├── lib ├── models │ ├── ctrnet │ │ ├── __init__.py │ │ ├── mask_inference.py │ │ ├── keypoint_seg_resnet.py │ │ └── CtRNet.py │ ├── backbones │ │ ├── configs │ │ │ ├── hrnet_w48.yaml │ │ │ └── hrnet_w32.yaml │ │ └── Resnet.py │ └── depth_net.py ├── utils │ ├── urdfpytorch │ │ ├── version.py │ │ ├── __init__.py │ │ └── utils.py │ ├── transforms.py │ ├── mesh_renderer.py │ ├── metrics.py │ ├── integral.py │ ├── utils.py │ ├── geometries.py │ └── urdf_robot.py ├── dataset │ ├── samplers.py │ ├── multiepoch_dataloader.py │ ├── const.py │ ├── roboutils.py │ └── augmentations.py ├── config.py └── core │ └── config.py ├── assets └── holistic.gif ├── .gitignore ├── scripts ├── train.py └── train_full.py ├── configs ├── kuka │ ├── depthnet.yaml │ └── full.yaml ├── baxter │ ├── depthnet.yaml │ └── full.yaml └── panda │ ├── depthnet.yaml │ ├── full.yaml │ └── self_supervised │ ├── synth.yaml │ ├── orb.yaml │ ├── realsense.yaml │ ├── kinect.yaml │ └── azure.yaml ├── requirements.txt └── README.md /lib/models/ctrnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/utils/urdfpytorch/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.0.19' 2 | -------------------------------------------------------------------------------- /assets/holistic.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Oliverbansk/Holistic-Robot-Pose-Estimation/HEAD/assets/holistic.gif -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | /.vscode 4 | 5 | /data 6 | /experiments 7 | /models 8 | /unit_test 9 | 10 | run.sh -------------------------------------------------------------------------------- /lib/dataset/samplers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | import torch 5 | from torch.utils.data import Sampler 6 | 7 | class PartialSampler(Sampler): 8 | def __init__(self, ds, epoch_size): 9 | self.n_items = len(ds) 10 | if epoch_size is not None: 11 | self.epoch_size = min(epoch_size, len(ds)) 12 | else: 13 | self.epoch_size = len(ds) 14 | super().__init__(None) 15 | 16 | def __len__(self): 17 | return self.epoch_size 18 | 19 | def __iter__(self): 20 | return (i.item() for i in torch.randperm(self.n_items)[:len(self)]) 21 | 22 | 23 | class ListSampler(Sampler): 24 | def __init__(self, ids): 25 | self.ids = ids 26 | 27 | def __len__(self): 28 | return len(self.ids) 29 | 30 | def __iter__(self): 31 | return iter(self.ids) 32 | -------------------------------------------------------------------------------- /lib/utils/urdfpytorch/__init__.py: -------------------------------------------------------------------------------- 1 | from .urdf import (URDFType, 2 | Box, Cylinder, Sphere, Mesh, Geometry, 3 | Texture, Material, 4 | Collision, Visual, Inertial, 5 | JointCalibration, JointDynamics, JointLimit, JointMimic, 6 | SafetyController, Actuator, TransmissionJoint, 7 | Transmission, Joint, Link, URDF) 8 | from .utils import (rpy_to_matrix, matrix_to_rpy, xyz_rpy_to_matrix, 9 | matrix_to_xyz_rpy) 10 | from .version import __version__ 11 | 12 | __all__ = [ 13 | 'URDFType', 'Box', 'Cylinder', 'Sphere', 'Mesh', 'Geometry', 14 | 'Texture', 'Material', 'Collision', 'Visual', 'Inertial', 15 | 'JointCalibration', 'JointDynamics', 'JointLimit', 'JointMimic', 16 | 'SafetyController', 'Actuator', 'TransmissionJoint', 17 | 'Transmission', 'Joint', 'Link', 'URDF', 18 | 'rpy_to_matrix', 'matrix_to_rpy', 'xyz_rpy_to_matrix', 'matrix_to_xyz_rpy', 19 | '__version__' 20 | ] 21 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | import argparse 5 | import yaml 6 | from lib.config import LOCAL_DATA_DIR 7 | from lib.core.config import make_cfg 8 | from scripts.train_depthnet import train_depthnet 9 | from scripts.train_sim2real import train_sim2real 10 | from scripts.train_full import train_full 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser('Training') 15 | parser.add_argument('--config', '-c', type=str, required=True, default='configs/cfg.yaml', help="hyperparameters path") 16 | args = parser.parse_args() 17 | cfg = make_cfg(args) 18 | 19 | print("------------------- config for this experiment -------------------") 20 | print(cfg) 21 | print("----------------------------------------------------------------------") 22 | 23 | if cfg.use_rootnet_with_reg_int_shared_backbone: 24 | print(f"\n pipeline: full network training (JointNet/RotationNet/KeypoinNet/DepthNet) \n") 25 | train_full(cfg) 26 | 27 | elif cfg.use_rootnet: 28 | print("\n pipeline: training DepthNet only \n") 29 | train_depthnet(cfg) 30 | 31 | elif cfg.use_sim2real: 32 | print("\n pipeline: self-supervised training on real datasets \n") 33 | train_sim2real(cfg) 34 | 35 | elif cfg.use_sim2real_real: 36 | print("\n pipeline: self-supervised training on my real datasets \n") 37 | # train_sim2real_real(cfg) 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /lib/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from joblib import Memory 3 | from pathlib import Path 4 | import getpass 5 | import socket 6 | 7 | hostname = socket.gethostname() 8 | username = getpass.getuser() 9 | 10 | PROJECT_ROOT = Path(__file__).parent 11 | PROJECT_DIR = PROJECT_ROOT 12 | DATA_DIR = PROJECT_DIR / 'data' 13 | LOCAL_DATA_DIR = Path('data') 14 | TEST_DATA_DIR = LOCAL_DATA_DIR 15 | 16 | EXP_DIR = LOCAL_DATA_DIR / 'models' 17 | RESULTS_DIR = LOCAL_DATA_DIR / 'results' 18 | DEBUG_DATA_DIR = LOCAL_DATA_DIR / 'debug_data' 19 | DEPS_DIR = LOCAL_DATA_DIR / 'deps' 20 | CACHE_DIR = LOCAL_DATA_DIR / 'joblib_cache' 21 | assert LOCAL_DATA_DIR.exists() 22 | CACHE_DIR.mkdir(exist_ok=True) 23 | TEST_DATA_DIR.mkdir(exist_ok=True) 24 | RESULTS_DIR.mkdir(exist_ok=True) 25 | DEBUG_DATA_DIR.mkdir(exist_ok=True) 26 | 27 | ASSET_DIR = DATA_DIR / 'assets' 28 | MEMORY = Memory(CACHE_DIR, verbose=2) 29 | 30 | # ROBOTS URDF 31 | DREAM_DS_DIR = LOCAL_DATA_DIR / 'dream' 32 | 33 | PANDA_DESCRIPTION_PATH = os.path.abspath(DEPS_DIR / "panda-description/panda.urdf") 34 | PANDA_DESCRIPTION_PATH_VISUAL = os.path.abspath(DEPS_DIR / "panda-description/patched_urdf/panda.urdf") 35 | KUKA_DESCRIPTION_PATH = os.path.abspath(DEPS_DIR / "kuka-description/iiwa_description/urdf/iiwa7.urdf") 36 | BAXTER_DESCRIPTION_PATH = os.path.abspath("/DATA/disk1/cvda_share/robopose_data/deps/baxter-description/baxter_description/urdf/baxter.urdf") 37 | 38 | OWI_DESCRIPTION = os.path.abspath(DEPS_DIR / 'owi-description' / 'owi535_description' / 'owi535.urdf') 39 | OWI_KEYPOINTS_PATH = os.path.abspath(DEPS_DIR / 'owi-description' / 'keypoints.json') 40 | -------------------------------------------------------------------------------- /lib/dataset/multiepoch_dataloader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | from itertools import chain 5 | 6 | 7 | class MultiEpochDataLoader: 8 | def __init__(self, dataloader): 9 | self.dataloader = dataloader 10 | self.dataloader_iter = None 11 | self.epoch_id = -1 12 | self.batch_id = 0 13 | self.n_repeats_sampler = 1 14 | self.sampler_length = None 15 | self.id_in_sampler = None 16 | 17 | def __iter__(self): 18 | if self.dataloader_iter is None: 19 | self.dataloader_iter = iter(self.dataloader) 20 | 21 | self.sampler_length = len(self.dataloader) 22 | self.id_in_sampler = 0 23 | while self.sampler_length <= 2 * self.dataloader.num_workers: 24 | self.sampler_length += len(self.dataloader) 25 | next_index_sampler = iter(self.dataloader_iter._index_sampler) 26 | self.dataloader_iter._sampler_iter = chain( 27 | self.dataloader_iter._sampler_iter, next_index_sampler) 28 | 29 | self.epoch_id += 1 30 | self.batch_id = 0 31 | self.epoch_size = len(self.dataloader_iter) 32 | 33 | return self 34 | 35 | def __len__(self): 36 | return len(self.dataloader) 37 | 38 | def __next__(self): 39 | if self.batch_id == self.epoch_size: 40 | raise StopIteration 41 | 42 | elif self.id_in_sampler == self.sampler_length - 2 * self.dataloader.num_workers: 43 | next_index_sampler = iter(self.dataloader_iter._index_sampler) 44 | self.dataloader_iter._sampler_iter = next_index_sampler 45 | self.id_in_sampler = 0 46 | 47 | idx, batch = self.dataloader_iter._get_data() 48 | self.dataloader_iter._tasks_outstanding -= 1 49 | self.dataloader_iter._process_data(batch) 50 | 51 | self.batch_id += 1 52 | self.id_in_sampler += 1 53 | return batch 54 | 55 | def get_infos(self): 56 | return dict() 57 | 58 | def __del__(self): 59 | del self.dataloader_iter 60 | -------------------------------------------------------------------------------- /configs/kuka/depthnet.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "kuka_depthnet" 9 | 10 | # Data 11 | urdf_robot_name : "kuka" 12 | train_ds_names : "dream/synthetic/kuka_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "hrnet32" 18 | split_reg_head : False 19 | split_type : "2-first" 20 | use_rpmg: False 21 | 22 | # Optimizer 23 | lr : 1e-4 24 | weight_decay : 0. 25 | use_schedule : False 26 | schedule_type : "linear" 27 | n_epochs_warmup : 15 28 | start_decay : 100 29 | end_decay: 300 30 | final_decay : 0.01 31 | exponent : 0.96 32 | clip_gradient : 1.0 33 | 34 | # Training 35 | batch_size : 64 36 | epoch_size : 104975 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | 41 | # Method 42 | use_direct_reg_branch : False 43 | n_iter : 4 44 | pose_loss_func : "smoothl1" 45 | rot_loss_func : "smoothl1" 46 | trans_loss_func : "smoothl1" 47 | kp3d_loss_func : "l2norm" 48 | kp2d_loss_func : "l2norm" 49 | rot_loss_weight : 1.0 50 | trans_loss_weight : 1.0 51 | use_2d_reprojection_loss : False 52 | use_3d_loss : True 53 | error2d_loss_weight : 1e-5 54 | error3d_loss_weight : 10.0 55 | joint_individual_weights : None 56 | 57 | use_integral_3d_branch : False 58 | use_limb_loss : False 59 | limb_loss_func : "l1" 60 | limb_loss_weight : 1.0 61 | use_uvd_3d_loss : True 62 | integral_3d_loss_func : "l2norm" 63 | integral_3d_loss_weight : 1.0 64 | use_xyz_3d_loss : False 65 | integral_xyz_3d_loss_func : "l2norm" 66 | integral_xyz_3d_loss_weight : 1.0 67 | bbox_3d_shape : 68 | - 1300 69 | - 1300 70 | - 1300 71 | reference_keypoint_id : 3 # 0:base 72 | 73 | use_pretrained_direct_reg_weights: False 74 | pretrained_direct_reg_weights_path: None 75 | 76 | # rootnet 77 | use_rootnet: True 78 | depth_loss_func : "l1" 79 | use_rootnet_xy_branch : False 80 | xy_loss_func : "mse" 81 | use_origin_bbox : False 82 | use_extended_bbox : True 83 | extend_ratio : [0.2, 0.13] 84 | use_rootnet_with_angle: False 85 | 86 | # Resume 87 | resume_run : False 88 | resume_experiment_name : "" 89 | -------------------------------------------------------------------------------- /configs/baxter/depthnet.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "baxter_depthnet" 9 | 10 | # Data 11 | urdf_robot_name : "baxter" 12 | train_ds_names : "dream/synthetic/baxter_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "hrnet32" 18 | split_reg_head : False 19 | split_type : "2-first" 20 | use_rpmg: False 21 | 22 | # Optimizer 23 | lr : 1e-4 24 | weight_decay : 0. 25 | use_schedule : False 26 | schedule_type : "linear" 27 | n_epochs_warmup : 15 28 | start_decay : 100 29 | end_decay: 300 30 | final_decay : 0.01 31 | exponent : 0.96 32 | clip_gradient : 1.0 33 | 34 | # Training 35 | batch_size : 64 36 | epoch_size : 104975 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | 41 | # Method 42 | use_direct_reg_branch : False 43 | n_iter : 4 44 | pose_loss_func : "smoothl1" 45 | rot_loss_func : "smoothl1" 46 | trans_loss_func : "smoothl1" 47 | kp3d_loss_func : "l2norm" 48 | kp2d_loss_func : "l2norm" 49 | rot_loss_weight : 1.0 50 | trans_loss_weight : 1.0 51 | use_2d_reprojection_loss : False 52 | use_3d_loss : True 53 | error2d_loss_weight : 1e-5 54 | error3d_loss_weight : 10.0 55 | joint_individual_weights : None 56 | 57 | use_integral_3d_branch : False 58 | use_limb_loss : False 59 | limb_loss_func : "l1" 60 | limb_loss_weight : 1.0 61 | use_uvd_3d_loss : True 62 | integral_3d_loss_func : "l2norm" 63 | integral_3d_loss_weight : 1.0 64 | use_xyz_3d_loss : False 65 | integral_xyz_3d_loss_func : "l2norm" 66 | integral_xyz_3d_loss_weight : 1.0 67 | bbox_3d_shape : 68 | - 1300 69 | - 1300 70 | - 1300 71 | reference_keypoint_id : 0 # 0:base 72 | 73 | use_pretrained_direct_reg_weights: False 74 | pretrained_direct_reg_weights_path: None 75 | 76 | # rootnet 77 | use_rootnet: True 78 | depth_loss_func : "l1" 79 | use_rootnet_xy_branch : False 80 | xy_loss_func : "mse" 81 | use_origin_bbox : False 82 | use_extended_bbox : True 83 | extend_ratio : [0.2, 0.13] 84 | use_rootnet_with_angle: False 85 | 86 | # Resume 87 | resume_run : False 88 | resume_experiment_name : "" 89 | -------------------------------------------------------------------------------- /configs/panda/depthnet.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_depthnet" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/synthetic/panda_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "hrnet32" 18 | split_reg_head : False 19 | split_type : "2-first" 20 | use_rpmg: False 21 | 22 | # Optimizer 23 | lr : 1e-4 24 | weight_decay : 0. 25 | use_schedule : False 26 | schedule_type : "linear" 27 | n_epochs_warmup : 15 28 | start_decay : 100 29 | end_decay: 300 30 | final_decay : 0.01 31 | exponent : 0.96 32 | clip_gradient : 1.0 33 | 34 | # Training 35 | batch_size : 64 36 | epoch_size : 104950 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | 41 | # Method 42 | use_direct_reg_branch : False 43 | n_iter : 4 44 | pose_loss_func : "smoothl1" 45 | rot_loss_func : "smoothl1" 46 | trans_loss_func : "smoothl1" 47 | kp3d_loss_func : "l2norm" 48 | kp2d_loss_func : "l2norm" 49 | rot_loss_weight : 1.0 50 | trans_loss_weight : 1.0 51 | use_2d_reprojection_loss : False 52 | use_3d_loss : True 53 | error2d_loss_weight : 1e-5 54 | error3d_loss_weight : 10.0 55 | joint_individual_weights : None 56 | 57 | use_integral_3d_branch : False 58 | use_limb_loss : False 59 | limb_loss_func : "l1" 60 | limb_loss_weight : 1.0 61 | use_uvd_3d_loss : True 62 | integral_3d_loss_func : "l2norm" 63 | integral_3d_loss_weight : 1.0 64 | use_xyz_3d_loss : False 65 | integral_xyz_3d_loss_func : "l2norm" 66 | integral_xyz_3d_loss_weight : 1.0 67 | bbox_3d_shape : 68 | - 1300 69 | - 1300 70 | - 1300 71 | reference_keypoint_id : 3 # 0:base 72 | 73 | use_pretrained_direct_reg_weights: False 74 | pretrained_direct_reg_weights_path: None 75 | 76 | # rootnet 77 | use_rootnet: True 78 | depth_loss_func : "l1" 79 | use_rootnet_xy_branch : False 80 | xy_loss_func : "mse" 81 | use_origin_bbox : False 82 | use_extended_bbox : True 83 | extend_ratio : [0.2, 0.13] 84 | use_rootnet_with_angle: False 85 | 86 | # Resume 87 | resume_run : False 88 | resume_experiment_name : "resume_name" 89 | -------------------------------------------------------------------------------- /lib/models/backbones/configs/hrnet_w48.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | 26 | MODEL: 27 | INIT_WEIGHTS: true 28 | NAME: pose_hrnet 29 | NUM_JOINTS: 7 30 | PRETRAINED: './models/hrnet_w48-8ef0771d.pth' 31 | TARGET_TYPE: gaussian 32 | IMAGE_SIZE: 33 | - 256 34 | - 256 35 | HEATMAP_SIZE: 36 | - 64 37 | - 64 38 | SIGMA: 2 39 | EXTRA: 40 | PRETRAINED_LAYERS: 41 | - 'conv1' 42 | - 'bn1' 43 | - 'conv2' 44 | - 'bn2' 45 | - 'layer1' 46 | - 'transition1' 47 | - 'stage2' 48 | - 'transition2' 49 | - 'stage3' 50 | - 'transition3' 51 | - 'stage4' 52 | FINAL_CONV_KERNEL: 1 53 | STAGE2: 54 | NUM_MODULES: 1 55 | NUM_BRANCHES: 2 56 | BLOCK: BASIC 57 | NUM_BLOCKS: 58 | - 4 59 | - 4 60 | NUM_CHANNELS: 61 | - 48 62 | - 96 63 | FUSE_METHOD: SUM 64 | STAGE3: 65 | NUM_MODULES: 4 66 | NUM_BRANCHES: 3 67 | BLOCK: BASIC 68 | NUM_BLOCKS: 69 | - 4 70 | - 4 71 | - 4 72 | NUM_CHANNELS: 73 | - 48 74 | - 96 75 | - 192 76 | FUSE_METHOD: SUM 77 | STAGE4: 78 | NUM_MODULES: 3 79 | NUM_BRANCHES: 4 80 | BLOCK: BASIC 81 | NUM_BLOCKS: 82 | - 4 83 | - 4 84 | - 4 85 | - 4 86 | NUM_CHANNELS: 87 | - 48 88 | - 96 89 | - 192 90 | - 384 91 | FUSE_METHOD: SUM 92 | 93 | # LOSS: 94 | # USE_TARGET_WEIGHT: true 95 | # TRAIN: 96 | # BATCH_SIZE_PER_GPU: 32 97 | # SHUFFLE: true 98 | # BEGIN_EPOCH: 0 99 | # END_EPOCH: 210 100 | # OPTIMIZER: adam 101 | # LR: 0.001 102 | # LR_FACTOR: 0.1 103 | # LR_STEP: 104 | # - 170 105 | # - 200 106 | # WD: 0.0001 107 | # GAMMA1: 0.99 108 | # GAMMA2: 0.0 109 | # MOMENTUM: 0.9 110 | # NESTEROV: false 111 | # DEBUG: 112 | # DEBUG: true 113 | # SAVE_BATCH_IMAGES_GT: true 114 | # SAVE_BATCH_IMAGES_PRED: true 115 | # SAVE_HEATMAPS_GT: true 116 | # SAVE_HEATMAPS_PRED: true 117 | -------------------------------------------------------------------------------- /configs/panda/full.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_full2" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/synthetic/panda_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | rootnet_backbone_name : "hrnet32" 19 | rootnet_image_size : 256.0 20 | other_image_size : 256.0 21 | use_rpmg: False 22 | 23 | # Optimizer 24 | lr : 1e-4 25 | weight_decay : 0. 26 | use_schedule : True 27 | schedule_type : "exponential" 28 | n_epochs_warmup : 0 29 | start_decay : 45 30 | end_decay: 100 31 | final_decay : 0.01 32 | exponent : 0.95 33 | 34 | # Training 35 | batch_size : 64 36 | epoch_size : 104950 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | clip_gradient : 5.0 41 | 42 | # Method 43 | use_direct_reg_branch : True 44 | n_iter : 4 45 | pose_loss_func : "mse" 46 | rot_loss_func : "mse" 47 | trans_loss_func : "l2norm" 48 | depth_loss_func : "l1" 49 | uv_loss_func : "l2norm" 50 | kp2d_loss_func : "l2norm" 51 | kp3d_loss_func : "l2norm" 52 | kp2d_int_loss_func : "l2norm" 53 | kp3d_int_loss_func : "l2norm" 54 | align_3d_loss_func : "l2norm" 55 | pose_loss_weight : 1.0 56 | rot_loss_weight : 1.0 57 | trans_loss_weight : 1.0 58 | depth_loss_weight : 10.0 59 | uv_loss_weight : 1.0 60 | kp2d_loss_weight : 10.0 61 | kp3d_loss_weight : 10.0 62 | kp2d_int_loss_weight : 10.0 63 | kp3d_int_loss_weight : 10.0 64 | align_3d_loss_weight : 0.0 65 | joint_individual_weights : None 66 | use_joint_valid_mask : False 67 | fix_root : True 68 | bbox_3d_shape : 69 | - 1300 70 | - 1300 71 | - 1300 72 | reference_keypoint_id : 3 # 0:base 73 | fix_truncation : False 74 | 75 | use_pretrained_direct_reg_weights: False 76 | pretrained_direct_reg_weights_path: None 77 | 78 | use_pretrained_integral : False 79 | pretrained_integral_weights_path: None 80 | 81 | 82 | # rootnet (+ integral/regression) 83 | use_rootnet: True 84 | resample : False 85 | rootnet_depth_loss_weight : 1.0 86 | depth_loss_func : "l1" 87 | use_rootnet_xy_branch : False 88 | xy_loss_func : "mse" 89 | pretrained_rootnet: "models/pretrained_depthnet/panda_pretrained_depthnet.pk" 90 | use_origin_bbox : False 91 | use_extended_bbox : True 92 | 93 | use_rootnet_with_reg_int_shared_backbone : True 94 | use_rootnet_with_reg_with_int_separate_backbone : False 95 | 96 | # Resume 97 | resume_run : False 98 | resume_experiment_name : "resume_experiment_name" 99 | -------------------------------------------------------------------------------- /configs/panda/self_supervised/synth.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_synth_pretrain" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/synthetic/panda_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | # integral_backbone_name : "resnet34" 19 | rootnet_backbone_name : "hrnet32" 20 | rootnet_image_size : 256.0 21 | other_image_size : 256.0 22 | use_rpmg: False 23 | 24 | # Optimizer 25 | lr : 1e-4 26 | weight_decay : 0. 27 | use_schedule : False 28 | schedule_type : "linear" 29 | n_epochs_warmup : 15 30 | start_decay : 100 31 | end_decay: 300 32 | final_decay : 0.01 33 | exponent : 0.96 34 | 35 | # Training 36 | batch_size : 64 37 | epoch_size : 104950 38 | n_epochs : 700 39 | n_dataloader_workers : 6 40 | save_epoch_interval : None 41 | clip_gradient : 5.0 42 | 43 | # Method 44 | use_direct_reg_branch : True 45 | n_iter : 4 46 | pose_loss_func : "mse" 47 | rot_loss_func : "mse" 48 | trans_loss_func : "l2norm" 49 | depth_loss_func : "l1" 50 | uv_loss_func : "l2norm" 51 | kp2d_loss_func : "l2norm" 52 | kp3d_loss_func : "l2norm" 53 | kp2d_int_loss_func : "l2norm" 54 | kp3d_int_loss_func : "l2norm" 55 | align_3d_loss_func : "l2norm" 56 | pose_loss_weight : 1.0 57 | rot_loss_weight : 1.0 58 | trans_loss_weight : 1.0 59 | depth_loss_weight : 1.0 60 | uv_loss_weight : 1.0 61 | kp2d_loss_weight : 10.0 62 | kp3d_loss_weight : 10.0 63 | kp2d_int_loss_weight : 10.0 64 | kp3d_int_loss_weight : 10.0 65 | align_3d_loss_weight : 0.0 66 | joint_individual_weights : None 67 | use_joint_valid_mask : False 68 | fix_root : True 69 | bbox_3d_shape : 70 | - 1300 71 | - 1300 72 | - 1300 73 | reference_keypoint_id : 3 # 0:base 74 | fix_truncation : False 75 | rotation_dim : 6 76 | 77 | use_pretrained_direct_reg_weights: False 78 | pretrained_direct_reg_weights_path: None 79 | 80 | use_pretrained_integral : False 81 | pretrained_integral_weights_path: None 82 | 83 | 84 | # rootnet (+ integral/regression) 85 | use_rootnet: True 86 | resample : False 87 | rootnet_depth_loss_weight : 1.0 88 | depth_loss_func : "l1" 89 | use_rootnet_xy_branch : False 90 | xy_loss_func : "mse" 91 | pretrained_rootnet: "" 92 | use_origin_bbox : False 93 | use_extended_bbox : True 94 | 95 | use_rootnet_with_reg_int_shared_backbone : True 96 | use_rootnet_with_reg_with_int_separate_backbone : False 97 | 98 | # Resume 99 | resume_run : False 100 | resume_experiment_name : "resume_name" 101 | -------------------------------------------------------------------------------- /configs/baxter/full.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "baxter_full" 9 | 10 | # Data 11 | urdf_robot_name : "baxter" 12 | train_ds_names : "dream/synthetic/baxter_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | # integral_backbone_name : "resnet34" 19 | rootnet_backbone_name : "hrnet32" 20 | rootnet_image_size : 256.0 21 | other_image_size : 256.0 22 | use_rpmg: False 23 | 24 | # Optimizer 25 | lr : 1e-4 26 | weight_decay : 0. 27 | use_schedule : True 28 | schedule_type : "exponential" 29 | n_epochs_warmup : 0 30 | start_decay : 23 31 | end_decay: 90 32 | final_decay : 0.01 33 | exponent : 0.95 34 | clip_gradient : 5.0 35 | 36 | 37 | # Training 38 | batch_size : 64 39 | epoch_size : 104950 40 | n_epochs : 700 41 | n_dataloader_workers : 6 42 | save_epoch_interval : None 43 | 44 | # Method 45 | use_direct_reg_branch : True 46 | n_iter : 4 47 | pose_loss_func : "mse" 48 | rot_loss_func : "mse" 49 | trans_loss_func : "l2norm" 50 | depth_loss_func : "l1" 51 | uv_loss_func : "l2norm" 52 | kp2d_loss_func : "l2norm" 53 | kp3d_loss_func : "l2norm" 54 | kp2d_int_loss_func : "l2norm" 55 | kp3d_int_loss_func : "l2norm" 56 | align_3d_loss_func : "l2norm" 57 | pose_loss_weight : 1.0 58 | rot_loss_weight : 1.0 59 | trans_loss_weight : 1.0 60 | depth_loss_weight : 1.0 61 | uv_loss_weight : 1.0 62 | kp2d_loss_weight : 10.0 63 | kp3d_loss_weight : 10.0 64 | kp2d_int_loss_weight : 10.0 65 | kp3d_int_loss_weight : 10.0 66 | align_3d_loss_weight : 0.0 67 | joint_individual_weights : None 68 | use_joint_valid_mask : False 69 | rot_iterative_matmul : False 70 | fix_root : True 71 | bbox_3d_shape : 72 | - 1300 73 | - 1300 74 | - 1300 75 | reference_keypoint_id : 0 # 0:base 76 | fix_truncation : False 77 | 78 | use_pretrained_direct_reg_weights: False 79 | pretrained_direct_reg_weights_path: None 80 | 81 | use_pretrained_integral : False 82 | pretrained_integral_weights_path: None 83 | 84 | 85 | # rootnet (+ integral/regression) 86 | use_rootnet: True 87 | resample : False 88 | rootnet_depth_loss_weight : 1.0 89 | depth_loss_func : "l1" 90 | use_rootnet_xy_branch : False 91 | xy_loss_func : "mse" 92 | pretrained_rootnet: "experiments/baxter_rootnet_ref0_1028/ckpt/curr_best_root_depth_model.pk" 93 | use_origin_bbox : False 94 | use_extended_bbox : True 95 | 96 | use_rootnet_with_reg_int_shared_backbone : True 97 | use_rootnet_with_reg_with_int_separate_backbone : False 98 | 99 | # Resume 100 | resume_run : False 101 | resume_experiment_name : "panda_rootnetwithreguv_pretrainedrootnet_extendedbbox_transl2norm_3dw5_usejointmask_notruncate_ref3_lr1e-4con_0911" 102 | -------------------------------------------------------------------------------- /configs/kuka/full.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "kuka_full" 9 | 10 | # Data 11 | urdf_robot_name : "kuka" 12 | train_ds_names : "dream/synthetic/kuka_synth_train_dr" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | rootnet_backbone_name : "hrnet32" 19 | rootnet_image_size : 256.0 20 | other_image_size : 256.0 21 | use_rpmg: False 22 | jitter: True 23 | occlusion : True 24 | other_aug : True 25 | 26 | # Optimizer 27 | lr : 1e-4 28 | weight_decay : 0. 29 | use_schedule : True 30 | schedule_type : "exponential" 31 | n_epochs_warmup : 0 32 | start_decay : 25 33 | end_decay: 90 34 | final_decay : 0.01 35 | exponent : 0.95 36 | 37 | # Training 38 | batch_size : 64 39 | epoch_size : 104950 40 | n_epochs : 700 41 | n_dataloader_workers : 6 42 | save_epoch_interval : None 43 | clip_gradient : 5.0 44 | 45 | 46 | # Method 47 | use_direct_reg_branch : True 48 | n_iter : 4 49 | pose_loss_func : "mse" 50 | rot_loss_func : "mse" 51 | trans_loss_func : "l2norm" 52 | depth_loss_func : "l1" 53 | uv_loss_func : "l2norm" 54 | kp2d_loss_func : "l2norm" 55 | kp3d_loss_func : "l2norm" 56 | kp2d_int_loss_func : "l2norm" 57 | kp3d_int_loss_func : "l2norm" 58 | align_3d_loss_func : "l2norm" 59 | pose_loss_weight : 1.0 60 | rot_loss_weight : 1.0 61 | trans_loss_weight : 1.0 62 | depth_loss_weight : 1.0 63 | uv_loss_weight : 1.0 64 | kp2d_loss_weight : 10.0 65 | kp3d_loss_weight : 10.0 66 | kp2d_int_loss_weight : 10.0 67 | kp3d_int_loss_weight : 10.0 68 | align_3d_loss_weight : 0.0 69 | joint_individual_weights : None 70 | use_joint_valid_mask : False 71 | rot_iterative_matmul : False 72 | fix_root : True 73 | bbox_3d_shape : 74 | - 1300 75 | - 1300 76 | - 1300 77 | reference_keypoint_id : 3 # 0:base 78 | fix_truncation : False 79 | 80 | use_pretrained_direct_reg_weights: False 81 | pretrained_direct_reg_weights_path: None 82 | 83 | use_pretrained_integral : False 84 | pretrained_integral_weights_path: None 85 | 86 | 87 | # rootnet (+ integral/regression) 88 | use_rootnet: True 89 | resample : False 90 | rootnet_depth_loss_weight : 1.0 91 | depth_loss_func : "l1" 92 | use_rootnet_xy_branch : False 93 | xy_loss_func : "mse" 94 | pretrained_rootnet: "experiments/kuka_rootnet_ref3/ckpt/curr_best_root_depth_model.pk" 95 | use_origin_bbox : False 96 | use_extended_bbox : True 97 | 98 | use_rootnet_with_reg_int_shared_backbone : True 99 | use_rootnet_with_reg_with_int_separate_backbone : False 100 | 101 | # Resume 102 | resume_run : False 103 | resume_experiment_name : "panda_rootnetwithreguv_pretrainedrootnet_extendedbbox_transl2norm_3dw5_usejointmask_notruncate_ref3_lr1e-4con_0911" 104 | -------------------------------------------------------------------------------- /lib/models/backbones/configs/hrnet_w32.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | 26 | MODEL: 27 | INIT_WEIGHTS: true 28 | NAME: pose_hrnet 29 | NUM_JOINTS: 7 30 | PRETRAINED: './models/hrnet_w32-36af842e_roc.pth' 31 | TARGET_TYPE: gaussian 32 | IMAGE_SIZE: 33 | - 256 34 | - 256 35 | HEATMAP_SIZE: 36 | - 64 37 | - 64 38 | SIGMA: 2 39 | EXTRA: 40 | PRETRAINED_LAYERS: 41 | - 'conv1' 42 | - 'bn1' 43 | - 'conv2' 44 | - 'bn2' 45 | - 'layer1' 46 | - 'transition1' 47 | - 'stage2' 48 | - 'transition2' 49 | - 'stage3' 50 | - 'transition3' 51 | - 'stage4' 52 | - 'incre_modules' 53 | 54 | FINAL_CONV_KERNEL: 1 55 | STAGE2: 56 | NUM_MODULES: 1 57 | NUM_BRANCHES: 2 58 | BLOCK: BASIC 59 | NUM_BLOCKS: 60 | - 4 61 | - 4 62 | NUM_CHANNELS: 63 | - 32 64 | - 64 65 | FUSE_METHOD: SUM 66 | STAGE3: 67 | NUM_MODULES: 4 68 | NUM_BRANCHES: 3 69 | BLOCK: BASIC 70 | NUM_BLOCKS: 71 | - 4 72 | - 4 73 | - 4 74 | NUM_CHANNELS: 75 | - 32 76 | - 64 77 | - 128 78 | FUSE_METHOD: SUM 79 | STAGE4: 80 | NUM_MODULES: 3 81 | NUM_BRANCHES: 4 82 | BLOCK: BASIC 83 | NUM_BLOCKS: 84 | - 4 85 | - 4 86 | - 4 87 | - 4 88 | NUM_CHANNELS: 89 | - 32 90 | - 64 91 | - 128 92 | - 256 93 | FUSE_METHOD: SUM 94 | 95 | # LOSS: 96 | # USE_TARGET_WEIGHT: true 97 | # TRAIN: 98 | # BATCH_SIZE_PER_GPU: 32 99 | # SHUFFLE: true 100 | # BEGIN_EPOCH: 0 101 | # END_EPOCH: 210 102 | # OPTIMIZER: adam 103 | # LR: 0.001 104 | # LR_FACTOR: 0.1 105 | # LR_STEP: 106 | # - 170 107 | # - 200 108 | # WD: 0.0001 109 | # GAMMA1: 0.99 110 | # GAMMA2: 0.0 111 | # MOMENTUM: 0.9 112 | # NESTEROV: false 113 | # TEST: 114 | # BATCH_SIZE_PER_GPU: 32 115 | # COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 116 | # BBOX_THRE: 1.0 117 | # IMAGE_THRE: 0.0 118 | # IN_VIS_THRE: 0.2 119 | # MODEL_FILE: '' 120 | # NMS_THRE: 1.0 121 | # OKS_THRE: 0.9 122 | # USE_GT_BBOX: true 123 | # FLIP_TEST: true 124 | # POST_PROCESS: true 125 | # SHIFT_HEATMAP: true 126 | # DEBUG: 127 | # DEBUG: true 128 | # SAVE_BATCH_IMAGES_GT: true 129 | # SAVE_BATCH_IMAGES_PRED: true 130 | # SAVE_HEATMAPS_GT: true 131 | # SAVE_HEATMAPS_PRED: true -------------------------------------------------------------------------------- /configs/panda/self_supervised/orb.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_orb_self_supervised" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/real/panda-orb" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | rootnet_backbone_name : "hrnet32" 19 | rootnet_image_size : 256.0 20 | other_image_size : 256.0 21 | use_rpmg: False 22 | 23 | # Optimizer 24 | lr : 1e-7 25 | weight_decay : 0. 26 | use_schedule : False 27 | schedule_type : "exponential" 28 | n_epochs_warmup : 0 29 | start_decay : 20 30 | end_decay: 300 31 | final_decay : 0.01 32 | exponent : 0.78 33 | 34 | # Training 35 | batch_size : 32 36 | epoch_size : 104950 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | clip_gradient : 10.0 41 | 42 | # Method 43 | use_direct_reg_branch : True 44 | n_iter : 4 45 | pose_loss_func : "mse" 46 | rot_loss_func : "mse" 47 | trans_loss_func : "l2norm" 48 | depth_loss_func : "l1" 49 | uv_loss_func : "l2norm" 50 | kp2d_loss_func : "l2norm" 51 | kp3d_loss_func : "l2norm" 52 | pose_loss_weight : 1.0 53 | rot_loss_weight : 1.0 54 | trans_loss_weight : 1.0 55 | depth_loss_weight : 1.0 56 | uv_loss_weight : 0.0 57 | kp2d_loss_weight : 10.0 58 | kp3d_loss_weight : 10.0 59 | reg_joint_map : False 60 | joint_conv_dim : [256,256,256] 61 | joint_individual_weights : None 62 | use_joint_valid_mask : True 63 | 64 | 65 | use_integral_3d_branch : False 66 | use_limb_loss : False 67 | limb_loss_func : "l1" 68 | limb_loss_weight : 1.0 69 | use_uvd_3d_loss : False 70 | integral_3d_loss_func : "l2norm" 71 | integral_3d_loss_weight : 1.0 72 | use_xyz_3d_loss : True 73 | integral_xyz_3d_loss_func : "l2norm" 74 | integral_xyz_3d_loss_weight : 1.0 75 | bbox_3d_shape : 76 | - 1300 77 | - 1300 78 | - 1300 79 | 80 | reference_keypoint_id : 3 # 0:base 81 | fix_truncation : False 82 | 83 | use_pretrained_direct_reg_weights: False 84 | pretrained_direct_reg_weights_path: None 85 | 86 | use_pretrained_integral : False 87 | pretrained_integral_weights_path: None 88 | 89 | 90 | # rootnet (+ integral/regression) 91 | use_rootnet: False 92 | resample : False 93 | rootnet_depth_loss_weight : 1.0 94 | depth_loss_func : "l1" 95 | use_rootnet_xy_branch : False 96 | xy_loss_func : "mse" 97 | pretrained_rootnet: None 98 | use_origin_bbox : False 99 | use_extended_bbox : True 100 | 101 | use_rootnet_with_regression_uv : False 102 | use_rootnet_with_reg_int_shared_backbone : True 103 | use_rootnet_with_reg_with_int_separate_backbone : False 104 | 105 | use_sim2real : True 106 | use_view : False 107 | pretrained_weight_on_synth : "panda_synth_pretrain/ckpt/curr_best_auc(add)_orb_model.pk" 108 | 109 | mask_loss_weight : 0.0 110 | iou_loss_weight : 1.0 111 | scale_loss_weight : 0.0 112 | align_3d_loss_weight : 1.0 113 | 114 | # Resume 115 | resume_run : False 116 | resume_experiment_name : "resume_name" 117 | -------------------------------------------------------------------------------- /configs/panda/self_supervised/realsense.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_realsense_self_supervised" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/real/panda-3cam_realsense" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | rootnet_backbone_name : "hrnet32" 19 | rootnet_image_size : 256.0 20 | other_image_size : 256.0 21 | use_rpmg: False 22 | 23 | # Optimizer 24 | lr : 1e-7 25 | weight_decay : 0. 26 | use_schedule : False 27 | schedule_type : "exponential" 28 | n_epochs_warmup : 0 29 | start_decay : 20 30 | end_decay: 300 31 | final_decay : 0.01 32 | exponent : 0.78 33 | 34 | # Training 35 | batch_size : 32 36 | epoch_size : 104950 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | clip_gradient : 10.0 41 | 42 | # Method 43 | use_direct_reg_branch : True 44 | n_iter : 4 45 | pose_loss_func : "mse" 46 | rot_loss_func : "mse" 47 | trans_loss_func : "l2norm" 48 | depth_loss_func : "l1" 49 | uv_loss_func : "l2norm" 50 | kp2d_loss_func : "l2norm" 51 | kp3d_loss_func : "l2norm" 52 | pose_loss_weight : 1.0 53 | rot_loss_weight : 1.0 54 | trans_loss_weight : 1.0 55 | depth_loss_weight : 1.0 56 | uv_loss_weight : 1.0 57 | kp2d_loss_weight : 10.0 58 | kp3d_loss_weight : 10.0 59 | reg_joint_map : False 60 | joint_conv_dim : [256,256,256] 61 | joint_individual_weights : None 62 | use_joint_valid_mask : False 63 | 64 | 65 | use_integral_3d_branch : False 66 | use_limb_loss : False 67 | limb_loss_func : "l1" 68 | limb_loss_weight : 1.0 69 | use_uvd_3d_loss : False 70 | integral_3d_loss_func : "l2norm" 71 | integral_3d_loss_weight : 1.0 72 | use_xyz_3d_loss : True 73 | integral_xyz_3d_loss_func : "l2norm" 74 | integral_xyz_3d_loss_weight : 1.0 75 | bbox_3d_shape : 76 | - 1300 77 | - 1300 78 | - 1300 79 | 80 | reference_keypoint_id : 3 # 0:base 81 | fix_truncation : False 82 | 83 | use_pretrained_direct_reg_weights: False 84 | pretrained_direct_reg_weights_path: None 85 | 86 | use_pretrained_integral : False 87 | pretrained_integral_weights_path: None 88 | 89 | 90 | # rootnet (+ integral/regression) 91 | use_rootnet: False 92 | resample : False 93 | rootnet_depth_loss_weight : 1.0 94 | depth_loss_func : "l1" 95 | use_rootnet_xy_branch : False 96 | xy_loss_func : "mse" 97 | pretrained_rootnet: None 98 | use_origin_bbox : False 99 | use_extended_bbox : True 100 | 101 | use_rootnet_with_regression_uv : False 102 | use_rootnet_with_reg_int_shared_backbone : True 103 | use_rootnet_with_reg_with_int_separate_backbone : False 104 | 105 | use_sim2real : True 106 | use_view : False 107 | pretrained_weight_on_synth : "panda_synth_pretrain/ckpt/curr_best_auc(add)_realsense_model.pk" 108 | 109 | mask_loss_weight : 0.0 110 | iou_loss_weight : 1.0 111 | scale_loss_weight : 0.0 112 | align_3d_loss_weight : 1.0 113 | 114 | # Resume 115 | resume_run : False 116 | resume_experiment_name : "resume_name" 117 | -------------------------------------------------------------------------------- /configs/panda/self_supervised/kinect.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_kinect_self_supervised" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/real/panda-3cam_kinect360" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | rootnet_backbone_name : "hrnet32" 19 | rootnet_image_size : 256.0 20 | other_image_size : 256.0 21 | use_rpmg: False 22 | 23 | # Optimizer 24 | lr : 3e-9 25 | weight_decay : 0. 26 | use_schedule : False 27 | schedule_type : "exponential" 28 | n_epochs_warmup : 0 29 | start_decay : 1 30 | end_decay: 300 31 | final_decay : 0.01 32 | exponent : 0.85 33 | 34 | # Training 35 | batch_size : 32 36 | epoch_size : 104950 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | clip_gradient : 10.0 41 | 42 | # Method 43 | use_direct_reg_branch : True 44 | n_iter : 4 45 | pose_loss_func : "mse" 46 | rot_loss_func : "mse" 47 | trans_loss_func : "l2norm" 48 | depth_loss_func : "l1" 49 | uv_loss_func : "l2norm" 50 | kp2d_loss_func : "l2norm" 51 | kp3d_loss_func : "l2norm" 52 | pose_loss_weight : 1.0 53 | rot_loss_weight : 1.0 54 | trans_loss_weight : 1.0 55 | depth_loss_weight : 1.0 56 | uv_loss_weight : 0.0 57 | kp2d_loss_weight : 10.0 58 | kp3d_loss_weight : 10.0 59 | reg_joint_map : False 60 | joint_conv_dim : [256,256,256] 61 | joint_individual_weights : None 62 | use_joint_valid_mask : True 63 | 64 | 65 | use_integral_3d_branch : False 66 | use_limb_loss : False 67 | limb_loss_func : "l1" 68 | limb_loss_weight : 1.0 69 | use_uvd_3d_loss : False 70 | integral_3d_loss_func : "l2norm" 71 | integral_3d_loss_weight : 1.0 72 | use_xyz_3d_loss : True 73 | integral_xyz_3d_loss_func : "l2norm" 74 | integral_xyz_3d_loss_weight : 1.0 75 | bbox_3d_shape : 76 | - 1300 77 | - 1300 78 | - 1300 79 | 80 | reference_keypoint_id : 3 # 0:base 81 | fix_truncation : False 82 | 83 | use_pretrained_direct_reg_weights: False 84 | pretrained_direct_reg_weights_path: None 85 | 86 | use_pretrained_integral : False 87 | pretrained_integral_weights_path: None 88 | 89 | 90 | # rootnet (+ integral/regression) 91 | use_rootnet: False 92 | resample : False 93 | rootnet_depth_loss_weight : 1.0 94 | depth_loss_func : "l1" 95 | use_rootnet_xy_branch : False 96 | xy_loss_func : "mse" 97 | pretrained_rootnet: None 98 | use_origin_bbox : False 99 | use_extended_bbox : True 100 | 101 | use_rootnet_with_regression_uv : False 102 | use_rootnet_with_reg_int_shared_backbone : True 103 | use_rootnet_with_reg_with_int_separate_backbone : False 104 | 105 | use_sim2real : True 106 | use_view : False 107 | pretrained_weight_on_synth : "panda_synth_pretrain/ckpt/curr_best_auc(add)_kinect_model.pk" 108 | 109 | mask_loss_weight : 0.0 110 | iou_loss_weight : 1.0 111 | scale_loss_weight : 0.0 112 | align_3d_loss_weight : 1.0 113 | 114 | # Resume 115 | resume_run : False 116 | resume_experiment_name : "panda_sim2real_rootnet+reg1008_lr1e-4con_1011" 117 | -------------------------------------------------------------------------------- /configs/panda/self_supervised/azure.yaml: -------------------------------------------------------------------------------- 1 | 2 | # basic training 3 | no_cuda : False 4 | device_id : [0] 5 | 6 | # experiment name (also name of the saving directory) 7 | # model and log directory : {ROOT}/experiment/{exp_name}/ 8 | exp_name : "panda_azure_self_supervised" 9 | 10 | # Data 11 | urdf_robot_name : "panda" 12 | train_ds_names : "dream/real/panda-3cam_azure" 13 | val_ds_names : None 14 | image_size : 256.0 15 | 16 | # Model 17 | backbone_name : "resnet50" 18 | rootnet_backbone_name : "hrnet32" 19 | rootnet_image_size : 256.0 20 | other_image_size : 256.0 21 | use_rpmg: False 22 | 23 | # Optimizer 24 | lr : 1e-8 25 | weight_decay : 0. 26 | use_schedule : False 27 | schedule_type : "exponential" 28 | n_epochs_warmup : 0 29 | start_decay : 20 30 | end_decay: 300 31 | final_decay : 0.01 32 | exponent : 0.78 33 | 34 | # Training 35 | batch_size : 32 36 | epoch_size : 104950 37 | n_epochs : 700 38 | n_dataloader_workers : 6 39 | save_epoch_interval : None 40 | clip_gradient : 10.0 41 | 42 | # Method 43 | use_direct_reg_branch : True 44 | n_iter : 4 45 | pose_loss_func : "mse" 46 | rot_loss_func : "mse" 47 | trans_loss_func : "l2norm" 48 | depth_loss_func : "l1" 49 | uv_loss_func : "l2norm" 50 | kp2d_loss_func : "l2norm" 51 | kp3d_loss_func : "l2norm" 52 | pose_loss_weight : 1.0 53 | rot_loss_weight : 1.0 54 | trans_loss_weight : 1.0 55 | depth_loss_weight : 1.0 56 | uv_loss_weight : 0.0 57 | kp2d_loss_weight : 10.0 58 | kp3d_loss_weight : 10.0 59 | reg_joint_map : False 60 | joint_conv_dim : [256,256,256] 61 | joint_individual_weights : None 62 | use_joint_valid_mask : True 63 | 64 | 65 | use_integral_3d_branch : False 66 | use_limb_loss : False 67 | limb_loss_func : "l1" 68 | limb_loss_weight : 1.0 69 | use_uvd_3d_loss : False 70 | integral_3d_loss_func : "l2norm" 71 | integral_3d_loss_weight : 1.0 72 | use_xyz_3d_loss : True 73 | integral_xyz_3d_loss_func : "l2norm" 74 | integral_xyz_3d_loss_weight : 1.0 75 | bbox_3d_shape : 76 | - 1300 77 | - 1300 78 | - 1300 79 | 80 | reference_keypoint_id : 3 # 0:base 81 | fix_truncation : False 82 | 83 | use_pretrained_direct_reg_weights: False 84 | pretrained_direct_reg_weights_path: None 85 | 86 | use_pretrained_integral : False 87 | pretrained_integral_weights_path: None 88 | 89 | 90 | # rootnet (+ integral/regression) 91 | use_rootnet: False 92 | resample : False 93 | rootnet_depth_loss_weight : 1.0 94 | depth_loss_func : "l1" 95 | use_rootnet_xy_branch : False 96 | xy_loss_func : "mse" 97 | pretrained_rootnet: None 98 | use_origin_bbox : False 99 | use_extended_bbox : True 100 | 101 | use_rootnet_with_regression_uv : False 102 | use_rootnet_with_reg_int_shared_backbone : True 103 | use_rootnet_with_reg_with_int_separate_backbone : False 104 | 105 | use_sim2real : True 106 | use_view : False 107 | pretrained_weight_on_synth : "panda_synth_pretrain/ckpt/curr_best_auc(add)_azure_model.pk" 108 | 109 | mask_loss_weight : 0.0 110 | iou_loss_weight : 1.0 111 | scale_loss_weight : 0.0 112 | align_3d_loss_weight : 1.0 113 | 114 | # Resume 115 | resume_run : False 116 | resume_experiment_name : "panda_sim2real_az_rri1026_lr1e-8con_iouloss+alignloss_fixbnrun_preepoch82_1031" 117 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | aiofiles==22.1.0 3 | aiosqlite==0.18.0 4 | ansitable==0.9.7 5 | antlr4-python3-runtime==4.9.3 6 | anyio==3.6.2 7 | asttokens==2.2.1 8 | async-generator==1.10 9 | attrs==23.1.0 10 | backcall==0.2.0 11 | cachetools==5.3.0 12 | certifi==2023.5.7 13 | charset-normalizer==3.1.0 14 | cmake==3.26.3 15 | colorama==0.4.6 16 | colored==2.2.3 17 | comm==0.1.3 18 | contourpy==1.0.7 19 | cycler==0.11.0 20 | debugpy==1.6.7 21 | decorator==5.1.1 22 | easydict==1.10 23 | exceptiongroup==1.1.1 24 | executing==1.2.0 25 | filelock==3.12.0 26 | fonttools==4.39.4 27 | freetype-py==2.4.0 28 | google-auth==2.18.0 29 | google-auth-oauthlib==0.4.6 30 | grpcio==1.54.0 31 | h11==0.14.0 32 | HeapDict==1.0.1 33 | idna==3.4 34 | imageio==2.28.1 35 | importlib-metadata==6.6.0 36 | importlib-resources==5.12.0 37 | iopath==0.1.10 38 | ipykernel==6.23.0 39 | ipython==8.13.2 40 | jedi==0.18.2 41 | Jinja2==3.1.2 42 | joblib==1.2.0 43 | json-tricks==3.17.3 44 | jsonpatch==1.32 45 | jsonpointer==2.3 46 | jupyter_client==8.2.0 47 | jupyter_core==5.3.0 48 | kiwisolver==1.4.4 49 | kornia==0.7.0 50 | lit==16.0.3 51 | lxml==4.9.2 52 | Markdown==3.4.3 53 | MarkupSafe==2.1.2 54 | matplotlib==3.7.1 55 | matplotlib-inline==0.1.6 56 | mpmath==1.3.0 57 | nest-asyncio==1.5.6 58 | networkx==2.5 59 | numpy==1.22.4 60 | nvidia-cublas-cu11==11.10.3.66 61 | nvidia-cuda-cupti-cu11==11.7.101 62 | nvidia-cuda-nvrtc-cu11==11.7.99 63 | nvidia-cuda-runtime-cu11==11.7.99 64 | nvidia-cudnn-cu11==8.5.0.96 65 | nvidia-cufft-cu11==10.9.0.58 66 | nvidia-curand-cu11==10.2.10.91 67 | nvidia-cusolver-cu11==11.4.0.1 68 | nvidia-cusparse-cu11==11.7.4.91 69 | nvidia-nccl-cu11==2.14.3 70 | nvidia-nvtx-cu11==11.7.91 71 | oauthlib==3.2.2 72 | opencv-python==4.7.0.72 73 | outcome==1.2.0 74 | packaging==23.1 75 | pandas==1.5.3 76 | parso==0.8.3 77 | pexpect==4.8.0 78 | pgraph-python==0.6.2 79 | pickleshare==0.7.5 80 | Pillow==9.5.0 81 | pinocchio==0.3 82 | platformdirs==3.5.1 83 | portalocker==2.8.2 84 | progress==1.6 85 | prompt-toolkit==3.0.38 86 | protobuf==3.20.3 87 | psutil==5.9.5 88 | ptyprocess==0.7.0 89 | pure-eval==0.2.2 90 | pyarrow==12.0.0 91 | pyasn1==0.5.0 92 | pyasn1-modules==0.3.0 93 | pybullet==3.2.5 94 | pycocotools==2.0.7 95 | pycollada==0.6 96 | pyglet==2.0.7 97 | Pygments==2.15.1 98 | PyOpenGL==3.1.0 99 | pyparsing==3.0.9 100 | pyrender==0.1.45 101 | python-dateutil==2.8.2 102 | 103 | pytz==2023.3 104 | PyYAML==5.1 105 | pyzmq==25.0.2 106 | requests==2.30.0 107 | requests-oauthlib==1.3.1 108 | roboticstoolbox-python==1.0.1 109 | rsa==4.9 110 | rtb-data==1.0.1 111 | scikit-learn==1.2.2 112 | scipy==1.10.1 113 | seaborn==0.12.2 114 | shapely==2.0.1 115 | simplejson==3.17.0 116 | six==1.16.0 117 | smplx==0.1.28 118 | sniffio==1.3.0 119 | sortedcontainers==2.4.0 120 | soupsieve==2.4 121 | spatialgeometry==1.0.3 122 | spatialmath-python==1.0.5 123 | stack-data==0.6.2 124 | swift-sim==1.0.1 125 | sympy==1.12 126 | tblib==1.7.0 127 | tensorboard==2.11.2 128 | tensorboard-data-server==0.6.1 129 | tensorboard-plugin-wit==1.8.1 130 | tensorboardX==2.6.2 131 | termcolor==2.2.0 132 | terminado==0.17.1 133 | thop==0.1.1.post2209072238 134 | threadpoolctl==3.1.0 135 | tinycss2==1.2.1 136 | tomli==2.0.1 137 | toolz==0.12.0 138 | torch==1.13.1+cu117 139 | torch-summary==1.4.5 140 | torch-utils==0.1.2 141 | torchgeometry==0.1.2 142 | torchnet==0.0.4 143 | torchvision==0.14.1+cu117 144 | tornado==6.3.1 145 | tqdm==4.41.1 146 | traitlets==5.9.0 147 | transform3d==0.0.4 148 | transforms3d==0.3.1 149 | trimesh==3.18.1 150 | trio==0.22.0 151 | trio-websocket==0.9.2 152 | triton==2.0.0 153 | typing_extensions==4.5.0 154 | tzdata==2023.3 155 | 156 | uri-template==1.2.0 157 | urllib3==1.26.14 158 | visdom==0.2.3 159 | wcwidth==0.2.6 160 | webcolors==1.12 161 | webencodings==0.5.1 162 | websocket-client==1.5.0 163 | websockets==11.0.3 164 | Werkzeug==2.2.3 165 | wget==3.2 166 | wsproto==1.2.0 167 | xarray==0.14.1 168 | y-py==0.5.9 169 | ypy-websocket==0.8.2 170 | zict==2.2.0 171 | zipp==3.15.0 172 | -------------------------------------------------------------------------------- /lib/models/ctrnet/mask_inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | base_dir = os.path.abspath(".") 4 | sys.path.append(base_dir) 5 | import argparse 6 | import numpy as np 7 | import torch 8 | import torchvision.transforms as transforms 9 | from PIL import Image as PILImage 10 | from .CtRNet import CtRNet 11 | 12 | 13 | class seg_mask_inference(torch.nn.Module): 14 | def __init__(self, intrinsics, dataset, image_hw=(480, 640), scale=0.5): 15 | super(seg_mask_inference, self).__init__() 16 | self.args = self.set_args(intrinsics, dataset, image_hw, scale) 17 | self.net = CtRNet(self.args) 18 | self.trans_to_tensor = transforms.Compose([ 19 | transforms.ToTensor(), 20 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 21 | ]) 22 | 23 | def set_args(self, intrinsics, dataset, image_hw=(480, 640), scale=0.5): 24 | parser = argparse.ArgumentParser() 25 | args = parser.parse_args("") 26 | args.use_gpu = True 27 | args.robot_name = 'Panda' 28 | args.n_kp = 7 29 | args.scale = scale 30 | args.height, args.width = image_hw 31 | args.fx, args.fy, args.px, args.py = intrinsics 32 | args.width, args.height = int(args.width * args.scale), int(args.height * args.scale) 33 | args.fx, args.fy, args.px, args.py = args.fx * args.scale, args.fy * args.scale, args.px * args.scale, args.py * args.scale 34 | 35 | if "realsense" in dataset: 36 | args.keypoint_seg_model_path = "models/panda_segmentation/realsense.pth" 37 | elif "azure" in dataset: 38 | args.keypoint_seg_model_path = "models/panda_segmentation/azure.pth" 39 | elif "kinect" in dataset: 40 | args.keypoint_seg_model_path = "models/panda_segmentation/kinect.pth" 41 | elif "orb" in dataset: 42 | args.keypoint_seg_model_path = "models/panda_segmentation/orb.pth" 43 | else: 44 | args.keypoint_seg_model_path = "models/panda_segmentation/azure.pth" 45 | 46 | return args 47 | 48 | def preprocess_img_tensor(self, img_tensor): 49 | width, height = img_tensor.shape[3], img_tensor.shape[2] 50 | img_array = np.uint8(img_tensor.detach().cpu().numpy()).transpose(0, 2, 3, 1) 51 | new_size = (int(width*self.args.scale),int(height*self.args.scale)) 52 | pil_image = [self.trans_to_tensor(PILImage.fromarray(img).resize(new_size)) for img in img_array] 53 | return torch.stack(pil_image) 54 | 55 | def forward(self, img_tensor): 56 | 57 | image = self.preprocess_img_tensor(img_tensor).cuda() 58 | segmentation = self.net.inference_batch_images_onlyseg(image) 59 | 60 | return segmentation 61 | 62 | class seg_keypoint_inference(torch.nn.Module): 63 | def __init__(self, image_hw=(480, 640), scale=0.5): 64 | super(seg_keypoint_inference, self).__init__() 65 | self.args = self.set_args(image_hw, scale) 66 | self.net = CtRNet(self.args) 67 | self.trans_to_tensor = transforms.Compose([ 68 | transforms.ToTensor(), 69 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 70 | ]) 71 | 72 | def set_args(self, image_hw=(480, 640), scale=0.5): 73 | parser = argparse.ArgumentParser() 74 | args = parser.parse_args("") 75 | args.use_gpu = True 76 | args.robot_name = 'Panda' 77 | args.n_kp = 7 78 | args.scale = scale 79 | args.height, args.width = image_hw 80 | args.fx, args.fy, args.px, args.py = 320,320,320,240 81 | args.width, args.height = int(args.width * args.scale), int(args.height * args.scale) 82 | args.fx, args.fy, args.px, args.py = args.fx * args.scale, args.fy * args.scale, args.px * args.scale, args.py * args.scale 83 | args.keypoint_seg_model_path = "models/panda_segmentation/azure.pth" 84 | 85 | return args 86 | 87 | def preprocess_img_tensor(self, img_tensor): 88 | width, height = img_tensor.shape[3], img_tensor.shape[2] 89 | img_array = np.uint8(img_tensor.detach().cpu().numpy()).transpose(0, 2, 3, 1) 90 | new_size = (int(width*self.args.scale),int(height*self.args.scale)) 91 | pil_image = [self.trans_to_tensor(PILImage.fromarray(img).resize(new_size)) for img in img_array] 92 | return torch.stack(pil_image) 93 | 94 | def forward(self, img_tensor): 95 | 96 | image = self.preprocess_img_tensor(img_tensor).cuda() 97 | keypoints, segmentation = self.net.inference_batch_images_seg_kp(image) 98 | 99 | return keypoints, segmentation -------------------------------------------------------------------------------- /lib/core/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | import yaml 5 | from lib.config import LOCAL_DATA_DIR 6 | from easydict import EasyDict 7 | 8 | def make_default_cfg(): 9 | cfg = EasyDict() 10 | 11 | # basic experiment info (must be overwritten) 12 | cfg.exp_name = "default" 13 | cfg.config_path = "default" 14 | 15 | # training 16 | cfg.no_cuda = False 17 | cfg.device_id = 0 18 | cfg.batch_size = 64 19 | cfg.epoch_size = 104950 # will get rid of this eventually, but right now let it be 20 | cfg.n_epochs = 700 21 | cfg.n_dataloader_workers = int(os.environ.get('N_CPUS', 10)) - 2 22 | cfg.clip_gradient = 10.0 23 | 24 | # data 25 | cfg.urdf_robot_name = "panda" 26 | cfg.train_ds_names = os.path.abspath(LOCAL_DATA_DIR / "dream/real/panda_synth_train_dr") 27 | cfg.image_size = 256.0 28 | 29 | # augmentation during training 30 | cfg.jitter = True 31 | cfg.other_aug = True 32 | cfg.occlusion = True 33 | cfg.occlu_p = 0.5 34 | cfg.padding = False 35 | cfg.fix_truncation = False 36 | cfg.truncation_padding = [120,120,120,120] 37 | cfg.rootnet_flip = False 38 | 39 | # pipeline 40 | cfg.use_rootnet = False 41 | cfg.use_rootnet_with_reg_int_shared_backbone = False 42 | cfg.use_sim2real = False 43 | cfg.use_sim2real_real = False 44 | cfg.pretrained_rootnet = None 45 | cfg.pretrained_weight_on_synth = None 46 | cfg.use_view = False 47 | cfg.known_joint = False 48 | 49 | # optimizer and scheduler 50 | cfg.lr = 1e-4 51 | cfg.weight_decay = 0.0 52 | cfg.use_schedule = False 53 | cfg.schedule_type = "" 54 | cfg.n_epochs_warmup = 0 55 | cfg.start_decay = 100 56 | cfg.end_decay = 200 57 | cfg.final_decay = 0.01 58 | cfg.exponent = 1.0 59 | cfg.step_decay = 0.1 60 | cfg.step = 5 61 | 62 | # model 63 | ## basic setting 64 | cfg.backbone_name = "resnet50" 65 | cfg.rootnet_backbone_name = "hrnet32" 66 | cfg.rootnet_image_size = (cfg.image_size, cfg.image_size) 67 | cfg.other_image_size = (cfg.image_size, cfg.image_size) 68 | ## Jointnet/RotationNet 69 | cfg.n_iter = 4 70 | cfg.p_dropout = 0.5 71 | cfg.use_rpmg = False 72 | cfg.reg_joint_map = False 73 | cfg.joint_conv_dim = [] 74 | cfg.rotation_dim = 6 75 | cfg.direct_reg_rot = False 76 | cfg.rot_iterative_matmul = False 77 | cfg.fix_root = True 78 | cfg.reg_from_bb_out = False 79 | cfg.depth_from_bb_out = False 80 | ## KeypointNet 81 | cfg.bbox_3d_shape = [1300, 1300, 1300] 82 | cfg.reference_keypoint_id = 3 83 | ## DepthNet 84 | cfg.resample = False 85 | cfg.use_origin_bbox = False 86 | cfg.use_extended_bbox = True 87 | cfg.extend_ratio = [0.2, 0.13] 88 | cfg.use_offset = False 89 | cfg.use_rootnet_xy_branch = False 90 | cfg.add_fc = False 91 | cfg.multi_kp = False 92 | cfg.kps_need_depth = None 93 | 94 | # loss 95 | ## for full network training 96 | cfg.pose_loss_func = "mse" 97 | cfg.rot_loss_func = "mse" 98 | cfg.trans_loss_func = "l2norm" 99 | cfg.uv_loss_func = "l2norm" 100 | cfg.depth_loss_func = "l1" 101 | cfg.kp3d_loss_func = "l2norm" 102 | cfg.kp2d_loss_func = "l2norm" 103 | cfg.kp3d_int_loss_func = "l2norm" 104 | cfg.kp2d_int_loss_func = "l2norm" 105 | cfg.align_3d_loss_func = "l2norm" 106 | cfg.pose_loss_weight = 0.0 107 | cfg.rot_loss_weight = 0.0 108 | cfg.trans_loss_weight = 0.0 109 | cfg.uv_loss_weight = 0.0 110 | cfg.depth_loss_weight = 0.0 111 | cfg.kp2d_loss_weight = 0.0 112 | cfg.kp3d_loss_weight = 0.0 113 | cfg.kp2d_int_loss_weight = 0.0 114 | cfg.kp3d_int_loss_weight = 0.0 115 | cfg.align_3d_loss_weight = 0.0 116 | cfg.joint_individual_weights = None 117 | cfg.use_joint_valid_mask = False 118 | cfg.fix_mask = False 119 | ## for depthnet training 120 | cfg.rootnet_depth_loss_weight = 1.0 121 | cfg.depth_loss_func = "l1" 122 | cfg.xy_loss_func = "l1" 123 | ## for self-supervised training 124 | cfg.mask_loss_func = "mse_mean" 125 | cfg.mask_loss_weight = 0.0 126 | cfg.scale_loss_weight = 0.0 127 | cfg.iou_loss_weight = 0.0 128 | 129 | # resume 130 | cfg.resume_run = False 131 | cfg.resume_experiment_name = "resume_name" 132 | 133 | return cfg 134 | 135 | 136 | def make_cfg(args): 137 | 138 | cfg = make_default_cfg() 139 | cfg.config_path = args.config 140 | 141 | with open(args.config, encoding="utf-8") as f: 142 | config = yaml.load(f.read(), Loader=yaml.FullLoader) 143 | 144 | for k,v in config.items(): 145 | if k in cfg: 146 | if k == "n_dataloader_workers": 147 | cfg[k] = min(cfg[k], v) 148 | elif k == "train_ds_names": 149 | cfg[k] = os.path.abspath(LOCAL_DATA_DIR / v) 150 | if "move" in v: 151 | cfg[k] = v 152 | elif k in ["lr", "exponent"] or k.endswith("loss_weight"): 153 | cfg[k] = float(v) 154 | elif k in ["joint_individual_weights", "pretrained_rootnet", "pretrained_weight_on_synth"]: 155 | cfg[k] = None if v == "None" else v 156 | elif k == "extend_ratio": 157 | cfg[k] = list(v) 158 | else: 159 | cfg[k] = v 160 | 161 | f.close() 162 | 163 | return cfg -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | import numpy as np 5 | import torch 6 | 7 | def hnormalized(vector): 8 | hnormalized_vector = (vector / vector[-1])[:-1] 9 | return hnormalized_vector 10 | 11 | def point_projection_from_3d(camera_K, points): 12 | corr = zip(camera_K, points) 13 | projections = [hnormalized(np.matmul(K, loc.T)).T for K,loc in corr] 14 | projections = np.array(projections) 15 | return projections 16 | 17 | def point_projection_from_3d_tensor(camera_K, points): 18 | corr = zip(camera_K, points) 19 | projections = [hnormalized(torch.matmul(K, loc.T)).T for K,loc in corr] 20 | projections = torch.stack(projections) 21 | return projections 22 | 23 | def invert_T(T): 24 | R = T[..., :3, :3] 25 | t = T[..., :3, [-1]] 26 | R_inv = R.transpose(-2, -1) 27 | t_inv = - R_inv @ t 28 | T_inv = T.clone() 29 | T_inv[..., :3, :3] = R_inv 30 | T_inv[..., :3, [-1]] = t_inv 31 | return T_inv 32 | 33 | def uvd_to_xyz(uvd_jts, image_size, intrinsic_matrix_inverse, root_trans, depth_factor, return_relative=False): 34 | 35 | """ 36 | Adapted from https://github.com/Jeff-sjtu/HybrIK/tree/main/hybrik/models 37 | """ 38 | 39 | # intrinsic_param is of the inverse version (inv=True) 40 | assert uvd_jts.dim() == 3 and uvd_jts.shape[2] == 3, uvd_jts.shape 41 | uvd_jts_new = uvd_jts.clone() 42 | assert torch.sum(torch.isnan(uvd_jts)) == 0, ('uvd_jts', uvd_jts) 43 | 44 | # remap uv coordinate to input (256x256) space 45 | uvd_jts_new[:, :, 0] = (uvd_jts[:, :, 0] + 0.5) * image_size 46 | uvd_jts_new[:, :, 1] = (uvd_jts[:, :, 1] + 0.5) * image_size 47 | # remap d to m (depth_factor unit: m) 48 | uvd_jts_new[:, :, 2] = uvd_jts[:, :, 2] * depth_factor 49 | assert torch.sum(torch.isnan(uvd_jts_new)) == 0, ('uvd_jts_new', uvd_jts_new) 50 | 51 | dz = uvd_jts_new[:, :, 2].cuda() 52 | 53 | # transform uv coordinate to x/z y/z coordinate 54 | uv_homo_jts = torch.cat((uvd_jts_new[:, :, :2], torch.ones_like(uvd_jts_new)[:, :, 2:]), dim=2).cuda() 55 | device = intrinsic_matrix_inverse.device 56 | uv_homo_jts = uv_homo_jts.to(device) 57 | # batch-wise matrix multipy : (B,1,3,3) * (B,K,3,1) -> (B,K,3,1) 58 | xyz_jts = torch.matmul(intrinsic_matrix_inverse.unsqueeze(1), uv_homo_jts.unsqueeze(-1)) 59 | xyz_jts = xyz_jts.squeeze(dim=3).cuda() 60 | 61 | # recover absolute z : (B,K) + (B,1) 62 | abs_z = dz + root_trans[:, 2].unsqueeze(-1).cuda() 63 | # multipy absolute z : (B,K,3) * (B,K,1) 64 | xyz_jts = xyz_jts * abs_z.unsqueeze(-1) 65 | 66 | if return_relative: 67 | # (B,K,3) - (B,1,3) 68 | xyz_jts = xyz_jts - root_trans.unsqueeze(1).cuda() 69 | 70 | # xyz_jts = xyz_jts / depth_factor.unsqueeze(-1) 71 | # output xyz unit: m 72 | 73 | return xyz_jts.cuda() 74 | 75 | 76 | def xyz_to_uvd(xyz_jts, image_size, intrinsic_matrix, root_trans, depth_factor, return_relative=False): 77 | 78 | """ 79 | Adapted from https://github.com/Jeff-sjtu/HybrIK/tree/main/hybrik/models 80 | """ 81 | 82 | assert xyz_jts.dim() == 3 and xyz_jts.shape[2] == 3, xyz_jts.shape 83 | xyz_jts = xyz_jts.cuda() 84 | intrinsic_matrix = intrinsic_matrix.cuda() 85 | root_trans = root_trans.cuda() 86 | uvd_jts = torch.empty_like(xyz_jts).cuda() 87 | if return_relative: 88 | # (B,K,3) - (B,1,3) 89 | xyz_jts = xyz_jts + root_trans.unsqueeze(1) 90 | assert torch.sum(torch.isnan(xyz_jts)) == 0, ('xyz_jts', xyz_jts) 91 | 92 | # batch-wise matrix multipy : (B,1,3,3) * (B,K,3,1) -> (B,K,3,1) 93 | uvz_jts = torch.matmul(intrinsic_matrix.unsqueeze(1), xyz_jts.unsqueeze(-1)) 94 | uvz_jts = uvz_jts.squeeze(dim=3) 95 | 96 | uv_homo = uvz_jts / uvz_jts[:, :, 2].unsqueeze(-1) 97 | 98 | abs_z = xyz_jts[:, :, 2] 99 | dz = abs_z - root_trans[:, 2].unsqueeze(-1) 100 | 101 | uvd_jts[:, :, 2] = dz / depth_factor 102 | uvd_jts[:, :, 0] = uv_homo[:, :, 0] / float(image_size) - 0.5 103 | uvd_jts[:, :, 1] = uv_homo[:, :, 1] / float(image_size) - 0.5 104 | 105 | assert torch.sum(torch.isnan(uvd_jts)) == 0, ('uvd_jts', uvd_jts) 106 | 107 | return uvd_jts 108 | 109 | 110 | def xyz_to_uvd_from_gt2d(xyz_jts, gt_uv_2d, image_size, root_trans, depth_factor, return_relative=False): 111 | 112 | assert xyz_jts.dim() == 3 and xyz_jts.shape[2] == 3, xyz_jts.shape 113 | assert gt_uv_2d.dim() == 3 and gt_uv_2d.shape[2] == 2, gt_uv_2d.shape 114 | xyz_jts = xyz_jts.cuda() 115 | root_trans = root_trans.cuda() 116 | uvd_jts = torch.empty_like(xyz_jts).cuda() 117 | if return_relative: 118 | # (B,K,3) - (B,1,3) 119 | xyz_jts = xyz_jts + root_trans.unsqueeze(1) 120 | assert torch.sum(torch.isnan(xyz_jts)) == 0, ('xyz_jts', xyz_jts) 121 | 122 | abs_z = xyz_jts[:, :, 2] 123 | dz = abs_z - root_trans[:, 2].unsqueeze(-1) 124 | 125 | uvd_jts[:, :, 2] = dz / depth_factor 126 | uvd_jts[:, :, 0] = gt_uv_2d[:, :, 0] / float(image_size) - 0.5 127 | uvd_jts[:, :, 1] = gt_uv_2d[:, :, 1] / float(image_size) - 0.5 128 | 129 | assert torch.sum(torch.isnan(uvd_jts)) == 0, ('uvd_jts', uvd_jts) 130 | 131 | return uvd_jts 132 | 133 | def uvz2xyz_singlepoint(uv, z, K): 134 | batch_size = uv.shape[0] 135 | assert uv.shape == (batch_size, 2) and z.shape == (batch_size,1) and K.shape == (batch_size,3,3), (uv.shape, z.shape, K.shape) 136 | inv_k = get_intrinsic_matrix_batch((K[:,0,0],K[:,1,1]), (K[:,0,2],K[:,1,2]), bsz=batch_size, inv=True) 137 | device = inv_k.device 138 | xy_unnormalized = uv * z 139 | xyz_transformed = torch.cat([xy_unnormalized, z], dim=1) 140 | xyz_transformed = xyz_transformed.to(device) 141 | assert xyz_transformed.shape == (batch_size, 3) and inv_k.shape == (batch_size, 3, 3) 142 | xyz = torch.matmul(inv_k, xyz_transformed.unsqueeze(-1)).squeeze(-1).cuda() 143 | return xyz 144 | 145 | def get_intrinsic_matrix_batch(f, c, bsz, inv=False): 146 | 147 | intrinsic_matrix = torch.zeros((bsz, 3, 3)).to(torch.float) 148 | 149 | if inv: 150 | intrinsic_matrix[:, 0, 0] = 1.0 / f[0].to(float) 151 | intrinsic_matrix[:, 0, 2] = - c[0].to(float) / f[0].to(float) 152 | intrinsic_matrix[:, 1, 1] = 1.0 / f[1].to(float) 153 | intrinsic_matrix[:, 1, 2] = - c[1].to(float) / f[1].to(float) 154 | intrinsic_matrix[:, 2, 2] = 1 155 | else: 156 | intrinsic_matrix[:, 0, 0] = f[0] 157 | intrinsic_matrix[:, 0, 2] = c[0] 158 | intrinsic_matrix[:, 1, 1] = f[1] 159 | intrinsic_matrix[:, 1, 2] = c[1] 160 | intrinsic_matrix[:, 2, 2] = 1 161 | 162 | return intrinsic_matrix.cuda(device=0) -------------------------------------------------------------------------------- /scripts/train_full.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | import numpy as np 5 | import torch 6 | from lib.core.function import farward_loss, validate 7 | from lib.dataset.const import INITIAL_JOINT_ANGLE 8 | from lib.models.full_net import get_rootNetwithRegInt_model 9 | from lib.utils.urdf_robot import URDFRobot 10 | from lib.utils.utils import set_random_seed, create_logger, get_dataloaders, get_scheduler, resume_run, save_checkpoint 11 | from torchnet.meter import AverageValueMeter 12 | from tqdm import tqdm 13 | 14 | 15 | def train_full(args): 16 | 17 | torch.autograd.set_detect_anomaly(True) 18 | set_random_seed(808) 19 | 20 | save_folder, ckpt_folder, log_folder, writer = create_logger(args) 21 | 22 | urdf_robot_name = args.urdf_robot_name 23 | robot = URDFRobot(urdf_robot_name) 24 | 25 | device_id = args.device_id 26 | device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") 27 | 28 | ds_iter_train, test_loader_dict = get_dataloaders(args) 29 | 30 | init_param_dict = { 31 | "robot_type" : urdf_robot_name, 32 | "pose_params": INITIAL_JOINT_ANGLE, 33 | "cam_params": np.eye(4,dtype=float), 34 | "init_pose_from_mean": True 35 | } 36 | if args.use_rootnet_with_reg_int_shared_backbone: 37 | print("regression and integral shared backbone, with rootnet 2 backbones in total") 38 | model = get_rootNetwithRegInt_model(init_param_dict, args) 39 | else: 40 | assert 0 41 | 42 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 43 | 44 | curr_max_auc = 0.0 45 | curr_max_auc_4real = { "azure": 0.0, "kinect": 0.0, "realsense": 0.0, "orb": 0.0 } 46 | start_epoch, last_epoch, end_epoch = 0, -1, args.n_epochs 47 | if args.resume_run: 48 | start_epoch, last_epoch, curr_max_auc, curr_max_auc_4real = resume_run(args, model, optimizer, device) 49 | 50 | lr_scheduler = get_scheduler(args, optimizer, last_epoch) 51 | 52 | 53 | for epoch in range(start_epoch, end_epoch + 1): 54 | print('In epoch {}, script: full network training (JointNet/RotationNet/KeypoinNet/DepthNet)'.format(epoch + 1)) 55 | model.train() 56 | iterator = tqdm(ds_iter_train, dynamic_ncols=True) 57 | losses = AverageValueMeter() 58 | losses_pose, losses_rot, losses_trans, losses_uv, losses_depth, losses_error2d, losses_error3d, losses_error2d_int, losses_error3d_int, losses_error3d_align = \ 59 | AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter(),AverageValueMeter() 60 | for batchid, sample in enumerate(iterator): 61 | optimizer.zero_grad() 62 | loss, loss_dict = farward_loss(args=args, input_batch=sample, model=model, robot=robot, device=device, device_id=device_id, train=True) 63 | loss.backward() 64 | if args.clip_gradient is not None: 65 | clipping_value = args.clip_gradient 66 | torch.nn.utils.clip_grad_norm_(model.parameters(), clipping_value) 67 | optimizer.step() 68 | losses.add(loss.detach().cpu().numpy()) 69 | losses_pose.add(loss_dict["loss_joint"].detach().cpu().numpy()) 70 | losses_rot.add(loss_dict["loss_rot"].detach().cpu().numpy()) 71 | losses_trans.add(loss_dict["loss_trans"].detach().cpu().numpy()) 72 | losses_uv.add(loss_dict["loss_uv"].detach().cpu().numpy()) 73 | losses_depth.add(loss_dict["loss_depth"].detach().cpu().numpy()) 74 | losses_error2d.add(loss_dict["loss_error2d"].detach().cpu().numpy()) 75 | losses_error3d.add(loss_dict["loss_error3d"].detach().cpu().numpy()) 76 | losses_error2d_int.add(loss_dict["loss_error2d_int"].detach().cpu().numpy()) 77 | losses_error3d_int.add(loss_dict["loss_error3d_int"].detach().cpu().numpy()) 78 | losses_error3d_align.add(loss_dict["loss_error3d_align"].detach().cpu().numpy()) 79 | 80 | if (batchid+1) % 100 == 0: # Every 100 mini-batches/iterations 81 | writer.add_scalar('Train/loss', losses.mean , epoch * len(ds_iter_train) + batchid + 1) 82 | writer.add_scalar('Train/pose_loss', losses_pose.mean , epoch * len(ds_iter_train) + batchid + 1) 83 | writer.add_scalar('Train/rot_loss', losses_rot.mean , epoch * len(ds_iter_train) + batchid + 1) 84 | writer.add_scalar('Train/trans_loss', losses_trans.mean , epoch * len(ds_iter_train) + batchid + 1) 85 | writer.add_scalar('Train/uv_loss', losses_uv.mean , epoch * len(ds_iter_train) + batchid + 1) 86 | writer.add_scalar('Train/depth_loss', losses_depth.mean , epoch * len(ds_iter_train) + batchid + 1) 87 | writer.add_scalar('Train/error2d_loss', losses_error2d.mean, epoch * len(ds_iter_train) + batchid + 1) 88 | writer.add_scalar('Train/error3d_loss', losses_error3d.mean, epoch * len(ds_iter_train) + batchid + 1) 89 | writer.add_scalar('Train/error2d_int_loss', losses_error2d_int.mean, epoch * len(ds_iter_train) + batchid + 1) 90 | writer.add_scalar('Train/error3d_int_loss', losses_error3d_int.mean, epoch * len(ds_iter_train) + batchid + 1) 91 | writer.add_scalar('Train/error3d_align_loss', losses_error3d_align.mean, epoch * len(ds_iter_train) + batchid + 1) 92 | losses.reset() 93 | losses_pose.reset() 94 | losses_rot.reset() 95 | losses_trans.reset() 96 | losses_uv.reset() 97 | losses_depth.reset() 98 | losses_error2d.reset() 99 | losses_error3d.reset() 100 | losses_error2d_int.reset() 101 | losses_error3d_int.reset() 102 | losses_error3d_align.reset() 103 | writer.add_scalar('LR/learning_rate_opti', optimizer.param_groups[0]['lr'], epoch * len(ds_iter_train) + batchid + 1) 104 | if len(optimizer.param_groups) > 1: 105 | for pgid in range(1,len(optimizer.param_groups)): 106 | writer.add_scalar(f'LR/learning_rate_opti_{pgid}', optimizer.param_groups[pgid]['lr'], epoch * len(ds_iter_train) + batchid + 1) 107 | if args.use_schedule: 108 | lr_scheduler.step() 109 | 110 | auc_adds = {} 111 | for dsname, loader in test_loader_dict.items(): 112 | auc_add = validate(args=args, epoch=epoch, dsname=dsname, loader=loader, model=model, 113 | robot=robot, writer=writer, device=device, device_id=device_id) 114 | auc_adds[dsname] = auc_add 115 | 116 | save_checkpoint(args=args, auc_adds=auc_adds, 117 | model=model, optimizer=optimizer, 118 | ckpt_folder=ckpt_folder, 119 | epoch=epoch, lr_scheduler=lr_scheduler, 120 | curr_max_auc=curr_max_auc, 121 | curr_max_auc_4real=curr_max_auc_4real) 122 | 123 | print("Training Finished !") 124 | writer.flush() 125 | -------------------------------------------------------------------------------- /lib/models/depth_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 4 | import torch 5 | import torch.nn as nn 6 | from .backbones.HRnet import get_hrnet 7 | from .backbones.Resnet import get_resnet 8 | from torch.nn import functional as F 9 | 10 | 11 | class RootNet(nn.Module): 12 | 13 | def __init__(self, backbone, pred_xy=False, use_offset=False, add_fc=False, input_shape=(256,256), **kwargs): 14 | 15 | super(RootNet, self).__init__() 16 | self.backbone_name = backbone 17 | if backbone in ["resnet34", "resnet50", "resnet"]: 18 | self.backbone = get_resnet(backbone) 19 | self.inplanes = self.backbone.block.expansion * 512 20 | elif backbone in ["hrnet", "hrnet32"]: 21 | self.backbone = get_hrnet(type_name=32, num_joints=7, depth_dim=1, 22 | pretrain=True, generate_feat=True, generate_hm=False) 23 | self.inplanes = 2048 24 | else: 25 | raise NotImplementedError 26 | 27 | self.pred_xy = pred_xy 28 | self.add_fc = add_fc 29 | self.use_offset = use_offset 30 | self.input_shape = input_shape 31 | self.output_shape = (input_shape[0]//4, input_shape[1]//4) 32 | self.outplanes = 256 33 | 34 | if self.pred_xy: 35 | self.deconv_layers = self._make_deconv_layer(3) 36 | self.xy_layer = nn.Conv2d( 37 | in_channels=self.outplanes, 38 | out_channels=1, 39 | kernel_size=1, 40 | stride=1, 41 | padding=0 42 | ) 43 | 44 | if self.add_fc: 45 | self.depth_relu = nn.ReLU() 46 | self.depth_fc1 = nn.Linear(self.inplanes, self.inplanes//2) 47 | self.depth_bn1 = nn.BatchNorm1d(self.inplanes//2) 48 | self.depth_fc2 = nn.Linear(self.inplanes//2, self.inplanes//4) 49 | self.depth_bn2 = nn.BatchNorm1d(self.inplanes//4) 50 | self.depth_fc3 = nn.Linear(self.inplanes//4, self.inplanes//4) 51 | self.depth_bn3 = nn.BatchNorm1d(self.inplanes//4) 52 | self.depth_fc4 = nn.Linear(self.inplanes//4, self.inplanes//2) 53 | self.depth_bn4 = nn.BatchNorm1d(self.inplanes//2) 54 | self.depth_fc5 = nn.Linear(self.inplanes//2, self.inplanes) 55 | 56 | self.depth_layer = nn.Conv2d( 57 | in_channels=self.inplanes, 58 | out_channels=1, 59 | kernel_size=1, 60 | stride=1, 61 | padding=0 62 | ) 63 | if self.use_offset: 64 | self.offset_layer = nn.Conv2d( 65 | in_channels=self.inplanes, 66 | out_channels=1, 67 | kernel_size=1, 68 | stride=1, 69 | padding=0 70 | ) 71 | 72 | def _make_deconv_layer(self, num_layers): 73 | layers = [] 74 | inplanes = self.inplanes 75 | outplanes = self.outplanes 76 | for i in range(num_layers): 77 | layers.append( 78 | nn.ConvTranspose2d( 79 | in_channels=inplanes, 80 | out_channels=outplanes, 81 | kernel_size=4, 82 | stride=2, 83 | padding=1, 84 | output_padding=0, 85 | bias=False)) 86 | layers.append(nn.BatchNorm2d(outplanes)) 87 | layers.append(nn.ReLU(inplace=True)) 88 | inplanes = outplanes 89 | 90 | return nn.Sequential(*layers) 91 | 92 | def forward(self, x, k_value): 93 | if self.backbone_name in ["resnet34", "resnet50", "resnet"]: 94 | fm = self.backbone(x) 95 | img_feat = torch.mean(fm.view(fm.size(0), fm.size(1), fm.size(2)*fm.size(3)), dim=2) # global average pooling 96 | elif self.backbone_name in ["hrnet", "hrnet32"]: 97 | img_feat = self.backbone(x) 98 | 99 | # x,y 100 | if self.pred_xy: 101 | xy = self.deconv_layers(fm) 102 | xy = self.xy_layer(xy) 103 | xy = xy.view(-1,1,self.output_shape[0]*self.output_shape[1]) 104 | xy = F.softmax(xy,2) 105 | xy = xy.view(-1,1,self.output_shape[0],self.output_shape[1]) 106 | hm_x = xy.sum(dim=(2)) 107 | hm_y = xy.sum(dim=(3)) 108 | coord_x = hm_x * torch.arange(self.output_shape[1]).float().cuda() 109 | coord_y = hm_y * torch.arange(self.output_shape[0]).float().cuda() 110 | coord_x = coord_x.sum(dim=2) 111 | coord_y = coord_y.sum(dim=2) 112 | 113 | # z 114 | if self.add_fc: 115 | img_feat1 = self.depth_relu(self.depth_bn1(self.depth_fc1(img_feat))) 116 | img_feat2 = self.depth_relu(self.depth_bn2(self.depth_fc2(img_feat1))) 117 | img_feat3 = self.depth_relu(self.depth_bn3(self.depth_fc3(img_feat2))) 118 | img_feat4 = self.depth_relu(self.depth_bn4(self.depth_fc4(img_feat3))) 119 | img_feat5 = self.depth_fc5(img_feat4) 120 | img_feat = img_feat + img_feat5 121 | img_feat = torch.unsqueeze(img_feat,2) 122 | img_feat = torch.unsqueeze(img_feat,3) 123 | gamma = self.depth_layer(img_feat) 124 | gamma = gamma.view(-1,1) 125 | depth = gamma * k_value.view(-1,1) 126 | 127 | if self.use_offset: 128 | offset = self.offset_layer(img_feat) 129 | offset = offset.view(-1,1) # unit: m 130 | offset *= 1000.0 131 | depth += offset 132 | 133 | if self.pred_xy: 134 | coord = torch.cat((coord_x, coord_y, depth), dim=1) 135 | return coord 136 | else: 137 | return depth 138 | 139 | def init_weights(self): 140 | if self.pred_xy: 141 | for name, m in self.deconv_layers.named_modules(): 142 | if isinstance(m, nn.ConvTranspose2d): 143 | nn.init.normal_(m.weight, std=0.001) 144 | elif isinstance(m, nn.BatchNorm2d): 145 | nn.init.constant_(m.weight, 1) 146 | nn.init.constant_(m.bias, 0) 147 | for m in self.xy_layer.modules(): 148 | if isinstance(m, nn.Conv2d): 149 | nn.init.normal_(m.weight, std=0.001) 150 | nn.init.constant_(m.bias, 0) 151 | print("Initialized deconv and xy layer of RootNet.") 152 | for m in self.depth_layer.modules(): 153 | if isinstance(m, nn.Conv2d): 154 | nn.init.normal_(m.weight, std=0.001) 155 | nn.init.constant_(m.bias, 0) 156 | print("Initialized depth layer of RootNet.") 157 | if self.use_offset: 158 | for m in self.offset_layer.modules(): 159 | if isinstance(m, nn.Conv2d): 160 | nn.init.normal_(m.weight, std=0.001) 161 | nn.init.constant_(m.bias, 0) 162 | print("Initialized offset layer of RootNet.") 163 | 164 | 165 | def get_rootnet(backbone, pred_xy=False, use_offset=False, add_fc=False, input_shape=(256,256), **kwargs): 166 | model = RootNet(backbone, pred_xy, use_offset, add_fc, input_shape=(256,256), **kwargs) 167 | model.init_weights() 168 | return model 169 | 170 | 171 | -------------------------------------------------------------------------------- /lib/models/backbones/Resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class ResNet(nn.Module): 6 | 7 | def __init__(self, resnet_type): 8 | 9 | resnet_spec = {"resnet18": (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512]), 10 | "resnet34": (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512]), 11 | "resnet50": (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048]), 12 | "resnet101": (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048]), 13 | "resnet152": (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048])} 14 | block, layers, channels = resnet_spec[resnet_type] 15 | 16 | self.block = block 17 | 18 | self.name = resnet_type 19 | self.inplanes = 64 20 | super(ResNet, self).__init__() 21 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 22 | bias=False) 23 | self.bn1 = nn.BatchNorm2d(64) 24 | self.relu = nn.ReLU(inplace=True) 25 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 26 | self.layer1 = self._make_layer(block, 64, layers[0]) 27 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 28 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 29 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 30 | 31 | for m in self.modules(): 32 | if isinstance(m, nn.Conv2d): 33 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 34 | nn.init.normal_(m.weight, mean=0, std=0.001) 35 | elif isinstance(m, nn.BatchNorm2d): 36 | nn.init.constant_(m.weight, 1) 37 | nn.init.constant_(m.bias, 0) 38 | 39 | def _make_layer(self, block, planes, blocks, stride=1): 40 | downsample = None 41 | if stride != 1 or self.inplanes != planes * block.expansion: 42 | downsample = nn.Sequential( 43 | nn.Conv2d(self.inplanes, planes * block.expansion, 44 | kernel_size=1, stride=stride, bias=False), 45 | nn.BatchNorm2d(planes * block.expansion), 46 | ) 47 | 48 | layers = [] 49 | layers.append(block(self.inplanes, planes, stride, downsample)) 50 | self.inplanes = planes * block.expansion 51 | for i in range(1, blocks): 52 | layers.append(block(self.inplanes, planes)) 53 | 54 | return nn.Sequential(*layers) 55 | 56 | def forward(self, x): 57 | x = self.conv1(x) 58 | x = self.bn1(x) 59 | x = self.relu(x) 60 | x = self.maxpool(x) 61 | 62 | x = self.layer1(x) 63 | x = self.layer2(x) 64 | x = self.layer3(x) 65 | x = self.layer4(x) 66 | 67 | return x 68 | 69 | def init_weights(self, backbone_name): 70 | # org_resnet = torch.utils.model_zoo.load_url(model_urls[self.name]) 71 | # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error 72 | 73 | import torchvision.models.resnet as resnet_ 74 | if backbone_name == "resnet34": 75 | resnet_imagenet = resnet_.resnet34(pretrained=True) 76 | org_resnet = resnet_imagenet.state_dict() 77 | elif backbone_name in ["resnet", "resnet50"]: 78 | backbone_name = "resnet50" 79 | resnet_imagenet = resnet_.resnet50(pretrained=True) 80 | org_resnet = resnet_imagenet.state_dict() 81 | elif backbone_name == "resnet101": 82 | resnet_imagenet = resnet_.resnet101(pretrained=True) 83 | org_resnet = resnet_imagenet.state_dict() 84 | else: 85 | raise NotImplementedError 86 | 87 | org_resnet.pop('fc.weight', None) 88 | org_resnet.pop('fc.bias', None) 89 | 90 | self.load_state_dict(org_resnet, strict=True) 91 | 92 | print(f"Initialized {backbone_name} from model zoo") 93 | 94 | 95 | 96 | class Bottleneck(nn.Module): 97 | """ Redefinition of Bottleneck residual block 98 | Adapted from the official PyTorch implementation 99 | """ 100 | expansion = 4 101 | 102 | def __init__(self, inplanes, planes, stride=1, downsample=None): 103 | super(Bottleneck, self).__init__() 104 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 105 | self.bn1 = nn.BatchNorm2d(planes) 106 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 107 | padding=1, bias=False) 108 | self.bn2 = nn.BatchNorm2d(planes) 109 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 110 | self.bn3 = nn.BatchNorm2d(planes * 4) 111 | self.relu = nn.ReLU(inplace=True) 112 | self.downsample = downsample 113 | self.stride = stride 114 | 115 | def forward(self, x): 116 | residual = x 117 | 118 | out = self.conv1(x) 119 | out = self.bn1(out) 120 | out = self.relu(out) 121 | 122 | out = self.conv2(out) 123 | out = self.bn2(out) 124 | out = self.relu(out) 125 | 126 | out = self.conv3(out) 127 | out = self.bn3(out) 128 | 129 | if self.downsample is not None: 130 | residual = self.downsample(x) 131 | 132 | out += residual 133 | out = self.relu(out) 134 | 135 | return out 136 | 137 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 138 | """3x3 convolution with padding""" 139 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 140 | padding=dilation, groups=groups, bias=False, dilation=dilation) 141 | 142 | class BasicBlock(nn.Module): 143 | expansion = 1 144 | 145 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 146 | base_width=64, dilation=1, norm_layer=None, dcn=None): 147 | super(BasicBlock, self).__init__() 148 | if norm_layer is None: 149 | norm_layer = nn.BatchNorm2d 150 | if groups != 1 or base_width != 64: 151 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 152 | if dilation > 1: 153 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 154 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 155 | self.conv1 = conv3x3(inplanes, planes, stride) 156 | self.bn1 = norm_layer(planes) 157 | self.relu = nn.ReLU(inplace=True) 158 | self.conv2 = conv3x3(planes, planes) 159 | self.bn2 = norm_layer(planes) 160 | self.downsample = downsample 161 | self.stride = stride 162 | 163 | def forward(self, x): 164 | identity = x 165 | 166 | out = self.conv1(x) 167 | out = self.bn1(out) 168 | out = self.relu(out) 169 | 170 | out = self.conv2(out) 171 | out = self.bn2(out) 172 | 173 | if self.downsample is not None: 174 | identity = self.downsample(x) 175 | 176 | out += identity 177 | out = self.relu(out) 178 | 179 | return out 180 | 181 | 182 | 183 | def get_resnet(backbone_name, pretrain=True): 184 | 185 | if backbone_name == "resnet": 186 | backbone = "resnet50" 187 | else: 188 | backbone = backbone_name 189 | 190 | model = ResNet(backbone) 191 | 192 | if pretrain: 193 | model.init_weights(backbone_name) 194 | return model -------------------------------------------------------------------------------- /lib/models/ctrnet/keypoint_seg_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import torchvision.models as models 7 | 8 | 9 | 10 | class KeypointUpSample(nn.Module): 11 | def __init__(self, in_channels, num_keypoints): 12 | super().__init__() 13 | input_features = in_channels 14 | deconv_kernel = 4 15 | self.kps_score_lowres = nn.ConvTranspose2d( 16 | input_features, 17 | num_keypoints, 18 | deconv_kernel, 19 | stride=2, 20 | padding=deconv_kernel // 2 - 1, 21 | ) 22 | nn.init.kaiming_normal_(self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu") 23 | nn.init.constant_(self.kps_score_lowres.bias, 0) 24 | #nn.init.uniform_(self.kps_score_lowres.weight) 25 | #nn.init.uniform_(self.kps_score_lowres.bias) 26 | self.up_scale = 1 27 | self.out_channels = num_keypoints 28 | 29 | def forward(self, x): 30 | x = self.kps_score_lowres(x) 31 | return torch.nn.functional.interpolate( 32 | x, scale_factor=float(self.up_scale), mode="bilinear", align_corners=False, recompute_scale_factor=False 33 | ) 34 | 35 | 36 | 37 | 38 | class SpatialSoftArgmax(nn.Module): 39 | """ 40 | The spatial softmax of each feature 41 | map is used to compute a weighted mean of the pixel 42 | locations, effectively performing a soft arg-max 43 | over the feature dimension. 44 | 45 | """ 46 | 47 | def __init__(self, normalize=True): 48 | """Constructor. 49 | Args: 50 | normalize (bool): Whether to use normalized 51 | image coordinates, i.e. coordinates in 52 | the range `[-1, 1]`. 53 | """ 54 | super().__init__() 55 | 56 | self.normalize = normalize 57 | 58 | def _coord_grid(self, h, w, device): 59 | if self.normalize: 60 | return torch.stack( 61 | torch.meshgrid( 62 | torch.linspace(-1, 1, h, device=device), 63 | torch.linspace(-1, 1, w, device=device), 64 | indexing='ij', 65 | ) 66 | ) 67 | return torch.stack( 68 | torch.meshgrid( 69 | torch.arange(0, h, device=device), 70 | torch.arange(0, w, device=device), 71 | indexing='ij', 72 | ) 73 | ) 74 | 75 | def forward(self, x): 76 | assert x.ndim == 4, "Expecting a tensor of shape (B, C, H, W)." 77 | 78 | # compute a spatial softmax over the input: 79 | # given an input of shape (B, C, H, W), 80 | # reshape it to (B*C, H*W) then apply 81 | # the softmax operator over the last dimension 82 | b, c, h, w = x.shape 83 | softmax = F.softmax(x.view(-1, h * w), dim=-1) 84 | 85 | # create a meshgrid of pixel coordinates 86 | # both in the x and y axes 87 | yc, xc = self._coord_grid(h, w, x.device) 88 | 89 | # element-wise multiply the x and y coordinates 90 | # with the softmax, then sum over the h*w dimension 91 | # this effectively computes the weighted mean of x 92 | # and y locations 93 | x_mean = (softmax * xc.flatten()).sum(dim=1, keepdims=True) 94 | y_mean = (softmax * yc.flatten()).sum(dim=1, keepdims=True) 95 | 96 | # concatenate and reshape the result 97 | # to (B, C, 2) where for every feature 98 | # we have the expected x and y pixel 99 | # locations 100 | return torch.cat([x_mean, y_mean], dim=1).view(-1, c, 2) 101 | 102 | 103 | class KeyPointSegNet(nn.Module): 104 | def __init__(self, args, lim=[-1., 1., -1., 1.], use_gpu=True): 105 | super(KeyPointSegNet, self).__init__() 106 | 107 | self.args = args 108 | self.lim = lim 109 | 110 | k = args.n_kp 111 | 112 | if use_gpu: 113 | self.device = "cuda" 114 | else: 115 | self.device = "cpu" 116 | 117 | 118 | deeplabv3_resnet50 = models.segmentation.deeplabv3_resnet50(pretrained=True) 119 | deeplabv3_resnet50.classifier[4] = torch.nn.Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1)) # Change final layer to 2 classes 120 | 121 | self.backbone = torch.nn.Sequential(list(deeplabv3_resnet50.children())[0]) 122 | 123 | self.read_out = KeypointUpSample(2048, k) 124 | 125 | self.spatialsoftargmax = SpatialSoftArgmax() 126 | 127 | self.classifer = torch.nn.Sequential((list(deeplabv3_resnet50.children())[1])) 128 | 129 | 130 | 131 | def forward(self, img): 132 | input_shape = img.shape[-2:] 133 | 134 | resnet_out = self.backbone(img)['out'] # (B, 2048, H//8, W//8) 135 | 136 | # keypoint prediction branch 137 | heatmap = self.read_out(resnet_out) # (B, k, H//4, W//4) 138 | keypoints = self.spatialsoftargmax(heatmap) 139 | # mapping back to original resolution from [-1,1] 140 | offset = torch.tensor([self.lim[0], self.lim[2]], device = resnet_out.device) 141 | scale = torch.tensor([self.args.width // 2, self.args.height // 2], device = resnet_out.device) 142 | keypoints = keypoints - offset 143 | keypoints = keypoints * scale 144 | 145 | # segmentation branch 146 | x = self.classifer(resnet_out) 147 | segout = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) 148 | 149 | return keypoints, segout 150 | 151 | 152 | class KeyPointSegNet_x(nn.Module): 153 | def __init__(self, args=None, lim=[-1., 1., -1., 1.], use_gpu=True): 154 | super(KeyPointSegNet_x, self).__init__() 155 | 156 | self.args = args 157 | self.lim = lim 158 | 159 | k = 7 160 | self.width = 640 161 | self.height = 480 162 | 163 | if use_gpu: 164 | self.device = "cuda" 165 | else: 166 | self.device = "cpu" 167 | 168 | 169 | deeplabv3_resnet50 = models.segmentation.deeplabv3_resnet50(pretrained=True) 170 | deeplabv3_resnet50.classifier[4] = torch.nn.Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1)) # Change final layer to 2 classes 171 | 172 | self.backbone = torch.nn.Sequential(list(deeplabv3_resnet50.children())[0]) 173 | 174 | self.read_out = KeypointUpSample(2048, k) 175 | 176 | self.spatialsoftargmax = SpatialSoftArgmax() 177 | 178 | self.classifer = torch.nn.Sequential((list(deeplabv3_resnet50.children())[1])) 179 | 180 | 181 | 182 | def forward(self, img): 183 | input_shape = img.shape[-2:] 184 | 185 | resnet_out = self.backbone(img)['out'] # (B, 2048, H//8, W//8) 186 | 187 | # keypoint prediction branch 188 | heatmap = self.read_out(resnet_out) # (B, k, H//4, W//4) 189 | keypoints = self.spatialsoftargmax(heatmap) 190 | # mapping back to original resolution from [-1,1] 191 | offset = torch.tensor([self.lim[0], self.lim[2]], device = resnet_out.device) 192 | scale = torch.tensor([self.width // 2, self.height // 2], device = resnet_out.device) 193 | keypoints = keypoints - offset 194 | keypoints = keypoints * scale 195 | 196 | # segmentation branch 197 | x = self.classifer(resnet_out) 198 | segout = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) 199 | 200 | return keypoints, segout 201 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
15 |
16 | This is the official PyTorch implementation of the paper "Real-time Holistic Robot Pose Estimation with Unknown States". It provides an efficient framework for real-time robot pose estimation from RGB images without requiring known robot states.
17 |
18 | ## Installation
19 | This project's dependencies include python 3.9, pytorch 1.13, pytorch3d 0.7.4 and CUDA 11.7.
20 | The code is developed and tested on Ubuntu 20.04.
21 |
22 | ```bash
23 | pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117
24 | pip install -r requirements.txt
25 | conda install pytorch3d=0.7.4 # from https://anaconda.org/pytorch3d/pytorch3d/files
26 | ```
27 |
28 | ## Data and Model Preparation
29 |
30 | In our work, we use the following data and pretrained model:
31 | * The [DREAM datasets](https://drive.google.com/drive/folders/1uNK2n9wU4tRE07sM_r640wDhwmOwuxx6) consisting of both real and synthetic subsets, placed under `${ROOT}/data/dream/$`.
32 | * The [URDF](https://drive.google.com/drive/folders/17KNhy28pypheYfDCxgOjJf4IyUnOI3gW?) (Unified Robotics Description Format) of robot Panda, Kuka and Baxter, placed under `${ROOT}/data/deps/$`.
33 | * The [pretrained HRnet backbone](https://drive.google.com/file/d/1eqIftq1T_oIGhmCfkVYSM245Wj5xZaUo/view?) for pose estimation, placed under `${ROOT}/models/$`.
34 | * The openly available [foreground segmentation model](https://drive.google.com/drive/folders/1PpXe3p5dJt9EOM-fwvJ9TNStTWTQFDNK?) of 4 real datasets of Panda from [CtRNet](https://github.com/ucsdarclab/CtRNet-robot-pose-estimation), placed under `${ROOT}/models/panda_segmentation/$`.
35 |
36 | You can download the data and models through provided links.
37 | When finished, the directory tree should look like this.
38 | ```
39 | ${ROOT}
40 | |-- data
41 | |-- dream
42 | | |-- real
43 | | | |-- panda-3cam_azure
44 | | | |-- panda-3cam_kinect360
45 | | | |-- panda-3cam_realsense
46 | | | |-- panda-orb
47 | | |-- synthetic
48 | | | |-- baxter_synth_test_dr
49 | | | |-- baxter_synth_train_dr
50 | | | |-- kuka_synth_test_dr
51 | | | |-- kuka_synth_test_photo
52 | | | |-- kuka_synth_train_dr
53 | | | |-- panda_synth_test_dr
54 | | | |-- panda_synth_test_photo
55 | | | |-- panda_synth_train_dr
56 | |-- deps
57 | | |-- baxter-description
58 | | |-- kuka-description
59 | | |-- panda-description
60 | |-- models
61 | |-- panda_segmentation
62 | | |-- azure.pth
63 | | |-- kinect.pth
64 | | |-- orb.pth
65 | | |-- realsense.pth
66 | |-- hrnet_w32-36af842e_roc.pth
67 | ```
68 |
69 | ## Train
70 | We train our final model in a multi-stage fashion. All model is trained using a single NVIDIA V100 with 32GB GPU. Distributed training is also supported.
71 |
72 | We use config files in `configs/` to specify the training process. We recommend filling in the `exp_name` field in the config files with a unique name, as the checkpoints and event logs produced during training will be saved under `experiments/{exp_name}`. The correspondent config file will be automatically copied into this directory.
73 |
74 | ### Synthetic Datasets
75 |
76 | Firstly, pretrain the depthnet (root depth estimator) for 100 epochs for each robot arm:
77 | ```bash
78 | python scripts/train.py --config configs/panda/depthnet.yaml
79 | python scripts/train.py --config configs/kuka/depthnet.yaml
80 | python scripts/train.py --config configs/baxter/depthnet.yaml
81 | ```
82 |
83 | With depthnet pretrained, we can train the full network for 100 epochs:
84 | ```bash
85 | python scripts/train.py --config configs/panda/full.yaml
86 | python scripts/train.py --config configs/kuka/full.yaml
87 | python scripts/train.py --config configs/baxter/full.yaml
88 | ```
89 | To save your time when reproducing results of our paper, we provide readily-pretrained [depthnet model weights](https://drive.google.com/drive/folders/1rWC2bbA3U0IiZ7oDoKIVsWK_m4JkVarA?) for full network training. To use them, you can modify the `configs/{robot}/full.yaml` file by filling in the `pretrained_rootnet` field with the path of the downloaded `.pk` file.
90 |
91 | ### Real Datasets of Panda
92 |
93 | We employ self-supervised training for the 4 real datasets of Panda.
94 |
95 | Firstly, train the model on synthetic dataset using `configs/panda/self_supervised/synth.yaml` for 100 epochs. Be sure to fill in the `pretrained_rootnet` field with the path of the pretrained Panda depthnet weight in advance.
96 |
97 | ```bash
98 | python scripts/train.py --config configs/panda/self_supervised/synth.yaml
99 | ```
100 | The training process above saves checkpoints for 4 real datasets for further self-supervised training (e.g. `experiments/{exp_name}/ckpt/curr_best_auc(add)_azure_model.pk`).
101 |
102 | When finished training on synthetic data, modify the `configs/panda/self_supervised/{real_dataset}.yaml` file by filling in the `pretrained_weight_on_synth` field with the path of the correspondent checkpoint. Then start self-supervised training with:
103 |
104 | ```bash
105 | python scripts/train.py --config configs/panda/self_supervised/azure.yaml
106 | python scripts/train.py --config configs/panda/self_supervised/kinect.yaml
107 | python scripts/train.py --config configs/panda/self_supervised/realsense.yaml
108 | python scripts/train.py --config configs/panda/self_supervised/orb.yaml
109 | ```
110 |
111 | ## Test
112 | To evaluate models, simply run:
113 | ```bash
114 | python scripts/test.py --exp_path {path of the experiment folder} --dataset {dataset name}
115 | # e.g. python scripts/test.py -e experiments/panda_full --dataset panda_synth_test_dr
116 | # You can add '--vis_skeleton' to visualize the robot keypoint skeleton
117 | ```
118 | Note that each model is presented in a folder containing ckpt/, log/ and config.yaml. After running test script, result/ will be generated inside the folder.
119 |
120 | ## Model Zoo
121 | You can download our final models from [Google Drive](https://drive.google.com/drive/folders/10Gz0NP39YyuvAlrhTa-XssWTDlyh9v80?usp=sharing) and evaluate them yourself.
122 |
123 |
124 | ## Citation
125 | If you use our code or models in your research, please cite with:
126 | ```bibtex
127 | @inproceedings{holisticrobotpose,
128 | author={Ban, Shikun and Fan, Juling and Ma, Xiaoxuan and Zhu, Wentao and Qiao, Yu and Wang, Yizhou},
129 | title={Real-time Holistic Robot Pose Estimation with Unknown States},
130 | booktitle = {European Conference on Computer Vision (ECCV)},
131 | year = {2024}
132 | }
133 | ```
134 |
135 | ## Acknowledgment
136 | This repo is built on the excellent work [RoboPose](https://github.com/ylabbe/robopose) and [CtRNet](https://github.com/ucsdarclab/CtRNet-robot-pose-estimation). Thank the authors for releasing their codes.
137 |
--------------------------------------------------------------------------------
/lib/utils/mesh_renderer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 | # io utils
5 | from pytorch3d.io import load_obj
6 |
7 | # datastructures
8 | from pytorch3d.structures import Meshes
9 |
10 | # rendering components
11 | from pytorch3d.renderer import (
12 | RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
13 | SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,
14 | PerspectiveCameras,Textures
15 | )
16 |
17 | from os.path import exists
18 | from roboticstoolbox.robot.ERobot import ERobot
19 |
20 |
21 | class PandaArm():
22 | def __init__(self, urdf_file):
23 |
24 | self.robot = self.Panda(urdf_file)
25 |
26 | def get_joint_RT(self, joint_angle):
27 |
28 | assert joint_angle.shape[0] == 7
29 |
30 | link_idx_list = [0,1,2,3,4,5,6,7,9]
31 | # link 0,1,2,3,4,5,6,7, and hand
32 | R_list = []
33 | t_list = []
34 |
35 | for i in range(len(link_idx_list)):
36 | link_idx = link_idx_list[i]
37 | T = self.robot.fkine(joint_angle, end = self.robot.links[link_idx], start = self.robot.links[0])
38 | R_list.append(T.R)
39 | t_list.append(T.t)
40 |
41 | return np.array(R_list),np.array(t_list)
42 |
43 | class Panda(ERobot):
44 | """
45 | Class that imports a URDF model
46 | """
47 |
48 | def __init__(self, urdf_file):
49 |
50 | links, name, urdf_string, urdf_filepath = self.URDF_read(urdf_file)
51 |
52 | super().__init__(
53 | links,
54 | name=name,
55 | manufacturer="Franka",
56 | urdf_string=urdf_string,
57 | urdf_filepath=urdf_filepath,
58 | )
59 |
60 |
61 | class RobotMeshRenderer():
62 | """
63 | Class that render robot mesh with differentiable renderer
64 | """
65 | def __init__(self, focal_length, principal_point, image_size, robot, mesh_files, device):
66 |
67 | self.focal_length = focal_length
68 | self.principal_point = principal_point
69 | self.image_size = image_size
70 | self.device = device
71 | self.robot = robot
72 | self.mesh_files = mesh_files
73 | self.preload_verts = []
74 | self.preload_faces = []
75 |
76 |
77 | # preload the mesh to save loading time
78 | for m_file in mesh_files:
79 | assert exists(m_file)
80 | preload_verts_i, preload_faces_idx_i, _ = load_obj(m_file)
81 | preload_faces_i = preload_faces_idx_i.verts_idx
82 | self.preload_verts.append(preload_verts_i)
83 | self.preload_faces.append(preload_faces_i)
84 |
85 |
86 | # set up differentiable renderer with given camera parameters
87 | self.cameras = PerspectiveCameras(
88 | focal_length = [focal_length],
89 | principal_point = [principal_point],
90 | device=device,
91 | in_ndc=False, image_size = [image_size]
92 | ) # (height, width) !!!!!
93 |
94 | blend_params = BlendParams(sigma=1e-8, gamma=1e-8)
95 | raster_settings = RasterizationSettings(
96 | image_size=image_size,
97 | blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma,
98 | faces_per_pixel=100,
99 | max_faces_per_bin=100000, # max_faces_per_bin=1000000,
100 | )
101 |
102 | # Create a silhouette mesh renderer by composing a rasterizer and a shader.
103 | self.silhouette_renderer = MeshRenderer(
104 | rasterizer=MeshRasterizer(
105 | cameras=self.cameras,
106 | raster_settings=raster_settings
107 | ),
108 | shader=SoftSilhouetteShader(blend_params=blend_params)
109 | )
110 |
111 |
112 | # We will also create a Phong renderer. This is simpler and only needs to render one face per pixel.
113 | raster_settings = RasterizationSettings(
114 | image_size=image_size,
115 | blur_radius=0.0,
116 | faces_per_pixel=1,
117 | max_faces_per_bin=100000,
118 | )
119 | # We can add a point light in front of the object.
120 | lights = PointLights(device=device, location=((2.0, 2.0, -2.0),))
121 | self.phong_renderer = MeshRenderer(
122 | rasterizer=MeshRasterizer(
123 | cameras=self.cameras,
124 | raster_settings=raster_settings
125 | ),
126 | shader=HardPhongShader(device=device, cameras=self.cameras, lights=lights)
127 | )
128 |
129 | def get_robot_mesh(self, joint_angle):
130 |
131 | R_list, t_list = self.robot.get_joint_RT(joint_angle)
132 | assert len(self.mesh_files) == R_list.shape[0] and len(self.mesh_files) == t_list.shape[0]
133 |
134 | verts_list = []
135 | faces_list = []
136 | verts_rgb_list = []
137 | verts_count = 0
138 | for i in range(len(self.mesh_files)):
139 | verts_i = self.preload_verts[i]
140 | faces_i = self.preload_faces[i]
141 |
142 | R = torch.tensor(R_list[i],dtype=torch.float32)
143 | t = torch.tensor(t_list[i],dtype=torch.float32)
144 | verts_i = verts_i @ R.T + t
145 | #verts_i = (R @ verts_i.T).T + t
146 | faces_i = faces_i + verts_count
147 |
148 | verts_count+=verts_i.shape[0]
149 |
150 | verts_list.append(verts_i.to(self.device))
151 | faces_list.append(faces_i.to(self.device))
152 |
153 | # Initialize each vertex to be white in color.
154 | color = torch.rand(3)
155 | verts_rgb_i = torch.ones_like(verts_i) * color # (V, 3)
156 | verts_rgb_list.append(verts_rgb_i.to(self.device))
157 |
158 |
159 |
160 | verts = torch.concat(verts_list, dim=0)
161 | faces = torch.concat(faces_list, dim=0)
162 |
163 | verts_rgb = torch.concat(verts_rgb_list,dim=0)[None]
164 | textures = Textures(verts_rgb=verts_rgb)
165 |
166 | # Create a Meshes object
167 | robot_mesh = Meshes(
168 | verts=[verts.to(self.device)],
169 | faces=[faces.to(self.device)],
170 | textures=textures
171 | )
172 |
173 | return robot_mesh
174 |
175 |
176 | def get_robot_verts_and_faces(self, joint_angle):
177 |
178 | R_list, t_list = self.robot.get_joint_RT(joint_angle)
179 | assert len(self.mesh_files) == R_list.shape[0] and len(self.mesh_files) == t_list.shape[0]
180 |
181 | verts_list = []
182 | faces_list = []
183 | verts_rgb_list = []
184 | verts_count = 0
185 | for i in range(len(self.mesh_files)):
186 | verts_i = self.preload_verts[i]
187 | faces_i = self.preload_faces[i]
188 |
189 | R = torch.tensor(R_list[i],dtype=torch.float32)
190 | t = torch.tensor(t_list[i],dtype=torch.float32)
191 | verts_i = verts_i @ R.T + t
192 | #verts_i = (R @ verts_i.T).T + t
193 | faces_i = faces_i + verts_count
194 |
195 | verts_count+=verts_i.shape[0]
196 |
197 | verts_list.append(verts_i.to(self.device))
198 | faces_list.append(faces_i.to(self.device))
199 |
200 | # Initialize each vertex to be white in color.
201 | #color = torch.rand(3)
202 | #verts_rgb_i = torch.ones_like(verts_i) * color # (V, 3)
203 | #verts_rgb_list.append(verts_rgb_i.to(self.device))
204 |
205 | verts = torch.concat(verts_list, dim=0)
206 | faces = torch.concat(faces_list, dim=0)
207 |
208 |
209 | return verts, faces
--------------------------------------------------------------------------------
/lib/models/ctrnet/CtRNet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | import kornia
4 | import numpy as np
5 |
6 | from .keypoint_seg_resnet import KeyPointSegNet
7 | from utils.BPnP import BPnP, BPnP_m3d, batch_project
8 |
9 |
10 | class CtRNet(torch.nn.Module):
11 | def __init__(self, args):
12 | super(CtRNet, self).__init__()
13 |
14 | self.args = args
15 |
16 | if args.use_gpu:
17 | self.device = "cuda"
18 | else:
19 | self.device = "cpu"
20 |
21 | # load keypoint segmentation model
22 | self.keypoint_seg_predictor = KeyPointSegNet(args, use_gpu=args.use_gpu)
23 |
24 | if args.use_gpu:
25 | self.keypoint_seg_predictor = self.keypoint_seg_predictor.cuda()
26 |
27 | self.keypoint_seg_predictor = torch.nn.DataParallel(self.keypoint_seg_predictor, device_ids=[0])
28 |
29 | if args.keypoint_seg_model_path is not None:
30 | print("Loading keypoint segmentation model from {}".format(args.keypoint_seg_model_path))
31 | self.keypoint_seg_predictor.load_state_dict(torch.load(args.keypoint_seg_model_path))
32 |
33 | self.keypoint_seg_predictor.eval()
34 |
35 | # load BPnP
36 | self.bpnp = BPnP.apply
37 | self.bpnp_m3d = BPnP_m3d.apply
38 |
39 | # set up camera intrinsics
40 |
41 | self.intrinsics = np.array([[ args.fx, 0. , args.px ],
42 | [ 0. , args.fy, args.py ],
43 | [ 0. , 0. , 1. ]])
44 | print("Camera intrinsics: {}".format(self.intrinsics))
45 |
46 | self.K = torch.tensor(self.intrinsics, device=self.device, dtype=torch.float)
47 |
48 |
49 | def inference_single_image(self, img, joint_angles):
50 | # img: (3, H, W)
51 | # joint_angles: (7)
52 | # robot: robot model
53 |
54 | # detect 2d keypoints and segmentation masks
55 | points_2d, segmentation = self.keypoint_seg_predictor(img[None])
56 | foreground_mask = torch.sigmoid(segmentation)
57 | _,t_list = self.robot.get_joint_RT(joint_angles)
58 | points_3d = torch.from_numpy(np.array(t_list)).float().to(self.device)
59 | if self.args.robot_name == "Panda":
60 | points_3d = points_3d[[0,2,3,4,6,7,8]] # remove 1 and 5 links as they are overlapping with 2 and 6
61 |
62 | #init_pose = torch.tensor([[ 1.5497, 0.5420, -0.3909, -0.4698, -0.0211, 1.3243]])
63 | #cTr = bpnp(points_2d_pred, points_3d, K, init_pose)
64 | cTr = self.bpnp(points_2d, points_3d, self.K)
65 |
66 | return cTr, points_2d, foreground_mask
67 |
68 | def inference_batch_images(self, img, joint_angles):
69 | # img: (B, 3, H, W)
70 | # joint_angles: (B, 7)
71 | # robot: robot model
72 |
73 | # detect 2d keypoints and segmentation masks
74 | points_2d, segmentation = self.keypoint_seg_predictor(img)
75 | foreground_mask = torch.sigmoid(segmentation)
76 |
77 | points_3d_batch = []
78 | for b in range(joint_angles.shape[0]):
79 | _,t_list = self.robot.get_joint_RT(joint_angles[b])
80 | points_3d = torch.from_numpy(np.array(t_list)).float().to(self.device)
81 | if self.args.robot_name == "Panda":
82 | points_3d = points_3d[:,[0,2,3,4,6,7,8]]
83 | points_3d_batch.append(points_3d[None])
84 |
85 | points_3d_batch = torch.cat(points_3d_batch, dim=0)
86 |
87 | cTr = self.bpnp_m3d(points_2d, points_3d_batch, self.K)
88 |
89 | return cTr, points_2d, foreground_mask
90 |
91 | def inference_batch_images_seg_kp(self, img):
92 | # img: (B, 3, H, W)
93 | # joint_angles: (B, 7)
94 | # robot: robot model
95 |
96 | # detect 2d keypoints and segmentation masks
97 | points_2d, segmentation = self.keypoint_seg_predictor(img)
98 | foreground_mask = torch.sigmoid(segmentation)
99 |
100 | return points_2d, foreground_mask
101 |
102 | def inference_batch_images_onlyseg(self, img):
103 | # img: (B, 3, H, W)
104 | # joint_angles: (B, 7)
105 | # robot: robot model
106 |
107 | # detect 2d keypoints and segmentation masks
108 | points_2d, segmentation = self.keypoint_seg_predictor(img)
109 | foreground_mask = torch.sigmoid(segmentation)
110 |
111 | return foreground_mask
112 |
113 |
114 | def cTr_to_pose_matrix(self, cTr):
115 | """
116 | cTr: (batch_size, 6)
117 | pose_matrix: (batch_size, 4, 4)
118 | """
119 | batch_size = cTr.shape[0]
120 | pose_matrix = torch.zeros((batch_size, 4, 4), device=self.device)
121 | pose_matrix[:, :3, :3] = kornia.geometry.conversions.axis_angle_to_rotation_matrix(cTr[:, :3])
122 | pose_matrix[:, :3, 3] = cTr[:, 3:]
123 | pose_matrix[:, 3, 3] = 1
124 | return pose_matrix
125 |
126 | def to_valid_R_batch(self, R):
127 | # R is a batch of 3x3 rotation matrices
128 | U, S, V = torch.svd(R)
129 | return torch.bmm(U, V.transpose(1,2))
130 |
131 | def render_single_robot_mask(self, cTr, robot_mesh, robot_renderer):
132 | # cTr: (6)
133 | # img: (1, H, W)
134 |
135 | R = kornia.geometry.conversions.angle_axis_to_rotation_matrix(cTr[:3][None]) # (1, 3, 3)
136 | R = torch.transpose(R,1,2)
137 | #R = to_valid_R_batch(R)
138 | T = cTr[3:][None] # (1, 3)
139 |
140 | if T[0,-1] < 0:
141 | rendered_image = robot_renderer.silhouette_renderer(meshes_world=robot_mesh, R = -R, T = -T)
142 | else:
143 | rendered_image = robot_renderer.silhouette_renderer(meshes_world=robot_mesh, R = R, T = T)
144 |
145 | if torch.isnan(rendered_image).any():
146 | rendered_image = torch.nan_to_num(rendered_image)
147 |
148 | return rendered_image[..., 3]
149 |
150 |
151 | def train_on_batch(self, img, joint_angles, robot_renderer, criterions, phase='train'):
152 | # img: (B, 3, H, W)
153 | # joint_angles: (B, 7)
154 | with torch.set_grad_enabled(phase == 'train'):
155 | # detect 2d keypoints
156 | points_2d, segmentation = self.keypoint_seg_predictor(img)
157 |
158 | mask_list = list()
159 | seg_weight_list = list()
160 |
161 | for b in range(img.shape[0]):
162 | # get 3d points
163 | _,t_list = self.robot.get_joint_RT(joint_angles[b])
164 | points_3d = torch.from_numpy(np.array(t_list)).float().to(self.device)
165 | if self.args.robot_name == "Panda":
166 | points_3d = points_3d[:,[0,2,3,4,6,7,8]]
167 |
168 | # get camera pose
169 | cTr = self.bpnp(points_2d[b][None], points_3d, self.K)
170 |
171 | # config robot mesh
172 | robot_mesh = robot_renderer.get_robot_mesh(joint_angles[b])
173 |
174 | # render robot mask
175 | rendered_image = self.render_single_robot_mask(cTr.squeeze(), robot_mesh, robot_renderer)
176 |
177 | mask_list.append(rendered_image)
178 | points_2d_proj = batch_project(cTr, points_3d, self.K)
179 | reproject_error = criterions["mse_mean"](points_2d[b], points_2d_proj.squeeze())
180 | seg_weight = torch.exp(-reproject_error * self.args.reproj_err_scale)
181 | seg_weight_list.append(seg_weight)
182 |
183 | mask_batch = torch.cat(mask_list,0)
184 |
185 | loss_bce = 0
186 | for b in range(segmentation.shape[0]):
187 | loss_bce = loss_bce + seg_weight_list[b] * criterions["bce"](segmentation[b].squeeze(), mask_batch[b].detach())
188 |
189 | img_ref = torch.sigmoid(segmentation).detach()
190 | #loss_reproj = 0.0005 * criterionMSE_mean(points_2d, points_2d_proj_batch)
191 | loss_mse = 0.001 * criterions["mse_sum"](mask_batch, img_ref.squeeze())
192 | loss = loss_mse + loss_bce
193 |
194 | return loss
195 |
196 |
197 |
198 |
199 |
200 |
--------------------------------------------------------------------------------
/lib/utils/urdfpytorch/utils.py:
--------------------------------------------------------------------------------
1 | """Utilities for URDF parsing.
2 | """
3 | import os
4 | from pathlib import Path
5 |
6 | from lxml import etree as ET
7 | import numpy as np
8 | import trimesh
9 |
10 |
11 | def resolve_package_path(urdf_path, mesh_path):
12 | urdf_path = Path(urdf_path)
13 | search_dir = urdf_path
14 | relative_path = Path(str(mesh_path).replace('package://', ''))
15 | while True:
16 | absolute_path = (search_dir / relative_path)
17 | if absolute_path.exists():
18 | return absolute_path
19 | search_dir = search_dir.parent
20 |
21 |
22 | def rpy_to_matrix(coords):
23 | """Convert roll-pitch-yaw coordinates to a 3x3 homogenous rotation matrix.
24 |
25 | The roll-pitch-yaw axes in a typical URDF are defined as a
26 | rotation of ``r`` radians around the x-axis followed by a rotation of
27 | ``p`` radians around the y-axis followed by a rotation of ``y`` radians
28 | around the z-axis. These are the Z1-Y2-X3 Tait-Bryan angles. See
29 | Wikipedia_ for more information.
30 |
31 | .. _Wikipedia: https://en.wikipedia.org/wiki/Euler_angles#Rotation_matrix
32 |
33 | Parameters
34 | ----------
35 | coords : (3,) float
36 | The roll-pitch-yaw coordinates in order (x-rot, y-rot, z-rot).
37 |
38 | Returns
39 | -------
40 | R : (3,3) float
41 | The corresponding homogenous 3x3 rotation matrix.
42 | """
43 | coords = np.asanyarray(coords, dtype=np.float64)
44 | c3, c2, c1 = np.cos(coords)
45 | s3, s2, s1 = np.sin(coords)
46 |
47 | return np.array([
48 | [c1 * c2, (c1 * s2 * s3) - (c3 * s1), (s1 * s3) + (c1 * c3 * s2)],
49 | [c2 * s1, (c1 * c3) + (s1 * s2 * s3), (c3 * s1 * s2) - (c1 * s3)],
50 | [-s2, c2 * s3, c2 * c3]
51 | ], dtype=np.float64)
52 |
53 |
54 | def matrix_to_rpy(R, solution=1):
55 | """Convert a 3x3 transform matrix to roll-pitch-yaw coordinates.
56 |
57 | The roll-pitchRyaw axes in a typical URDF are defined as a
58 | rotation of ``r`` radians around the x-axis followed by a rotation of
59 | ``p`` radians around the y-axis followed by a rotation of ``y`` radians
60 | around the z-axis. These are the Z1-Y2-X3 Tait-Bryan angles. See
61 | Wikipedia_ for more information.
62 |
63 | .. _Wikipedia: https://en.wikipedia.org/wiki/Euler_angles#Rotation_matrix
64 |
65 | There are typically two possible roll-pitch-yaw coordinates that could have
66 | created a given rotation matrix. Specify ``solution=1`` for the first one
67 | and ``solution=2`` for the second one.
68 |
69 | Parameters
70 | ----------
71 | R : (3,3) float
72 | A 3x3 homogenous rotation matrix.
73 | solution : int
74 | Either 1 or 2, indicating which solution to return.
75 |
76 | Returns
77 | -------
78 | coords : (3,) float
79 | The roll-pitch-yaw coordinates in order (x-rot, y-rot, z-rot).
80 | """
81 | R = np.asanyarray(R, dtype=np.float64)
82 | r = 0.0
83 | p = 0.0
84 | y = 0.0
85 |
86 | if np.abs(R[2,0]) >= 1.0 - 1e-12:
87 | y = 0.0
88 | if R[2,0] < 0:
89 | p = np.pi / 2
90 | r = np.arctan2(R[0,1], R[0,2])
91 | else:
92 | p = -np.pi / 2
93 | r = np.arctan2(-R[0,1], -R[0,2])
94 | else:
95 | if solution == 1:
96 | p = -np.arcsin(R[2,0])
97 | else:
98 | p = np.pi + np.arcsin(R[2,0])
99 | r = np.arctan2(R[2,1] / np.cos(p), R[2,2] / np.cos(p))
100 | y = np.arctan2(R[1,0] / np.cos(p), R[0,0] / np.cos(p))
101 |
102 | return np.array([r, p, y], dtype=np.float64)
103 |
104 |
105 | def matrix_to_xyz_rpy(matrix):
106 | """Convert a 4x4 homogenous matrix to xyzrpy coordinates.
107 |
108 | Parameters
109 | ----------
110 | matrix : (4,4) float
111 | The homogenous transform matrix.
112 |
113 | Returns
114 | -------
115 | xyz_rpy : (6,) float
116 | The xyz_rpy vector.
117 | """
118 | xyz = matrix[:3,3]
119 | rpy = matrix_to_rpy(matrix[:3,:3])
120 | return np.hstack((xyz, rpy))
121 |
122 |
123 | def xyz_rpy_to_matrix(xyz_rpy):
124 | """Convert xyz_rpy coordinates to a 4x4 homogenous matrix.
125 |
126 | Parameters
127 | ----------
128 | xyz_rpy : (6,) float
129 | The xyz_rpy vector.
130 |
131 | Returns
132 | -------
133 | matrix : (4,4) float
134 | The homogenous transform matrix.
135 | """
136 | matrix = np.eye(4, dtype=np.float64)
137 | matrix[:3,3] = xyz_rpy[:3]
138 | matrix[:3,:3] = rpy_to_matrix(xyz_rpy[3:])
139 | return matrix
140 |
141 |
142 | def parse_origin(node):
143 | """Find the ``origin`` subelement of an XML node and convert it
144 | into a 4x4 homogenous transformation matrix.
145 |
146 | Parameters
147 | ----------
148 | node : :class`lxml.etree.Element`
149 | An XML node which (optionally) has a child node with the ``origin``
150 | tag.
151 |
152 | Returns
153 | -------
154 | matrix : (4,4) float
155 | The 4x4 homogneous transform matrix that corresponds to this node's
156 | ``origin`` child. Defaults to the identity matrix if no ``origin``
157 | child was found.
158 | """
159 | matrix = np.eye(4, dtype=np.float64)
160 | origin_node = node.find('origin')
161 | if origin_node is not None:
162 | if 'xyz' in origin_node.attrib:
163 | matrix[:3,3] = np.fromstring(origin_node.attrib['xyz'], sep=' ')
164 | if 'rpy' in origin_node.attrib:
165 | rpy = np.fromstring(origin_node.attrib['rpy'], sep=' ')
166 | matrix[:3,:3] = rpy_to_matrix(rpy)
167 | return matrix
168 |
169 |
170 | def unparse_origin(matrix):
171 | """Turn a 4x4 homogenous matrix into an ``origin`` XML node.
172 |
173 | Parameters
174 | ----------
175 | matrix : (4,4) float
176 | The 4x4 homogneous transform matrix to convert into an ``origin``
177 | XML node.
178 |
179 | Returns
180 | -------
181 | node : :class`lxml.etree.Element`
182 | An XML node whose tag is ``origin``. The node has two attributes:
183 |
184 | - ``xyz`` - A string with three space-delimited floats representing
185 | the translation of the origin.
186 | - ``rpy`` - A string with three space-delimited floats representing
187 | the rotation of the origin.
188 | """
189 | node = ET.Element('origin')
190 | node.attrib['xyz'] = '{} {} {}'.format(*matrix[:3,3])
191 | node.attrib['rpy'] = '{} {} {}'.format(*matrix_to_rpy(matrix[:3,:3]))
192 | return node
193 |
194 |
195 | def get_filename(base_path, file_path, makedirs=False):
196 | """Formats a file path correctly for URDF loading.
197 |
198 | Parameters
199 | ----------
200 | base_path : str
201 | The base path to the URDF's folder.
202 | file_path : str
203 | The path to the file.
204 | makedirs : bool, optional
205 | If ``True``, the directories leading to the file will be created
206 | if needed.
207 |
208 | Returns
209 | -------
210 | resolved : str
211 | The resolved filepath -- just the normal ``file_path`` if it was an
212 | absolute path, otherwise that path joined to ``base_path``.
213 | """
214 | # print(base_path)
215 | # print(file_path)
216 | fn = file_path
217 | if not os.path.isabs(file_path):
218 | fn = os.path.join(base_path, file_path)
219 | if makedirs:
220 | d, _ = os.path.split(fn)
221 | if not os.path.exists(d):
222 | os.makedirs(d)
223 | if not Path(fn).exists():
224 | fn = str(resolve_package_path(base_path, file_path))
225 | return fn
226 |
227 |
228 | def load_meshes(filename):
229 | """Loads triangular meshes from a file.
230 |
231 | Parameters
232 | ----------
233 | filename : str
234 | Path to the mesh file.
235 |
236 | Returns
237 | -------
238 | meshes : list of :class:`~trimesh.base.Trimesh`
239 | The meshes loaded from the file.
240 | """
241 | meshes = trimesh.load(filename)
242 |
243 | # If we got a scene, dump the meshes
244 | if isinstance(meshes, trimesh.Scene):
245 | meshes = list(meshes.dump())
246 | meshes = [g for g in meshes if isinstance(g, trimesh.Trimesh)]
247 |
248 | if isinstance(meshes, (list, tuple, set)):
249 | meshes = list(meshes)
250 | if len(meshes) == 0:
251 | raise ValueError('At least one mesh must be pmeshesent in file')
252 | for r in meshes:
253 | if not isinstance(r, trimesh.Trimesh):
254 | raise TypeError('Could not load meshes from file')
255 | elif isinstance(meshes, trimesh.Trimesh):
256 | meshes = [meshes]
257 | else:
258 | raise ValueError('Unable to load mesh from file')
259 |
260 | return meshes
261 |
262 |
263 | def configure_origin(value):
264 | """Convert a value into a 4x4 transform matrix.
265 |
266 | Parameters
267 | ----------
268 | value : None, (6,) float, or (4,4) float
269 | The value to turn into the matrix.
270 | If (6,), interpreted as xyzrpy coordinates.
271 |
272 | Returns
273 | -------
274 | matrix : (4,4) float or None
275 | The created matrix.
276 | """
277 | if value is None:
278 | value = np.eye(4, dtype=np.float64)
279 | elif isinstance(value, (list, tuple, np.ndarray)):
280 | value = np.asanyarray(value, dtype=np.float64)
281 | if value.shape == (6,):
282 | value = xyz_rpy_to_matrix(value)
283 | elif value.shape != (4,4):
284 | raise ValueError('Origin must be specified as a 4x4 '
285 | 'homogenous transformation matrix')
286 | else:
287 | raise TypeError('Invalid type for origin, expect 4x4 matrix')
288 | return value
289 |
--------------------------------------------------------------------------------
/lib/dataset/const.py:
--------------------------------------------------------------------------------
1 | from .augmentations import (CropResizeToAspectAugmentation, PillowBlur,
2 | PillowBrightness, PillowColor, PillowContrast,
3 | PillowSharpness, occlusion_aug, to_torch_uint8)
4 |
5 | rgb_augmentations=[
6 | PillowSharpness(p=0.3, factor_interval=(0., 50.)),
7 | PillowContrast(p=0.3, factor_interval=(0.7, 1.8)),
8 | PillowBrightness(p=0.3, factor_interval=(0.7, 1.8)),
9 | PillowColor(p=0.3, factor_interval=(0., 4.))
10 | ]
11 |
12 | KEYPOINT_NAMES={
13 | 'panda' : [
14 | 'panda_link0', 'panda_link2', 'panda_link3',
15 | 'panda_link4', 'panda_link6', 'panda_link7',
16 | 'panda_hand'
17 | ],
18 | 'baxter': [
19 | 'torso_t0', 'right_s0','left_s0', 'right_s1', 'left_s1',
20 | 'right_e0','left_e0', 'right_e1','left_e1','right_w0', 'left_w0',
21 | 'right_w1','left_w1','right_w2', 'left_w2','right_hand','left_hand'
22 | ],
23 | 'kuka' : [
24 | 'iiwa7_link_0', 'iiwa7_link_1',
25 | 'iiwa7_link_2', 'iiwa7_link_3',
26 | 'iiwa7_link_4', 'iiwa7_link_5',
27 | 'iiwa7_link_6', 'iiwa7_link_7'
28 | ],
29 | 'owi535' :[
30 | 'Rotation', 'Base', 'Elbow', 'Wrist'
31 | ]
32 | }
33 |
34 | KEYPOINT_NAMES_TO_LINK_NAMES = {
35 | "panda" : dict(zip(KEYPOINT_NAMES['panda'],KEYPOINT_NAMES['panda'])),
36 | "kuka" : {
37 | 'iiwa7_link_0':'iiwa_link_0', 'iiwa7_link_1':'iiwa_link_1',
38 | 'iiwa7_link_2':'iiwa_link_2', 'iiwa7_link_3':'iiwa_link_3',
39 | 'iiwa7_link_4':'iiwa_link_4', 'iiwa7_link_5':'iiwa_link_5',
40 | 'iiwa7_link_6':'iiwa_link_6', 'iiwa7_link_7':'iiwa_link_7'
41 | },
42 | "baxter" : {
43 | 'torso_t0':'torso',
44 | 'right_s0':'right_upper_shoulder', 'left_s0':'left_upper_shoulder',
45 | 'right_s1':'right_lower_shoulder', 'left_s1':'left_lower_shoulder',
46 | 'right_e0':'right_upper_elbow','left_e0':'left_upper_elbow',
47 | 'right_e1':'right_lower_elbow','left_e1':'left_lower_elbow',
48 | 'right_w0':'right_upper_forearm', 'left_w0':'left_upper_forearm',
49 | 'right_w1':'right_lower_forearm', 'left_w1':'left_lower_forearm',
50 | 'right_w2':'right_wrist', 'left_w2':'left_wrist',
51 | 'right_hand':'right_hand','left_hand':'left_hand'
52 | },
53 | "owi535" : {
54 | 'Rotation':'Rotation', 'Base':'Base', 'Elbow':'Elbow', 'Wrist':'Wrist'
55 | }
56 | }
57 |
58 | LINK_NAMES = {
59 | 'panda': ['panda_link0', 'panda_link2', 'panda_link3', 'panda_link4',
60 | 'panda_link6', 'panda_link7', 'panda_hand'],
61 | 'kuka': ['iiwa_link_0', 'iiwa_link_1', 'iiwa_link_2', 'iiwa_link_3',
62 | 'iiwa_link_4', 'iiwa_link_5', 'iiwa_link_6', 'iiwa_link_7'],
63 | 'baxter': ['torso', 'right_upper_shoulder', 'left_upper_shoulder', 'right_lower_shoulder',
64 | 'left_lower_shoulder', 'right_upper_elbow', 'left_upper_elbow', 'right_lower_elbow',
65 | 'left_lower_elbow', 'right_upper_forearm', 'left_upper_forearm', 'right_lower_forearm',
66 | 'left_lower_forearm', 'right_wrist', 'left_wrist', 'right_hand', 'left_hand'],
67 | #'owi535': ["Base","Elbow","Wrist","Model","Model","Model","Model","Base","Base","Base","Base","Elbow","Elbow","Elbow","Elbow","Wrist","Wrist"],
68 | 'owi535' :[
69 | 'Rotation', 'Base', 'Elbow', 'Wrist'
70 | ]
71 | }
72 |
73 | JOINT_NAMES={
74 | 'panda': ['panda_joint1', 'panda_joint2', 'panda_joint3', 'panda_joint4',
75 | 'panda_joint5', 'panda_joint6', 'panda_joint7', 'panda_finger_joint1'],
76 | 'kuka': ['iiwa_joint_1', 'iiwa_joint_2', 'iiwa_joint_3', 'iiwa_joint_4',
77 | 'iiwa_joint_5', 'iiwa_joint_6', 'iiwa_joint_7'],
78 | 'baxter': ['head_pan', 'right_s0', 'left_s0', 'right_s1', 'left_s1',
79 | 'right_e0', 'left_e0', 'right_e1', 'left_e1', 'right_w0',
80 | 'left_w0', 'right_w1', 'left_w1', 'right_w2', 'left_w2'],
81 | 'owi535' :[
82 | 'Rotation', 'Base', 'Elbow', 'Wrist'
83 | ]
84 | }
85 |
86 | JOINT_TO_KP = {
87 | 'panda': [1, 1, 2, 3, 4, 4, 5, 6],
88 | 'kuka':[1,2,3,4,5,6,7],
89 | 'baxter':[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],
90 | 'owi535':[0,1,2,3]
91 | }
92 |
93 | # flip_pairs=[
94 | # ["right_s0","left_s0"],["right_s1","left_s1"],["right_e0","left_e0"],
95 | # ["right_e1","left_e1"],["right_w0","left_w0"],["right_w1","left_w1"],
96 | # ["right_w2","left_w2"],["right_hand","left_hand"]
97 | # ]
98 | flip_pairs = [ [1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16] ]
99 |
100 | PANDA_LIMB_LENGTH ={
101 | "link0-link2" : 0.3330,
102 | "link2-link3" : 0.3160,
103 | "link3-link4" : 0.0825,
104 | "link4-link6" : 0.39276,
105 | "link6-link7" : 0.0880,
106 | "link7-hand" : 0.1070
107 | }
108 | KUKA_LIMB_LENGTH ={
109 | "link0-link1" : 0.1500,
110 | "link1-link2" : 0.1900,
111 | "link2-link3" : 0.2100,
112 | "link3-link4" : 0.1900,
113 | "link4-link5" : 0.2100,
114 | "link5-link6" : 0.19946,
115 | "link6-link7" : 0.10122
116 | }
117 |
118 | LIMB_LENGTH = {
119 | "panda": list(PANDA_LIMB_LENGTH.values()),
120 | "kuka": list(KUKA_LIMB_LENGTH.values())
121 | }
122 |
123 | INITIAL_JOINT_ANGLE = {
124 | "zero": {
125 | "panda": {
126 | "panda_joint1": 0.0,
127 | "panda_joint2": 0.0,
128 | "panda_joint3": 0.0,
129 | "panda_joint4": 0.0,
130 | "panda_joint5": 0.0,
131 | "panda_joint6": 0.0,
132 | "panda_joint7": 0.0,
133 | "panda_finger_joint1": 0.0
134 | },
135 | "kuka": {
136 | "iiwa_joint_1": 0.0,
137 | "iiwa_joint_2": 0.0,
138 | "iiwa_joint_3": 0.0,
139 | "iiwa_joint_4": 0.0,
140 | "iiwa_joint_5": 0.0,
141 | "iiwa_joint_6": 0.0,
142 | "iiwa_joint_7": 0.0
143 | },
144 | "baxter": {
145 | "head_pan": 0.0,
146 | "right_s0": 0.0,
147 | "left_s0": 0.0,
148 | "right_s1": 0.0,
149 | "left_s1": 0.0,
150 | "right_e0": 0.0,
151 | "left_e0": 0.0,
152 | "right_e1": 0.0,
153 | "left_e1": 0.0,
154 | "right_w0": 0.0,
155 | "left_w0": 0.0,
156 | "right_w1": 0.0,
157 | "left_w1": 0.0,
158 | "right_w2": 0.0,
159 | "left_w2": 0.0
160 | },
161 | "owi535":{
162 | "Rotation":0.0,
163 | "Base":0.0,
164 | "Elbow":0.0,
165 | "Wrist":0.0
166 | }
167 | },
168 | "mean": {
169 | "panda": {
170 | "panda_joint1": 0.0,
171 | "panda_joint2": 0.0,
172 | "panda_joint3": 0.0,
173 | "panda_joint4": -1.52715,
174 | "panda_joint5": 0.0,
175 | "panda_joint6": 1.8675,
176 | "panda_joint7": 0.0,
177 | "panda_finger_joint1": 0.02
178 | },
179 | "kuka": {
180 | "iiwa_joint_1": 0.0,
181 | "iiwa_joint_2": 0.0,
182 | "iiwa_joint_3": 0.0,
183 | "iiwa_joint_4": 0.0,
184 | "iiwa_joint_5": 0.0,
185 | "iiwa_joint_6": 0.0,
186 | "iiwa_joint_7": 0.0
187 | },
188 | "baxter": {
189 | "head_pan": 0.0,
190 | "right_s0": 0.0,
191 | "left_s0": 0.0,
192 | "right_s1": -0.5499999999999999,
193 | "left_s1": -0.5499999999999999,
194 | "right_e0": 0.0,
195 | "left_e0": 0.0,
196 | "right_e1": 1.284,
197 | "left_e1": 1.284,
198 | "right_w0": 0.0,
199 | "left_w0": 0.0,
200 | "right_w1": 0.2616018366049999,
201 | "left_w1": 0.2616018366049999,
202 | "right_w2": 0.0,
203 | "left_w2": 0.0
204 | },
205 | "owi535":{
206 | "Rotation":0.0,
207 | "Base":-0.523598,
208 | "Elbow":0.523598,
209 | "Wrist":0.0
210 | }
211 | }
212 | }
213 |
214 | JOINT_BOUNDS = {
215 | "panda": [[-2.9671, 2.9671],
216 | [-1.8326, 1.8326],
217 | [-2.9671, 2.9671],
218 | [-3.1416, 0.0873],
219 | [-2.9671, 2.9671],
220 | [-0.0873, 3.8223],
221 | [-2.9671, 2.9671],
222 | [ 0.0000, 0.0400]],
223 |
224 | "kuka": [[-2.9671, 2.9671],
225 | [-2.0944, 2.0944],
226 | [-2.9671, 2.9671],
227 | [-2.0944, 2.0944],
228 | [-2.9671, 2.9671],
229 | [-2.0944, 2.0944],
230 | [-3.0543, 3.0543]],
231 |
232 | "baxter": [[-1.5708, 1.5708],
233 | [-1.7017, 1.7017],
234 | [-1.7017, 1.7017],
235 | [-2.1470, 1.0470],
236 | [-2.1470, 1.0470],
237 | [-3.0542, 3.0542],
238 | [-3.0542, 3.0542],
239 | [-0.0500, 2.6180],
240 | [-0.0500, 2.6180],
241 | [-3.0590, 3.0590],
242 | [-3.0590, 3.0590],
243 | [-1.5708, 2.0940],
244 | [-1.5708, 2.0940],
245 | [-3.0590, 3.0590],
246 | [-3.0590, 3.0590]],
247 | "owi535":[
248 | [-2.268928,2.268928],
249 | [-1.570796,1.047198],
250 | [-1.047198, 1.570796],
251 | [-0.785398,0.785398]
252 | ]
253 | }
254 |
255 |
256 | INTRINSICS_DICT = {
257 | "azure": (399.6578776041667, 399.4959309895833, 319.8955891927083, 244.0602823893229),
258 | "kinect": (525.0, 525.0, 319.5, 239.5),
259 | "realsense": (615.52392578125, 615.2191772460938, 328.2606506347656, 251.7917022705078),
260 | "orb": (615.52392578125, 615.2191772460938, 328.2606506347656, 251.7917022705078),
261 |
262 | }
263 |
--------------------------------------------------------------------------------
/lib/dataset/roboutils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | import numpy as np
4 | import torchvision.transforms as transforms
5 | from PIL import Image
6 | import sys
7 | sys.path.append("..")
8 | from utils.geometries import get_K_crop_resize
9 | import random
10 |
11 | def hnormalized(vector):
12 | hnormalized_vector = (vector / vector[-1])[:-1]
13 | return hnormalized_vector
14 |
15 | def crop_to_aspect_ratio(images, box, masks=None, K=None):
16 | assert images.dim() == 4
17 | bsz, _, h, w = images.shape
18 | assert box.dim() == 1
19 | assert box.shape[0] == 4
20 | w_output, h_output = box[[2, 3]] - box[[0, 1]]
21 | boxes = torch.cat(
22 | (torch.arange(bsz).unsqueeze(1).to(box.device).float(), box.unsqueeze(0).repeat(bsz, 1).float()),
23 | dim=1).to(images.device)
24 | images = torchvision.ops.roi_pool(images, boxes, output_size=(h_output, w_output))
25 | if masks is not None:
26 | assert masks.dim() == 4
27 | masks = torchvision.ops.roi_pool(masks, boxes, output_size=(h_output, w_output))
28 | if K is not None:
29 | assert K.dim() == 3
30 | assert K.shape[0] == bsz
31 | K = get_K_crop_resize(K, boxes[:, 1:], orig_size=(h, w), crop_resize=(h_output, w_output))
32 | return images, masks, K
33 |
34 |
35 | def make_detections_from_segmentation(masks):
36 | detections = []
37 | if masks.dim() == 4:
38 | assert masks.shape[0] == 1
39 | masks = masks.squeeze(0)
40 |
41 | for mask_n in masks:
42 | dets_n = dict()
43 | for uniq in torch.unique(mask_n, sorted=True):
44 | ids = np.where((mask_n == uniq).cpu().numpy())
45 | x1, y1, x2, y2 = np.min(ids[1]), np.min(ids[0]), np.max(ids[1]), np.max(ids[0])
46 | dets_n[int(uniq.item())] = torch.tensor([x1, y1, x2, y2]).to(mask_n.device)
47 | detections.append(dets_n)
48 | return detections
49 |
50 |
51 | def make_masks_from_det(detections, h, w):
52 | n_ids = len(detections)
53 | detections = torch.as_tensor(detections)
54 | masks = torch.zeros((n_ids, h, w)).byte()
55 | for mask_n, det_n in zip(masks, detections):
56 | x1, y1, x2, y2 = det_n.cpu().int().tolist()
57 | mask_n[y1:y2, x1:x2] = True
58 | return masks
59 |
60 | def get_bbox(bbox,w,h, strict=True):
61 | assert len(bbox)==4
62 | wmin, hmin, wmax, hmax = bbox
63 | if wmax<0 or hmax <0 or wmin > w or hmin > h:
64 | print("wmax",wmax,"hmax",hmax,"wmin",wmin,"hmin",hmin)
65 | wmin,hmin,wmax,hmax=max(0,wmin),max(0,hmin),min(w,wmax),min(h,hmax)
66 | wnew=wmax-wmin
67 | hnew=hmax-hmin
68 | wmin=int(max(0,wmin-0.3*wnew))
69 | wmax=int(min(w,wmax+0.3*wnew))
70 | hmin=int(max(0,hmin-0.3*hnew))
71 | hmax=int(min(h,hmax+0.3*hnew))
72 | wnew=wmax-wmin
73 | hnew=hmax-hmin
74 |
75 | if not strict:
76 | randomw = (random.random()-0.2)/2
77 | randomh = (random.random()-0.2)/2
78 |
79 | dwnew=randomw*wnew
80 | wmax+=dwnew/2
81 | wmin-=dwnew/2
82 |
83 | dhnew=randomh*hnew
84 | hmax+=dhnew/2
85 | hmin-=dhnew/2
86 |
87 | wmin=int(max(0,wmin))
88 | wmax=int(min(w,wmax))
89 | hmin=int(max(0,hmin))
90 | hmax=int(min(h,hmax))
91 | wnew=wmax-wmin
92 | hnew=hmax-hmin
93 |
94 | if wnew < 150:
95 | wmax+=75
96 | wmin-=75
97 | if hnew < 120:
98 | hmax+=60
99 | hmin-=60
100 |
101 | wmin,hmin,wmax,hmax=max(0,wmin),max(0,hmin),min(w,wmax),min(h,hmax)
102 | wmin,hmin,wmax,hmax=min(w,wmin),min(h,hmin),max(0,wmax),max(0,hmax)
103 | new_bbox = np.array([wmin,hmin,wmax,hmax])
104 | return new_bbox
105 |
106 | def get_bbox_raw(bbox):
107 | assert len(bbox)==4
108 | wmin, hmin, wmax, hmax = bbox
109 | wnew=wmax-wmin
110 | hnew=hmax-hmin
111 | wmin=int(wmin-0.3*wnew)
112 | wmax=int(wmax+0.3*wnew)
113 | hmin=int(hmin-0.3*hnew)
114 | hmax=int(hmax+0.3*hnew)
115 | wnew=wmax-wmin
116 | hnew=hmax-hmin
117 |
118 | if wnew < 150:
119 | wmax+=75
120 | wmin-=75
121 | if hnew < 120:
122 | hmax+=60
123 | hmin-=60
124 |
125 | new_bbox = np.array([wmin,hmin,wmax,hmax])
126 | return new_bbox
127 |
128 | def resize_image(image, bbox, mask, state, bbox_strict_bounded=None):
129 | #image as np.array
130 | wmin, hmin, wmax, hmax = bbox
131 | square_size =int(max(wmax - wmin, hmax - hmin))
132 | square_image = np.zeros((square_size, square_size, 3), dtype=np.uint8)
133 |
134 | x_offset = int((square_size - (wmax-wmin)) // 2)
135 | y_offset = int((square_size- (hmax-hmin)) // 2)
136 |
137 | square_image[y_offset:y_offset+(hmax-hmin), x_offset:x_offset+(wmax-wmin)] = image[hmin:hmax, wmin:wmax]
138 |
139 | keypoints=state['objects'][0]['keypoints_2d']
140 |
141 | for k in keypoints:
142 | k[1]-=hmin
143 | k[1]+=y_offset
144 | k[0]+=x_offset
145 | k[0]-=wmin
146 | if bbox_strict_bounded is not None:
147 | bbox_strict_bounded_new = bbox_strict_bounded[0]-wmin+x_offset, bbox_strict_bounded[1]-hmin+y_offset, \
148 | bbox_strict_bounded[2]-wmin+x_offset, bbox_strict_bounded[3]-hmin+y_offset
149 |
150 | K = state['camera']['K']
151 | K[0, 2] -= (wmin-x_offset)
152 | K[1, 2] -= (hmin-y_offset)
153 | if bbox_strict_bounded is None:
154 | return square_image, mask, state
155 | else:
156 | return square_image, mask, state, bbox_strict_bounded_new
157 |
158 | def tensor_to_image(tensor):
159 | image = tensor.cpu().clone().detach().numpy()
160 | image = Image.fromarray(image)
161 | return image
162 |
163 | def process_truncation(image, bbox, mask, state, max_pad=[120, 120, 120, 120]):
164 | #image as np.array
165 | wmin, hmin, wmax, hmax = bbox
166 | if wmin > 0 and hmin > 0 and hmax<480 and wmax <640:
167 | return image, bbox, mask, state
168 | d_wmin, d_hmin, d_wmax, d_hmax = int(-wmin), int(-hmin), int(wmax-640), int(hmax-480)
169 | d_wmin, d_hmin, d_wmax, d_hmax = int(max(0,d_wmin)), int(max(0,d_hmin)), int(max(0,d_wmax)), int(max(0,d_hmax))
170 | #print(d_wmin, d_hmin, d_wmax, d_hmax)
171 | d_wmin, d_hmin, d_wmax, d_hmax = min(max_pad[0],d_wmin), min(max_pad[1],d_hmin),min(max_pad[2],d_wmax),min(max_pad[3],d_hmax)
172 | wmax, hmax = 640 + d_wmax, 480+ d_hmax
173 | wnew, hnew = 640+d_wmax+d_wmin,480+d_hmax+d_hmin
174 |
175 | #print(wnew,hnew)
176 | new_image = np.zeros((hnew, wnew, 3), dtype=np.uint8)
177 |
178 | #print("d_hmin:",d_hmin,d_hmax, d_wmin, d_wmax,wnew, hnew,"hmax:",hmax)
179 | new_image[d_hmin:d_hmin+480, d_wmin:d_wmin+640] = image[0:480, 0:640]
180 |
181 |
182 | keypoints=state['objects'][0]['keypoints_2d']
183 |
184 | for k in keypoints:
185 | k[1]+=d_hmin
186 | k[0]+=d_wmin
187 |
188 | K = state['camera']['K']
189 | K[0, 2] += (d_wmin)
190 | K[1, 2] += (d_hmin)
191 |
192 | # new_bbox = np.array([max(0,int(wmin + d_wmin)),max(0,int(hmin + d_hmin)),int(wmax + d_wmin),int(hmax + d_hmin)])
193 | bbox_raw = np.concatenate([np.min(keypoints, axis=0)[0:2], np.max(keypoints, axis=0)[0:2]])
194 | new_bbox = get_bbox(bbox_raw,wnew,hnew)
195 | return new_image, new_bbox, mask, state
196 |
197 | def process_padding(image, bbox, mask, state, padding_pixel=25):
198 | #image as np.array
199 | keypoints=state['objects'][0]['keypoints_2d']
200 | # in_frame = 0
201 | # for k in keypoints:
202 | # if k[0]>0 and k[0]<256 and k[1]>0 and k[1]<256:
203 | # in_frame +=1
204 | # if in_frame ==7:
205 | # return image, bbox, mask, state
206 | # d_pad = 30 - 3*in_frame
207 | d_pad = padding_pixel
208 | d_wmin, d_hmin, d_wmax, d_hmax = d_pad,d_pad,d_pad,d_pad
209 |
210 | wnew, hnew = 320+d_wmax+d_wmin,320+d_hmax+d_hmin
211 |
212 | #print(wnew,hnew)
213 | new_image = np.zeros((hnew, wnew, 3), dtype=np.uint8)
214 |
215 | #print("d_hmin:",d_hmin,d_hmax, d_wmin, d_wmax,wnew, hnew,"hmax:",hmax)
216 | new_image[d_hmin:d_hmin+320, d_wmin:d_wmin+320] = image[0:320, 0:320]
217 |
218 | for k in keypoints:
219 | k[1]+=d_hmin
220 | k[0]+=d_wmin
221 |
222 | K = state['camera']['K']
223 | K[0, 2] += (d_wmin)
224 | K[1, 2] += (d_hmin)
225 |
226 | # new_bbox = np.array([max(0,int(wmin + d_wmin)),max(0,int(hmin + d_hmin)),int(wmax + d_wmin),int(hmax + d_hmin)])
227 | bbox_raw = np.concatenate([np.min(keypoints, axis=0)[0:2], np.max(keypoints, axis=0)[0:2]])
228 | new_bbox = get_bbox(bbox_raw,wnew,hnew)
229 | return new_image, new_bbox, mask, state
230 |
231 | def bbox_transform(bbox, K_original_inv, K, resize_hw):
232 | wmin, hmin, wmax, hmax = bbox
233 | corners = np.array([[wmin, hmin, 1.0],
234 | [wmax, hmin, 1.0],
235 | [wmax, hmax, 1.0],
236 | [wmin, hmax, 1.0]])
237 | corners3d_ill = np.matmul(K_original_inv, corners.T)
238 | new_corners = np.matmul(K, corners3d_ill).T
239 | assert all(new_corners[:,2] == 1.0), new_corners
240 | new_bbox = np.array([
241 | np.clip(new_corners[0,0], 0, resize_hw[0]),
242 | np.clip(new_corners[0,1], 0, resize_hw[1]),
243 | np.clip(new_corners[1,0], 0, resize_hw[0]),
244 | np.clip(new_corners[2,1], 0, resize_hw[1]),
245 | ])
246 | return new_bbox
247 |
248 | def get_extended_bbox(bbox, dwmin, dhmin, dwmax, dhmax, bounded=True, image_size=None):
249 | wmin, hmin, wmax, hmax = bbox
250 | extended_bbox = np.array([wmin-dwmin, hmin-dhmin, wmax+dwmax, hmax+dhmax])
251 | wmin, hmin, wmax, hmax = extended_bbox
252 | if bounded:
253 | assert image_size
254 | extended_bbox = np.array([max(0,wmin),max(0,hmin),min(image_size[0],wmax),min(image_size[1],hmax)])
255 | else:
256 | pass
257 | return extended_bbox
258 |
--------------------------------------------------------------------------------
/lib/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from utils.transforms import point_projection_from_3d
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 | import os
7 |
8 | def compute_metrics_batch(robot,gt_keypoints3d,gt_keypoints2d,K_original,gt_joint,**pred_kwargs):
9 |
10 | # compute 3d keypoints locations
11 | # output shape: (batch_size, keypoints_num, 3)
12 | pred_joint = pred_kwargs["pred_joint"]
13 | pred_rot = pred_kwargs["pred_rot"]
14 | pred_trans = pred_kwargs["pred_trans"]
15 | if "pred_xy" in pred_kwargs and "pred_depth" in pred_kwargs and pred_kwargs["pred_xy"] is not None and pred_kwargs["pred_depth"] is not None:
16 | pred_xy = pred_kwargs["pred_xy"]
17 | pred_depth = pred_kwargs["pred_depth"]
18 | pred_trans = torch.cat((pred_xy,pred_depth),dim=-1)
19 | pred_xyz_integral = pred_kwargs["pred_xyz_integral"]
20 | reference_keypoint_id = pred_kwargs["reference_keypoint_id"]
21 |
22 | if pred_joint is None or pred_rot is None or pred_trans is None:
23 | assert pred_xyz_integral is not None
24 | pred_keypoints3d = pred_xyz_integral
25 | batch_size = pred_xyz_integral.shape[0]
26 | else:
27 | if reference_keypoint_id == 0:
28 | pred_keypoints3d = robot.get_keypoints(pred_joint,pred_rot,pred_trans)
29 | batch_size = pred_joint.shape[0]
30 | pred_joint = pred_joint.detach().cpu().numpy()
31 | else:
32 | pred_keypoints3d = robot.get_keypoints_root(pred_joint,pred_rot,pred_trans,root=reference_keypoint_id)
33 | batch_size = pred_joint.shape[0]
34 | pred_joint = pred_joint.detach().cpu().numpy()
35 |
36 | keypoints_num = len(robot.link_names)
37 | dof = robot.dof
38 | pred_keypoints3d = pred_keypoints3d.detach().cpu().numpy()
39 | gt_keypoints3d = gt_keypoints3d.detach().cpu().numpy()
40 | gt_keypoints2d = gt_keypoints2d.detach().cpu().numpy()
41 | K_original = K_original.detach().cpu().numpy()
42 | gt_joint = gt_joint.detach().cpu().numpy()
43 | pred_keypoints2d = point_projection_from_3d(K_original,pred_keypoints3d)
44 | assert(pred_keypoints3d.shape == (batch_size,keypoints_num,3)),f"{pred_keypoints3d.shape}"
45 | assert(gt_keypoints3d.shape == (batch_size,keypoints_num,3)),f"{gt_keypoints3d.shape}"
46 | assert(pred_keypoints2d.shape == (batch_size,keypoints_num,2)),f"{pred_keypoints2d.shape}"
47 | assert(gt_keypoints2d.shape == (batch_size,keypoints_num,2)),f"{gt_keypoints2d.shape}"
48 |
49 |
50 | # Thresholds (ADD:mm, PCK:pixel)
51 | add_thresholds = [1,5,10,20,40,60,80,100]
52 | pck_thresholds = [2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0]
53 |
54 | # ADD Average distance of detected keypoints within threshold
55 | error3d_batch = np.linalg.norm(pred_keypoints3d - gt_keypoints3d, ord = 2, axis = 2)
56 | assert(error3d_batch.shape == (batch_size,keypoints_num))
57 | error3d = np.mean(error3d_batch, axis = 1)
58 | # pcts3d = [len(np.where(error3d < th_mm/1000.0)[0])/float(error3d.shape[0]*error3d.shape[1]) for th_mm in add_thresholds]
59 |
60 | # PCK percentage of correct keypoints (only keypoints within the camera frame)
61 | error2d_batch = np.linalg.norm(pred_keypoints2d - gt_keypoints2d, ord = 2, axis = 2)
62 | assert(error2d_batch.shape == (batch_size,keypoints_num))
63 | valid = (gt_keypoints2d[:,:,0] <= 640.0) & (gt_keypoints2d[:,:,0] >= 0) & (gt_keypoints2d[:,:,1] <= 480.0) & (gt_keypoints2d[:,:,1] >= 0)
64 | error2d_all = error2d_batch * valid
65 | error2d_sum = np.sum(error2d_all, axis = 1)
66 | valid_sum = np.sum(valid, axis = 1)
67 | error2d = error2d_sum / valid_sum
68 | # pcts2d = [len(np.where(error2d < th_p)[0])/float(error2d.shape[0]*error2d.shape[1]) for th_p in pck_thresholds]
69 |
70 | # 3D/2D mean distance with gt of each keypoints
71 | dis3d = list(np.mean(error3d_batch, axis = 0))
72 | error2d_sum_batch = np.sum(error2d_all, axis = 0)
73 | valid_sum_batch = np.sum(valid, axis = 0)
74 | dis2d = error2d_sum_batch / valid_sum_batch
75 | # dis2d = list(np.mean(error2d_batch, axis = 0))
76 |
77 | # mean joint angle L1 error (per joint)
78 | # mean joint angle L1 error (per image)
79 | if pred_joint is not None:
80 | # pred_joint = pred_joint.detach().cpu().numpy()
81 | assert(gt_joint.shape == pred_joint.shape and gt_joint.shape == (batch_size, dof)), f"{pred_joint.shape},{gt_joint.shape}"
82 | error_joint = np.abs(gt_joint - pred_joint)
83 | l1_jointerror = list(np.mean(error_joint, axis = 0))
84 | if robot.robot_type == "panda":
85 | mean_jointerror = list(np.mean(error_joint[:,:-1], axis = 1))
86 | else:
87 | mean_jointerror = list(np.mean(error_joint, axis = 1))
88 | assert(len(mean_jointerror) == batch_size), len(mean_jointerror)
89 | else:
90 | l1_jointerror = [0] * dof
91 | mean_jointerror = [0] * batch_size
92 |
93 | # depth l1 error
94 | reference_keypoint_id = pred_kwargs["reference_keypoint_id"]
95 | error_depth = np.abs(pred_keypoints3d[:,reference_keypoint_id,2] - gt_keypoints3d[:,reference_keypoint_id,2])
96 |
97 | # root relative error
98 | pred_relatives = pred_keypoints3d[:,:,2] - pred_keypoints3d[:,reference_keypoint_id:reference_keypoint_id+1,2]
99 | gt_relatives = gt_keypoints3d[:,:,2] - gt_keypoints3d[:,reference_keypoint_id:reference_keypoint_id+1,2]
100 | error_relative = np.abs(pred_relatives - gt_relatives)
101 | batch_error_relative = np.mean(error_relative, axis=1)
102 |
103 | # root relative auc
104 | pred_keypoints3d_relative = pred_keypoints3d.copy()
105 | pred_keypoints3d_relative[:,:,2] = pred_relatives
106 | gt_keypoints3d_relative = gt_keypoints3d.copy()
107 | gt_keypoints3d_relative[:,:,2] = gt_relatives
108 | error3d_relative_batch = np.linalg.norm(pred_keypoints3d_relative - gt_keypoints3d_relative, ord = 2, axis = 2)
109 | assert(error3d_relative_batch.shape == (batch_size,keypoints_num))
110 | error3d_relative = np.mean(error3d_relative_batch, axis = 1)
111 |
112 |
113 |
114 | return error3d, error2d, dis3d, dis2d, l1_jointerror, mean_jointerror, error_depth, batch_error_relative, error3d_relative
115 |
116 |
117 | def summary_add_pck(alldis):
118 |
119 | dis3d = np.array(alldis['dis3d'])
120 | dis2d = np.array(alldis['dis2d'])
121 | assert(dis3d.shape[0] == dis2d.shape[0])
122 |
123 | add_threshold_ontb = [1,5,10,20,40,60,80,100]
124 | pck_threshold_ontb = [2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0]
125 |
126 | # for ADD
127 | auc_threshold = 0.1
128 | delta_threshold = 0.00001
129 | add_threshold_values = np.arange(0.0, auc_threshold, delta_threshold)
130 | counts_3d = []
131 | for value in add_threshold_values:
132 | under_threshold = (
133 | np.mean(dis3d <= value)
134 | )
135 | counts_3d.append(under_threshold)
136 | auc_add = np.trapz(counts_3d, dx=delta_threshold) / auc_threshold
137 |
138 | # for PCK
139 | auc_pixel_threshold = 20.0
140 | delta_pixel = 0.01
141 | pck_threshold_values = np.arange(0, auc_pixel_threshold, delta_pixel)
142 | counts_2d = []
143 | for value in pck_threshold_values:
144 | under_threshold = (
145 | np.mean(dis2d <= value)
146 | )
147 | counts_2d.append(under_threshold)
148 | auc_pck = np.trapz(counts_2d, dx=delta_pixel) / auc_pixel_threshold
149 |
150 | summary = {
151 | 'ADD/mean': np.mean(dis3d),
152 | 'ADD/median': np.median(dis3d),
153 | 'ADD/AUC': auc_add.item(),
154 | 'ADD_2D/mean': np.mean(dis2d),
155 | 'ADD_2D/median': np.median(dis2d),
156 | 'PCK/AUC': auc_pck.item()
157 | }
158 | for th_mm in add_threshold_ontb:
159 | summary[f'ADD_{th_mm}_mm'] = np.mean(dis3d <= th_mm * 1e-3)
160 | for th_p in pck_threshold_ontb:
161 | summary[f'PCK_{th_p}_pixel'] = np.mean(dis2d <= th_p)
162 | return summary
163 |
164 |
165 | def draw_add_curve(alldis, savename, testdsname, auc):
166 |
167 | dis3d = np.array(alldis['dis3d'])
168 | auc_threshold = 0.1
169 | delta_threshold = 0.00001
170 | add_threshold_values = np.arange(0.0, auc_threshold, delta_threshold)
171 | counts_3d = []
172 | for value in add_threshold_values:
173 | under_threshold = (
174 | np.mean(dis3d <= value)
175 | )
176 | counts_3d.append(under_threshold)
177 | plt.figure(figsize=(25,18))
178 | grid = plt.GridSpec(2,2, wspace=0.1, hspace=0.2)
179 | plt.subplot(grid[0,0])
180 | plt.grid()
181 | plt.plot(add_threshold_values, counts_3d)
182 | plt.xlim(0,auc_threshold)
183 | plt.ylim(0,1.0)
184 | plt.xlabel("add threshold values (unit: m)")
185 | plt.ylabel("percentages")
186 | plt.axvline(x=np.mean(dis3d), color='red', linestyle='--', label='mean distance')
187 | plt.axvline(x=np.median(dis3d), color='green', linestyle='--', label='median distance')
188 | plt.title("ADD curve")
189 | plt.text(x=0.001, y=0.9, s="auc="+str(round(auc*100, ndigits=2)))
190 | plt.legend()
191 |
192 | plt.subplot(grid[0,1])
193 | sns.histplot(dis3d, kde=True)
194 | plt.title("3d distance distribution, whole range")
195 |
196 | plt.subplot(grid[1,0])
197 | sns.histplot(dis3d, kde=True)
198 | plt.xlim(0, 0.5)
199 | plt.title("3d distance distribution, range: 0~0.5m")
200 |
201 | plt.subplot(grid[1,1])
202 | sns.histplot(dis3d, kde=True)
203 | plt.xlim(0, 0.1)
204 | plt.xticks(np.arange(0.0,0.101,0.01))
205 | plt.title("3d distance distribution, range: 0~0.1m")
206 | plt.axvline(x=np.mean(dis3d), color='red', linestyle='--', label='mean distance')
207 | plt.axvline(x=np.median(dis3d), color='green', linestyle='--', label='median distance')
208 |
209 | dataset_name = testdsname.split("/")[-1]
210 |
211 | plt.savefig(os.path.join(savename, f"add_distribution_curve_{dataset_name}.jpg"))
212 | print("drawn add curve in folder vis")
213 | plt.close()
214 |
215 |
216 | def draw_depth_figure(alldis, savename, testdsname):
217 | if "dr" in testdsname.split("/")[-1]:
218 | ds = "dr"
219 | elif "photo" in testdsname.split("/")[-1]:
220 | ds = "photo"
221 | else:
222 | ds = testdsname.split("/")[-1]
223 | assert len(alldis["deptherror"]) == len(alldis["gt_root_depth"]), (len(alldis["deptherror"]), len(alldis["gt_root_depth"]))
224 | deptherror = np.array(alldis["deptherror"])
225 | gtrootdepth = np.array(alldis["gt_root_depth"])
226 | plt.figure(figsize=(15,15))
227 | plt.scatter(gtrootdepth, deptherror)
228 | plt.xlim(0, 2.0)
229 | plt.ylim(0, 0.2)
230 | plt.title("root depth error -- gt root depth scatterplot")
231 | plt.savefig("unit_test/depth_curve/"+savename+"_"+ds+".jpg")
232 | plt.close()
233 |
234 | plt.close()
235 |
--------------------------------------------------------------------------------
/lib/utils/integral.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import functional as F
4 | from utils.transforms import uvd_to_xyz
5 |
6 |
7 | def flip(x):
8 | assert (x.dim() == 3 or x.dim() == 4)
9 | dim = x.dim() - 1
10 |
11 | return x.flip(dims=(dim,))
12 |
13 | def norm_heatmap_hrnet(norm_type, heatmap, tau=5, sample_num=1):
14 | # Input tensor shape: [N,C,...]
15 | shape = heatmap.shape
16 | if norm_type == 'softmax':
17 | heatmap = heatmap.reshape(*shape[:2], -1)
18 | # global soft max
19 | heatmap = F.softmax(heatmap, 2)
20 | return heatmap.reshape(*shape)
21 | elif norm_type == 'sampling':
22 | heatmap = heatmap.reshape(*shape[:2], -1)
23 |
24 | eps = torch.rand_like(heatmap)
25 | log_eps = torch.log(-torch.log(eps))
26 | gumbel_heatmap = heatmap - log_eps / tau
27 |
28 | gumbel_heatmap = F.softmax(gumbel_heatmap, 2)
29 | return gumbel_heatmap.reshape(*shape)
30 | elif norm_type == 'multiple_sampling':
31 |
32 | heatmap = heatmap.reshape(*shape[:2], 1, -1)
33 |
34 | eps = torch.rand(*heatmap.shape[:2], sample_num, heatmap.shape[3], device=heatmap.device)
35 | log_eps = torch.log(-torch.log(eps))
36 | gumbel_heatmap = heatmap - log_eps / tau
37 | gumbel_heatmap = F.softmax(gumbel_heatmap, 3)
38 | gumbel_heatmap = gumbel_heatmap.reshape(shape[0], shape[1], sample_num, shape[2])
39 |
40 | # [B, S, K, -1]
41 | return gumbel_heatmap.transpose(1, 2)
42 | else:
43 | raise NotImplementedError
44 |
45 | def norm_heatmap_resnet(norm_type, heatmap):
46 | # Input tensor shape: [N,C,...]
47 | shape = heatmap.shape
48 | if norm_type == 'softmax':
49 | heatmap = heatmap.reshape(*shape[:2], -1)
50 | # global soft max
51 | heatmap = F.softmax(heatmap, 2)
52 | return heatmap.reshape(*shape)
53 | else:
54 | raise NotImplementedError
55 |
56 | def get_intrinsic_matrix_batch(f, c, bsz, inv=False):
57 |
58 | intrinsic_matrix = torch.zeros((bsz, 3, 3)).to(torch.float)
59 |
60 | if inv:
61 | intrinsic_matrix[:, 0, 0] = 1.0 / f[0].to(float)
62 | intrinsic_matrix[:, 0, 2] = - c[0].to(float) / f[0].to(float)
63 | intrinsic_matrix[:, 1, 1] = 1.0 / f[1].to(float)
64 | intrinsic_matrix[:, 1, 2] = - c[1].to(float) / f[1].to(float)
65 | intrinsic_matrix[:, 2, 2] = 1
66 | else:
67 | intrinsic_matrix[:, 0, 0] = f[0]
68 | intrinsic_matrix[:, 0, 2] = c[0]
69 | intrinsic_matrix[:, 1, 1] = f[1]
70 | intrinsic_matrix[:, 1, 2] = c[1]
71 | intrinsic_matrix[:, 2, 2] = 1
72 |
73 | return intrinsic_matrix.cuda(device=0)
74 |
75 | class HeatmapIntegralPose(nn.Module):
76 | """
77 | This module takes in heatmap output and performs soft-argmax(integral operation).
78 | """
79 | def __init__(self, backbone, **kwargs):
80 | super(HeatmapIntegralPose, self).__init__()
81 | self.backbone_name = backbone
82 | self.norm_type = kwargs["norm_type"]
83 | self.num_joints = kwargs["num_joints"]
84 | self.depth_dim = kwargs["depth_dim"]
85 | self.height_dim = kwargs["height_dim"]
86 | self.width_dim = kwargs["width_dim"]
87 | self.rootid = kwargs["rootid"] if "rootid" in kwargs else 0
88 | self.fixroot = kwargs["fixroot"] if "fixroot" in kwargs else False
89 |
90 | # self.focal_length = kwargs['FOCAL_LENGTH'] if 'FOCAL_LENGTH' in kwargs else 320
91 | bbox_3d_shape = kwargs['bbox_3d_shape'] if 'bbox_3d_shape' in kwargs else (2300, 2300, 2300)
92 | self.bbox_3d_shape = torch.tensor(bbox_3d_shape).float()
93 | self.depth_factor = self.bbox_3d_shape[2] * 1e-3
94 | self.image_size = kwargs["image_size"]
95 |
96 |
97 | def forward(self, out, flip_test=False, **kwargs):
98 | """
99 | Adapted from https://github.com/Jeff-sjtu/HybrIK/tree/main/hybrik/models
100 | """
101 |
102 | K = kwargs["K"]
103 | root_trans = kwargs["root_trans"]
104 | batch_size = out.shape[0]
105 | inv_k = get_intrinsic_matrix_batch((K[:,0,0],K[:,1,1]), (K[:,0,2],K[:,1,2]), bsz=batch_size, inv=True)
106 |
107 | if self.backbone_name in ["resnet", "resnet34", "resnet50"]:
108 | # out = out.reshape(batch_size, self.num_joints, self.depth_dim, self.height_dim, self.width_dim)
109 | out = out.reshape((out.shape[0], self.num_joints, -1))
110 | out = norm_heatmap_resnet(self.norm_type, out)
111 | assert out.dim() == 3, out.shape
112 | heatmaps = out / out.sum(dim=2, keepdim=True)
113 | heatmaps = heatmaps.reshape((heatmaps.shape[0], self.num_joints, self.depth_dim, self.height_dim, self.width_dim))
114 | hm_x0 = heatmaps.sum((2, 3)) # (B, K, W)
115 | hm_y0 = heatmaps.sum((2, 4)) # (B, K, H)
116 | hm_z0 = heatmaps.sum((3, 4)) # (B, K, D)
117 |
118 | range_tensor = torch.arange(hm_x0.shape[-1], dtype=torch.float32, device=hm_x0.device)
119 |
120 | hm_x = hm_x0 * range_tensor
121 | hm_y = hm_y0 * range_tensor
122 | hm_z = hm_z0 * range_tensor
123 |
124 | coord_x = hm_x.sum(dim=2, keepdim=True)
125 | coord_y = hm_y.sum(dim=2, keepdim=True)
126 | coord_z = hm_z.sum(dim=2, keepdim=True)
127 |
128 | coord_x = coord_x / float(self.width_dim) - 0.5
129 | coord_y = coord_y / float(self.height_dim) - 0.5
130 | coord_z = coord_z / float(self.depth_dim) - 0.5
131 |
132 | # -0.5 ~ 0.5
133 | pred_uvd_jts = torch.cat((coord_x, coord_y, coord_z), dim=2)
134 | if self.fixroot:
135 | pred_uvd_jts[:,self.rootid,2] = 0.0
136 | pred_uvd_jts_flat = pred_uvd_jts.reshape(batch_size, -1)
137 |
138 | pred_xyz_jts = uvd_to_xyz(uvd_jts=pred_uvd_jts, image_size=self.image_size, intrinsic_matrix_inverse=inv_k,
139 | root_trans=root_trans, depth_factor=self.depth_factor, return_relative=False)
140 |
141 | # pred_uvd_jts_back = xyz_to_uvd(xyz_jts=pred_xyz_jts, image_size=self.image_size, intrinsic_matrix=K,
142 | # root_trans=root_trans, depth_factor=self.depth_factor, return_relative=False)
143 | # print("(pred_uvd_jts-pred_uvd_jts_back).sum()",(pred_uvd_jts.cuda()-pred_uvd_jts_back.cuda()).sum())
144 |
145 | return pred_uvd_jts, pred_xyz_jts
146 |
147 | elif self.backbone_name == "hrnet" or self.backbone_name == "hrnet32" or self.backbone_name == "hrnet48":
148 | out = out.reshape((out.shape[0], self.num_joints, -1))
149 | heatmaps = norm_heatmap_hrnet(self.norm_type, out)
150 | assert heatmaps.dim() == 3, heatmaps.shape
151 | heatmaps = heatmaps.reshape((heatmaps.shape[0], self.num_joints, self.depth_dim, self.height_dim, self.width_dim))
152 |
153 | hm_x0 = heatmaps.sum((2, 3)) # (B, K, W)
154 | hm_y0 = heatmaps.sum((2, 4)) # (B, K, H)
155 | hm_z0 = heatmaps.sum((3, 4)) # (B, K, D)
156 |
157 | range_tensor = torch.arange(hm_x0.shape[-1], dtype=torch.float32, device=hm_x0.device).unsqueeze(-1)
158 | # hm_x = hm_x0 * range_tensor
159 | # hm_y = hm_y0 * range_tensor
160 | # hm_z = hm_z0 * range_tensor
161 |
162 | # coord_x = hm_x.sum(dim=2, keepdim=True)
163 | # coord_y = hm_y.sum(dim=2, keepdim=True)
164 | # coord_z = hm_z.sum(dim=2, keepdim=True)
165 | coord_x = hm_x0.matmul(range_tensor)
166 | coord_y = hm_y0.matmul(range_tensor)
167 | coord_z = hm_z0.matmul(range_tensor)
168 |
169 | coord_x = coord_x / float(self.width_dim) - 0.5
170 | coord_y = coord_y / float(self.height_dim) - 0.5
171 | coord_z = coord_z / float(self.depth_dim) - 0.5
172 |
173 | # -0.5 ~ 0.5
174 | pred_uvd_jts = torch.cat((coord_x, coord_y, coord_z), dim=2)
175 | if self.fixroot:
176 | pred_uvd_jts[:,self.rootid,2] = 0.0
177 | pred_uvd_jts_flat = pred_uvd_jts.reshape(batch_size, -1)
178 |
179 | pred_xyz_jts = uvd_to_xyz(uvd_jts=pred_uvd_jts, image_size=self.image_size, intrinsic_matrix_inverse=inv_k,
180 | root_trans=root_trans, depth_factor=self.depth_factor, return_relative=False)
181 |
182 | # pred_uvd_jts_back = xyz_to_uvd(xyz_jts=pred_xyz_jts, image_size=self.image_size, intrinsic_matrix=K,
183 | # root_trans=root_trans, depth_factor=self.depth_factor, return_relative=False)
184 | # print("(pred_uvd_jts-pred_uvd_jts_back).sum()",(pred_uvd_jts.cuda()-pred_uvd_jts_back.cuda()).sum())
185 |
186 | return pred_uvd_jts, pred_xyz_jts
187 |
188 | else:
189 | raise(NotImplementedError)
190 |
191 |
192 | class HeatmapIntegralJoint(nn.Module):
193 | """
194 | This module takes in heatmap output and performs soft-argmax(integral operation).
195 | """
196 | def __init__(self, backbone, **kwargs):
197 | super(HeatmapIntegralJoint, self).__init__()
198 | self.backbone_name = backbone
199 | self.norm_type = kwargs["norm_type"]
200 | self.dof = kwargs["dof"]
201 | self.joint_bounds = kwargs["joint_bounds"]
202 | assert self.joint_bounds.shape == (self.dof, 2), self.joint_bounds.shape
203 |
204 |
205 | def forward(self, out, **kwargs):
206 | """
207 | Adapted from https://github.com/Jeff-sjtu/HybrIK/tree/main/hybrik/models
208 | """
209 |
210 | batch_size = out.shape[0]
211 |
212 | if self.backbone_name in ["resnet34", "resnet50"]:
213 | out = out.reshape(batch_size, self.dof, -1)
214 | out = norm_heatmap_resnet(self.norm_type, out)
215 | assert out.dim() == 3, out.shape
216 | heatmaps = out / out.sum(dim=2, keepdim=True)
217 | heatmaps = heatmaps.reshape((heatmaps.shape[0], self.dof, -1)) # no depth dimension
218 |
219 | resolution = heatmaps.shape[-1]
220 | range_tensor = torch.arange(resolution, dtype=torch.float32, device=heatmaps.device).reshape(1,1,resolution)
221 | hm_int = heatmaps * range_tensor
222 | coord_joint_raw = hm_int.sum(dim=2, keepdim=True)
223 | coord_joint = coord_joint_raw / float(resolution) # 0~1
224 |
225 | bounds = self.joint_bounds.reshape(1,self.dof,2).cuda()
226 | jointrange = bounds[:,:,[1]] - bounds[:,:,[0]]
227 | joints = coord_joint * jointrange + bounds[:,:,[0]]
228 |
229 | return joints.squeeze(-1)
230 |
231 | else:
232 | raise(NotImplementedError)
233 |
--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4 | import random
5 | import shutil
6 | from collections import OrderedDict, defaultdict
7 | from lib.dataset.multiepoch_dataloader import MultiEpochDataLoader
8 | from lib.dataset.samplers import PartialSampler
9 | from pathlib import Path
10 | import numpy as np
11 | import torch
12 | from lib.dataset.dream import DreamDataset
13 | from torch.utils.data import DataLoader
14 | from torch.utils.tensorboard import SummaryWriter
15 | from torchnet.meter import AverageValueMeter
16 | from tqdm import tqdm
17 |
18 | def cast(obj, device, dtype=None):
19 |
20 | if isinstance(obj, (dict, OrderedDict)):
21 | for k, v in obj.items():
22 | if v is None:
23 | continue
24 | obj[k] = cast(torch.as_tensor(v),device)
25 | if dtype is not None:
26 | obj[k] = obj[k].to(dtype)
27 | return obj
28 | else:
29 | return obj.to(device)
30 |
31 |
32 | def set_random_seed(seed):
33 |
34 | random.seed(seed)
35 | np.random.seed(seed)
36 | torch.manual_seed(seed)
37 | torch.cuda.manual_seed(seed)
38 |
39 |
40 | def copy_and_rename(src_path, dest_path, new_filename):
41 |
42 | src_path = Path(src_path)
43 | dest_path = Path(dest_path)
44 | shutil.copy(src_path, dest_path)
45 | src_filename = src_path.name
46 | dest_filepath = dest_path / new_filename
47 | (dest_path / src_filename).replace(dest_filepath)
48 |
49 |
50 | def create_logger(args):
51 |
52 | save_folder = os.path.join('experiments', args.exp_name)
53 | ckpt_folder = os.path.join(save_folder, 'ckpt')
54 | log_folder = os.path.join(save_folder, 'log')
55 | os.makedirs(ckpt_folder, exist_ok=True)
56 | os.makedirs(log_folder, exist_ok=True)
57 | writer = SummaryWriter(log_dir=log_folder)
58 | copy_and_rename(args.config_path, save_folder, "config.yaml")
59 |
60 | return save_folder, ckpt_folder, log_folder, writer
61 |
62 |
63 | def get_dataloaders(args):
64 |
65 | urdf_robot_name = args.urdf_robot_name
66 | train_ds_names = args.train_ds_names
67 | test_ds_name_dr = train_ds_names.replace("train_dr","test_dr")
68 | if urdf_robot_name != "baxter":
69 | test_ds_name_photo = train_ds_names.replace("train_dr","test_photo")
70 | if urdf_robot_name == "panda":
71 |
72 | test_ds_name_real = [train_ds_names.replace("synthetic/panda_synth_train_dr","real/panda-3cam_azure"),
73 | train_ds_names.replace("synthetic/panda_synth_train_dr","real/panda-3cam_kinect360"),
74 | train_ds_names.replace("synthetic/panda_synth_train_dr","real/panda-3cam_realsense"),
75 | train_ds_names.replace("synthetic/panda_synth_train_dr","real/panda-orb")]
76 |
77 | rootnet_hw = (int(args.rootnet_image_size),int(args.rootnet_image_size))
78 | other_hw = (int(args.other_image_size),int(args.other_image_size))
79 | ds_train = DreamDataset(train_ds_names,
80 | rootnet_resize_hw=rootnet_hw,
81 | other_resize_hw=other_hw,
82 | color_jitter=args.jitter, rgb_augmentation=args.other_aug,
83 | occlusion_augmentation=args.occlusion, occlu_p=args.occlu_p)
84 | ds_test_dr = DreamDataset(test_ds_name_dr,
85 | rootnet_resize_hw=rootnet_hw,
86 | other_resize_hw=other_hw,
87 | color_jitter=False, rgb_augmentation=False, occlusion_augmentation=False)
88 | if urdf_robot_name != "baxter":
89 | ds_test_photo = DreamDataset(test_ds_name_photo,
90 | rootnet_resize_hw=rootnet_hw,
91 | other_resize_hw=other_hw,
92 | color_jitter=False, rgb_augmentation=False, occlusion_augmentation=False)
93 |
94 | train_sampler = PartialSampler(ds_train, epoch_size=args.epoch_size)
95 | ds_iter_train = DataLoader(
96 | ds_train,
97 | sampler=train_sampler,
98 | batch_size=args.batch_size,
99 | num_workers=args.n_dataloader_workers,
100 | drop_last=False,
101 | pin_memory=True
102 | )
103 | ds_iter_train = MultiEpochDataLoader(ds_iter_train)
104 |
105 | test_loader_dict = {}
106 | ds_iter_test_dr = DataLoader(
107 | ds_test_dr,
108 | batch_size=args.batch_size,
109 | num_workers=args.n_dataloader_workers
110 | )
111 | test_loader_dict["dr"] = ds_iter_test_dr
112 |
113 | if urdf_robot_name != "baxter":
114 | ds_iter_test_photo = DataLoader(
115 | ds_test_photo,
116 | batch_size=args.batch_size,
117 | num_workers=args.n_dataloader_workers
118 | )
119 | test_loader_dict["photo"] = ds_iter_test_photo
120 |
121 | if urdf_robot_name == "panda":
122 | ds_shorts = ["azure", "kinect", "realsense", "orb"]
123 | for ds_name, ds_short in zip(test_ds_name_real, ds_shorts):
124 | ds_test_real = DreamDataset(ds_name,
125 | rootnet_resize_hw=rootnet_hw,
126 | other_resize_hw=other_hw,
127 | color_jitter=False, rgb_augmentation=False, occlusion_augmentation=False,
128 | process_truncation=args.fix_truncation)
129 | ds_iter_test_real = DataLoader(
130 | ds_test_real,
131 | batch_size=args.batch_size,
132 | num_workers=args.n_dataloader_workers
133 | )
134 | test_loader_dict[ds_short] = ds_iter_test_real
135 |
136 | print("len(ds_iter_train): ", len(ds_iter_train))
137 | print("len(ds_iter_test_dr): ", len(ds_iter_test_dr))
138 | if urdf_robot_name != "baxter":
139 | print("len(ds_iter_test_photo): ", len(ds_iter_test_photo))
140 | if urdf_robot_name == "panda":
141 | for ds_short in ds_shorts:
142 | print(f"len(ds_iter_test_{ds_short}): ", len(test_loader_dict[ds_short]))
143 |
144 | return ds_iter_train, test_loader_dict
145 |
146 |
147 | def get_scheduler(args, optimizer, last_epoch):
148 |
149 | def lr_lambda_linear(epoch):
150 | if epoch < args.n_epochs_warmup:
151 | ratio = float(epoch+1)/float(args.n_epochs_warmup)
152 | elif epoch <= args.start_decay:
153 | ratio = 1.0
154 | elif epoch <= args.end_decay:
155 | ratio = (float(args.end_decay - args.final_decay * args.start_decay) - (float(1-args.final_decay) * epoch)) / float(args.end_decay - args.start_decay)
156 | else:
157 | ratio = args.final_decay
158 | return ratio
159 |
160 | def lr_lambda_exponential(epoch):
161 | base_ratio = 1.0
162 | ratio = base_ratio
163 | if epoch < args.n_epochs_warmup:
164 | ratio = float(epoch+1)/float(args.n_epochs_warmup)
165 | elif epoch <= args.start_decay:
166 | ratio = base_ratio
167 | elif epoch <= args.end_decay:
168 | ratio = (args.exponent)**(epoch-args.start_decay)
169 | else:
170 | ratio = (args.exponent)**(args.end_decay-args.start_decay)
171 | return ratio
172 |
173 | def lr_lambda_everyXepoch(epoch):
174 | ratio = (args.step_decay)**(epoch // args.step)
175 | if epoch >= args.end_decay:
176 | ratio = (args.step_decay)**(args.end_decay // args.step)
177 | return ratio
178 |
179 | if args.use_schedule:
180 | if args.schedule_type == "linear":
181 | lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lr_lambda_linear, last_epoch=last_epoch)
182 | elif args.schedule_type == "exponential":
183 | lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lr_lambda_exponential, last_epoch=last_epoch)
184 | elif args.schedule_type == "everyXepoch":
185 | lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lr_lambda_everyXepoch, last_epoch=last_epoch)
186 | else:
187 | lr_scheduler = None
188 |
189 | return lr_scheduler
190 |
191 |
192 | def resume_run(args, model, optimizer, device):
193 |
194 | curr_max_auc_4real = { "azure": 0.0, "kinect": 0.0, "realsense": 0.0, "orb": 0.0 }
195 | template = 'ckpt/curr_best_auc(add)_DATASET_model.pk'
196 | ckpt_paths = [template.replace("DATASET", name) for name in curr_max_auc_4real.keys()]
197 |
198 | resume_dir = os.path.join("experiments" , args.resume_experiment_name)
199 | path = os.path.join(resume_dir, 'ckpt/curr_best_auc(add)_model.pk')
200 | checkpoint = torch.load(path)
201 | state_dict = checkpoint['model_state_dict']
202 | model.load_state_dict(state_dict)
203 | model.to(device)
204 | optimizer_dict = checkpoint['optimizer_state_dict']
205 | optimizer.load_state_dict(optimizer_dict)
206 | for state in optimizer.state.values():
207 | for k, v in state.items():
208 | if isinstance(v, torch.Tensor):
209 | state[k] = v.to(device)
210 |
211 | start_epoch = checkpoint['epoch']+1
212 | last_epoch = checkpoint['lr_scheduler_last_epoch']
213 | curr_max_auc = checkpoint["auc_add"]
214 |
215 | for postfix, dsname in zip(ckpt_paths, curr_max_auc_4real.keys()):
216 | model_path = os.path.join(resume_dir, postfix)
217 | ckpt = torch.load(model_path)
218 | curr_max_auc_onreal = ckpt["auc_add"]
219 | curr_max_auc_4real[dsname] = curr_max_auc_onreal
220 |
221 | return start_epoch, last_epoch, curr_max_auc, curr_max_auc_4real
222 |
223 |
224 | def save_checkpoint(args, auc_adds, model, optimizer, ckpt_folder, epoch, lr_scheduler, curr_max_auc, curr_max_auc_4real):
225 |
226 | save_path_dr = os.path.join(ckpt_folder, 'curr_best_auc(add)_model.pk')
227 | save_path_azure = os.path.join(ckpt_folder, 'curr_best_auc(add)_azure_model.pk')
228 | save_path_kinect = os.path.join(ckpt_folder, 'curr_best_auc(add)_kinect_model.pk')
229 | save_path_realsense = os.path.join(ckpt_folder, 'curr_best_auc(add)_realsense_model.pk')
230 | save_path_orb = os.path.join(ckpt_folder, 'curr_best_auc(add)_orb_model.pk')
231 | save_path = {"azure":save_path_azure, "kinect":save_path_kinect, "realsense":save_path_realsense, "orb":save_path_orb}
232 | saves = {"dr":True, "azure":True, "kinect":True, "realsense":True, "orb":True }
233 | if os.path.exists(save_path_dr):
234 | ckpt = torch.load(save_path_dr)
235 | if epoch <= ckpt["epoch"]: # prevent better model got covered during cluster rebooting
236 | saves["dr"] = False
237 | for real_name in ["azure", "kinect", "realsense", "orb"]:
238 | if os.path.exists(save_path[real_name]):
239 | ckpt_real = torch.load(save_path[real_name])
240 | if epoch <= ckpt_real["epoch"]: # prevent better model got covered during cluster rebooting
241 | saves[real_name] = False
242 |
243 | if saves["dr"]:
244 | if auc_adds["dr"] > curr_max_auc:
245 | curr_max_auc = auc_adds["dr"]
246 | last_epoch = lr_scheduler.last_epoch if args.use_schedule else -1
247 | torch.save({
248 | 'epoch': epoch,
249 | 'auc_add': curr_max_auc,
250 | 'model_state_dict': model.state_dict(),
251 | 'optimizer_state_dict': optimizer.state_dict(),
252 | 'lr_scheduler_last_epoch':last_epoch,
253 | }, save_path_dr)
254 |
255 | if args.urdf_robot_name == "panda":
256 | for real_name in ["azure", "kinect", "realsense", "orb"]:
257 | if saves[real_name]:
258 | if auc_adds[real_name] > curr_max_auc_4real[real_name]:
259 | curr_max_auc_4real[real_name] = auc_adds[real_name]
260 | last_epoch = lr_scheduler.last_epoch if args.use_schedule else -1
261 | torch.save({
262 | 'epoch': epoch,
263 | 'auc_add': curr_max_auc_4real[real_name],
264 | 'model_state_dict': model.state_dict(),
265 | 'optimizer_state_dict': optimizer.state_dict(),
266 | 'lr_scheduler_last_epoch':last_epoch,
267 | }, save_path[real_name])
--------------------------------------------------------------------------------
/lib/dataset/augmentations.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4 | import math
5 | import random
6 | from copy import deepcopy
7 | import numpy as np
8 | import PIL
9 | import torch
10 | import torch.nn.functional as F
11 | from dataset.roboutils import hnormalized, make_detections_from_segmentation
12 | from PIL import ImageEnhance, ImageFilter
13 | from utils.geometries import get_K_crop_resize
14 |
15 |
16 | def to_pil(im):
17 | if isinstance(im, PIL.Image.Image):
18 | return im
19 | elif isinstance(im, torch.Tensor):
20 | return PIL.Image.fromarray(np.asarray(im))
21 | elif isinstance(im, np.ndarray):
22 | return PIL.Image.fromarray(im)
23 | else:
24 | raise ValueError('Type not supported', type(im))
25 |
26 |
27 | def to_torch_uint8(im):
28 | if isinstance(im, PIL.Image.Image):
29 | im = torch.as_tensor(np.asarray(im).astype(np.uint8))
30 | elif isinstance(im, torch.Tensor):
31 | assert im.dtype == torch.uint8
32 | elif isinstance(im, np.ndarray):
33 | assert im.dtype == np.uint8
34 | im = torch.as_tensor(im)
35 | else:
36 | raise ValueError('Type not supported', type(im))
37 | if im.dim() == 3:
38 | assert im.shape[-1] in {1, 3},f"{im.shape}"
39 | return im
40 |
41 | def occlusion_aug(bbox, img_shape, min_area=0.0, max_area=0.3, max_try_times=5):
42 | xmin, ymin, _, _ = bbox
43 | xmax = bbox[2]
44 | ymax = bbox[3]
45 | imght, imgwidth = img_shape
46 | counter = 0
47 | while True:
48 | # force to break if no suitable occlusion
49 | if counter > max_try_times: # 5
50 | return 0, 0, 0, 0
51 | counter += 1
52 |
53 | area_min = min_area # 0.0
54 | area_max = max_area # 0.3
55 | synth_area = (random.random() * (area_max - area_min) + area_min) * (xmax - xmin) * (ymax - ymin)
56 |
57 | ratio_min = 0.5
58 | ratio_max = 1 / 0.5
59 | synth_ratio = (random.random() * (ratio_max - ratio_min) + ratio_min)
60 |
61 | if(synth_ratio*synth_area<=0):
62 | print(synth_area,xmax,xmin,ymax,ymin)
63 | print(synth_ratio,ratio_max,ratio_min)
64 | synth_h = math.sqrt(synth_area * synth_ratio)
65 | synth_w = math.sqrt(synth_area / synth_ratio)
66 | synth_xmin = random.random() * ((xmax - xmin) - synth_w - 1) + xmin
67 | synth_ymin = random.random() * ((ymax - ymin) - synth_h - 1) + ymin
68 |
69 | if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < imgwidth and synth_ymin + synth_h < imght:
70 | synth_xmin = int(synth_xmin)
71 | synth_ymin = int(synth_ymin)
72 | synth_w = int(synth_w)
73 | synth_h = int(synth_h)
74 | break
75 | return synth_ymin, synth_h, synth_xmin, synth_w
76 |
77 | class PillowBlur:
78 | def __init__(self, p=0.4, factor_interval=(1, 3)):
79 | self.p = p
80 | self.factor_interval = factor_interval
81 |
82 | def __call__(self, im, mask, obs):
83 | im = to_pil(im)
84 | k = random.randint(*self.factor_interval)
85 | im = im.filter(ImageFilter.GaussianBlur(k))
86 | return im, mask, obs
87 |
88 |
89 | class PillowRGBAugmentation:
90 | def __init__(self, pillow_fn, p, factor_interval):
91 | self._pillow_fn = pillow_fn
92 | self.p = p
93 | self.factor_interval = factor_interval
94 |
95 | def __call__(self, im, mask, obs):
96 | im = to_pil(im)
97 | if random.random() <= self.p:
98 | im = self._pillow_fn(im).enhance(factor=random.uniform(*self.factor_interval))
99 | #im.save('./BRIGHT.png')
100 | return im, mask, obs
101 |
102 |
103 | class PillowSharpness(PillowRGBAugmentation):
104 | def __init__(self, p=0.3, factor_interval=(0., 50.)):
105 | super().__init__(pillow_fn=ImageEnhance.Sharpness,
106 | p=p,
107 | factor_interval=factor_interval)
108 |
109 |
110 | class PillowContrast(PillowRGBAugmentation):
111 | def __init__(self, p=0.3, factor_interval=(0.2, 50.)):
112 | super().__init__(pillow_fn=ImageEnhance.Contrast,
113 | p=p,
114 | factor_interval=factor_interval)
115 |
116 |
117 | class PillowBrightness(PillowRGBAugmentation):
118 | def __init__(self, p=0.5, factor_interval=(0.1, 6.0)):
119 | super().__init__(pillow_fn=ImageEnhance.Brightness,
120 | p=p,
121 | factor_interval=factor_interval)
122 |
123 |
124 | class PillowColor(PillowRGBAugmentation):
125 | def __init__(self, p=0.3, factor_interval=(0.0, 20.0)):
126 | super().__init__(pillow_fn=ImageEnhance.Color,
127 | p=p,
128 | factor_interval=factor_interval)
129 |
130 |
131 | class GrayScale(PillowRGBAugmentation):
132 | def __init__(self, p=0.3):
133 | self.p = p
134 |
135 | def __call__(self, im, mask, obs):
136 | im = to_pil(im)
137 | if random.random() <= self.p:
138 | im = to_torch_uint8(im).float()
139 | gray = 0.2989 * im[..., 0] + 0.5870 * im[..., 1] + 0.1140 * im[..., 2]
140 | gray = gray.to(torch.uint8)
141 | im = gray.unsqueeze(-1).repeat(1, 1, 3)
142 | return im, mask, obs
143 |
144 |
145 | class BackgroundAugmentation:
146 | def __init__(self, image_dataset, p):
147 | self.image_dataset = image_dataset
148 | self.p = p
149 |
150 | def get_bg_image(self, idx):
151 | return self.image_dataset[idx]
152 |
153 | def __call__(self, im, mask, obs):
154 | if random.random() <= self.p:
155 | im = to_torch_uint8(im)
156 | mask = to_torch_uint8(mask)
157 | h, w, c = im.shape
158 | im_bg = self.get_bg_image(random.randint(0, len(self.image_dataset) - 1))
159 | im_bg = to_pil(im_bg)
160 | im_bg = torch.as_tensor(np.asarray(im_bg.resize((w, h))))
161 | mask_bg = mask == 0
162 | im[mask_bg] = im_bg[mask_bg]
163 | return im, mask, obs
164 |
165 | class CropResizeToAspectAugmentation:
166 | def __init__(self, resize=(640, 480)):
167 | self.resize = (min(resize), max(resize))
168 | self.aspect = max(resize) / min(resize)
169 |
170 | def __call__(self, im, mask, obs, use_3d=True):
171 | im = to_torch_uint8(im)
172 | mask = to_torch_uint8(mask)
173 | obs['orig_camera'] = deepcopy(obs['camera'])
174 | assert im.shape[-1] == 3
175 | h, w = im.shape[:2]
176 | if (h, w) == self.resize:
177 | obs['orig_camera']['crop_resize_bbox'] = (0, 0, w-1, h-1)
178 | return im, mask, obs
179 |
180 | ratio = float(self.resize[0])/h
181 |
182 | images = (torch.as_tensor(im).float() / 255).unsqueeze(0).permute(0, 3, 1, 2)
183 | masks = torch.as_tensor(mask).unsqueeze(0).unsqueeze(0).float()
184 | K = torch.tensor(obs['camera']['K']).unsqueeze(0)
185 |
186 | # Match the width on input image with an image of target aspect ratio.
187 | # if not np.isclose(w/h, self.aspect):
188 | # x0, y0 = images.shape[-1] / 2, images.shape[-2] / 2
189 | # w = images.shape[-1]
190 | # r = self.aspect
191 | # h = w * 1/r
192 | # box_size = (h, w)
193 | # h, w = min(box_size), max(box_size)
194 | # x1, y1, x2, y2 = x0-w/2, y0-h/2, x0+w/2, y0+h/2
195 | # box = torch.tensor([x1, y1, x2, y2])
196 | # images, masks, K = crop_to_aspect_ratio(images, box, masks=masks, K=K)
197 |
198 | # Resize to target size
199 | x0, y0 = images.shape[-1] / 2, images.shape[-2] / 2
200 | h_input, w_input = images.shape[-2], images.shape[-1]
201 | h_output, w_output = min(self.resize), max(self.resize)
202 | box_size = (h_input, w_input)
203 | h, w = min(box_size), max(box_size)
204 | x1, y1, x2, y2 = x0-w/2, y0-h/2, x0+w/2, y0+h/2
205 | box = torch.tensor([x1, y1, x2, y2])
206 | images = F.interpolate(images, size=(h_output, w_output), mode='bilinear', align_corners=False)
207 | masks = F.interpolate(masks, size=(h_output, w_output), mode='nearest')
208 | obs['orig_camera']['crop_resize_bbox'] = tuple(box.tolist())
209 | K = get_K_crop_resize(K, box.unsqueeze(0), orig_size=(h_input, w_input), crop_resize=(h_output, w_output))
210 | # Update the bounding box annotations
211 | keypoints=[]
212 | keypoints_3d=obs['objects'][0]['TCO_keypoints_3d']
213 | K_tmp=K.cpu().clone().detach().numpy()[0]
214 |
215 | if use_3d:
216 | for location3d in keypoints_3d:
217 | location3d.reshape(3,1)
218 | p_unflattened = np.matmul(K_tmp, location3d)
219 | #print(p_unflattened)
220 | projection = hnormalized(p_unflattened)
221 | #print(p_unflattened)
222 | #print(projection)
223 | keypoints.append(list(projection))
224 | obs['objects'][0]['keypoints_2d']=keypoints
225 | else:
226 | obs['objects'][0]['keypoints_2d']=obs['objects'][0]['keypoints_2d']*ratio
227 |
228 | dets_gt = make_detections_from_segmentation(masks)[0]
229 | for n, obj in enumerate(obs['objects']):
230 | if 'bbox' in obj:
231 | #assert 'id_in_segm' in obj
232 | #print(dets_gt)
233 | try:
234 | obj['bbox'] = dets_gt[1]
235 | except:
236 | print("bbox",obj['bbox'],"dets_gt",dets_gt)
237 |
238 | im = (images[0].permute(1, 2, 0) * 255).to(torch.uint8)
239 | mask = masks[0, 0].to(torch.uint8)
240 | obs['camera']['K'] = K.squeeze(0).numpy()
241 | obs['camera']['resolution'] = (w_output, h_output)
242 | return im, mask, obs
243 |
244 | def flip_img(x):
245 | assert (x.ndim == 3)
246 | dim = 1
247 | return np.flip(x,axis=dim).copy()
248 |
249 | def flip_joints_2d(joints_2d, width, flip_pairs):
250 | joints = joints_2d.copy()
251 | joints[:, 0] = width - joints[:, 0] - 1 # flip horizontally
252 |
253 | if flip_pairs is not None: # change left-right parts
254 | for lr in flip_pairs:
255 | joints[lr[0]], joints[lr[1]] = joints[lr[1]].copy(), joints[lr[0]].copy()
256 | return joints
257 |
258 | def flip_xyz_joints_3d(joints_3d, flip_pairs):
259 | assert joints_3d.ndim in (2, 3)
260 | joints = joints_3d.copy()
261 | # flip horizontally
262 | joints[:, 0] = -1 * joints[:, 0]
263 | # change left-right parts
264 | if flip_pairs is not None:
265 | print(joints)
266 | for pair in flip_pairs:
267 | print(pair)
268 | print(joints[pair[0]], joints[pair[1]])
269 | joints[pair[0]], joints[pair[1]] = joints[pair[1]], joints[pair[0]].copy()
270 | return joints
271 |
272 | def flip_joints_3d(joints_3d, width, flip_pairs):
273 | joints = joints_3d.copy()
274 | # flip horizontally
275 | joints[:, 0, 0] = width - joints[:, 0, 0] - 1
276 | # change left-right parts
277 | if flip_pairs is not None:
278 | for pair in flip_pairs:
279 | joints[pair[0], :, 0], joints[pair[1], :, 0] = \
280 | joints[pair[1], :, 0], joints[pair[0], :, 0].copy()
281 | joints[pair[0], :, 1], joints[pair[1], :, 1] = \
282 | joints[pair[1], :, 1], joints[pair[0], :, 1].copy()
283 | joints[:, :, 0] *= joints[:, :, 1]
284 | return joints
285 |
286 | class FlipAugmentation:
287 | def __init__(self, p, flip_pairs=None):
288 | self.p = p
289 | self.flip_pairs = flip_pairs
290 |
291 | def __call__(self, im, mask, obs):
292 | if random.random() <= self.p:
293 | im = flip_img(im.numpy())
294 | # mask = flip_img(mask)
295 | obs['objects'][0]['keypoints_2d'] = flip_joints_2d(np.array(obs['objects'][0]['keypoints_2d']), im.shape[1], self.flip_pairs)
296 | obs['camera']['K'][0,0] = - obs['camera']['K'][0,0]
297 | obs['camera']['K'][0,2] = im.shape[1] - 1 - obs['camera']['K'][0,2]
298 | return im, mask, obs
299 |
300 | def rotate_joints_2d(joints_2d, width):
301 | joints = joints_2d.copy()
302 | joints[:, 1] = joints_2d[:, 0]
303 | joints[:, 0] = width - joints_2d[:, 1] + 1
304 | return joints
305 |
306 | class RotationAugmentation:
307 | def __init__(self, p):
308 | self.p = p
309 |
310 | def __call__(self, im, mask, obs):
311 | if random.random() <= self.p:
312 | h,w = im.shape[0],im.shape[1]
313 | im_copy = np.zeros((w,h,3), dtype=np.uint8)
314 | for i in range(h):
315 | for j in range(w):
316 | im_copy[j][h-i-1]=im[i][j].astype(np.uint8)
317 | rgb = PIL.fromarray(im_copy)
318 | obs['objects'][0]['keypoints_2d'] = rotate_joints_2d(obs['objects'][0]['keypoints_2d'], im_copy.shape[1])
319 | kp3d = obs['objects'][0]['TCO_keypoints_3d']
320 | K = obs['camera']['K']
321 | # original_fx,original_fy,original_cx,original_cy = K[0][0],K[1][1],K[0][2],K[1][2]
322 | # 角度(弧度)表示旋转方向,顺时针旋转90度
323 | angle = np.pi / 2 # 90 degree
324 | K[0][2],K[1][2] = K[1][2],K[0][2]
325 | # set up rotation matrix
326 | rotation_matrix = np.array([[np.cos(angle), -np.sin(angle), 0],
327 | [np.sin(angle), np.cos(angle), 0],
328 | [0, 0, 1]])
329 | # new camera intrinsic matrix
330 | # obs['camera']['K'] = new_intrinsic_matrix
331 | for i in range(kp3d.shape[0]):
332 | kp3d[i] = np.dot(rotation_matrix, kp3d[i])
333 |
334 | return rgb, mask, obs
335 | else:
336 | pass
--------------------------------------------------------------------------------
/lib/utils/geometries.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch.nn import functional as F
4 |
5 | def batch_rodrigues(theta):
6 | """Convert axis-angle representation to rotation matrix.
7 | Args:
8 | theta: size = [B, 3]
9 | Returns:
10 | Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
11 | """
12 | l1norm = torch.norm(theta + 1e-8, p = 2, dim = 1)
13 | angle = torch.unsqueeze(l1norm, -1)
14 | normalized = torch.div(theta, angle)
15 | angle = angle * 0.5
16 | v_cos = torch.cos(angle)
17 | v_sin = torch.sin(angle)
18 | quat = torch.cat([v_cos, v_sin * normalized], dim = 1)
19 | return quat_to_rotmat(quat)
20 |
21 | def quat_to_rotmat(quat):
22 | """Convert quaternion coefficients to rotation matrix.
23 | Args:
24 | quat: size = [B, 4] 4 <===>(w, x, y, z)
25 | Returns:
26 | Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
27 | """
28 | norm_quat = quat
29 | norm_quat = norm_quat/(norm_quat.norm(p=2, dim=1, keepdim=True)+1e-9)
30 | w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3]
31 |
32 | B = quat.size(0)
33 |
34 | w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
35 | wx, wy, wz = w*x, w*y, w*z
36 | xy, xz, yz = x*y, x*z, y*z
37 |
38 | rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz,
39 | 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx,
40 | 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3)
41 | return rotMat
42 |
43 | def quat_to_rotmat_np(quat):
44 | """Convert quaternion coefficients to rotation matrix.
45 | Without batch, in numpy.
46 | Args:
47 | quat: size = [4] <===>(w, x, y, z)
48 | Returns:
49 | Rotation matrix corresponding to the quaternion -- size = [3, 3]
50 | """
51 | norm_quat = quat
52 | norm_quat = norm_quat / np.linalg.norm(norm_quat, ord=2, axis=0, keepdims=True)
53 | w, x, y, z = norm_quat[0], norm_quat[1], norm_quat[2], norm_quat[3]
54 | w2, x2, y2, z2 = w*w, x*x, y*y, z*z
55 | wx, wy, wz = w*x, w*y, w*z
56 | xy, xz, yz = x*y, x*z, y*z
57 |
58 | rotMat = np.array([[w2 - x2 - y2 + z2, -2*yz + 2*wx, 2*wy + 2*xz],
59 | [2*wx + 2*yz, -(w2 - x2 + y2 - z2), 2*xy - 2*wz],
60 | [-2*xz + 2*wy, 2*wz + 2*xy, -(w2 + x2 - y2 - z2)]])
61 | return rotMat
62 |
63 | def rotmat_to_quat(matrices):
64 | batch = matrices.shape[0]
65 | this_device = matrices.device
66 | w = torch.sqrt(torch.max(1.0 + matrices[:,0,0] + matrices[:,1,1] + matrices[:,2,2], torch.zeros(1).to(this_device))) / 2.0
67 | w = torch.max (w , torch.autograd.Variable(torch.zeros(batch)).to(this_device) + 1e-8) #batch
68 | w4 = 4.0 * w
69 | x = (matrices[:,2,1] - matrices[:,1,2]) / w4
70 | y = (matrices[:,0,2] - matrices[:,2,0]) / w4
71 | z = (matrices[:,1,0] - matrices[:,0,1]) / w4
72 | quats = torch.cat( (w.view(batch,1), x.view(batch, 1),y.view(batch, 1), z.view(batch, 1) ), 1 )
73 | quats = normalize_vector(quats)
74 | return quats
75 |
76 | def normalize_vector(v):
77 | batch = v.shape[0]
78 | v_mag = torch.sqrt(v.pow(2).sum(1))# batch
79 | v_mag = torch.max(v_mag, torch.autograd.Variable(torch.FloatTensor([1e-8]).to(v.device)))
80 | v_mag = v_mag.view(batch,1).expand(batch,v.shape[1])
81 | v = v/v_mag
82 | return v
83 |
84 | # def rot6d_to_rotmat(x):
85 | # """Convert 6D rotation representation to 3x3 rotation matrix.
86 | # Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
87 | # Input:
88 | # (B,6) Batch of 6-D rotation representations
89 | # Output:
90 | # (B,3,3) Batch of corresponding rotation matrices
91 | # """
92 | # x = x.view(-1,3,2)
93 | # a1 = x[:, :, 0]
94 | # a2 = x[:, :, 1]
95 | # b1 = F.normalize(a1)
96 | # b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
97 | # b3 = torch.cross(b1, b2)
98 | # return torch.stack((b1, b2, b3), dim=-1)
99 |
100 | def rot6d_to_rotmat(poses):
101 | """
102 | Code from https://github.com/papagina/RotationContinuity
103 | On the Continuity of Rotation Representations in Neural Networks
104 | Zhou et al. CVPR19
105 | https://zhouyisjtu.github.io/project_rotation/rotation.html
106 | """
107 | assert poses.shape[-1] == 6
108 | x_raw = poses[..., 0:3]
109 | y_raw = poses[..., 3:6]
110 | x = x_raw / torch.norm(x_raw, p=2, dim=-1, keepdim=True)
111 | z = torch.cross(x, y_raw, dim=-1)
112 | z = z / torch.norm(z, p=2, dim=-1, keepdim=True)
113 | y = torch.cross(z, x, dim=-1)
114 | matrix = torch.stack((x, y, z), -1)
115 | return torch.transpose(matrix,dim0=-2,dim1=-1)
116 |
117 | def rotmat_to_rot6d(matrix):
118 | """
119 | Converts rotation matrices to 6D rotation representation by Zhou et al. [1]
120 | by dropping the last row. Note that 6D representation is not unique.
121 | Args:
122 | matrix: batch of rotation matrices of size (*, 3, 3)
123 |
124 | Returns:
125 | 6D rotation representation, of size (*, 6)
126 |
127 | [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H.
128 | On the Continuity of Rotation Representations in Neural Networks.
129 | IEEE Conference on Computer Vision and Pattern Recognition, 2019.
130 | Retrieved from http://arxiv.org/abs/1812.07035
131 | """
132 | return matrix[..., :2, :].clone().reshape(*matrix.size()[:-2], 6)
133 |
134 | def rot9d_to_rotmat(x):
135 | """
136 | Maps 9D input vectors onto SO(3) via symmetric orthogonalization.
137 | x: should have size [batch_size, 9]
138 | Output has size [batch_size, 3, 3], where each inner 3x3 matrix is in SO(3).
139 | """
140 | m = x.view(-1, 3, 3)
141 | d = m.device
142 | u, s, v = torch.svd(m.cpu())
143 | u, v = u.to(d), v.to(d)
144 | vt = torch.transpose(v, 1, 2)
145 | det = torch.det(torch.bmm(u, vt))
146 | det = det.view(-1, 1, 1)
147 | vt = torch.cat((vt[:, :2, :], vt[:, -1:, :] * det), 1)
148 | r = torch.bmm(u, vt)
149 | return r.cuda()
150 |
151 | #matrices batch*3*3
152 | #both matrix are orthogonal rotation matrices
153 | #out theta between 0 to 180 degree batch
154 | def compute_geodesic_distance_from_two_matrices(m1, m2):
155 | batch=m1.shape[0]
156 | m = torch.bmm(m1, m2.transpose(1,2)) #batch*3*3
157 | cos = ( m[:,0,0] + m[:,1,1] + m[:,2,2] - 1 )/2
158 | cos = torch.min(cos, torch.autograd.Variable(torch.ones(batch).cuda()) )
159 | cos = torch.max(cos, torch.autograd.Variable(torch.ones(batch).cuda())*-1 )
160 | theta = torch.acos(cos)
161 | # theta = torch.min(theta, 2*np.pi - theta)
162 | return theta
163 |
164 | def angle_axis_to_rotation_matrix(angle_axis):
165 | """Convert 3d vector of axis-angle rotation to 4x4 rotation matrix
166 |
167 | Args:
168 | angle_axis (Tensor): tensor of 3d vector of axis-angle rotations.
169 |
170 | Returns:
171 | Tensor: tensor of 4x4 rotation matrices.
172 |
173 | Shape:
174 | - Input: :math:`(N, 3)`
175 | - Output: :math:`(N, 4, 4)`
176 |
177 | Example:
178 | >>> input = torch.rand(1, 3) # Nx3
179 | >>> output = tgm.angle_axis_to_rotation_matrix(input) # Nx4x4
180 | """
181 | def _compute_rotation_matrix(angle_axis, theta2, eps=1e-6):
182 | # We want to be careful to only evaluate the square root if the
183 | # norm of the angle_axis vector is greater than zero. Otherwise
184 | # we get a division by zero.
185 | k_one = 1.0
186 | theta = torch.sqrt(theta2)
187 | wxyz = angle_axis / (theta + eps)
188 | wx, wy, wz = torch.chunk(wxyz, 3, dim=1)
189 | cos_theta = torch.cos(theta)
190 | sin_theta = torch.sin(theta)
191 |
192 | r00 = cos_theta + wx * wx * (k_one - cos_theta)
193 | r10 = wz * sin_theta + wx * wy * (k_one - cos_theta)
194 | r20 = -wy * sin_theta + wx * wz * (k_one - cos_theta)
195 | r01 = wx * wy * (k_one - cos_theta) - wz * sin_theta
196 | r11 = cos_theta + wy * wy * (k_one - cos_theta)
197 | r21 = wx * sin_theta + wy * wz * (k_one - cos_theta)
198 | r02 = wy * sin_theta + wx * wz * (k_one - cos_theta)
199 | r12 = -wx * sin_theta + wy * wz * (k_one - cos_theta)
200 | r22 = cos_theta + wz * wz * (k_one - cos_theta)
201 | rotation_matrix = torch.cat(
202 | [r00, r01, r02, r10, r11, r12, r20, r21, r22], dim=1)
203 | return rotation_matrix.view(-1, 3, 3)
204 |
205 | def _compute_rotation_matrix_taylor(angle_axis):
206 | rx, ry, rz = torch.chunk(angle_axis, 3, dim=1)
207 | k_one = torch.ones_like(rx)
208 | rotation_matrix = torch.cat(
209 | [k_one, -rz, ry, rz, k_one, -rx, -ry, rx, k_one], dim=1)
210 | return rotation_matrix.view(-1, 3, 3)
211 |
212 | # stolen from ceres/rotation.h
213 |
214 | _angle_axis = torch.unsqueeze(angle_axis, dim=1)
215 | theta2 = torch.matmul(_angle_axis, _angle_axis.transpose(1, 2))
216 | theta2 = torch.squeeze(theta2, dim=1)
217 |
218 | # compute rotation matrices
219 | rotation_matrix_normal = _compute_rotation_matrix(angle_axis, theta2)
220 | rotation_matrix_taylor = _compute_rotation_matrix_taylor(angle_axis)
221 |
222 | # create mask to handle both cases
223 | eps = 1e-6
224 | mask = (theta2 > eps).view(-1, 1, 1).to(theta2.device)
225 | mask_pos = (mask).type_as(theta2)
226 | mask_neg = (mask == False).type_as(theta2) # noqa
227 |
228 | # create output pose matrix
229 | batch_size = angle_axis.shape[0]
230 | rotation_matrix = torch.eye(4).to(angle_axis.device).type_as(angle_axis)
231 | rotation_matrix = rotation_matrix.view(1, 4, 4).repeat(batch_size, 1, 1)
232 | # fill output matrix with masked values
233 | rotation_matrix[..., :3, :3] = \
234 | mask_pos * rotation_matrix_normal + mask_neg * rotation_matrix_taylor
235 | return rotation_matrix # Nx4x4
236 |
237 |
238 | def perspective_projection(points, rotation, translation,
239 | focal_length, camera_center):
240 | """
241 | This function computes the perspective projection of a set of points.
242 | Input:
243 | points (bs, N, 3): 3D points
244 | rotation (bs, 3, 3): Camera rotation
245 | translation (bs, 3): Camera translation
246 | focal_length (bs,) or scalar: Focal length
247 | camera_center (bs, 2): Camera center
248 | """
249 | batch_size = points.shape[0]
250 | K = torch.zeros([batch_size, 3, 3], device=points.device)
251 | K[:,0,0] = focal_length
252 | K[:,1,1] = focal_length
253 | K[:,2,2] = 1.
254 | K[:,:-1, -1] = camera_center
255 |
256 | # Transform points
257 | points = torch.einsum('bij,bkj->bki', rotation, points)
258 | points = points + translation.unsqueeze(1)
259 |
260 | # Apply perspective distortion
261 | projected_points = points / points[:,:,-1].unsqueeze(-1)
262 |
263 | # Apply camera intrinsics
264 | projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
265 |
266 | return projected_points[:, :, :-1]
267 |
268 |
269 | def estimate_translation_np(S, joints_2d, joints_conf, focal_length=5000, img_size=224):
270 | """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
271 | Input:
272 | S: (25, 3) 3D joint locations
273 | joints: (25, 3) 2D joint locations and confidence
274 | Returns:
275 | (3,) camera translation vector
276 | """
277 |
278 | num_joints = S.shape[0]
279 | # focal length
280 | f = np.array([focal_length,focal_length])
281 | # optical center
282 | center = np.array([img_size/2., img_size/2.])
283 |
284 | # transformations
285 | Z = np.reshape(np.tile(S[:,2],(2,1)).T,-1)
286 | XY = np.reshape(S[:,0:2],-1)
287 | O = np.tile(center,num_joints)
288 | F = np.tile(f,num_joints)
289 | weight2 = np.reshape(np.tile(np.sqrt(joints_conf),(2,1)).T,-1)
290 |
291 | # least squares
292 | Q = np.array([F*np.tile(np.array([1,0]),num_joints), F*np.tile(np.array([0,1]),num_joints), O-np.reshape(joints_2d,-1)]).T
293 | c = (np.reshape(joints_2d,-1)-O)*Z - F*XY
294 |
295 | # weighted least squares
296 | W = np.diagflat(weight2)
297 | Q = np.dot(W,Q)
298 | c = np.dot(W,c)
299 |
300 | # square matrix
301 | A = np.dot(Q.T,Q)
302 | b = np.dot(Q.T,c)
303 |
304 | # solution
305 | trans = np.linalg.solve(A, b)
306 |
307 | return trans
308 |
309 |
310 | def estimate_translation(S, joints_2d, focal_length=5000., img_size=224.):
311 | """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
312 | Input:
313 | S: (B, 49, 3) 3D joint locations
314 | joints: (B, 49, 3) 2D joint locations and confidence
315 | Returns:
316 | (B, 3) camera translation vectors
317 | """
318 |
319 | device = S.device
320 | # Use only joints 25:49 (GT joints)
321 | S = S[:, 25:, :].cpu().numpy()
322 | joints_2d = joints_2d[:, 25:, :].cpu().numpy()
323 | joints_conf = joints_2d[:, :, -1]
324 | joints_2d = joints_2d[:, :, :-1]
325 | trans = np.zeros((S.shape[0], 3), dtype=np.float32)
326 | # Find the translation for each example in the batch
327 | for i in range(S.shape[0]):
328 | S_i = S[i]
329 | joints_i = joints_2d[i]
330 | conf_i = joints_conf[i]
331 | trans[i] = estimate_translation_np(S_i, joints_i, conf_i, focal_length=focal_length, img_size=img_size)
332 | return torch.from_numpy(trans).to(device)
333 |
334 | #input batch*4*4 or batch*3*3
335 | #output torch batch*3 x, y, z in radiant
336 | #the rotation is in the sequence of x,y,z
337 | def compute_euler_angles_from_rotation_matrices(rotation_matrices):
338 | batch=rotation_matrices.shape[0]
339 | R=rotation_matrices
340 | sy = torch.sqrt(R[:,0,0]*R[:,0,0]+R[:,1,0]*R[:,1,0])
341 | singular= sy<1e-6
342 | singular=singular.float()
343 |
344 | x=torch.atan2(R[:,2,1], R[:,2,2])
345 | y=torch.atan2(-R[:,2,0], sy)
346 | z=torch.atan2(R[:,1,0],R[:,0,0])
347 |
348 | xs=torch.atan2(-R[:,1,2], R[:,1,1])
349 | ys=torch.atan2(-R[:,2,0], sy)
350 | zs=R[:,1,0]*0
351 |
352 | out_euler=torch.autograd.Variable(torch.zeros(batch,3).cuda())
353 | out_euler[:,0]=x*(1-singular)+xs*singular
354 | out_euler[:,1]=y*(1-singular)+ys*singular
355 | out_euler[:,2]=z*(1-singular)+zs*singular
356 |
357 | return out_euler
358 |
359 |
360 | def get_K_crop_resize(K, boxes, orig_size, crop_resize):
361 | """
362 | Adapted from https://github.com/BerkeleyAutomation/perception/blob/master/perception/camera_intrinsics.py
363 | Skew is not handled !
364 | """
365 | assert K.shape[1:] == (3, 3)
366 | assert boxes.shape[1:] == (4, )
367 | K = K.float()
368 | boxes = boxes.float()
369 | new_K = K.clone()
370 |
371 | orig_size = torch.tensor(orig_size, dtype=torch.float)
372 | crop_resize = torch.tensor(crop_resize, dtype=torch.float)
373 |
374 | final_width, final_height = max(crop_resize), min(crop_resize)
375 | crop_width = boxes[:, 2] - boxes[:, 0]
376 | crop_height = boxes[:, 3] - boxes[:, 1]
377 | crop_cj = (boxes[:, 0] + boxes[:, 2]) / 2
378 | crop_ci = (boxes[:, 1] + boxes[:, 3]) / 2
379 |
380 | # Crop
381 | cx = K[:, 0, 2] + (crop_width - 1) / 2 - crop_cj
382 | cy = K[:, 1, 2] + (crop_height - 1) / 2 - crop_ci
383 |
384 | # # Resize (upsample)
385 | center_x = (crop_width - 1) / 2
386 | center_y = (crop_height - 1) / 2
387 | orig_cx_diff = cx - center_x
388 | orig_cy_diff = cy - center_y
389 | scale_x = final_width / crop_width
390 | scale_y = final_height / crop_height
391 | scaled_center_x = (final_width - 1) / 2
392 | scaled_center_y = (final_height - 1) / 2
393 | fx = scale_x * K[:, 0, 0]
394 | fy = scale_y * K[:, 1, 1]
395 | cx = scaled_center_x + scale_x * orig_cx_diff
396 | cy = scaled_center_y + scale_y * orig_cy_diff
397 |
398 | new_K[:, 0, 0] = fx
399 | new_K[:, 1, 1] = fy
400 | new_K[:, 0, 2] = cx
401 | new_K[:, 1, 2] = cy
402 | return new_K
403 |
404 |
405 | def cropresize_backtransform_points2d(input_wh, boxes_2d_crop,
406 | output_wh, points_2d_in_output):
407 | bsz = input_wh.shape[0]
408 | assert output_wh.shape == (bsz, 2)
409 | assert input_wh.shape == (bsz, 2)
410 | assert points_2d_in_output.dim() == 3
411 |
412 | points_2d_normalized = points_2d_in_output / output_wh.unsqueeze(1)
413 | points_2d = boxes_2d_crop[:, [0, 1]].unsqueeze(1) + points_2d_normalized * input_wh.unsqueeze(1)
414 | return points_2d
415 |
--------------------------------------------------------------------------------
/lib/utils/urdf_robot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4 | import platform
5 | import numpy as np
6 | import pandas as pd
7 | import pyrender
8 | import torch
9 | from config import (BAXTER_DESCRIPTION_PATH, KUKA_DESCRIPTION_PATH,
10 | OWI_DESCRIPTION, OWI_KEYPOINTS_PATH,
11 | PANDA_DESCRIPTION_PATH, PANDA_DESCRIPTION_PATH_VISUAL)
12 | from dataset.const import JOINT_NAMES, LINK_NAMES
13 | from PIL import Image
14 | from utils.geometries import (quat_to_rotmat, rot6d_to_rotmat, rot9d_to_rotmat,
15 | rotmat_to_quat, rotmat_to_rot6d)
16 | from utils.mesh_renderer import RobotMeshRenderer, PandaArm
17 | from utils.urdfpytorch import URDF
18 |
19 | if platform.system() == "Linux":
20 | os.environ['PYOPENGL_PLATFORM'] = 'egl'
21 |
22 | class URDFRobot:
23 | def __init__(self,robot_type):
24 | self.robot_type = robot_type
25 | if self.robot_type == "panda":
26 | self.urdf_path = PANDA_DESCRIPTION_PATH
27 | self.urdf_path_visual = PANDA_DESCRIPTION_PATH_VISUAL
28 | self.dof = 8
29 | self.robot_for_render = PandaArm(self.urdf_path)
30 | elif self.robot_type == "kuka":
31 | self.urdf_path = KUKA_DESCRIPTION_PATH
32 | self.urdf_path_visual = KUKA_DESCRIPTION_PATH
33 | self.dof = 7
34 | self.robot_for_render = PandaArm(self.urdf_path)
35 | elif self.robot_type == "baxter":
36 | self.urdf_path = BAXTER_DESCRIPTION_PATH
37 | self.urdf_path_visual = BAXTER_DESCRIPTION_PATH
38 | self.dof = 15
39 | self.robot_for_render = PandaArm(self.urdf_path)
40 | elif self.robot_type == "owi":
41 | self.urdf_path = OWI_DESCRIPTION
42 | self.urdf_path_visual = OWI_DESCRIPTION
43 | self.dof = 4
44 | self.robot_for_render = None
45 | self.robot = URDF.load(self.urdf_path)
46 | self.robot_visual = URDF.load(self.urdf_path_visual)
47 | self.actuated_joint_names = JOINT_NAMES[self.robot_type]
48 | self.global_scale = 1.0
49 | self.device = None
50 | self.link_names, self.offsets = self.get_link_names_and_offsets()
51 |
52 | def get_link_names_and_offsets(self):
53 | if self.robot_type == "panda" or self.robot_type == "kuka":
54 | kp_offsets = torch.zeros((len(LINK_NAMES[self.robot_type]),3),dtype=torch.float).unsqueeze(0).unsqueeze(-1) * self.global_scale
55 | kp_offsets = kp_offsets.to(torch.float)
56 | return LINK_NAMES[self.robot_type], kp_offsets
57 | elif self.robot_type == "baxter":
58 | joint_name_to_joint = {joint.name: joint for joint in self.robot.joints}
59 | offsets = []
60 | link_names = []
61 | joint_names_for_links = [
62 | 'torso_t0', 'right_s0','left_s0', 'right_s1', 'left_s1',
63 | 'right_e0','left_e0', 'right_e1','left_e1','right_w0', 'left_w0',
64 | 'right_w1','left_w1','right_w2', 'left_w2','right_hand','left_hand'
65 | ]
66 | for joint_name in joint_names_for_links:
67 | joint = joint_name_to_joint[joint_name]
68 | offset = joint.origin[:3, -1]
69 | link_name = joint.parent
70 | link_names.append(link_name)
71 | offsets.append(offset)
72 | kp_offsets = torch.as_tensor(np.stack(offsets)).unsqueeze(0).unsqueeze(-1) * self.global_scale
73 | kp_offsets = kp_offsets.to(torch.float)
74 | return link_names, kp_offsets
75 | elif self.robot_type == "owi":
76 | keypoint_infos = pd.read_json(OWI_KEYPOINTS_PATH)
77 | kp_offsets = torch.as_tensor(np.stack(keypoint_infos['offset'])).unsqueeze(0).unsqueeze(-1).to(torch.float)
78 | return LINK_NAMES[self.robot_type], kp_offsets
79 | else:
80 | raise(NotImplementedError)
81 |
82 | def get_keypoints(self, jointcfgs, b2c_rot, b2c_trans):
83 |
84 | # jointcfgs, b2c_rot, b2c_trans all comes in batch (as model outputs)
85 | # b2c means base to camera
86 |
87 | batch_size = b2c_rot.shape[0]
88 | if b2c_rot.shape[1] == 6:
89 | rotmat = rot6d_to_rotmat(b2c_rot)
90 | elif b2c_rot.shape[1] == 4:
91 | rotmat = quat_to_rotmat(b2c_rot)
92 | elif b2c_rot.shape[1] == 9:
93 | rotmat = rot9d_to_rotmat(b2c_rot)
94 | else:
95 | raise NotImplementedError
96 | trans = b2c_trans.unsqueeze(dim=2)
97 | pad = torch.zeros((batch_size,1,4),dtype=torch.float).cuda()
98 | base2cam = torch.cat([rotmat,trans],dim=2).cuda()
99 | base2cam = torch.cat([base2cam,pad],dim=1).cuda()
100 | base2cam[:,3,3] = 1.0
101 | base2cam = base2cam.unsqueeze(1)
102 | TWL_base = self.get_TWL(jointcfgs).cuda()
103 | TWL = base2cam @ TWL_base
104 | pts = TWL[:, :, :3, :3] @ self.offsets.cuda() + TWL[:, :, :3, [-1]]
105 | return pts.squeeze(-1)
106 |
107 | def get_TWL(self, cfgs):
108 | fk = self.robot.link_fk_batch(cfgs, use_names=True)
109 | TWL = torch.stack([fk[link] for link in self.link_names]).permute(1, 0, 2, 3)
110 | TWL[..., :3, -1] *= self.global_scale
111 | return TWL
112 |
113 | def get_rotation_at_specific_root(self, jointcfgs, b2c_rot, b2c_trans, root = 0):
114 | if root == 0:
115 | return b2c_rot
116 | batch_size = b2c_rot.shape[0]
117 | if b2c_rot.shape[1] == 6:
118 | rotmat = rot6d_to_rotmat(b2c_rot)
119 | elif b2c_rot.shape[1] == 4:
120 | rotmat = quat_to_rotmat(b2c_rot)
121 | elif b2c_rot.shape[1] == 9:
122 | rotmat = rot9d_to_rotmat(b2c_rot)
123 | else:
124 | raise NotImplementedError
125 | trans = b2c_trans.unsqueeze(dim=2)
126 | pad = torch.zeros((batch_size,1,4),dtype=torch.float).cuda()
127 | base2cam = torch.cat([rotmat,trans],dim=2).cuda()
128 | base2cam = torch.cat([base2cam,pad],dim=1).cuda()
129 | base2cam[:,3,3] = 1.0
130 | base2cam = base2cam.unsqueeze(1)
131 | TWL_base = self.get_TWL(jointcfgs).cuda()
132 | TWL = base2cam @ TWL_base
133 | assert root < TWL.shape[1], (root, TWL.shape[1])
134 | if b2c_rot.shape[1] == 6:
135 | rotation = rotmat_to_rot6d(TWL[:, root, :3, :3]).cuda()
136 | elif b2c_rot.shape[1] == 4:
137 | rotation = rotmat_to_quat(TWL[:, root, :3, :3]).cuda()
138 | return rotation
139 |
140 |
141 | def get_keypoints_only_fk(self, jointcfgs):
142 |
143 | # only using joint angles to perform forward kinematics
144 | # the fk process is used when assuming the world frame is at the robot base, so rotation is identity and translation is origin/zeros
145 | # the output from this fk function is used for pnp process
146 |
147 | TWL = self.get_TWL(jointcfgs).cuda()
148 | pts = TWL[:, :, :3, :3] @ self.offsets.cuda() + TWL[:, :, :3, [-1]]
149 | return pts.squeeze(-1)
150 |
151 | def get_keypoints_only_fk_at_specific_root(self, jointcfgs, root=0):
152 |
153 | # only using joint angles to perform forward kinematics
154 | # the fk process is used when assuming the world frame is at the robot base, so rotation is identity and translation is origin/zeros
155 | # the output from this fk function is used for pnp process
156 |
157 | if root == 0:
158 | return self.get_keypoints_only_fk(jointcfgs)
159 | else:
160 | assert root > 0 and root < len(self.link_names)
161 |
162 | TWL_base = self.get_TWL(jointcfgs).cuda()
163 | TWL_root_inv = torch.linalg.inv(TWL_base[:,root:root+1,:,:])
164 | TWL = TWL_root_inv @ TWL_base
165 | pts = TWL[:, :, :3, :3] @ self.offsets.cuda() + TWL[:, :, :3, [-1]]
166 | return pts.squeeze(-1)
167 |
168 |
169 | def get_keypoints_root(self, jointcfgs, b2c_rot, b2c_trans, root = 0):
170 |
171 | # jointcfgs, b2c_rot, b2c_trans all comes in batch (as model outputs)
172 | # b2c here means *** root *** to camera
173 |
174 | if root == 0:
175 | return self.get_keypoints(jointcfgs, b2c_rot, b2c_trans)
176 | else:
177 | assert root > 0 and root < len(self.link_names)
178 |
179 | batch_size = b2c_rot.shape[0]
180 | if b2c_rot.shape[1] == 6:
181 | rotmat = rot6d_to_rotmat(b2c_rot)
182 | elif b2c_rot.shape[1] == 4:
183 | rotmat = quat_to_rotmat(b2c_rot)
184 | elif b2c_rot.shape[1] == 9:
185 | rotmat = rot9d_to_rotmat(b2c_rot)
186 | else:
187 | raise NotImplementedError
188 | trans = b2c_trans.unsqueeze(dim=2)
189 | pad = torch.zeros((batch_size,1,4),dtype=torch.float).cuda()
190 | base2cam = torch.cat([rotmat,trans],dim=2).cuda()
191 | base2cam = torch.cat([base2cam,pad],dim=1).cuda()
192 | base2cam[:,3,3] = 1.0
193 | base2cam = base2cam.unsqueeze(1)
194 | TWL_base = self.get_TWL(jointcfgs).cuda()
195 | TWL_root_inv = torch.linalg.inv(TWL_base[:,root:root+1,:,:])
196 | TWL_base = TWL_root_inv @ TWL_base
197 | TWL = base2cam @ TWL_base
198 | pts = TWL[:, :, :3, :3] @ self.offsets.cuda() + TWL[:, :, :3, [-1]]
199 | return pts.squeeze(-1)
200 |
201 | def set_robot_renderer(self, K_original, original_image_size=(480, 640), scale=0.5, device="cpu"):
202 |
203 | fx, fy, cx, cy = K_original[0,0]*scale, K_original[1,1]*scale, K_original[0,2]*scale, K_original[1,2]*scale
204 | image_size = (int(original_image_size[0]*scale), int(original_image_size[1]*scale))
205 |
206 | base_dir = os.path.dirname(self.urdf_path)
207 |
208 | mesh_files = [
209 | base_dir + "/meshes/visual/link0/link0.obj",
210 | base_dir + "/meshes/visual/link1/link1.obj",
211 | base_dir + "/meshes/visual/link2/link2.obj",
212 | base_dir + "/meshes/visual/link3/link3.obj",
213 | base_dir + "/meshes/visual/link4/link4.obj",
214 | base_dir + "/meshes/visual/link5/link5.obj",
215 | base_dir + "/meshes/visual/link6/link6.obj",
216 | base_dir + "/meshes/visual/link7/link7.obj",
217 | base_dir + "/meshes/visual/hand/hand.obj",
218 | ]
219 |
220 | focal_length = [-fx,-fy]
221 | principal_point = [cx, cy]
222 |
223 | robot_renderer = RobotMeshRenderer(
224 | focal_length=focal_length, principal_point=principal_point, image_size=image_size,
225 | robot=self.robot_for_render, mesh_files=mesh_files, device=device)
226 |
227 | return robot_renderer
228 |
229 | def get_robot_mesh_list(self, joint_angles, renderer):
230 |
231 | robot_meshes = []
232 | for joint_angle in joint_angles:
233 | if self.robot_type == "panda":
234 | joints = joint_angle[:-1].detach().cpu()
235 | else:
236 | joints = joint_angle.detach().cpu()
237 | robot_mesh = renderer.get_robot_mesh(joints).cuda()
238 | robot_meshes.append(robot_mesh)
239 |
240 | return robot_meshes
241 |
242 | def get_rendered_mask_single_image(self, rot, trans, robot_mesh, robot_renderer_gpu):
243 |
244 | R = rot6d_to_rotmat(rot).reshape(1,3,3)
245 | R = torch.transpose(R,1,2).cuda()
246 |
247 | T = trans.reshape(1,3).cuda()
248 |
249 | if T[0,-1] < 0:
250 | rendered_image = robot_renderer_gpu.silhouette_renderer(meshes_world=robot_mesh, R = -R, T = -T)
251 | else:
252 | rendered_image = robot_renderer_gpu.silhouette_renderer(meshes_world=robot_mesh, R = R, T = T)
253 |
254 | if torch.isnan(rendered_image).any():
255 | rendered_image = torch.nan_to_num(rendered_image)
256 |
257 | return rendered_image[..., 3]
258 |
259 | def get_rendered_mask_single_image_at_specific_root(self, joint_angles, rot, trans, robot_mesh, robot_renderer_gpu, root=0):
260 |
261 | if root == 0:
262 | return self.get_rendered_mask_single_image(rot, trans, robot_mesh, robot_renderer_gpu)
263 | else:
264 | rotmat = rot6d_to_rotmat(rot).cuda()
265 | trans = trans.unsqueeze(dim=1).cuda()
266 | pad = torch.zeros((1,4),dtype=torch.float).cuda()
267 | base2cam = torch.cat([rotmat,trans],dim=1).cuda()
268 | base2cam = torch.cat([base2cam,pad],dim=0).cuda()
269 | base2cam[3,3] = 1.0
270 | TWL_base = self.get_TWL(joint_angles.unsqueeze(0)).cuda().detach() # detach joint with rot/trans
271 | TWL_root_inv = torch.linalg.inv(TWL_base[:,root:root+1,:,:]).squeeze()
272 | new_base2cam = base2cam @ TWL_root_inv
273 | new_rot = rotmat_to_rot6d(new_base2cam[:3,:3])
274 | new_trans = new_base2cam[:3,3]
275 | return self.get_rendered_mask_single_image(new_rot, new_trans, robot_mesh, robot_renderer_gpu)
276 |
277 | def get_textured_rendering(self, joint, rot, trans, intrinsics=(320, 320, 320, 240), save_path=(None,None,None), original_image=None, root=0):
278 |
279 | if root != 0:
280 | rotmat = rot6d_to_rotmat(rot)
281 | trans = trans.unsqueeze(dim=1)
282 | pad = torch.zeros((1,4),dtype=torch.float)
283 | base2cam = torch.cat([rotmat,trans],dim=1)
284 | base2cam = torch.cat([base2cam,pad],dim=0)
285 | base2cam[3,3] = 1.0
286 | TWL_base = self.get_TWL(joint.unsqueeze(0))
287 | TWL_root_inv = torch.linalg.inv(TWL_base[:,root:root+1,:,:]).squeeze()
288 | new_base2cam = base2cam @ TWL_root_inv
289 | rot = rotmat_to_rot6d(new_base2cam[:3,:3])
290 | trans = new_base2cam[:3,3]
291 |
292 | save_path1, save_path2, save_path3 = save_path
293 | rotmat = rot6d_to_rotmat(rot)
294 | trans = trans.unsqueeze(dim=1)
295 | pad = torch.zeros((1,4),dtype=torch.float)
296 | camera_pose = torch.cat([rotmat,trans],dim=1)
297 | camera_pose = torch.cat([camera_pose,pad],dim=0)
298 | camera_pose[3,3] = 1.0
299 | joint = joint.numpy()
300 | camera_pose = camera_pose.numpy()
301 | rotation = np.array([[1,0,0,0],
302 | [0,-1,0,0],
303 | [0,0,-1,0],
304 | [0,0,0,1]])
305 | camera_pose = np.matmul(rotation,camera_pose)
306 | camera_pose = np.linalg.inv(camera_pose)
307 | fk = self.robot_visual.visual_trimesh_fk(cfg=joint)
308 | scene = pyrender.Scene()
309 | camera = pyrender.IntrinsicsCamera(*intrinsics)
310 | light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=3)
311 | light_pose = np.eye(4)
312 | light_pose[:3, 3] = np.array([0, -1, 1])
313 | scene.add(light, pose=light_pose)
314 | light_pose[:3, 3] = np.array([0, 1, 1])
315 | scene.add(light, pose=light_pose)
316 | light_pose[:3, 3] = np.array([1, 1, 2])
317 | scene.add(light, pose=light_pose)
318 | for tm in fk:
319 | pose = fk[tm]
320 | mesh = pyrender.Mesh.from_trimesh(tm, smooth=False)
321 | scene.add(mesh, pose=pose)
322 | scene.add(camera, pose=camera_pose)
323 | scene.add(light, pose=camera_pose)
324 | renderer = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
325 | color, depth = renderer.render(scene)
326 | rendered_img = Image.fromarray(np.uint8(color)).convert("RGBA")
327 | rendered_img.save(save_path1)
328 | original_img = Image.fromarray(np.transpose(np.uint8(original_image),(1,2,0))).convert("RGBA")
329 | original_img.save(save_path2)
330 | blend_ratio = 0.7
331 | blended_image = Image.blend(original_img, rendered_img, blend_ratio)
332 | blended_image.save(save_path3)
333 |
334 | def get_textured_rendering_individual(self, joint, rot, trans, intrinsics=(320, 320, 320, 240), root=0):
335 |
336 | if root != 0:
337 | rotmat = rot6d_to_rotmat(rot)
338 | trans = trans.unsqueeze(dim=1)
339 | pad = torch.zeros((1,4),dtype=torch.float)
340 | base2cam = torch.cat([rotmat,trans],dim=1)
341 | base2cam = torch.cat([base2cam,pad],dim=0)
342 | base2cam[3,3] = 1.0
343 | TWL_base = self.get_TWL(joint.unsqueeze(0))
344 | TWL_root_inv = torch.linalg.inv(TWL_base[:,root:root+1,:,:]).squeeze()
345 | new_base2cam = base2cam @ TWL_root_inv
346 | rot = rotmat_to_rot6d(new_base2cam[:3,:3])
347 | trans = new_base2cam[:3,3]
348 |
349 | rotmat = rot6d_to_rotmat(rot)
350 | trans = trans.unsqueeze(dim=1)
351 | pad = torch.zeros((1,4),dtype=torch.float)
352 | camera_pose = torch.cat([rotmat,trans],dim=1)
353 | camera_pose = torch.cat([camera_pose,pad],dim=0)
354 | camera_pose[3,3] = 1.0
355 | joint = joint.numpy()
356 | camera_pose = camera_pose.numpy()
357 | rotation = np.array([[1,0,0,0],
358 | [0,-1,0,0],
359 | [0,0,-1,0],
360 | [0,0,0,1]])
361 | camera_pose = np.matmul(rotation,camera_pose)
362 | camera_pose = np.linalg.inv(camera_pose)
363 | fk = self.robot_visual.visual_trimesh_fk(cfg=joint)
364 | scene = pyrender.Scene()
365 | camera = pyrender.IntrinsicsCamera(*intrinsics)
366 | # azure
367 | light = pyrender.PointLight(color=[1.5, 1.5, 1.5], intensity=2.6)
368 | # realsense, kinect
369 | # light = pyrender.PointLight(color=[1.4, 1.4, 1.4], intensity=2.4)
370 | # light_pose = np.eye(4)
371 | # light_pose[:3, 3] = np.array([0, 1, 0])
372 | # scene.add(light, pose=light_pose)
373 | # orb
374 | # light = pyrender.PointLight(color=[1.4, 1.4, 1.4], intensity=2.4)
375 | # light_pose = np.eye(4)
376 | # light_pose[:3, 3] = np.array([0, -1, 0])
377 | # scene.add(light, pose=light_pose)
378 |
379 | for tm in fk:
380 | pose = fk[tm]
381 | mesh = pyrender.Mesh.from_trimesh(tm, smooth=False)
382 | scene.add(mesh, pose=pose)
383 | scene.add(camera, pose=camera_pose)
384 | scene.add(light, pose=camera_pose)
385 | renderer = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
386 | color, depth = renderer.render(scene)
387 | rendered_img = Image.fromarray(np.uint8(color))
388 | return rendered_img
--------------------------------------------------------------------------------