├── .gitignore ├── assets └── poster.png ├── conformal_calibration.py ├── conformal_prediction.py ├── get_calibration_ids.py ├── keypoint ├── README.md ├── bop_dataset.py ├── bop_toolkit_lib │ ├── __init__.py │ ├── colors.json │ ├── config.py │ ├── dataset_params.py │ ├── droid_sans_mono.ttf │ ├── droid_sans_mono_license.txt │ ├── inout.py │ ├── misc.py │ ├── pose_error.py │ ├── pose_matching.py │ ├── renderer.py │ ├── renderer_cpp.py │ ├── renderer_py.py │ ├── score.py │ ├── transform.py │ ├── view_sampler.py │ ├── visibility.py │ └── visualization.py ├── demo_data.ipynb ├── demo_pipeline.ipynb ├── est_6dof.py ├── eval │ ├── results_lmo-test.csv │ ├── results_tudl-test.csv │ └── results_ycbv-test.csv ├── kpts3d.json ├── misc │ ├── __init__.py │ ├── loss.py │ ├── pose2d_eval.py │ └── segmentation.py ├── models │ ├── __init__.py │ ├── fasterRCNN.py │ ├── hourglass.py │ ├── layers.py │ ├── mask_rcnn.py │ └── patched.py ├── scripts │ ├── _init_paths.py │ ├── calc_gt_distribution.py │ ├── calc_gt_info.py │ ├── calc_gt_masks.py │ ├── calc_model_info.py │ ├── check_results_bop19.py │ ├── eval_bop19.py │ ├── eval_calc_errors.py │ ├── eval_calc_scores.py │ ├── meshlab_scripts │ │ ├── remesh_for_eval_cell=0.25.mlx │ │ └── remesh_for_eval_cell=0.5.mlx │ ├── remesh_models_for_eval.py │ ├── render_train_imgs.py │ ├── show_performance_bop19.py │ ├── vis_est_poses.py │ ├── vis_gt_poses.py │ └── vis_object_symmetries.py ├── train │ ├── base_options.py │ ├── base_trainer.py │ ├── detection_trainer.py │ ├── keypoint_trainer.py │ ├── train.py │ ├── train_options.py │ └── transforms.py └── utils │ ├── __init__.py │ ├── data_loader.py │ ├── img_utils.py │ ├── saver.py │ └── trimesh_renderer.py ├── readme.md └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Python ## 2 | *__pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .vscode 6 | ## Jupyter Notebook ## 7 | .ipynb_checkpoints 8 | ## Ignore Mac DS_Store files ## 9 | .DS_Store 10 | ## Large Files ## 11 | keypoint/data 12 | *.pkl -------------------------------------------------------------------------------- /assets/poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/ConformalKeypoint/427d71bd7fb40686b345c1e780250728266f2944/assets/poster.png -------------------------------------------------------------------------------- /conformal_calibration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torchvision import transforms as T 4 | import tqdm 5 | import pickle 6 | import argparse 7 | 8 | from keypoint.models import FRCNN, StackedHourglass, fasterrcnn_backbone 9 | from keypoint.bop_dataset import BOPDataset 10 | from keypoint.train.transforms import ToTensor, Normalize, AffineCrop 11 | 12 | from utils import conformity_score, one_each 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--score_type', action='store', type=str) 16 | parser.add_argument('--do_frcnn', action='store_true') 17 | args = parser.parse_args() 18 | 19 | score_type = args.score_type 20 | do_frcnn = args.do_frcnn 21 | 22 | # Load dataset 23 | dataset_name = 'lmo' # this the lmo calibration dataset containing 200 images 24 | root = './keypoint/data/bop' 25 | num_classes = {'lmo':8, 'lmo-org':8} 26 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 27 | dataset = BOPDataset(root, dataset_name, split='test', return_coco=True) 28 | dataset._set_kpts_info() 29 | 30 | if do_frcnn: 31 | # Load Faster-RCNN detector 32 | detector_trainsform = T.ToTensor() 33 | state_dict = torch.load('data/detect_checkpoints/d{}.pt'.format(dataset_name), map_location=device)['frcnn'] 34 | detector = fasterrcnn_backbone('resnet101', num_classes=1+num_classes[dataset_name]).to(device) 35 | detector.eval() 36 | detector.load_state_dict(state_dict) 37 | 38 | # Load keypoints detector 39 | transform_list = [] 40 | transform_list.append(AffineCrop(out_size=256, scale_factor=0, rotation_factor=0, dialation=0.25)) 41 | transform_list.append(ToTensor()) 42 | transform_list.append(Normalize()) 43 | kpts_transform = T.Compose(transform_list) 44 | 45 | state_dict = torch.load('keypoint/data/kpts_checkpoints/{}.pt'.format(dataset_name), map_location=device)['stacked_hg'] 46 | kpts_detector = StackedHourglass(dataset.n_kpts).to(device) 47 | kpts_detector.eval() 48 | kpts_detector.load_state_dict(state_dict) 49 | 50 | # useful info about dataset 51 | n_kpts = dataset.n_kpts 52 | n_smps = len(dataset) 53 | obj2idx = dataset.obj2idx 54 | idx2obj = {v:k for k,v in obj2idx.items()} 55 | lab2obj = {v+1:k for k,v in obj2idx.items()} 56 | n_objs = len(idx2obj) 57 | 58 | # Prepare to store obj scores 59 | obj_scores = [[] for i in range(n_objs)] 60 | print(f'nonconformity function: {score_type}.') 61 | 62 | # Compute conformity score on calibration dataset 63 | for i in tqdm.tqdm(range(n_smps)): 64 | sample = dataset[i] 65 | meta = dataset.db[i] 66 | 67 | image = sample['image'] 68 | gt_boxes = sample['boxes'] 69 | gt_objs = [lab2obj[l] for l in sample['labels']] 70 | gt_kpts = meta['keypoints'] 71 | 72 | if do_frcnn: 73 | # Object detection 74 | with torch.no_grad(): 75 | img = detector_trainsform(image).to(device) 76 | pred = detector([img])[0] 77 | pred = {k:v.cpu() for k,v in pred.items()} 78 | pd_boxes, pd_labels = one_each(pred, thresh=0.0) 79 | pd_objs = [lab2obj[i] for i in pd_labels.tolist()] 80 | pd_boxes = pd_boxes.tolist() 81 | 82 | _, comm1, comm2 = np.intersect1d(np.array(pd_objs), np.array(gt_objs), return_indices=True) 83 | comm1 = comm1.tolist() 84 | comm2 = comm2.tolist() 85 | 86 | pd_objs_true = [pd_objs[i] for i in comm1] 87 | pd_boxes_true = [pd_boxes[i] for i in comm1] 88 | gt_kpts_pd = [gt_kpts[i] for i in comm2] 89 | gt_objs = pd_objs_true 90 | gt_boxes = pd_boxes_true 91 | gt_kpts = gt_kpts_pd 92 | 93 | for obj, box, gt_kpt in zip(gt_objs, gt_boxes, gt_kpts): 94 | box = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 95 | gt_kpt_homo = np.concatenate( 96 | (gt_kpt,np.ones((gt_kpt.shape[0],1))),axis=1) 97 | input_crop = {'image':image, 'bb':box, 'keypoints':gt_kpt_homo} 98 | input_crop = kpts_transform(input_crop) 99 | gt_kpt_crop = input_crop['keypoints'][:,:2].numpy() * (64/256) # the heatmap is size (64,64), rescale from 256 to 64 100 | 101 | with torch.no_grad(): 102 | batch = input_crop['image'][None].to(device) 103 | output = kpts_detector(batch) 104 | output = output[-1].cpu() 105 | 106 | kpt_start = dataset.obj2kptid[obj][0] 107 | kpt_end = dataset.obj2kptid[obj][1] 108 | heatmaps_pred = torch.squeeze( 109 | output[[0], kpt_start:kpt_end, :, :]) 110 | 111 | scores = [] 112 | for j in np.arange(kpt_start,kpt_end): 113 | score = conformity_score( 114 | np.squeeze(gt_kpt_crop[j-kpt_start,:]), 115 | torch.squeeze(heatmaps_pred[j-kpt_start,:]).numpy(), 116 | type=score_type) 117 | scores.append(score) 118 | # @Apoorva: here is the place to quickly implement the windowed nonconformity score 119 | max_score = np.max(np.stack(scores)) 120 | obj_scores[obj2idx[obj]].append(max_score) 121 | 122 | obj_scores_np = [] 123 | for i in range(n_objs): 124 | obj_scores_np.append(np.array(obj_scores[i])) 125 | fname = f'calibration_scores_{score_type}_{dataset_name}.pkl' 126 | if do_frcnn: 127 | fname = f'calibration_scores_{score_type}_{dataset_name}_frcnn.pkl' 128 | with open(fname, 'wb') as f: 129 | pickle.dump(obj_scores_np, f) -------------------------------------------------------------------------------- /conformal_prediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import matplotlib.pyplot as plt 4 | import torch 5 | from torchvision import transforms as T 6 | import tqdm 7 | import pickle, argparse 8 | import os 9 | 10 | from keypoint.models import FRCNN, StackedHourglass, fasterrcnn_backbone 11 | from keypoint.bop_dataset import BOPDataset 12 | from keypoint.train.transforms import ToTensor, Normalize, AffineCrop 13 | from keypoint.misc.pose2d_eval import Pose2DEval 14 | 15 | from utils import icp, draw_icp_ball, draw_icp_ellipse 16 | 17 | def heatmap2org(kpts,lams,T): 18 | ''' 19 | The heatmap is on the cropped image, this function converts the prediction sets on the cropped image to the original image (which will be used for pose estimation) 20 | ''' 21 | A = T[:,:2] 22 | b = T[:,2] 23 | kpts_new = np.linalg.solve(A,kpts*4 - b[:,np.newaxis]) 24 | lam_new = [] 25 | for lam in lams: 26 | lam_new.append( (A.T @ lam @ A)/16 ) 27 | return kpts_new, np.stack(lam_new) 28 | 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument('--score_type', action='store', type=str) 31 | parser.add_argument('--eps', type=int) 32 | parser.add_argument('--do_frcnn', action='store_true') 33 | parser.add_argument('--save_fig', action='store_true') 34 | 35 | args = parser.parse_args() 36 | 37 | score_type = args.score_type 38 | eps = args.eps 39 | eps = eps / 100.0 40 | save_fig = args.save_fig 41 | do_frcnn = args.do_frcnn 42 | 43 | print(f'nonconformity function: {score_type}, epsilon: {eps}, save_fig: {save_fig}.') 44 | 45 | # Load dataset 46 | dataset_name = 'lmo-org' # this is the full lmo dataset containing 1214 images 47 | root = './keypoint/data/bop' 48 | num_classes = {'lmo':8, 'lmo-org':8} 49 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 50 | dataset = BOPDataset(root, dataset_name, split='test', return_coco=True) 51 | dataset._set_kpts_info() 52 | 53 | if do_frcnn: 54 | # Load Faster-RCNN detector 55 | detector_trainsform = T.ToTensor() 56 | state_dict = torch.load('keypoint/data/detect_checkpoints/d{}.pt'.format('lmo'), map_location=device)['frcnn'] 57 | detector = fasterrcnn_backbone('resnet101', num_classes=1+num_classes[dataset_name]).to(device) 58 | detector.eval() 59 | detector.load_state_dict(state_dict) 60 | 61 | # Load keypoints detector 62 | transform_list = [] 63 | transform_list.append(AffineCrop(out_size=256, scale_factor=0, rotation_factor=0, dialation=0.25)) 64 | transform_list.append(ToTensor()) 65 | transform_list.append(Normalize()) 66 | kpts_transform = T.Compose(transform_list) 67 | 68 | state_dict = torch.load('keypoint/data/kpts_checkpoints/{}.pt'.format(dataset_name), map_location=device)['stacked_hg'] 69 | kpts_detector = StackedHourglass(dataset.n_kpts).to(device) 70 | kpts_detector.eval() 71 | kpts_detector.load_state_dict(state_dict) 72 | 73 | # useful info about dataset 74 | n_kpts = dataset.n_kpts 75 | n_smps = len(dataset) 76 | obj2idx = dataset.obj2idx 77 | idx2obj = {v:k for k,v in obj2idx.items()} 78 | lab2obj = {v+1:k for k,v in obj2idx.items()} 79 | n_objs = len(idx2obj) 80 | poseEval = Pose2DEval() 81 | 82 | img_result_dir = './keypoint/data/bop/lmo-org/test/000002/icp_results' 83 | 84 | fname = f'calibration_scores_{score_type}_lmo.pkl' 85 | if do_frcnn: 86 | fname = f'calibration_scores_{score_type}_lmo_frcnn.pkl' 87 | # Compute quantiles 88 | with open(fname, 'rb') as f: 89 | obj_scores = pickle.load(f) 90 | obj_qs = [] 91 | for i in range(n_objs): 92 | scores = obj_scores[i] 93 | n = np.size(scores) 94 | idx = np.int64( np.floor( (n+1) * eps ) ) 95 | scores_sort = scores[np.flip(np.argsort(scores))] 96 | obj_qs.append(scores_sort[idx-1]) 97 | obj_qs = np.array(obj_qs) 98 | 99 | # Perform Conformal prediction 100 | obj_kpts = [[] for i in range(n_objs)] 101 | obj_lams = [[] for i in range(n_objs)] 102 | obj_imgs = [[] for i in range(n_objs)] 103 | 104 | for i in tqdm.tqdm(range(n_smps)): 105 | sample = dataset[i] 106 | meta = dataset.db[i] 107 | image = sample['image'] 108 | gt_boxes = sample['boxes'] 109 | gt_objs = [lab2obj[l] for l in sample['labels']] 110 | gt_kpts = meta['keypoints'] 111 | 112 | if do_frcnn: 113 | # Object detection 114 | with torch.no_grad(): 115 | img = detector_trainsform(image).to(device) 116 | pred = detector([img])[0] 117 | pred = {k:v.cpu() for k,v in pred.items()} 118 | pd_boxes, pd_labels = one_each(pred, thresh=0) 119 | pd_objs = [lab2obj[i] for i in pd_labels.tolist()] 120 | pd_boxes = pd_boxes.tolist() 121 | 122 | _, comm1, comm2 = np.intersect1d(np.array(pd_objs), np.array(gt_objs), return_indices=True) 123 | comm1 = comm1.tolist() 124 | comm2 = comm2.tolist() 125 | 126 | pd_objs_true = [pd_objs[i] for i in comm1] 127 | pd_boxes_true = [pd_boxes[i] for i in comm1] 128 | gt_kpts_pd = [gt_kpts[i] for i in comm2] 129 | gt_objs = pd_objs_true 130 | gt_boxes = pd_boxes_true 131 | gt_kpts = gt_kpts_pd 132 | 133 | 134 | for obj, box, gt_kpt in zip(gt_objs, gt_boxes, gt_kpts): 135 | box = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 136 | gt_kpt_homo = np.concatenate((gt_kpt,np.ones((gt_kpt.shape[0],1))),axis=1) 137 | input_crop = {'image':image, 'bb':box, 'keypoints':gt_kpt_homo} 138 | input_crop = kpts_transform(input_crop) 139 | gt_kpt_crop = input_crop['keypoints'][:,:2].numpy() * (64/256) # the heatmap is size (64,64), rescale from 256 to 64 140 | # affine transformation between original kpt loc and that in heatmap 141 | affineT = transform_list[0].crop_augment(box) 142 | 143 | with torch.no_grad(): 144 | batch = input_crop['image'][None].to(device) 145 | output = kpts_detector(batch) 146 | output = output[-1].cpu() 147 | 148 | kpt_start = dataset.obj2kptid[obj][0] 149 | kpt_end = dataset.obj2kptid[obj][1] 150 | heatmaps_pred = torch.squeeze(output[[0], kpt_start:kpt_end, :, :]) 151 | 152 | # output inductive conformal prediction set 153 | kpts = [] 154 | lams = [] 155 | icp_sets = [] 156 | for j in range(kpt_start,kpt_end): 157 | if score_type == "ball": 158 | center, radius = icp( 159 | torch.squeeze(heatmaps_pred[j-kpt_start,:]).numpy(), 160 | obj_qs[obj2idx[obj]], 161 | type=score_type) 162 | 163 | lam = np.eye(2) / (radius**2) 164 | kpts.append(center) # center 165 | lams.append(lam) # information matrix 166 | icp_sets.append((center,radius)) 167 | 168 | elif score_type == "ellipse": 169 | center, lam = icp( 170 | torch.squeeze(heatmaps_pred[j-kpt_start,:]).numpy(), 171 | obj_qs[obj2idx[obj]],type=score_type) 172 | kpts.append(center) 173 | lams.append(lam) 174 | icp_sets.append((center,lam)) 175 | 176 | else: 177 | raise RuntimeError('Unknown score type for ICP.') 178 | 179 | if save_fig: 180 | dir_path = "{:s}/{:.2f}/{:s}".format(img_result_dir,eps,score_type) 181 | os.makedirs(dir_path,exist_ok=True) 182 | fname = "{:s}/{:06d}_{:06d}_{:02d}.pdf".format(dir_path,i,meta['im_id'],obj) 183 | if do_frcnn: 184 | fname = "{:s}/{:06d}_{:06d}_{:02d}_frcnn.pdf".format(dir_path,i,meta['im_id'],obj) 185 | # plot 186 | img_disp = cv2.resize((input_crop['image'].permute(1, 2, 0).numpy()) / 2.0 + 0.5,(64,64)) 187 | if score_type == "ball": 188 | fig = draw_icp_ball(img_disp,heatmaps_pred.numpy(),gt_kpt_crop,icp_sets,fname=fname,show=False) 189 | elif score_type == "ellipse": 190 | fig = draw_icp_ellipse(img_disp,heatmaps_pred.numpy(),gt_kpt_crop,icp_sets,fname=fname,show=False) 191 | plt.close(fig) 192 | 193 | kpts = np.stack(kpts,axis=1) 194 | # convert the keypoints coordinates to the original image space and save 195 | kpts_new, lams_new = heatmap2org(kpts,lams,affineT) 196 | obj_kpts[obj2idx[obj]].append(kpts_new) 197 | obj_lams[obj2idx[obj]].append(lams_new) 198 | obj_imgs[obj2idx[obj]].append(i) 199 | 200 | # save the keypoint prediction sets 201 | data = {"kpts": obj_kpts, 202 | "lams": obj_lams, 203 | "imgs": obj_imgs} 204 | fname = "icp_sets_{:s}_{:.2f}.pkl".format(score_type,eps) 205 | if do_frcnn: 206 | fname = "icp_sets_{:s}_{:.2f}_frcnn.pkl".format(score_type,eps) 207 | with open(fname, 'wb') as f: 208 | pickle.dump(data, f) -------------------------------------------------------------------------------- /get_calibration_ids.py: -------------------------------------------------------------------------------- 1 | import json 2 | import cv2 3 | import numpy as np 4 | from glob import glob 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from torchvision import transforms as T 8 | import tqdm 9 | import pickle 10 | 11 | from bop_dataset import BOPDataset 12 | 13 | # Load dataset 14 | dataset_name = 'lmo' 15 | root = './data/bop' 16 | num_classes = {'lmo':8, 'lmo-org':8} 17 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 18 | dataset = BOPDataset(root, dataset_name, split='test', return_coco=True) 19 | dataset._set_kpts_info() 20 | 21 | n_smps = len(dataset) 22 | ids = [] 23 | 24 | for i in tqdm.tqdm(range(n_smps)): 25 | meta = dataset.db[i] 26 | path = meta['imgpath'] 27 | words = path.split('/') 28 | names = words[-1].split('.') 29 | id = int(names[0]) 30 | ids.append(id) 31 | 32 | ids = np.array(ids) 33 | print(ids) 34 | 35 | fname = 'calibration_imgs.npy' 36 | np.save(fname,ids) 37 | -------------------------------------------------------------------------------- /keypoint/README.md: -------------------------------------------------------------------------------- 1 | # 6D_Pose 2 | Python implementation for the BOP benchmark section of the paper: \ 3 | **Semantic keypoint-based pose estimation from single RGB frames** 4 | Field Robotics \ 5 | [[Paper](https://arxiv.org/abs/2204.05864)] 6 | ![cover](data/cover.png) 7 | 8 | ## Data 9 | You can download the pretrained models for [detection](https://drive.google.com/drive/folders/1Jzg-9sU4nEGawTREsMFblmBEZouPMOjM?usp=sharing) and [keypoint detection](https://drive.google.com/drive/folders/1i9Y5lFm3jc2t8qtxoB-qQJEDLc0urZao?usp=sharing). Please place the models as follows. We also put the test images for the LMO dataset in this repo for convenience. 10 | ``` 11 | - data 12 | -- detect_checkpoints 13 | -- kpts_checkpoints 14 | ``` 15 | 16 | ## Demo 17 | Our method uses additional 3D keypoint annotation on the CAD models, which is included in **kpts_3d.json**. We provide two demo. To explore the 3D annotation, please use **demo_data.ipynb**. To explore the inference pipeline, please use **demo_pipeline.ipynb**. 18 | 19 | 20 | ## Reference 21 | @article{schmeckpeper2022semantic, 22 | Title = {Semantic keypoint-based pose estimation from single RGB frames}, 23 | Author = {Schmeckpeper, Karl and Osteen, Philip R and Wang, Yufu and Pavlakos, Georgios and Chaney, Kenneth and Jordan, Wyatt and Zhou, Xiaowei and Derpanis, Konstantinos G and Daniilidis, Kostas}, 24 | Booktitle = {Field Robotics}, 25 | Year = {2022} 26 | } 27 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/ConformalKeypoint/427d71bd7fb40686b345c1e780250728266f2944/keypoint/bop_toolkit_lib/__init__.py -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/colors.json: -------------------------------------------------------------------------------- 1 | [ 2 | [0.89, 0.28, 0.13], 3 | [0.45, 0.38, 0.92], 4 | [0.35, 0.73, 0.63], 5 | [0.62, 0.28, 0.91], 6 | [0.65, 0.71, 0.22], 7 | [0.8, 0.29, 0.89], 8 | [0.27, 0.55, 0.22], 9 | [0.37, 0.46, 0.84], 10 | [0.84, 0.63, 0.22], 11 | [0.68, 0.29, 0.71], 12 | [0.48, 0.75, 0.48], 13 | [0.88, 0.27, 0.75], 14 | [0.82, 0.45, 0.2], 15 | [0.86, 0.27, 0.27], 16 | [0.52, 0.49, 0.18], 17 | [0.33, 0.67, 0.25], 18 | [0.67, 0.42, 0.29], 19 | [0.67, 0.46, 0.86], 20 | [0.36, 0.72, 0.84], 21 | [0.85, 0.29, 0.4], 22 | [0.24, 0.53, 0.55], 23 | [0.85, 0.55, 0.8], 24 | [0.4, 0.51, 0.33], 25 | [0.56, 0.38, 0.63], 26 | [0.78, 0.66, 0.46], 27 | [0.33, 0.5, 0.72], 28 | [0.83, 0.31, 0.56], 29 | [0.56, 0.61, 0.85], 30 | [0.89, 0.58, 0.57], 31 | [0.67, 0.4, 0.49] 32 | ] -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/config.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Configuration of the BOP Toolkit.""" 5 | 6 | import os 7 | 8 | 9 | ######## Basic ######## 10 | 11 | # Folder with the BOP datasets. 12 | if 'BOP_PATH' in os.environ: 13 | datasets_path = os.environ['BOP_PATH'] 14 | else: 15 | datasets_path = r'/Users/Yufu/Desktop/bop_public/data/bop' 16 | 17 | # Folder with pose results to be evaluated. 18 | results_path = r'/Users/Yufu/Desktop/bop_public' 19 | 20 | # Folder for the calculated pose errors and performance scores. 21 | eval_path = r'/Users/Yufu/Desktop/bop_public' 22 | 23 | ######## Extended ######## 24 | 25 | # Folder for outputs (e.g. visualizations). 26 | output_path = r'/path/to/output/folder' 27 | 28 | # For offscreen C++ rendering: Path to the build folder of bop_renderer (github.com/thodan/bop_renderer). 29 | bop_renderer_path = r'/path/to/bop_renderer/build' 30 | 31 | # Executable of the MeshLab server. 32 | meshlab_server_path = r'/path/to/meshlabserver.exe' 33 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/droid_sans_mono.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/ConformalKeypoint/427d71bd7fb40686b345c1e780250728266f2944/keypoint/bop_toolkit_lib/droid_sans_mono.ttf -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/pose_matching.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Matching of estimated poses to the ground-truth poses.""" 5 | 6 | import numpy as np 7 | 8 | 9 | def match_poses(errs, error_ths, max_ests_count=0, gt_valid_mask=None): 10 | """Matches the estimated poses to the ground-truth poses. 11 | 12 | The estimated poses are greedily matched to the ground truth poses in the 13 | order of decreasing score of the estimates. An estimated pose is matched to a 14 | ground-truth pose if the error w.r.t. the ground-truth pose is below the 15 | specified threshold. Each estimated pose is matched to up to one ground-truth 16 | pose and each ground-truth pose is matched to up to one estimated pose. 17 | 18 | :param errs: List of dictionaries, where each dictionary holds the following 19 | info about one pose estimate: 20 | - 'est_id': ID of the pose estimate. 21 | - 'score': Confidence score of the pose estimate. 22 | - 'errors': Dictionary mapping ground-truth ID's to errors of the pose 23 | estimate w.r.t. the ground-truth poses. 24 | :param error_ths: Thresholds of correctness. The pose error can be given 25 | by more than one element (e.g. translational + rotational error), in which 26 | case there is one threshold for each element. 27 | :param max_ests_count: Top k pose estimates to consider (0 = all). 28 | :param gt_valid_mask: Mask of ground-truth poses which can be considered. 29 | :return: List of dictionaries, where each dictionary holds info for one pose 30 | estimate (the estimates are ordered as in errs) about the matching 31 | ground-truth pose: 32 | - 'est_id': ID of the pose estimate. 33 | - 'gt_id': ID of the matched ground-truth pose (-1 means there is no 34 | matching ground-truth pose). 35 | - 'score': Confidence score of the pose estimate. 36 | - 'error': Error of the pose estimate w.r.t. the matched ground-truth pose. 37 | - 'error_norm': Error normalized by the threshold value. 38 | """ 39 | # Sort the estimated poses by decreasing confidence score. 40 | errs_sorted = sorted(errs, key=lambda e: e['score'], reverse=True) 41 | 42 | # Keep only the required number of poses with the highest confidence score. 43 | # 0 = all pose estimates are considered. 44 | if max_ests_count > 0: 45 | errs_sorted = errs_sorted[:max_ests_count] 46 | 47 | # Number of values defining the error (e.g. 1 for "ADD", 2 for "5deg 5cm"). 48 | error_num_elems = len(list(error_ths)) 49 | 50 | # Greedily match the estimated poses to the ground truth poses in the order of 51 | # decreasing score of the estimates. 52 | matches = [] 53 | gt_matched = [] 54 | for e in errs_sorted: 55 | 56 | best_gt_id = -1 57 | best_error = list(error_ths) 58 | for gt_id, error in e['errors'].items(): 59 | 60 | # If the mask of valid GT poses is not provided, consider all valid. 61 | is_valid = not gt_valid_mask or gt_valid_mask[gt_id] 62 | 63 | # Only valid GT poses that have not been matched yet are considered. 64 | if is_valid and gt_id not in gt_matched: 65 | 66 | # The current pose estimate is considered the best so far if all error 67 | # elements are the lowest so far. 68 | if np.all([error[i] < best_error[i] for i in range(error_num_elems)]): 69 | best_gt_id = gt_id 70 | best_error = error 71 | 72 | if best_gt_id >= 0: 73 | 74 | # Mark the GT pose as matched. 75 | gt_matched.append(best_gt_id) 76 | 77 | # Error normalized by the threshold. 78 | best_errors_normed = [best_error[i] / float(error_ths[i]) 79 | for i in range(error_num_elems)] 80 | 81 | # Save info about the match. 82 | matches.append({ 83 | 'est_id': e['est_id'], 84 | 'gt_id': best_gt_id, 85 | 'score': e['score'], 86 | 'error': best_error, 87 | 'error_norm': best_errors_normed 88 | }) 89 | 90 | return matches 91 | 92 | 93 | def match_poses_scene(scene_id, scene_gt, scene_gt_valid, scene_errs, 94 | correct_th, n_top): 95 | """Matches the estimated poses to the ground-truth poses in one scene. 96 | 97 | :param scene_id: Scene ID. 98 | :param scene_gt: Dictionary mapping image ID's to lists of dictionaries with: 99 | - 'obj_id': Object ID of the ground-truth pose. 100 | :param scene_gt_valid: Dictionary mapping image ID's to lists of boolean 101 | values indicating which ground-truth poses should be considered. 102 | :param scene_errs: List of dictionaries with: 103 | - 'im_id': Image ID. 104 | - 'obj_id': Object ID. 105 | - 'est_id': ID of the pose estimate. 106 | - 'score': Confidence score of the pose estimate. 107 | - 'errors': Dictionary mapping ground-truth ID's to errors of the pose 108 | estimate w.r.t. the ground-truth poses. 109 | :param error_obj_threshs: Dictionary mapping object ID's to values of the 110 | threshold of correctness. 111 | :param n_top: Top N pose estimates (with the highest score) to be evaluated 112 | for each object class in each image. 113 | :return: 114 | """ 115 | # Organize the errors by image ID and object ID (for faster query). 116 | scene_errs_org = {} 117 | for e in scene_errs: 118 | scene_errs_org.setdefault( 119 | e['im_id'], {}).setdefault(e['obj_id'], []).append(e) 120 | 121 | # Matching of poses in individual images. 122 | scene_matches = [] 123 | for im_id, im_gts in scene_gt.items(): 124 | im_matches = [] 125 | 126 | for gt_id, gt in enumerate(im_gts): 127 | im_matches.append({ 128 | 'scene_id': scene_id, 129 | 'im_id': im_id, 130 | 'obj_id': gt['obj_id'], 131 | 'gt_id': gt_id, 132 | 'est_id': -1, 133 | 'score': -1, 134 | 'error': -1, 135 | 'error_norm': -1, 136 | 'valid': scene_gt_valid[im_id][gt_id], 137 | }) 138 | 139 | # Treat estimates of each object separately. 140 | im_obj_ids = set([gt['obj_id'] for gt in im_gts]) 141 | for obj_id in im_obj_ids: 142 | if im_id in scene_errs_org.keys()\ 143 | and obj_id in scene_errs_org[im_id].keys(): 144 | 145 | # Greedily match the estimated poses to the ground truth poses. 146 | errs_im_obj = scene_errs_org[im_id][obj_id] 147 | ms = match_poses( 148 | errs_im_obj, correct_th, n_top, scene_gt_valid[im_id]) 149 | 150 | # Update info about the matched GT poses. 151 | for m in ms: 152 | g = im_matches[m['gt_id']] 153 | g['est_id'] = m['est_id'] 154 | g['score'] = m['score'] 155 | g['error'] = m['error'] 156 | g['error_norm'] = m['error_norm'] 157 | 158 | scene_matches += im_matches 159 | 160 | return scene_matches 161 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/renderer.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Abstract class of a renderer and a factory function to create a renderer. 5 | 6 | The renderer produces an RGB/depth image of a 3D mesh model in a specified pose 7 | for given camera parameters and illumination settings. 8 | """ 9 | 10 | 11 | class Renderer(object): 12 | """Abstract class of a renderer.""" 13 | 14 | def __init__(self, width, height): 15 | """Constructor. 16 | 17 | :param width: Width of the rendered image. 18 | :param height: Height of the rendered image. 19 | """ 20 | self.width = width 21 | self.height = height 22 | 23 | # 3D location of a point light (in the camera coordinates). 24 | self.light_cam_pos = (0, 0, 0) 25 | 26 | # Set light color and weights. 27 | self.light_color = (1.0, 1.0, 1.0) # Used only in C++ renderer. 28 | self.light_ambient_weight = 0.5 29 | self.light_diffuse_weight = 1.0 # Used only in C++ renderer. 30 | self.light_specular_weight = 0.0 # Used only in C++ renderer. 31 | self.light_specular_shininess = 0.0 # Used only in C++ renderer. 32 | 33 | def set_light_cam_pos(self, light_cam_pos): 34 | """Sets the 3D location of a point light. 35 | 36 | :param light_cam_pos: [X, Y, Z]. 37 | """ 38 | self.light_cam_pos = light_cam_pos 39 | 40 | def set_light_ambient_weight(self, light_ambient_weight): 41 | """Sets weight of the ambient light. 42 | 43 | :param light_ambient_weight: Scalar from 0 to 1. 44 | """ 45 | self.light_ambient_weight = light_ambient_weight 46 | 47 | def add_object(self, obj_id, model_path, **kwargs): 48 | """Loads an object model. 49 | 50 | :param obj_id: Object identifier. 51 | :param model_path: Path to the object model file. 52 | """ 53 | raise NotImplementedError 54 | 55 | def remove_object(self, obj_id): 56 | """Removes an object model. 57 | 58 | :param obj_id: Identifier of the object to remove. 59 | """ 60 | raise NotImplementedError 61 | 62 | def render_object(self, obj_id, R, t, fx, fy, cx, cy): 63 | """Renders an object model in the specified pose. 64 | 65 | :param obj_id: Object identifier. 66 | :param R: 3x3 ndarray with a rotation matrix. 67 | :param t: 3x1 ndarray with a translation vector. 68 | :param fx: Focal length (X axis). 69 | :param fy: Focal length (Y axis). 70 | :param cx: The X coordinate of the principal point. 71 | :param cy: The Y coordinate of the principal point. 72 | :return: Returns a dictionary with rendered images. 73 | """ 74 | raise NotImplementedError 75 | 76 | 77 | def create_renderer(width, height, renderer_type='cpp', mode='rgb+depth', 78 | shading='phong', bg_color=(0.0, 0.0, 0.0, 0.0)): 79 | """A factory to create a renderer. 80 | 81 | Note: Parameters mode, shading and bg_color are currently supported only by 82 | the Python renderer (renderer_type='python'). 83 | 84 | :param width: Width of the rendered image. 85 | :param height: Height of the rendered image. 86 | :param renderer_type: Type of renderer (options: 'cpp', 'python'). 87 | :param mode: Rendering mode ('rgb+depth', 'rgb', 'depth'). 88 | :param shading: Type of shading ('flat', 'phong'). 89 | :param bg_color: Color of the background (R, G, B, A). 90 | :return: Instance of a renderer of the specified type. 91 | """ 92 | if renderer_type == 'python': 93 | from . import renderer_py 94 | return renderer_py.RendererPython(width, height, mode, shading, bg_color) 95 | 96 | elif renderer_type == 'cpp': 97 | from . import renderer_cpp 98 | return renderer_cpp.RendererCpp(width, height) 99 | 100 | else: 101 | raise ValueError('Unknown renderer type.') 102 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/renderer_cpp.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """An interface to the C++ based renderer (bop_renderer).""" 5 | 6 | import sys 7 | import numpy as np 8 | 9 | from bop_toolkit_lib import config 10 | from bop_toolkit_lib import renderer 11 | 12 | # C++ renderer (https://github.com/thodan/bop_renderer) 13 | sys.path.append(config.bop_renderer_path) 14 | import bop_renderer 15 | 16 | 17 | class RendererCpp(renderer.Renderer): 18 | """An interface to the C++ based renderer.""" 19 | 20 | def __init__(self, width, height): 21 | """See base class.""" 22 | super(RendererCpp, self).__init__(width, height) 23 | self.renderer = bop_renderer.Renderer() 24 | self.renderer.init(width, height) 25 | self._set_light() 26 | 27 | def _set_light(self): 28 | self.renderer.set_light( 29 | list(self.light_cam_pos), list(self.light_color), 30 | self.light_ambient_weight, self.light_diffuse_weight, 31 | self.light_specular_weight, self.light_specular_shininess) 32 | 33 | def set_light_cam_pos(self, light_cam_pos): 34 | """See base class.""" 35 | super(RendererCpp, self).set_light_cam_pos(light_cam_pos) 36 | self._set_light() 37 | 38 | def set_light_ambient_weight(self, light_ambient_weight): 39 | """See base class.""" 40 | super(RendererCpp, self).set_light_ambient_weight(light_ambient_weight) 41 | self._set_light() 42 | 43 | def add_object(self, obj_id, model_path, **kwargs): 44 | """See base class. 45 | 46 | NEEDS TO BE CALLED RIGHT AFTER CREATING THE RENDERER (this is due to some 47 | memory issues in the C++ renderer which need to be fixed). 48 | """ 49 | self.renderer.add_object(obj_id, model_path) 50 | 51 | def remove_object(self, obj_id): 52 | """See base class.""" 53 | self.renderer.remove_object(obj_id) 54 | 55 | def render_object(self, obj_id, R, t, fx, fy, cx, cy): 56 | """See base class.""" 57 | R_l = R.astype(np.float32).flatten().tolist() 58 | t_l = t.astype(np.float32).flatten().tolist() 59 | self.renderer.render_object(obj_id, R_l, t_l, fx, fy, cx, cy) 60 | rgb = self.renderer.get_color_image(obj_id) 61 | depth = self.renderer.get_depth_image(obj_id).astype(np.float32) 62 | return {'rgb': rgb, 'depth': depth} 63 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/score.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Calculation of performance scores.""" 5 | 6 | import numpy as np 7 | from collections import defaultdict 8 | 9 | from bop_toolkit_lib import misc 10 | 11 | 12 | def calc_ap(rec, pre): 13 | """Calculates Average Precision (AP). 14 | 15 | Calculated in the PASCAL VOC challenge from 2010 onwards [1]: 16 | 1) Compute a version of the measured precision/recall curve with precision 17 | monotonically decreasing, by setting the precision for recall r to the 18 | maximum precision obtained for any recall r' >= r. 19 | 2) Compute the AP as the area under this curve by numerical integration. 20 | No approximation is involved since the curve is piecewise constant. 21 | 22 | NOTE: The used AP formula is different from the one in [2] where the 23 | formula from VLFeat [3] was presented - although it was mistakenly 24 | introduced as a formula used in PASCAL. 25 | 26 | References: 27 | [1] http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#SECTION00044000000000000000 28 | [2] Hodan et al., "On Evaluation of 6D Object Pose Estimation", ECCVW 2016 29 | [3] http://www.vlfeat.org/matlab/vl_pr.html 30 | 31 | :param rec: A list (or 1D ndarray) of recall rates. 32 | :param pre: A list (or 1D ndarray) of precision rates. 33 | :return: Average Precision - the area under the monotonically decreasing 34 | version of the precision/recall curve given by rec and pre. 35 | """ 36 | # Sorts the precision/recall points by increasing recall. 37 | i = np.argsort(rec) 38 | 39 | mrec = np.concatenate(([0], np.array(rec)[i], [1])) 40 | mpre = np.concatenate(([0], np.array(pre)[i], [0])) 41 | assert (mrec.shape == mpre.shape) 42 | for i in range(mpre.size - 3, -1, -1): 43 | mpre[i] = max(mpre[i], mpre[i + 1]) 44 | i = np.nonzero(mrec[1:] != mrec[:-1])[0] + 1 45 | ap = np.sum((mrec[i] - mrec[i - 1]) * mpre[i]) 46 | return ap 47 | 48 | 49 | def calc_recall(tp_count, targets_count): 50 | """Calculates recall. 51 | 52 | :param tp_count: Number of true positives. 53 | :param targets_count: Number of targets. 54 | :return: The recall rate. 55 | """ 56 | if targets_count == 0: 57 | return 0.0 58 | else: 59 | return tp_count / float(targets_count) 60 | 61 | 62 | def calc_localization_scores(scene_ids, obj_ids, matches, n_top, do_print=True): 63 | """Calculates performance scores for the 6D object localization task. 64 | 65 | References: 66 | Hodan et al., BOP: Benchmark for 6D Object Pose Estimation, ECCV'18. 67 | Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16. 68 | 69 | :param scene_ids: ID's of considered scenes. 70 | :param obj_ids: ID's of considered objects. 71 | :param matches: Info about matching pose estimates to ground-truth poses 72 | (see pose_matching.py for details). 73 | :param n_top: Number of top pose estimates to consider per test target. 74 | :param do_print: Whether to print the scores to the standard output. 75 | :return: Dictionary with the evaluation scores. 76 | """ 77 | # Count the number of visible object instances in each image. 78 | insts = {i: {j: defaultdict(lambda: 0) for j in scene_ids} for i in obj_ids} 79 | for m in matches: 80 | if m['valid']: 81 | insts[m['obj_id']][m['scene_id']][m['im_id']] += 1 82 | 83 | # Count the number of targets = object instances to be found. 84 | # For SiSo, there is either zero or one target in each image - there is just 85 | # one even if there are more instances of the object of interest. 86 | tars = 0 # Total number of targets. 87 | obj_tars = {i: 0 for i in obj_ids} # Targets per object. 88 | scene_tars = {i: 0 for i in scene_ids} # Targets per scene. 89 | for obj_id, obj_insts in insts.items(): 90 | for scene_id, scene_insts in obj_insts.items(): 91 | 92 | # Count the number of targets for the current object in the current scene. 93 | if n_top > 0: 94 | count = sum(np.minimum(n_top, list(scene_insts.values()))) 95 | else: 96 | count = sum(list(scene_insts.values())) 97 | 98 | tars += count 99 | obj_tars[obj_id] += count 100 | scene_tars[scene_id] += count 101 | 102 | # Count the number of true positives. 103 | tps = 0 # Total number of true positives. 104 | obj_tps = {i: 0 for i in obj_ids} # True positives per object. 105 | scene_tps = {i: 0 for i in scene_ids} # True positives per scene. 106 | for m in matches: 107 | if m['valid'] and m['est_id'] != -1: 108 | tps += 1 109 | obj_tps[m['obj_id']] += 1 110 | scene_tps[m['scene_id']] += 1 111 | 112 | # Total recall. 113 | recall = calc_recall(tps, tars) 114 | 115 | # Recall per object. 116 | obj_recalls = {} 117 | for i in obj_ids: 118 | obj_recalls[i] = calc_recall(obj_tps[i], obj_tars[i]) 119 | mean_obj_recall = float(np.mean(list(obj_recalls.values())).squeeze()) 120 | 121 | # Recall per scene. 122 | scene_recalls = {} 123 | for i in scene_ids: 124 | scene_recalls[i] = float(calc_recall(scene_tps[i], scene_tars[i])) 125 | mean_scene_recall = float(np.mean(list(scene_recalls.values())).squeeze()) 126 | 127 | # Final scores. 128 | scores = { 129 | 'recall': float(recall), 130 | 'obj_recalls': obj_recalls, 131 | 'mean_obj_recall': float(mean_obj_recall), 132 | 'scene_recalls': scene_recalls, 133 | 'mean_scene_recall': float(mean_scene_recall), 134 | 'gt_count': len(matches), 135 | 'targets_count': int(tars), 136 | 'tp_count': int(tps), 137 | } 138 | 139 | if do_print: 140 | obj_recalls_str = ', '.join( 141 | ['{}: {:.3f}'.format(i, s) for i, s in scores['obj_recalls'].items()]) 142 | 143 | scene_recalls_str = ', '.join( 144 | ['{}: {:.3f}'.format(i, s) for i, s in scores['scene_recalls'].items()]) 145 | 146 | misc.log('') 147 | misc.log('GT count: {:d}'.format(scores['gt_count'])) 148 | misc.log('Target count: {:d}'.format(scores['targets_count'])) 149 | misc.log('TP count: {:d}'.format(scores['tp_count'])) 150 | misc.log('Recall: {:.4f}'.format(scores['recall'])) 151 | misc.log('Mean object recall: {:.4f}'.format(scores['mean_obj_recall'])) 152 | misc.log('Mean scene recall: {:.4f}'.format(scores['mean_scene_recall'])) 153 | misc.log('Object recalls:\n{}'.format(obj_recalls_str)) 154 | misc.log('Scene recalls:\n{}'.format(scene_recalls_str)) 155 | misc.log('') 156 | 157 | return scores 158 | 159 | 160 | if __name__ == '__main__': 161 | 162 | # AP test. 163 | tp = np.array([False, True, True, False, True, False]) 164 | fp = np.logical_not(tp) 165 | tp_c = np.cumsum(tp).astype(np.float) 166 | fp_c = np.cumsum(fp).astype(np.float) 167 | rec = tp_c / tp.size 168 | pre = tp_c / (fp_c + tp_c) 169 | misc.log('Average Precision: ' + str(calc_ap(rec, pre))) 170 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/visibility.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Estimation of the visible object surface from depth images.""" 5 | 6 | import numpy as np 7 | 8 | 9 | def _estimate_visib_mask(d_test, d_model, delta, visib_mode='bop19'): 10 | """Estimates a mask of the visible object surface. 11 | 12 | :param d_test: Distance image of a scene in which the visibility is estimated. 13 | :param d_model: Rendered distance image of the object model. 14 | :param delta: Tolerance used in the visibility test. 15 | :param visib_mode: Visibility mode: 16 | 1) 'bop18' - Object is considered NOT VISIBLE at pixels with missing depth. 17 | 2) 'bop19' - Object is considered VISIBLE at pixels with missing depth. This 18 | allows to use the VSD pose error function also on shiny objects, which 19 | are typically not captured well by the depth sensors. A possible problem 20 | with this mode is that some invisible parts can be considered visible. 21 | However, the shadows of missing depth measurements, where this problem is 22 | expected to appear and which are often present at depth discontinuities, 23 | are typically relatively narrow and therefore this problem is less 24 | significant. 25 | :return: Visibility mask. 26 | """ 27 | assert (d_test.shape == d_model.shape) 28 | 29 | if visib_mode == 'bop18': 30 | mask_valid = np.logical_and(d_test > 0, d_model > 0) 31 | d_diff = d_model.astype(np.float32) - d_test.astype(np.float32) 32 | visib_mask = np.logical_and(d_diff <= delta, mask_valid) 33 | 34 | elif visib_mode == 'bop19': 35 | d_diff = d_model.astype(np.float32) - d_test.astype(np.float32) 36 | visib_mask = np.logical_and( 37 | np.logical_or(d_diff <= delta, d_test == 0), d_model > 0) 38 | 39 | else: 40 | raise ValueError('Unknown visibility mode.') 41 | 42 | return visib_mask 43 | 44 | 45 | def estimate_visib_mask_gt(d_test, d_gt, delta, visib_mode='bop19'): 46 | """Estimates a mask of the visible object surface in the ground-truth pose. 47 | 48 | :param d_test: Distance image of a scene in which the visibility is estimated. 49 | :param d_gt: Rendered distance image of the object model in the GT pose. 50 | :param delta: Tolerance used in the visibility test. 51 | :param visib_mode: See _estimate_visib_mask. 52 | :return: Visibility mask. 53 | """ 54 | visib_gt = _estimate_visib_mask(d_test, d_gt, delta, visib_mode) 55 | return visib_gt 56 | 57 | 58 | def estimate_visib_mask_est(d_test, d_est, visib_gt, delta, visib_mode='bop19'): 59 | """Estimates a mask of the visible object surface in the estimated pose. 60 | 61 | For an explanation of why the visibility mask is calculated differently for 62 | the estimated and the ground-truth pose, see equation (14) and related text in 63 | Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16. 64 | 65 | :param d_test: Distance image of a scene in which the visibility is estimated. 66 | :param d_est: Rendered distance image of the object model in the est. pose. 67 | :param visib_gt: Visibility mask of the object model in the GT pose (from 68 | function estimate_visib_mask_gt). 69 | :param delta: Tolerance used in the visibility test. 70 | :param visib_mode: See _estimate_visib_mask. 71 | :return: Visibility mask. 72 | """ 73 | visib_est = _estimate_visib_mask(d_test, d_est, delta, visib_mode) 74 | visib_est = np.logical_or(visib_est, np.logical_and(visib_gt, d_est > 0)) 75 | return visib_est 76 | -------------------------------------------------------------------------------- /keypoint/bop_toolkit_lib/visualization.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Visualization utilities.""" 5 | 6 | import os 7 | # import cv2 8 | import numpy as np 9 | from PIL import Image, ImageDraw, ImageFont 10 | 11 | from bop_toolkit_lib import inout 12 | from bop_toolkit_lib import misc 13 | 14 | 15 | def draw_rect(im, rect, color=(1.0, 1.0, 1.0)): 16 | """Draws a rectangle on an image. 17 | 18 | :param im: ndarray (uint8) on which the rectangle will be drawn. 19 | :param rect: Rectangle defined as [x, y, width, height], where [x, y] is the 20 | top-left corner. 21 | :param color: Color of the rectangle. 22 | :return: Image with drawn rectangle. 23 | """ 24 | if im.dtype != np.uint8: 25 | raise ValueError('The image must be of type uint8.') 26 | 27 | im_pil = Image.fromarray(im) 28 | draw = ImageDraw.Draw(im_pil) 29 | draw.rectangle((rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3]), 30 | outline=tuple([int(c * 255) for c in color]), fill=None) 31 | del draw 32 | return np.asarray(im_pil) 33 | 34 | 35 | def write_text_on_image(im, txt_list, loc=(3, 0), color=(1.0, 1.0, 1.0), 36 | size=20): 37 | """Writes text info on an image. 38 | 39 | :param im: ndarray on which the text info will be written. 40 | :param txt_list: List of dictionaries, each describing one info line: 41 | - 'name': Entry name. 42 | - 'val': Entry value. 43 | - 'fmt': String format for the value. 44 | :param loc: Location of the top left corner of the text box. 45 | :param color: Font color. 46 | :param size: Font size. 47 | :return: Image with written text info. 48 | """ 49 | im_pil = Image.fromarray(im) 50 | 51 | # Load font. 52 | try: 53 | font_path = os.path.join(os.path.dirname(__file__), 'droid_sans_mono.ttf') 54 | font = ImageFont.truetype(font_path, size) 55 | except IOError: 56 | misc.log('Warning: Loading a fallback font.') 57 | font = ImageFont.load_default() 58 | 59 | draw = ImageDraw.Draw(im_pil) 60 | for info in txt_list: 61 | if info['name'] != '': 62 | txt_tpl = '{}:{' + info['fmt'] + '}' 63 | else: 64 | txt_tpl = '{}{' + info['fmt'] + '}' 65 | txt = txt_tpl.format(info['name'], info['val']) 66 | draw.text(loc, txt, fill=tuple([int(c * 255) for c in color]), font=font) 67 | text_width, text_height = font.getsize(txt) 68 | loc = (loc[0], loc[1] + text_height) 69 | del draw 70 | 71 | return np.array(im_pil) 72 | 73 | 74 | def depth_for_vis(depth, valid_start=0.2, valid_end=1.0): 75 | """Transforms depth values from the specified range to [0, 255]. 76 | 77 | :param depth: ndarray with a depth image (1 channel). 78 | :param valid_start: The beginning of the depth range. 79 | :param valid_end: The end of the depth range. 80 | :return: Transformed depth image. 81 | """ 82 | mask = depth > 0 83 | depth_n = depth.astype(np.float) 84 | depth_n[mask] -= depth_n[mask].min() 85 | depth_n[mask] /= depth_n[mask].max() / (valid_end - valid_start) 86 | depth_n[mask] += valid_start 87 | return depth_n 88 | 89 | 90 | def vis_object_poses( 91 | poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, 92 | vis_depth_diff_path=None, vis_rgb_resolve_visib=False): 93 | """Visualizes 3D object models in specified poses in a single image. 94 | 95 | Two visualizations are created: 96 | 1. An RGB visualization (if vis_rgb_path is not None). 97 | 2. A Depth-difference visualization (if vis_depth_diff_path is not None). 98 | 99 | :param poses: List of dictionaries, each with info about one pose: 100 | - 'obj_id': Object ID. 101 | - 'R': 3x3 ndarray with a rotation matrix. 102 | - 't': 3x1 ndarray with a translation vector. 103 | - 'text_info': Info to write at the object (see write_text_on_image). 104 | :param K: 3x3 ndarray with an intrinsic camera matrix. 105 | :param renderer: Instance of the Renderer class (see renderer.py). 106 | :param rgb: ndarray with the RGB input image. 107 | :param depth: ndarray with the depth input image. 108 | :param vis_rgb_path: Path to the output RGB visualization. 109 | :param vis_depth_diff_path: Path to the output depth-difference visualization. 110 | :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects 111 | (i.e. only the closest object is visualized at each pixel). 112 | """ 113 | fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] 114 | 115 | # Indicators of visualization types. 116 | vis_rgb = vis_rgb_path is not None 117 | vis_depth_diff = vis_depth_diff_path is not None 118 | 119 | if vis_rgb and rgb is None: 120 | raise ValueError('RGB visualization triggered but RGB image not provided.') 121 | 122 | if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: 123 | raise ValueError('Depth visualization triggered but D image not provided.') 124 | 125 | # Prepare images for rendering. 126 | im_size = None 127 | ren_rgb = None 128 | ren_rgb_info = None 129 | ren_depth = None 130 | 131 | if vis_rgb: 132 | im_size = (rgb.shape[1], rgb.shape[0]) 133 | ren_rgb = np.zeros(rgb.shape, np.uint8) 134 | ren_rgb_info = np.zeros(rgb.shape, np.uint8) 135 | 136 | if vis_depth_diff: 137 | if im_size and im_size != (depth.shape[1], depth.shape[0]): 138 | raise ValueError('The RGB and D images must have the same size.') 139 | else: 140 | im_size = (depth.shape[1], depth.shape[0]) 141 | 142 | if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): 143 | ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) 144 | 145 | # Render the pose estimates one by one. 146 | for pose in poses: 147 | 148 | # Rendering. 149 | ren_out = renderer.render_object( 150 | pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) 151 | 152 | m_rgb = None 153 | if vis_rgb: 154 | m_rgb = ren_out['rgb'] 155 | 156 | m_mask = None 157 | if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): 158 | m_depth = ren_out['depth'] 159 | 160 | # Get mask of the surface parts that are closer than the 161 | # surfaces rendered before. 162 | visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) 163 | m_mask = np.logical_and(m_depth != 0, visible_mask) 164 | 165 | ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) 166 | 167 | # Combine the RGB renderings. 168 | if vis_rgb: 169 | if vis_rgb_resolve_visib: 170 | ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) 171 | else: 172 | ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32) 173 | ren_rgb_f[ren_rgb_f > 255] = 255 174 | ren_rgb = ren_rgb_f.astype(np.uint8) 175 | 176 | # Draw 2D bounding box and write text info. 177 | obj_mask = np.sum(m_rgb > 0, axis=2) 178 | ys, xs = obj_mask.nonzero() 179 | if len(ys): 180 | # bbox_color = model_color 181 | # text_color = model_color 182 | bbox_color = (0.3, 0.3, 0.3) 183 | text_color = (1.0, 1.0, 1.0) 184 | text_size = 11 185 | 186 | bbox = misc.calc_2d_bbox(xs, ys, im_size) 187 | im_size = (obj_mask.shape[1], obj_mask.shape[0]) 188 | ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) 189 | 190 | if 'text_info' in pose: 191 | text_loc = (bbox[0] + 2, bbox[1]) 192 | ren_rgb_info = write_text_on_image( 193 | ren_rgb_info, pose['text_info'], text_loc, color=text_color, 194 | size=text_size) 195 | 196 | # Blend and save the RGB visualization. 197 | if vis_rgb: 198 | misc.ensure_dir(os.path.dirname(vis_rgb_path)) 199 | 200 | vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ 201 | 0.5 * ren_rgb.astype(np.float32) + \ 202 | 1.0 * ren_rgb_info.astype(np.float32) 203 | vis_im_rgb[vis_im_rgb > 255] = 255 204 | inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) 205 | 206 | # Save the image of depth differences. 207 | if vis_depth_diff: 208 | misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) 209 | 210 | # Calculate the depth difference at pixels where both depth maps are valid. 211 | valid_mask = (depth > 0) * (ren_depth > 0) 212 | depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth) 213 | 214 | delta = 15 215 | below_delta = valid_mask * (depth_diff < delta) 216 | below_delta_vis = (255 * below_delta).astype(np.uint8) 217 | 218 | depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min()) 219 | depth_diff_vis = np.dstack( 220 | [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8) 221 | depth_diff_vis[np.logical_not(valid_mask)] = 0 222 | depth_diff_valid = depth_diff[valid_mask] 223 | depth_info = [ 224 | {'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid)}, 225 | {'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid)}, 226 | {'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid)}, 227 | ] 228 | depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info) 229 | inout.save_im(vis_depth_diff_path, depth_diff_vis) 230 | -------------------------------------------------------------------------------- /keypoint/est_6dof.py: -------------------------------------------------------------------------------- 1 | import json 2 | import cv2 3 | import argparse 4 | from tqdm import tqdm 5 | import numpy as np 6 | 7 | import torch 8 | from torchvision import transforms as T 9 | from models import FRCNN, StackedHourglass, fasterrcnn_backbone 10 | from bop_dataset import BOPDataset 11 | from poseOpt import pose_coordinate_descend 12 | 13 | from train.transforms import ToTensor, Normalize, AffineCrop, Normalize_imgnet 14 | from misc.pose2d_eval import Pose2DEval 15 | from bop_toolkit_lib.inout import save_bop_results 16 | 17 | def one_each(pred, thresh=0.0): 18 | # Postprocess frcnn: get at most one instance per class 19 | # Return: boxes and labels 20 | conf = pred['scores'] > thresh 21 | 22 | conf_scores = pred['scores'][conf] 23 | conf_boxes = pred['boxes'][conf].int() 24 | conf_labels = pred['labels'][conf].int() 25 | 26 | valid = torch.zeros_like(conf_labels).bool() 27 | unique_labels = torch.unique(conf_labels) 28 | for uni in unique_labels: 29 | p = (conf_labels==uni).nonzero(as_tuple=False).reshape(-1) 30 | valid[p[0]] = True 31 | 32 | pd_scores = conf_scores[valid] 33 | pd_boxes = conf_boxes[valid] 34 | pd_labels = conf_labels[valid] 35 | 36 | return pd_boxes, pd_labels 37 | 38 | #********************************************************* 39 | # Provide dataset name 40 | #********************************************************* 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument('--dataset', type=str, default=None, help='dataset name') 43 | args = parser.parse_args() 44 | dataset_name = args.dataset 45 | 46 | 47 | #********************************************************* 48 | # Keypoint-based 6DOF estimation 49 | #********************************************************* 50 | root = './data/bop' 51 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 52 | 53 | print('Running on:', dataset_name) 54 | print('Device:', device) 55 | 56 | # Load dataset meta 57 | dataset = BOPDataset(root, dataset_name, split='test', 58 | return_keypoints=False, return_coco=True) 59 | dataset._set_kpts_info() 60 | 61 | num_classes = {'lmo':8, 'ycbv': 21, 'tudl': 3} 62 | 63 | # Load Faster-RCNN detector 64 | detector_trainsform = T.ToTensor() 65 | state_dict = torch.load('data/detect_checkpoints/d{}.pt'.format(dataset_name), map_location=device)['frcnn'] 66 | 67 | #detector = FRCNN(num_classes = 1+num_classes[dataset_name]).to(device) 68 | detector = fasterrcnn_backbone('resnet101', num_classes=1+num_classes[dataset_name]).to(device) 69 | detector.eval() 70 | detector.load_state_dict(state_dict) 71 | 72 | 73 | # Load keypoint detector: stacked hourglass 74 | transform_list = [] 75 | transform_list.append(AffineCrop(out_size=256, scale_factor=0, rotation_factor=0, dialation=0.25)) 76 | transform_list.append(ToTensor()) 77 | transform_list.append(Normalize()) 78 | kpts_transform = T.Compose(transform_list) 79 | state_dict = torch.load('data/kpts_checkpoints/{}.pt'.format(dataset_name), map_location=device)['stacked_hg'] 80 | 81 | kpts_detector = StackedHourglass(dataset.n_kpts).to(device) 82 | kpts_detector.eval() 83 | kpts_detector.load_state_dict(state_dict) 84 | 85 | # Run keypoint-base 6DOF 86 | db = dataset.db 87 | num_imgs = len(db) 88 | poseEval = Pose2DEval() 89 | 90 | obj2idx = dataset.obj2idx 91 | idx2obj = {v:k for k,v in obj2idx.items()} 92 | lab2obj = {v+1:k for k,v in obj2idx.items()} 93 | 94 | with open('kpts3d.json', 'r') as infile: 95 | kpts3d = json.load(infile)[dataset.dataset_name] 96 | 97 | results = [] 98 | for i in tqdm(range(num_imgs)): 99 | imgpath = db[i]['imgpath'] 100 | image = dataset.load_img(imgpath) 101 | 102 | scene_id = db[i]['scene_id'] 103 | im_id = db[i]['im_id'] 104 | K = db[i]['K'] 105 | gt_objs = [lab2obj[l] for l in db[i]['labels']] 106 | 107 | # Object detection 108 | with torch.no_grad(): 109 | img = detector_trainsform(image).to(device) 110 | pred = detector([img])[0] 111 | pred = {k:v.cpu() for k,v in pred.items()} 112 | 113 | pd_boxes, pd_labels = one_each(pred, thresh=0) 114 | pd_objs = [lab2obj[i] for i in pd_labels.tolist()] 115 | pd_objs = torch.tensor(pd_objs) 116 | 117 | # Keypoint-base 6DOF estimation 118 | for obj in gt_objs: 119 | ### If Object is not detected 120 | if obj not in pd_objs: 121 | res = {'scene_id': scene_id, 122 | 'im_id': im_id, 123 | 'obj_id': obj, 124 | 'score': 0, 125 | 'R': np.eye(3), 126 | 't': np.zeros([3]), 127 | 'time': -1 128 | } 129 | results.append(res) 130 | continue 131 | 132 | ### If Object is detected 133 | box = pd_boxes[pd_objs == obj].squeeze().tolist() 134 | box = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 135 | input_crop = {'image':image, 'bb':box} 136 | input_crop = kpts_transform(input_crop) 137 | 138 | with torch.no_grad(): 139 | batch = input_crop['image'][None].to(device) 140 | output = kpts_detector(batch) 141 | output = output[-1].cpu() 142 | 143 | kpt_start = dataset.obj2kptid[obj][0] 144 | kpt_end = dataset.obj2kptid[obj][1] 145 | heatmaps_pred = output[[0], kpt_start:kpt_end, :, :] 146 | 147 | kpts_pred, confs = poseEval.heatmaps_to_locs(heatmaps_pred, return_vals=True) 148 | confs = confs[0] 149 | kpts_pred = kpts_pred[0] 150 | 151 | crop_kpts = kpts_pred * (256/64) 152 | view_kpts = poseEval.get_view_kpts(box, crop_kpts) 153 | view_kpts = view_kpts.numpy() 154 | kpts_h = np.hstack([view_kpts, np.ones([view_kpts.shape[0], 1])]).astype(np.double) 155 | 156 | D = confs.numpy().astype(np.double) 157 | kpts3d_obj = kpts3d[str(obj)] 158 | 159 | R_, t_, Z_, res = pose_coordinate_descend(K, kpts_h, kpts3d_obj, D, 160 | max_iters=10000, thresh=1e-6, pnp_int=True) 161 | 162 | 163 | res = {'scene_id': scene_id, 164 | 'im_id': im_id, 165 | 'obj_id': obj, 166 | 'score': np.mean(D), 167 | 'R': R_, 168 | 't': t_, 169 | 'time': -1 170 | } 171 | 172 | results.append(res) 173 | 174 | 175 | save_bop_results('results_{}-test.csv'.format(dataset_name), results) 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /keypoint/misc/__init__.py: -------------------------------------------------------------------------------- 1 | from .pose2d_eval import Pose2DEval 2 | from .loss import KptsMSELoss 3 | -------------------------------------------------------------------------------- /keypoint/misc/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import functional as F 4 | 5 | class CrossEntropy(nn.Module): 6 | def __init__(self, ignore_label=-1, weight=None): 7 | super(CrossEntropy, self).__init__() 8 | self.ignore_label = ignore_label 9 | self.criterion = nn.CrossEntropyLoss(weight=weight, 10 | ignore_index=ignore_label) 11 | 12 | def forward(self, score, target): 13 | ph, pw = score.size(2), score.size(3) 14 | h, w = target.size(1), target.size(2) 15 | if ph != h or pw != w: 16 | score = F.upsample( 17 | input=score, size=(h, w), mode='bilinear') 18 | 19 | loss = self.criterion(score, target) 20 | 21 | return loss 22 | 23 | 24 | class KptsMSELoss(nn.Module): 25 | def __init__(self, use_vis=False): 26 | super(KptsMSELoss, self).__init__() 27 | self.criterion = nn.MSELoss(reduction='mean') 28 | self.use_vis = use_vis 29 | 30 | def forward(self, output, target, vis): 31 | ''' 32 | output: (BN, K, w, h) 33 | target: (BN, K, w, h) 34 | vis: (BN, K) 35 | ''' 36 | batch_size = output.size(0) 37 | num_kpts = output.size(1) 38 | heatmaps_pred = output.reshape((batch_size, num_kpts, -1)) 39 | heatmaps_gt = target.reshape((batch_size, num_kpts, -1)) 40 | vis = vis.reshape((batch_size, num_kpts, 1)) 41 | 42 | if self.use_vis: 43 | loss = self.criterion( 44 | heatmaps_pred.mul(vis), 45 | heatmaps_gt.mul(vis) 46 | ) 47 | else: 48 | loss = self.criterion(heatmaps_pred, heatmaps_gt) 49 | 50 | return loss 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /keypoint/misc/pose2d_eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from skimage.draw import disk 4 | 5 | class Pose2DEval: 6 | 7 | def __init__(self, detection_thresh=0.1, dist_thresh=10): 8 | self.detection_thresh = detection_thresh 9 | self.dist_thresh = dist_thresh 10 | 11 | def heatmaps_to_locs(self, heatmaps, no_thresh=False, return_vals=False): 12 | vals, uv = torch.max(heatmaps.view(heatmaps.shape[0], 13 | heatmaps.shape[1], 14 | heatmaps.shape[2]*heatmaps.shape[3]), 2) 15 | # zero out entries below the detection threshold 16 | thresh = self.detection_thresh 17 | if no_thresh: 18 | thresh = 0 19 | uv *= (vals > thresh).type(torch.long) 20 | rows = uv / heatmaps.shape[3] 21 | cols = uv % heatmaps.shape[3] 22 | 23 | locs = torch.stack([cols, rows], 2).cpu().type(torch.float) 24 | vals[vals 0) 35 | return 100 * torch.mean((torch.sqrt(torch.sum((gt_locs - pred_locs) ** 2, dim=-1))[visible_keypoints] < self.dist_thresh).type(torch.float)) 36 | 37 | def get_view_kpts(self, bbox, crop_kpts, crop_size=256, crop_dialation=0.25): 38 | if type(bbox) is not torch.Tensor: 39 | bbox = torch.tensor(bbox) 40 | 41 | x,y,w,h = bbox 42 | center = torch.tensor([x+w/2, y+h/2]) 43 | scale = torch.max(w,h) * (1+crop_dialation) 44 | rescale = scale /crop_size 45 | ul = center - scale/2 46 | 47 | if crop_kpts.shape[1] == 2: 48 | view_kpts = crop_kpts * rescale + torch.tensor([ul[0],ul[1]]) 49 | elif crop_kpts.shape[1] == 3: 50 | view_kpts = crop_kpts * rescale + torch.tensor([ul[0],ul[1],0]) 51 | 52 | return view_kpts 53 | 54 | def draw_keypoints_with_labels(self, images, gt_heatmaps, pred_heatmaps): 55 | gt_images, pred_images = images.clone(), images.clone() 56 | rescale = images.shape[2]/gt_heatmaps.shape[2] 57 | gt_keypoints = self.heatmaps_to_locs(gt_heatmaps)*rescale 58 | pred_keypoints = self.heatmaps_to_locs(pred_heatmaps)*rescale 59 | for i in range(images.shape[0]): 60 | for gt_keypoint, pred_keypoint in zip(gt_keypoints[i,:,:], pred_keypoints[i,:,:]): 61 | if gt_keypoint[0] != 0 and gt_keypoint[1] != 0: 62 | r,c = disk(gt_keypoint[1], gt_keypoint[0], 3, shape=images.shape[-2:]) 63 | # blue color for the ground truth keypoints 64 | gt_images[i,0,r,c] = 0 65 | gt_images[i,1,r,c] = 0 66 | gt_images[i,2,r,c] = 1 67 | if pred_keypoint[0] != 0 and pred_keypoint[1] != 0: 68 | r,c = disk(pred_keypoint[1], pred_keypoint[0], 3, shape=images.shape[-2:]) 69 | correct_prediction = torch.sqrt(torch.sum((gt_keypoint - pred_keypoint) ** 2)) < self.dist_thresh 70 | # blue color if predicted keypoint is within the margin, else red 71 | val = [0,0,1] if correct_prediction else [1,0,0] 72 | pred_images[i,0,r,c] = val[0] 73 | pred_images[i,1,r,c] = val[1] 74 | pred_images[i,2,r,c] = val[2] 75 | return gt_images, pred_images 76 | 77 | def draw_keypoints_unlabeled(self, images, pred_heatmaps): 78 | pred_images = images.clone() 79 | rescale = images.shape[2]/pred_heatmaps.shape[2] 80 | pred_keypoints = self.heatmaps_to_locs(pred_heatmaps)*rescale 81 | for i in range(images.shape[0]): 82 | for pred_keypoint in pred_keypoints[i,:,:]: 83 | if pred_keypoint[0] != 0 and pred_keypoint[1] != 0: 84 | r,c = disk(pred_keypoint[1], pred_keypoint[0], 3, shape=images.shape[-2:]) 85 | # blue color for the predicted keypoints 86 | pred_images[i,0,r,c] = 0 87 | pred_images[i,1,r,c] = 0 88 | pred_images[i,2,r,c] = 1 89 | return pred_images 90 | -------------------------------------------------------------------------------- /keypoint/misc/segmentation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | def iou(gt_masks, pred_masks): 5 | pred_masks_thresh = (pred_masks > 0.5).type(torch.int) 6 | gt_masks = (gt_masks > 0.5).type(torch.int) 7 | return torch.mean((pred_masks_thresh & gt_masks).type(torch.float))\ 8 | / torch.mean((pred_masks_thresh | gt_masks).type(torch.float)) 9 | 10 | def visualize(images, masks): 11 | scale = int(images.shape[2] / masks.shape[2]) 12 | masks_thresh = (F.upsample(masks, scale_factor=scale, mode='bilinear') > 0.5).type(torch.int) 13 | segmented_images = images.clone() 14 | segmented_images[masks_thresh.repeat(1,3,1,1) == 0] = 0 15 | return segmented_images 16 | -------------------------------------------------------------------------------- /keypoint/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .hourglass import StackedHourglass 2 | from .fasterRCNN import FRCNN, fasterrcnn_backbone 3 | -------------------------------------------------------------------------------- /keypoint/models/fasterRCNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN 4 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 5 | 6 | from .patched import rpn_forward, roi_forward 7 | import types 8 | 9 | @torch.jit.unused 10 | def eager_outputs(self, losses, detections): 11 | if self.training or self.always_return_loss: 12 | return losses 13 | 14 | return detections 15 | 16 | 17 | def FRCNN(num_classes): 18 | # load a model pre-trained pre-trained on COCO 19 | model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) 20 | 21 | # get number of input features for the classifier 22 | in_features = model.roi_heads.box_predictor.cls_score.in_features 23 | 24 | # replace the pre-trained head with a new one 25 | model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 26 | 27 | # override ouput functions 28 | model.always_return_loss = False 29 | 30 | model.rpn.forward = types.MethodType(rpn_forward, model.rpn) 31 | model.roi_heads.forward = types.MethodType(roi_forward, model.roi_heads) 32 | model.eager_outputs = types.MethodType(eager_outputs, model) 33 | 34 | return model 35 | 36 | 37 | def fasterrcnn_backbone(backbone_name='resnet50', 38 | num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): 39 | ''' 40 | Input: 41 | backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', 42 | 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' 43 | ''' 44 | 45 | # load a model pre-trained pre-trained on COCO 46 | model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) 47 | in_features = model.roi_heads.box_predictor.cls_score.in_features 48 | model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 49 | 50 | # switch backbone 51 | backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone, trainable_layers=trainable_backbone_layers) 52 | model.backbone = backbone 53 | 54 | 55 | # override ouput functions 56 | model.always_return_loss = False 57 | model.rpn.forward = types.MethodType(rpn_forward, model.rpn) 58 | model.roi_heads.forward = types.MethodType(roi_forward, model.roi_heads) 59 | model.eager_outputs = types.MethodType(eager_outputs, model) 60 | 61 | return model 62 | 63 | 64 | -------------------------------------------------------------------------------- /keypoint/models/hourglass.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | # from .layers import ConvBlock, ResBlock 5 | from .layers import Residual 6 | 7 | class Hourglass(nn.Module): 8 | def __init__(self, n, in_channels, out_channels): 9 | super(Hourglass, self).__init__() 10 | self.up1 = Residual(in_channels, 256) 11 | self.up2 = Residual(256, 256) 12 | self.up4 = Residual(256, out_channels) 13 | 14 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 15 | self.low1 = Residual(in_channels, 256) 16 | self.low2 = Residual(256, 256) 17 | self.low5 = Residual(256, 256) 18 | if n > 1: 19 | self.low6 = Hourglass(n-1, 256, out_channels) 20 | else: 21 | self.low6 = Residual(256, out_channels) 22 | self.low7 = Residual(out_channels, out_channels) 23 | # self.up5 = nn.Upsample(scale_factor=2) 24 | 25 | def forward(self, x): 26 | up = self.up1(x) 27 | up = self.up2(up) 28 | up = self.up4(up) 29 | 30 | low = self.pool(x) 31 | low = self.low1(low) 32 | low = self.low2(low) 33 | low = self.low5(low) 34 | low = self.low6(low) 35 | low = self.low7(low) 36 | # low = self.up5(low) 37 | low = nn.functional.interpolate(low, scale_factor=2) 38 | 39 | return up + low 40 | 41 | class Lin(nn.Module): 42 | def __init__(self, in_channels, out_channels): 43 | super(Lin, self).__init__() 44 | self.layer = nn.Sequential( 45 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0), 46 | nn.BatchNorm2d(out_channels), 47 | nn.ReLU(True) 48 | ) 49 | 50 | def forward(self, x): 51 | return self.layer(x) 52 | 53 | class StackedHourglass(nn.Module): 54 | def __init__(self, out_channels): 55 | super(StackedHourglass, self).__init__() 56 | self.conv1 = nn.Sequential( 57 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), 58 | nn.BatchNorm2d(64), 59 | nn.ReLU() 60 | ) 61 | self.r1 = Residual(64, 128) 62 | self.pool = nn.MaxPool2d(2, 2) 63 | self.r4 = Residual(128, 128) 64 | self.r5 = Residual(128, 128) 65 | self.r6 = Residual(128, 256) 66 | 67 | self.hg1 = Hourglass(4, 256, 512) 68 | 69 | self.l1 = Lin(512, 512) 70 | self.l2 = Lin(512, 256) 71 | 72 | self.out1 = nn.Conv2d(256, out_channels, kernel_size=1, stride=1, padding=0) 73 | 74 | self.out_return = nn.Conv2d(out_channels, 256+128, kernel_size=1, stride=1, padding=0) 75 | 76 | self.cat_conv = nn.Conv2d(256+128, 256+128, kernel_size=1, stride=1, padding=0) 77 | 78 | self.hg2 = Hourglass(4, 256+128, 512) 79 | 80 | self.l3 = Lin(512, 512) 81 | self.l4 = Lin(512, 512) 82 | 83 | self.out2 = nn.Conv2d(512, out_channels, 1, 1, padding=0) 84 | 85 | def forward(self, x): 86 | x = self.conv1(x) 87 | x = self.r1(x) 88 | pooled = self.pool(x) 89 | x = self.r4(pooled) 90 | x = self.r5(x) 91 | x = self.r6(x) 92 | 93 | # First hourglass 94 | x = self.hg1(x) 95 | 96 | # Linear layers to produce first set of predictions 97 | x = self.l1(x) 98 | x = self.l2(x) 99 | 100 | # First predicted heatmaps 101 | out1 = self.out1(x) 102 | out1_ = self.out_return(out1) 103 | 104 | joined = torch.cat([x, pooled], 1) 105 | joined = self.cat_conv(joined) 106 | int1 = joined + out1_ 107 | 108 | hg2 = self.hg2(int1) 109 | 110 | l3 = self.l3(hg2) 111 | l4 = self.l4(l3) 112 | 113 | out2 = self.out2(l4) 114 | 115 | return out1, out2 116 | 117 | 118 | def num_trainable_parameters(self): 119 | trainable_parameters = filter(lambda p: p.requires_grad, self.parameters()) 120 | return sum([np.prod(p.size()) for p in trainable_parameters]) 121 | 122 | 123 | -------------------------------------------------------------------------------- /keypoint/models/layers.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | # Wrapper around Conv2d 4 | class ConvBlock(nn.Module): 5 | def __init__(self, in_channels, out_channels): 6 | super(ConvBlock, self).__init__() 7 | self.block = nn.Sequential( 8 | nn.BatchNorm2d(in_channels), 9 | nn.ReLU(True), 10 | nn.Conv2d(in_channels, out_channels//2, kernel_size=1), 11 | nn.BatchNorm2d(out_channels//2), 12 | nn.ReLU(True), 13 | nn.Conv2d(out_channels//2, out_channels//2, kernel_size=3, stride=1, padding=1), 14 | nn.BatchNorm2d(out_channels//2), 15 | nn.ReLU(True), 16 | nn.Conv2d(out_channels//2, out_channels, kernel_size=1) 17 | ) 18 | 19 | def forward(self, x): 20 | return self.block(x) 21 | 22 | class SkipLayer(nn.Module): 23 | def __init__(self, in_channels, out_channels): 24 | super(SkipLayer, self).__init__() 25 | if in_channels != out_channels: 26 | self.layer = nn.Conv2d(in_channels, out_channels, kernel_size=1) 27 | else: 28 | self.layer = None 29 | 30 | def forward(self, x): 31 | if self.layer is None: 32 | return x 33 | else: 34 | return self.layer(x) 35 | 36 | class Residual(nn.Module): 37 | def __init__(self, in_channels, out_channels): 38 | super(Residual, self).__init__() 39 | self.conv = ConvBlock(in_channels, out_channels) 40 | self.skip = SkipLayer(in_channels, out_channels) 41 | 42 | def forward(self, x): 43 | return self.conv(x) + self.skip(x) 44 | -------------------------------------------------------------------------------- /keypoint/models/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from torchvision.models.detection.mask_rcnn import MaskRCNN 2 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 3 | from torchvision.models.detection.rpn import AnchorGenerator 4 | 5 | 6 | class DetectorMaskRCNN(MaskRCNN): 7 | def __init__(self, input_resize=(240, 320), n_classes=2, 8 | backbone_str='resnet50-fpn', 9 | anchor_sizes=((32, ), (64, ), (128, ), (256, ), (512, ))): 10 | 11 | assert backbone_str == 'resnet50-fpn' 12 | backbone = resnet_fpn_backbone('resnet50', pretrained=False) 13 | 14 | aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) 15 | rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) 16 | 17 | super().__init__(backbone=backbone, num_classes=n_classes, 18 | rpn_anchor_generator=rpn_anchor_generator, 19 | max_size=max(input_resize), min_size=min(input_resize)) 20 | -------------------------------------------------------------------------------- /keypoint/models/patched.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.models.detection.rpn import concat_box_prediction_layers 3 | from torchvision.models.detection.roi_heads import fastrcnn_loss 4 | 5 | #************************************************************************ 6 | # Patch RPN forward function to return loss during eval() 7 | # when "targets" is provided 8 | #************************************************************************ 9 | def rpn_forward(self, 10 | images, # type: ImageList 11 | features, # type: Dict[str, Tensor] 12 | targets=None # type: Optional[List[Dict[str, Tensor]]] 13 | ): 14 | # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]] 15 | """ 16 | Args: 17 | images (ImageList): images for which we want to compute the predictions 18 | features (OrderedDict[Tensor]): features computed from the images that are 19 | used for computing the predictions. Each tensor in the list 20 | correspond to different feature levels 21 | targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional). 22 | If provided, each element in the dict should contain a field `boxes`, 23 | with the locations of the ground-truth boxes. 24 | Returns: 25 | boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per 26 | image. 27 | losses (Dict[Tensor]): the losses for the model during training. During 28 | testing, it is an empty dict. 29 | """ 30 | # RPN uses all feature maps that are available 31 | features = list(features.values()) 32 | objectness, pred_bbox_deltas = self.head(features) 33 | anchors = self.anchor_generator(images, features) 34 | 35 | num_images = len(anchors) 36 | num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness] 37 | num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors] 38 | objectness, pred_bbox_deltas = \ 39 | concat_box_prediction_layers(objectness, pred_bbox_deltas) 40 | # apply pred_bbox_deltas to anchors to obtain the decoded proposals 41 | # note that we detach the deltas because Faster R-CNN do not backprop through 42 | # the proposals 43 | proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) 44 | proposals = proposals.view(num_images, -1, 4) 45 | boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level) 46 | 47 | losses = {} 48 | if self.training: 49 | assert targets is not None 50 | labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets) 51 | regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) 52 | loss_objectness, loss_rpn_box_reg = self.compute_loss( 53 | objectness, pred_bbox_deltas, labels, regression_targets) 54 | losses = { 55 | "loss_objectness": loss_objectness, 56 | "loss_rpn_box_reg": loss_rpn_box_reg, 57 | } 58 | #************************************ 59 | # Patch start 60 | #************************************ 61 | elif targets is not None: 62 | assert targets is not None 63 | labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets) 64 | regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) 65 | loss_objectness, loss_rpn_box_reg = self.compute_loss( 66 | objectness, pred_bbox_deltas, labels, regression_targets) 67 | losses = { 68 | "loss_objectness": loss_objectness, 69 | "loss_rpn_box_reg": loss_rpn_box_reg, 70 | } 71 | #************************************ 72 | # Patch end 73 | #************************************ 74 | 75 | return boxes, losses 76 | 77 | 78 | 79 | 80 | #************************************************************************ 81 | # Patch ROIHeads forward function to return loss during eval() 82 | # when "targets" is provided 83 | # This function is reduced to only work for detection task (eg. frcnn) 84 | #************************************************************************ 85 | def roi_forward(self, 86 | features, # type: Dict[str, Tensor] 87 | proposals, # type: List[Tensor] 88 | image_shapes, # type: List[Tuple[int, int]] 89 | targets=None # type: Optional[List[Dict[str, Tensor]]] 90 | ): 91 | # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]] 92 | """ 93 | Args: 94 | features (List[Tensor]) 95 | proposals (List[Tensor[N, 4]]) 96 | image_shapes (List[Tuple[H, W]]) 97 | targets (List[Dict]) 98 | """ 99 | if targets is not None: 100 | for t in targets: 101 | # TODO: https://github.com/pytorch/pytorch/issues/26731 102 | floating_point_types = (torch.float, torch.double, torch.half) 103 | assert t["boxes"].dtype in floating_point_types, 'target boxes must of float type' 104 | assert t["labels"].dtype == torch.int64, 'target labels must of int64 type' 105 | if self.has_keypoint(): 106 | assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type' 107 | 108 | if self.training: 109 | proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets) 110 | #************************************ 111 | # Patch start 112 | #************************************ 113 | elif targets is not None: 114 | proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets) 115 | #************************************ 116 | # Patch end 117 | #************************************ 118 | else: 119 | labels = None 120 | regression_targets = None 121 | matched_idxs = None 122 | 123 | box_features = self.box_roi_pool(features, proposals, image_shapes) 124 | box_features = self.box_head(box_features) 125 | class_logits, box_regression = self.box_predictor(box_features) 126 | 127 | result: List[Dict[str, torch.Tensor]] = [] 128 | losses = {} 129 | if self.training: 130 | assert labels is not None and regression_targets is not None 131 | loss_classifier, loss_box_reg = fastrcnn_loss( 132 | class_logits, box_regression, labels, regression_targets) 133 | losses = { 134 | "loss_classifier": loss_classifier, 135 | "loss_box_reg": loss_box_reg 136 | } 137 | else: 138 | #************************************ 139 | # Patch start 140 | #************************************ 141 | if labels is not None and regression_targets is not None: 142 | loss_classifier, loss_box_reg = fastrcnn_loss( 143 | class_logits, box_regression, labels, regression_targets) 144 | losses = { 145 | "loss_classifier": loss_classifier, 146 | "loss_box_reg": loss_box_reg 147 | } 148 | #************************************ 149 | # Patch end 150 | #************************************ 151 | 152 | boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) 153 | num_images = len(boxes) 154 | for i in range(num_images): 155 | result.append( 156 | { 157 | "boxes": boxes[i], 158 | "labels": labels[i], 159 | "scores": scores[i], 160 | } 161 | ) 162 | 163 | return result, losses 164 | 165 | -------------------------------------------------------------------------------- /keypoint/scripts/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = os.path.dirname(__file__) 9 | lib_path = os.path.join(this_dir, '..') 10 | add_path(lib_path) 11 | -------------------------------------------------------------------------------- /keypoint/scripts/calc_gt_distribution.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Calculates distribution of GT poses.""" 5 | 6 | import math 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | from bop_toolkit_lib import config 11 | from bop_toolkit_lib import dataset_params 12 | from bop_toolkit_lib import inout 13 | from bop_toolkit_lib import misc 14 | 15 | 16 | # PARAMETERS. 17 | ################################################################################ 18 | p = { 19 | # See dataset_params.py for options. 20 | 'dataset': 'lm', 21 | 22 | # Dataset split. Options: 'train', 'val', 'test'. 23 | 'dataset_split': 'test', 24 | 25 | # Dataset split type. None = default. See dataset_params.py for options. 26 | 'dataset_split_type': None, 27 | 28 | # Folder containing the BOP datasets. 29 | 'datasets_path': config.datasets_path, 30 | } 31 | ################################################################################ 32 | 33 | 34 | # Load dataset parameters. 35 | dp_split = dataset_params.get_split_params( 36 | p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) 37 | 38 | scene_ids = dp_split['scene_ids'] 39 | dists = [] 40 | azimuths = [] 41 | elevs = [] 42 | visib_fracts = [] 43 | ims_count = 0 44 | for scene_id in scene_ids: 45 | misc.log('Processing - dataset: {} ({}, {}), scene: {}'.format( 46 | p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id)) 47 | 48 | # Load GT poses. 49 | scene_gt = inout.load_scene_gt( 50 | dp_split['scene_gt_tpath'].format(scene_id=scene_id)) 51 | 52 | # Load info about the GT poses. 53 | scene_gt_info = inout.load_json( 54 | dp_split['scene_gt_info_tpath'].format(scene_id=scene_id), keys_to_int=True) 55 | 56 | ims_count += len(scene_gt) 57 | 58 | for im_id in scene_gt.keys(): 59 | for gt_id, im_gt in enumerate(scene_gt[im_id]): 60 | 61 | # Object distance. 62 | dist = np.linalg.norm(im_gt['cam_t_m2c']) 63 | dists.append(dist) 64 | 65 | # Camera origin in the model coordinate system. 66 | cam_orig_m = -np.linalg.inv(im_gt['cam_R_m2c']).dot( 67 | im_gt['cam_t_m2c']) 68 | 69 | # Azimuth from [0, 360]. 70 | azimuth = math.atan2(cam_orig_m[1, 0], cam_orig_m[0, 0]) 71 | if azimuth < 0: 72 | azimuth += 2.0 * math.pi 73 | azimuths.append((180.0 / math.pi) * azimuth) 74 | 75 | # Elevation from [-90, 90]. 76 | a = np.linalg.norm(cam_orig_m) 77 | b = np.linalg.norm([cam_orig_m[0, 0], cam_orig_m[1, 0], 0]) 78 | elev = math.acos(b / a) 79 | if cam_orig_m[2, 0] < 0: 80 | elev = -elev 81 | elevs.append((180.0 / math.pi) * elev) 82 | 83 | # Visibility fraction. 84 | visib_fracts.append(scene_gt_info[im_id][gt_id]['visib_fract']) 85 | 86 | # Print stats. 87 | misc.log('Stats of the GT poses in dataset {} {}:'.format( 88 | p['dataset'], p['dataset_split'])) 89 | misc.log('Number of images: ' + str(ims_count)) 90 | 91 | misc.log('Min dist: {}'.format(np.min(dists))) 92 | misc.log('Max dist: {}'.format(np.max(dists))) 93 | misc.log('Mean dist: {}'.format(np.mean(dists))) 94 | 95 | misc.log('Min azimuth: {}'.format(np.min(azimuths))) 96 | misc.log('Max azimuth: {}'.format(np.max(azimuths))) 97 | misc.log('Mean azimuth: {}'.format(np.mean(azimuths))) 98 | 99 | misc.log('Min elev: {}'.format(np.min(elevs))) 100 | misc.log('Max elev: {}'.format(np.max(elevs))) 101 | misc.log('Mean elev: {}'.format(np.mean(elevs))) 102 | 103 | misc.log('Min visib fract: {}'.format(np.min(visib_fracts))) 104 | misc.log('Max visib fract: {}'.format(np.max(visib_fracts))) 105 | misc.log('Mean visib fract: {}'.format(np.mean(visib_fracts))) 106 | 107 | # Visualize distributions. 108 | plt.figure() 109 | plt.hist(dists, bins=100) 110 | plt.title('Object distance') 111 | 112 | plt.figure() 113 | plt.hist(azimuths, bins=100) 114 | plt.title('Azimuth') 115 | 116 | plt.figure() 117 | plt.hist(elevs, bins=100) 118 | plt.title('Elevation') 119 | 120 | plt.figure() 121 | plt.hist(visib_fracts, bins=100) 122 | plt.title('Visibility fraction') 123 | 124 | plt.show() 125 | -------------------------------------------------------------------------------- /keypoint/scripts/calc_gt_info.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Calculates visibility, 2D bounding boxes etc. for the ground-truth poses. 5 | 6 | See docs/bop_datasets_format.md for documentation of the calculated info. 7 | 8 | The info is saved in folder "{train,val,test}_gt_info" in the main folder of the 9 | selected dataset. 10 | """ 11 | 12 | import os 13 | import glob 14 | import numpy as np 15 | 16 | from bop_toolkit_lib import config 17 | from bop_toolkit_lib import dataset_params 18 | from bop_toolkit_lib import inout 19 | from bop_toolkit_lib import misc 20 | from bop_toolkit_lib import renderer 21 | from bop_toolkit_lib import visibility 22 | 23 | 24 | # PARAMETERS. 25 | ################################################################################ 26 | p = { 27 | # See dataset_params.py for options. 28 | 'dataset': 'lm', 29 | 30 | # Dataset split. Options: 'train', 'val', 'test'. 31 | 'dataset_split': 'test', 32 | 33 | # Dataset split type. None = default. See dataset_params.py for options. 34 | 'dataset_split_type': None, 35 | 36 | # Whether to save visualizations of visibility masks. 37 | 'vis_visibility_masks': False, 38 | 39 | # Tolerance used in the visibility test [mm]. 40 | 'delta': 15, 41 | 42 | # Type of the renderer. 43 | 'renderer_type': 'python', # Options: 'cpp', 'python'. 44 | 45 | # Folder containing the BOP datasets. 46 | 'datasets_path': config.datasets_path, 47 | 48 | # Path template for output images with object masks. 49 | 'vis_mask_visib_tpath': os.path.join( 50 | config.output_path, 'vis_gt_visib_delta={delta}', 51 | 'vis_gt_visib_delta={delta}', '{dataset}', '{split}', '{scene_id:06d}', 52 | '{im_id:06d}_{gt_id:06d}.jpg'), 53 | } 54 | ################################################################################ 55 | 56 | 57 | if p['vis_visibility_masks']: 58 | from bop_toolkit_lib import visualization 59 | 60 | # Load dataset parameters. 61 | dp_split = dataset_params.get_split_params( 62 | p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) 63 | 64 | model_type = None 65 | if p['dataset'] == 'tless': 66 | model_type = 'cad' 67 | dp_model = dataset_params.get_model_params( 68 | p['datasets_path'], p['dataset'], model_type) 69 | 70 | # Initialize a renderer. 71 | misc.log('Initializing renderer...') 72 | 73 | # The renderer has a larger canvas for generation of masks of truncated objects. 74 | im_width, im_height = dp_split['im_size'] 75 | ren_width, ren_height = 3 * im_width, 3 * im_height 76 | ren_cx_offset, ren_cy_offset = im_width, im_height 77 | ren = renderer.create_renderer( 78 | ren_width, ren_height, p['renderer_type'], mode='depth') 79 | 80 | for obj_id in dp_model['obj_ids']: 81 | model_fpath = dp_model['model_tpath'].format(obj_id=obj_id) 82 | ren.add_object(obj_id, model_fpath) 83 | 84 | scene_ids = dataset_params.get_present_scene_ids(dp_split) 85 | for scene_id in scene_ids: 86 | 87 | # Load scene info and ground-truth poses. 88 | scene_camera = inout.load_scene_camera( 89 | dp_split['scene_camera_tpath'].format(scene_id=scene_id)) 90 | scene_gt = inout.load_scene_gt( 91 | dp_split['scene_gt_tpath'].format(scene_id=scene_id)) 92 | 93 | scene_gt_info = {} 94 | im_ids = sorted(scene_gt.keys()) 95 | for im_counter, im_id in enumerate(im_ids): 96 | if im_counter % 100 == 0: 97 | misc.log( 98 | 'Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}'.format( 99 | p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, 100 | im_id)) 101 | 102 | # Load depth image. 103 | depth_fpath = dp_split['depth_tpath'].format(scene_id=scene_id, im_id=im_id) 104 | if not os.path.exists(depth_fpath): 105 | depth_fpath = depth_fpath.replace('.tif', '.png') 106 | depth = inout.load_depth(depth_fpath) 107 | depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. 108 | 109 | K = scene_camera[im_id]['cam_K'] 110 | fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] 111 | im_size = (depth.shape[1], depth.shape[0]) 112 | 113 | scene_gt_info[im_id] = [] 114 | for gt_id, gt in enumerate(scene_gt[im_id]): 115 | 116 | # Render depth image of the object model in the ground-truth pose. 117 | depth_gt_large = ren.render_object( 118 | gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], 119 | fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth'] 120 | depth_gt = depth_gt_large[ 121 | ren_cy_offset:(ren_cy_offset + im_height), 122 | ren_cx_offset:(ren_cx_offset + im_width)] 123 | 124 | # Convert depth images to distance images. 125 | dist_gt = misc.depth_im_to_dist_im(depth_gt, K) 126 | dist_im = misc.depth_im_to_dist_im(depth, K) 127 | 128 | # Estimation of the visibility mask. 129 | visib_gt = visibility.estimate_visib_mask_gt( 130 | dist_im, dist_gt, p['delta'], visib_mode='bop19') 131 | 132 | # Mask of the object in the GT pose. 133 | obj_mask_gt_large = depth_gt_large > 0 134 | obj_mask_gt = dist_gt > 0 135 | 136 | # Number of pixels in the whole object silhouette 137 | # (even in the truncated part). 138 | px_count_all = np.sum(obj_mask_gt_large) 139 | 140 | # Number of pixels in the object silhouette with a valid depth measurement 141 | # (i.e. with a non-zero value in the depth image). 142 | px_count_valid = np.sum(dist_im[obj_mask_gt] > 0) 143 | 144 | # Number of pixels in the visible part of the object silhouette. 145 | px_count_visib = visib_gt.sum() 146 | 147 | # Visible surface fraction. 148 | if px_count_all > 0: 149 | visib_fract = px_count_visib / float(px_count_all) 150 | else: 151 | visib_fract = 0.0 152 | 153 | # Bounding box of the whole object silhouette 154 | # (including the truncated part). 155 | bbox = [-1, -1, -1, -1] 156 | if px_count_visib > 0: 157 | ys, xs = obj_mask_gt_large.nonzero() 158 | ys -= ren_cy_offset 159 | xs -= ren_cx_offset 160 | bbox = misc.calc_2d_bbox(xs, ys, im_size) 161 | 162 | # Bounding box of the visible surface part. 163 | bbox_visib = [-1, -1, -1, -1] 164 | if px_count_visib > 0: 165 | ys, xs = visib_gt.nonzero() 166 | bbox_visib = misc.calc_2d_bbox(xs, ys, im_size) 167 | 168 | # Store the calculated info. 169 | scene_gt_info[im_id].append({ 170 | 'px_count_all': int(px_count_all), 171 | 'px_count_valid': int(px_count_valid), 172 | 'px_count_visib': int(px_count_visib), 173 | 'visib_fract': float(visib_fract), 174 | 'bbox_obj': [int(e) for e in bbox], 175 | 'bbox_visib': [int(e) for e in bbox_visib] 176 | }) 177 | 178 | # Visualization of the visibility mask. 179 | if p['vis_visibility_masks']: 180 | 181 | depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0) 182 | depth_im_vis = np.dstack([depth_im_vis] * 3) 183 | 184 | visib_gt_vis = visib_gt.astype(np.float) 185 | zero_ch = np.zeros(visib_gt_vis.shape) 186 | visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch]) 187 | 188 | vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis 189 | vis[vis > 1] = 1 190 | 191 | vis_path = p['vis_mask_visib_tpath'].format( 192 | delta=p['delta'], dataset=p['dataset'], split=p['dataset_split'], 193 | scene_id=scene_id, im_id=im_id, gt_id=gt_id) 194 | misc.ensure_dir(os.path.dirname(vis_path)) 195 | inout.save_im(vis_path, vis) 196 | 197 | # Save the info for the current scene. 198 | scene_gt_info_path = dp_split['scene_gt_info_tpath'].format(scene_id=scene_id) 199 | misc.ensure_dir(os.path.dirname(scene_gt_info_path)) 200 | inout.save_json(scene_gt_info_path, scene_gt_info) 201 | -------------------------------------------------------------------------------- /keypoint/scripts/calc_gt_masks.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Calculates masks of object models in the ground-truth poses.""" 5 | 6 | import os 7 | import numpy as np 8 | 9 | from bop_toolkit_lib import config 10 | from bop_toolkit_lib import dataset_params 11 | from bop_toolkit_lib import inout 12 | from bop_toolkit_lib import misc 13 | from bop_toolkit_lib import renderer 14 | from bop_toolkit_lib import visibility 15 | 16 | 17 | # PARAMETERS. 18 | ################################################################################ 19 | p = { 20 | # See dataset_params.py for options. 21 | 'dataset': 'lm', 22 | 23 | # Dataset split. Options: 'train', 'val', 'test'. 24 | 'dataset_split': 'test', 25 | 26 | # Dataset split type. None = default. See dataset_params.py for options. 27 | 'dataset_split_type': None, 28 | 29 | # Tolerance used in the visibility test [mm]. 30 | 'delta': 15, # 5 for ITODD, 15 for the other datasets. 31 | 32 | # Type of the renderer. 33 | 'renderer_type': 'python', # Options: 'cpp', 'python'. 34 | 35 | # Folder containing the BOP datasets. 36 | 'datasets_path': config.datasets_path, 37 | } 38 | ################################################################################ 39 | 40 | 41 | # Load dataset parameters. 42 | dp_split = dataset_params.get_split_params( 43 | p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) 44 | 45 | model_type = None 46 | if p['dataset'] == 'tless': 47 | model_type = 'cad' 48 | dp_model = dataset_params.get_model_params( 49 | p['datasets_path'], p['dataset'], model_type) 50 | 51 | scene_ids = dataset_params.get_present_scene_ids(dp_split) 52 | for scene_id in scene_ids: 53 | 54 | # Load scene GT. 55 | scene_gt_path = dp_split['scene_gt_tpath'].format( 56 | scene_id=scene_id) 57 | scene_gt = inout.load_scene_gt(scene_gt_path) 58 | 59 | # Load scene camera. 60 | scene_camera_path = dp_split['scene_camera_tpath'].format( 61 | scene_id=scene_id) 62 | scene_camera = inout.load_scene_camera(scene_camera_path) 63 | 64 | # Create folders for the output masks (if they do not exist yet). 65 | mask_dir_path = os.path.dirname( 66 | dp_split['mask_tpath'].format( 67 | scene_id=scene_id, im_id=0, gt_id=0)) 68 | misc.ensure_dir(mask_dir_path) 69 | 70 | mask_visib_dir_path = os.path.dirname( 71 | dp_split['mask_visib_tpath'].format( 72 | scene_id=scene_id, im_id=0, gt_id=0)) 73 | misc.ensure_dir(mask_visib_dir_path) 74 | 75 | # Initialize a renderer. 76 | misc.log('Initializing renderer...') 77 | width, height = dp_split['im_size'] 78 | ren = renderer.create_renderer( 79 | width, height, renderer_type=p['renderer_type'], mode='depth') 80 | 81 | # Add object models. 82 | for obj_id in dp_model['obj_ids']: 83 | ren.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) 84 | 85 | im_ids = sorted(scene_gt.keys()) 86 | for im_id in im_ids: 87 | 88 | if im_id % 100 == 0: 89 | misc.log( 90 | 'Calculating masks - dataset: {} ({}, {}), scene: {}, im: {}'.format( 91 | p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, 92 | im_id)) 93 | 94 | K = scene_camera[im_id]['cam_K'] 95 | fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] 96 | 97 | # Load depth image. 98 | depth_path = dp_split['depth_tpath'].format( 99 | scene_id=scene_id, im_id=im_id) 100 | depth_im = inout.load_depth(depth_path) 101 | depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] 102 | dist_im = misc.depth_im_to_dist_im(depth_im, K) 103 | 104 | for gt_id, gt in enumerate(scene_gt[im_id]): 105 | 106 | # Render the depth image. 107 | depth_gt = ren.render_object( 108 | gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx, cy)['depth'] 109 | 110 | # Convert depth image to distance image. 111 | dist_gt = misc.depth_im_to_dist_im(depth_gt, K) 112 | 113 | # Mask of the full object silhouette. 114 | mask = dist_gt > 0 115 | 116 | # Mask of the visible part of the object silhouette. 117 | mask_visib = visibility.estimate_visib_mask_gt( 118 | dist_im, dist_gt, p['delta'], visib_mode='bop19') 119 | 120 | # Save the calculated masks. 121 | mask_path = dp_split['mask_tpath'].format( 122 | scene_id=scene_id, im_id=im_id, gt_id=gt_id) 123 | inout.save_im(mask_path, 255 * mask.astype(np.uint8)) 124 | 125 | mask_visib_path = dp_split['mask_visib_tpath'].format( 126 | scene_id=scene_id, im_id=im_id, gt_id=gt_id) 127 | inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) 128 | -------------------------------------------------------------------------------- /keypoint/scripts/calc_model_info.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Calculates the 3D bounding box and the diameter of 3D object models.""" 5 | 6 | from bop_toolkit_lib import config 7 | from bop_toolkit_lib import dataset_params 8 | from bop_toolkit_lib import inout 9 | from bop_toolkit_lib import misc 10 | 11 | 12 | # PARAMETERS. 13 | ################################################################################ 14 | p = { 15 | # See dataset_params.py for options. 16 | 'dataset': 'lm', 17 | 18 | # Type of input object models. 19 | 'model_type': None, 20 | 21 | # Folder containing the BOP datasets. 22 | 'datasets_path': config.datasets_path, 23 | } 24 | ################################################################################ 25 | 26 | 27 | # Load dataset parameters. 28 | dp_model = dataset_params.get_model_params( 29 | p['datasets_path'], p['dataset'], p['model_type']) 30 | 31 | models_info = {} 32 | for obj_id in dp_model['obj_ids']: 33 | misc.log('Processing model of object {}...'.format(obj_id)) 34 | 35 | model = inout.load_ply(dp_model['model_tpath'].format(obj_id=obj_id)) 36 | 37 | # Calculate 3D bounding box. 38 | ref_pt = map(float, model['pts'].min(axis=0).flatten()) 39 | size = map(float, (model['pts'].max(axis=0) - ref_pt).flatten()) 40 | 41 | # Calculated diameter. 42 | diameter = misc.calc_pts_diameter(model['pts']) 43 | 44 | models_info[obj_id] = { 45 | 'min_x': ref_pt[0], 'min_y': ref_pt[1], 'min_z': ref_pt[2], 46 | 'size_x': size[0], 'size_y': size[1], 'size_z': size[2], 47 | 'diameter': diameter 48 | } 49 | 50 | # Save the calculated info about the object models. 51 | inout.save_json(dp_model['models_info_path'], models_info) 52 | -------------------------------------------------------------------------------- /keypoint/scripts/check_results_bop19.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Evaluation script for the BOP Challenge 2019/2020.""" 5 | 6 | import os 7 | import argparse 8 | 9 | from bop_toolkit_lib import config 10 | from bop_toolkit_lib import inout 11 | from bop_toolkit_lib import misc 12 | 13 | 14 | # PARAMETERS (some can be overwritten by the command line arguments below). 15 | ################################################################################ 16 | p = { 17 | # Names of files with results for which to calculate the errors (assumed to be 18 | # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a 19 | # description of the format. Example results can be found at: 20 | # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ 21 | 'result_filenames': [ 22 | '/path/to/csv/with/results', 23 | ], 24 | } 25 | ################################################################################ 26 | 27 | 28 | # Command line arguments. 29 | # ------------------------------------------------------------------------------ 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('--result_filenames', 32 | default=','.join(p['result_filenames']), 33 | help='Comma-separated names of files with results.') 34 | args = parser.parse_args() 35 | 36 | p['result_filenames'] = args.result_filenames.split(',') 37 | 38 | 39 | if __name__ == '__main__': 40 | 41 | for result_filename in p['result_filenames']: 42 | result_path = os.path.join(config.results_path, result_filename) 43 | check_passed, check_msg = inout.check_bop_results( 44 | result_path, version='bop19') 45 | 46 | misc.log('Check msg: {}'.format(check_msg)) 47 | -------------------------------------------------------------------------------- /keypoint/scripts/eval_bop19.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Evaluation script for the BOP Challenge 2019/2020.""" 5 | 6 | import os 7 | import time 8 | import argparse 9 | import subprocess 10 | import numpy as np 11 | 12 | import _init_paths 13 | from bop_toolkit_lib import config 14 | from bop_toolkit_lib import inout 15 | from bop_toolkit_lib import misc 16 | 17 | 18 | # PARAMETERS (some can be overwritten by the command line arguments below). 19 | ################################################################################ 20 | 21 | p = { 22 | # Errors to calculate. 23 | 'errors': [ 24 | { 25 | 'n_top': -1, 26 | 'type': 'vsd', 27 | 'vsd_deltas': { 28 | 'hb': 15, 29 | 'icbin': 15, 30 | 'icmi': 15, 31 | 'itodd': 5, 32 | 'lm': 15, 33 | 'lmo': 15, 34 | 'ruapc': 15, 35 | 'tless': 15, 36 | 'tudl': 15, 37 | 'tyol': 15, 38 | 'ycbv': 15, 39 | }, 40 | 'vsd_taus': list(np.arange(0.05, 0.51, 0.05)), 41 | 'vsd_normalized_by_diameter': True, 42 | 'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)] 43 | }, 44 | { 45 | 'n_top': -1, 46 | 'type': 'mssd', 47 | 'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)] 48 | }, 49 | { 50 | 'n_top': -1, 51 | 'type': 'mspd', 52 | 'correct_th': [[th] for th in np.arange(5, 51, 5)] 53 | }, 54 | ], 55 | 56 | # Minimum visible surface fraction of a valid GT pose. 57 | # -1 == k most visible GT poses will be considered, where k is given by 58 | # the "inst_count" item loaded from "targets_filename". 59 | 'visib_gt_min': -1, 60 | 61 | # See misc.get_symmetry_transformations(). 62 | 'max_sym_disc_step': 0.01, 63 | 64 | # Type of the renderer (used for the VSD pose error function). 65 | 'renderer_type': 'python', # Options: 'cpp', 'python'. 66 | 67 | # Names of files with results for which to calculate the errors (assumed to be 68 | # stored in folder p['results_path']). See docs/bop_challenge_2019.md for a 69 | # description of the format. Example results can be found at: 70 | # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ 71 | 'result_filenames': [ 72 | '/relative/path/to/csv/with/results', 73 | ], 74 | 75 | # Folder with results to be evaluated. 76 | 'results_path': config.results_path, 77 | 78 | # Folder for the calculated pose errors and performance scores. 79 | 'eval_path': config.eval_path, 80 | 81 | # File with a list of estimation targets to consider. The file is assumed to 82 | # be stored in the dataset folder. 83 | 'targets_filename': 'test_targets_bop19.json', 84 | } 85 | ################################################################################ 86 | 87 | 88 | # Command line arguments. 89 | # ------------------------------------------------------------------------------ 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument('--renderer_type', default=p['renderer_type']) 92 | parser.add_argument('--result_filenames', 93 | default=','.join(p['result_filenames']), 94 | help='Comma-separated names of files with results.') 95 | parser.add_argument('--results_path', default=p['results_path']) 96 | parser.add_argument('--eval_path', default=p['eval_path']) 97 | parser.add_argument('--targets_filename', default=p['targets_filename']) 98 | args = parser.parse_args() 99 | 100 | p['renderer_type'] = str(args.renderer_type) 101 | p['result_filenames'] = args.result_filenames.split(',') 102 | p['results_path'] = str(args.results_path) 103 | p['eval_path'] = str(args.eval_path) 104 | p['targets_filename'] = str(args.targets_filename) 105 | 106 | # Evaluation. 107 | # ------------------------------------------------------------------------------ 108 | for result_filename in p['result_filenames']: 109 | 110 | misc.log('===========') 111 | misc.log('EVALUATING: {}'.format(result_filename)) 112 | misc.log('===========') 113 | 114 | time_start = time.time() 115 | 116 | # Volume under recall surface (VSD) / area under recall curve (MSSD, MSPD). 117 | average_recalls = {} 118 | 119 | # Name of the result and the dataset. 120 | result_name = os.path.splitext(os.path.basename(result_filename))[0] 121 | dataset = str(result_name.split('_')[1].split('-')[0]) 122 | 123 | # Calculate the average estimation time per image. 124 | ests = inout.load_bop_results( 125 | os.path.join(p['results_path'], result_filename), version='bop19') 126 | times = {} 127 | times_available = True 128 | for est in ests: 129 | result_key = '{:06d}_{:06d}'.format(est['scene_id'], est['im_id']) 130 | if est['time'] < 0: 131 | # All estimation times must be provided. 132 | times_available = False 133 | break 134 | elif result_key in times: 135 | if abs(times[result_key] - est['time']) > 0.001: 136 | raise ValueError( 137 | 'The running time for scene {} and image {} is not the same for ' 138 | 'all estimates.'.format(est['scene_id'], est['im_id'])) 139 | else: 140 | times[result_key] = est['time'] 141 | 142 | if times_available: 143 | average_time_per_image = np.mean(list(times.values())) 144 | else: 145 | average_time_per_image = -1.0 146 | 147 | # Evaluate the pose estimates. 148 | for error in p['errors']: 149 | 150 | # Calculate error of the pose estimates. 151 | calc_errors_cmd = [ 152 | 'python', 153 | os.path.join('scripts', 'eval_calc_errors.py'), 154 | '--n_top={}'.format(error['n_top']), 155 | '--error_type={}'.format(error['type']), 156 | '--result_filenames={}'.format(result_filename), 157 | '--renderer_type={}'.format(p['renderer_type']), 158 | '--results_path={}'.format(p['results_path']), 159 | '--eval_path={}'.format(p['eval_path']), 160 | '--targets_filename={}'.format(p['targets_filename']), 161 | '--max_sym_disc_step={}'.format(p['max_sym_disc_step']), 162 | '--skip_missing=1', 163 | ] 164 | if error['type'] == 'vsd': 165 | vsd_deltas_str = \ 166 | ','.join(['{}:{}'.format(k, v) for k, v in error['vsd_deltas'].items()]) 167 | calc_errors_cmd += [ 168 | '--vsd_deltas={}'.format(vsd_deltas_str), 169 | '--vsd_taus={}'.format(','.join(map(str, error['vsd_taus']))), 170 | '--vsd_normalized_by_diameter={}'.format( 171 | error['vsd_normalized_by_diameter']) 172 | ] 173 | 174 | misc.log('Running: ' + ' '.join(calc_errors_cmd)) 175 | if subprocess.call(calc_errors_cmd) != 0: 176 | raise RuntimeError('Calculation of pose errors failed.') 177 | 178 | # Paths (rel. to p['eval_path']) to folders with calculated pose errors. 179 | # For VSD, there is one path for each setting of tau. For the other pose 180 | # error functions, there is only one path. 181 | error_dir_paths = {} 182 | if error['type'] == 'vsd': 183 | for vsd_tau in error['vsd_taus']: 184 | error_sign = misc.get_error_signature( 185 | error['type'], error['n_top'], vsd_delta=error['vsd_deltas'][dataset], 186 | vsd_tau=vsd_tau) 187 | error_dir_paths[error_sign] = os.path.join(result_name, error_sign) 188 | else: 189 | error_sign = misc.get_error_signature(error['type'], error['n_top']) 190 | error_dir_paths[error_sign] = os.path.join(result_name, error_sign) 191 | 192 | # Recall scores for all settings of the threshold of correctness (and also 193 | # of the misalignment tolerance tau in the case of VSD). 194 | recalls = [] 195 | 196 | # Calculate performance scores. 197 | for error_sign, error_dir_path in error_dir_paths.items(): 198 | for correct_th in error['correct_th']: 199 | 200 | calc_scores_cmd = [ 201 | 'python', 202 | os.path.join('scripts', 'eval_calc_scores.py'), 203 | '--error_dir_paths={}'.format(error_dir_path), 204 | '--eval_path={}'.format(p['eval_path']), 205 | '--targets_filename={}'.format(p['targets_filename']), 206 | '--visib_gt_min={}'.format(p['visib_gt_min']) 207 | ] 208 | 209 | calc_scores_cmd += ['--correct_th_{}={}'.format( 210 | error['type'], ','.join(map(str, correct_th)))] 211 | 212 | misc.log('Running: ' + ' '.join(calc_scores_cmd)) 213 | if subprocess.call(calc_scores_cmd) != 0: 214 | raise RuntimeError('Calculation of scores failed.') 215 | 216 | # Path to file with calculated scores. 217 | score_sign = misc.get_score_signature(correct_th, p['visib_gt_min']) 218 | 219 | scores_filename = 'scores_{}.json'.format(score_sign) 220 | scores_path = os.path.join( 221 | p['eval_path'], result_name, error_sign, scores_filename) 222 | 223 | # Load the scores. 224 | misc.log('Loading calculated scores from: {}'.format(scores_path)) 225 | scores = inout.load_json(scores_path) 226 | recalls.append(scores['recall']) 227 | 228 | average_recalls[error['type']] = np.mean(recalls) 229 | 230 | misc.log('Recall scores: {}'.format(' '.join(map(str, recalls)))) 231 | misc.log('Average recall: {}'.format(average_recalls[error['type']])) 232 | 233 | time_total = time.time() - time_start 234 | misc.log('Evaluation of {} took {}s.'.format(result_filename, time_total)) 235 | 236 | # Calculate the final scores. 237 | final_scores = {} 238 | for error in p['errors']: 239 | final_scores['bop19_average_recall_{}'.format(error['type'])] =\ 240 | average_recalls[error['type']] 241 | 242 | # Final score for the given dataset. 243 | final_scores['bop19_average_recall'] = np.mean([ 244 | average_recalls['vsd'], average_recalls['mssd'], average_recalls['mspd']]) 245 | 246 | # Average estimation time per image. 247 | final_scores['bop19_average_time_per_image'] = average_time_per_image 248 | 249 | # Save the final scores. 250 | final_scores_path = os.path.join( 251 | p['eval_path'], result_name, 'scores_bop19.json') 252 | inout.save_json(final_scores_path, final_scores) 253 | 254 | # Print the final scores. 255 | misc.log('FINAL SCORES:') 256 | for score_name, score_value in final_scores.items(): 257 | misc.log('- {}: {}'.format(score_name, score_value)) 258 | 259 | misc.log('Done.') 260 | -------------------------------------------------------------------------------- /keypoint/scripts/meshlab_scripts/remesh_for_eval_cell=0.25.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /keypoint/scripts/meshlab_scripts/remesh_for_eval_cell=0.5.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /keypoint/scripts/remesh_models_for_eval.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """'Uniformly' resamples and decimates 3D object models for evaluation. 5 | 6 | Note: Models of some T-LESS objects were processed by Blender (using the Remesh 7 | modifier). 8 | """ 9 | 10 | import os 11 | 12 | from bop_toolkit_lib import config 13 | from bop_toolkit_lib import dataset_params 14 | from bop_toolkit_lib import misc 15 | 16 | 17 | # PARAMETERS. 18 | ################################################################################ 19 | p = { 20 | # See dataset_params.py for options. 21 | 'dataset': 'lm', 22 | 23 | # Type of input object models. 24 | # None = default model type. 25 | 'model_in_type': None, 26 | 27 | # Type of output object models. 28 | 'model_out_type': 'eval', 29 | 30 | # Folder containing the BOP datasets. 31 | 'datasets_path': config.datasets_path, 32 | 33 | # Path to meshlabserver.exe (tested version: 1.3.3). 34 | # On Windows: C:\Program Files\VCG\MeshLab133\meshlabserver.exe 35 | 'meshlab_server_path': config.meshlab_server_path, 36 | 37 | # Path to scripts/meshlab_scripts/remesh_for_eval.mlx. 38 | 'meshlab_script_path': os.path.join( 39 | os.path.dirname(os.path.realpath(__file__)), 'meshlab_scripts', 40 | r'remesh_for_eval_cell=0.25.mlx'), 41 | } 42 | ################################################################################ 43 | 44 | 45 | # Load dataset parameters. 46 | dp_model_in = dataset_params.get_model_params( 47 | p['datasets_path'], p['dataset'], p['model_in_type']) 48 | 49 | dp_model_out = dataset_params.get_model_params( 50 | p['datasets_path'], p['dataset'], p['model_out_type']) 51 | 52 | # Attributes to save for the output models. 53 | attrs_to_save = [] 54 | 55 | # Process models of all objects in the selected dataset. 56 | for obj_id in dp_model_in['obj_ids']: 57 | misc.log('\n\n\nProcessing model of object {}...\n'.format(obj_id)) 58 | 59 | model_in_path = dp_model_in['model_tpath'].format(obj_id=obj_id) 60 | model_out_path = dp_model_out['model_tpath'].format(obj_id=obj_id) 61 | 62 | misc.ensure_dir(os.path.dirname(model_out_path)) 63 | 64 | misc.run_meshlab_script(p['meshlab_server_path'], p['meshlab_script_path'], 65 | model_in_path, model_out_path, attrs_to_save) 66 | 67 | misc.log('Done.') 68 | -------------------------------------------------------------------------------- /keypoint/scripts/render_train_imgs.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Renders RGB-D images of an object model.""" 5 | 6 | import os 7 | import cv2 8 | 9 | from bop_toolkit_lib import config 10 | from bop_toolkit_lib import dataset_params 11 | from bop_toolkit_lib import inout 12 | from bop_toolkit_lib import misc 13 | from bop_toolkit_lib import renderer 14 | from bop_toolkit_lib import view_sampler 15 | 16 | 17 | # PARAMETERS. 18 | ################################################################################ 19 | # See dataset_params.py for options. 20 | dataset = 'tyol' 21 | 22 | # Radii of view spheres from which to render the objects. 23 | if dataset == 'lm': 24 | radii = [400] # There are only 3 occurrences under 400 mm. 25 | elif dataset == 'tless': 26 | radii = [650] 27 | elif dataset == 'tudl': 28 | radii = [850] 29 | elif dataset == 'tyol': 30 | radii = [500] 31 | elif dataset == 'ruapc': 32 | radii = [590] 33 | elif dataset == 'icmi': 34 | radii = [500] 35 | elif dataset == 'icbin': 36 | radii = [450] 37 | else: 38 | raise ValueError('Unknown dataset.') 39 | 40 | # Type of object models and camera. 41 | model_type = None 42 | cam_type = None 43 | if dataset == 'tless': 44 | model_type = 'reconst' 45 | cam_type = 'primesense' 46 | 47 | # Objects to render ([] = all objects from the specified dataset). 48 | obj_ids = [] 49 | 50 | # Minimum required number of views on the whole view sphere. The final number of 51 | # views depends on the sampling method. 52 | min_n_views = 1000 53 | 54 | # Rendering parameters. 55 | ambient_weight = 0.5 # Weight of ambient light [0, 1] 56 | shading = 'phong' # 'flat', 'phong' 57 | 58 | # Type of the renderer. Options: 'cpp', 'python'. 59 | renderer_type = 'python' 60 | 61 | # Super-sampling anti-aliasing (SSAA) - the RGB image is rendered at ssaa_fact 62 | # times higher resolution and then down-sampled to the required resolution. 63 | # Ref: https://github.com/vispy/vispy/wiki/Tech.-Antialiasing 64 | ssaa_fact = 4 65 | 66 | # Folder containing the BOP datasets. 67 | datasets_path = config.datasets_path 68 | 69 | # Folder for the rendered images. 70 | out_tpath = os.path.join(config.output_path, 'render_{dataset}') 71 | 72 | # Output path templates. 73 | out_rgb_tpath =\ 74 | os.path.join('{out_path}', '{obj_id:06d}', 'rgb', '{im_id:06d}.png') 75 | out_depth_tpath =\ 76 | os.path.join('{out_path}', '{obj_id:06d}', 'depth', '{im_id:06d}.png') 77 | out_scene_camera_tpath =\ 78 | os.path.join('{out_path}', '{obj_id:06d}', 'scene_camera.json') 79 | out_scene_gt_tpath =\ 80 | os.path.join('{out_path}', '{obj_id:06d}', 'scene_gt.json') 81 | out_views_vis_tpath =\ 82 | os.path.join('{out_path}', '{obj_id:06d}', 'views_radius={radius}.ply') 83 | ################################################################################ 84 | 85 | 86 | out_path = out_tpath.format(dataset=dataset) 87 | misc.ensure_dir(out_path) 88 | 89 | # Load dataset parameters. 90 | dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test') 91 | dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type) 92 | dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type) 93 | 94 | if not obj_ids: 95 | obj_ids = dp_model['obj_ids'] 96 | 97 | # Image size and K for the RGB image (potentially with SSAA). 98 | im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']] 99 | K_rgb = dp_camera['K'] * ssaa_fact 100 | 101 | # Intrinsic parameters for RGB rendering. 102 | fx_rgb, fy_rgb, cx_rgb, cy_rgb =\ 103 | K_rgb[0, 0], K_rgb[1, 1], K_rgb[0, 2], K_rgb[1, 2] 104 | 105 | # Intrinsic parameters for depth rendering. 106 | K = dp_camera['K'] 107 | fx_d, fy_d, cx_d, cy_d = K[0, 0], K[1, 1], K[0, 2], K[1, 2] 108 | 109 | # Create the RGB renderer. 110 | width_rgb, height_rgb = im_size_rgb[0], im_size_rgb[1] 111 | ren_rgb = renderer.create_renderer( 112 | width_rgb, height_rgb, renderer_type, mode='rgb', shading=shading) 113 | ren_rgb.set_light_ambient_weight(ambient_weight) 114 | 115 | # Add object models to the RGB renderer. 116 | for obj_id in obj_ids: 117 | ren_rgb.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) 118 | 119 | # Create the depth renderer. 120 | width_depth, height_depth, = dp_camera['im_size'][0], dp_camera['im_size'][1] 121 | ren_depth = renderer.create_renderer( 122 | width_depth, height_depth, renderer_type, mode='depth') 123 | 124 | # Add object models to the depth renderer. 125 | for obj_id in obj_ids: 126 | ren_depth.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) 127 | 128 | # Render training images for all object models. 129 | for obj_id in obj_ids: 130 | 131 | # Prepare output folders. 132 | misc.ensure_dir(os.path.dirname(out_rgb_tpath.format( 133 | out_path=out_path, obj_id=obj_id, im_id=0))) 134 | misc.ensure_dir(os.path.dirname(out_depth_tpath.format( 135 | out_path=out_path, obj_id=obj_id, im_id=0))) 136 | 137 | # Load model. 138 | model_path = dp_model['model_tpath'].format(obj_id=obj_id) 139 | model = inout.load_ply(model_path) 140 | 141 | # Load model texture. 142 | if 'texture_file' in model: 143 | model_texture_path =\ 144 | os.path.join(os.path.dirname(model_path), model['texture_file']) 145 | model_texture = inout.load_im(model_texture_path) 146 | else: 147 | model_texture = None 148 | 149 | scene_camera = {} 150 | scene_gt = {} 151 | im_id = 0 152 | for radius in radii: 153 | # Sample viewpoints. 154 | view_sampler_mode = 'hinterstoisser' # 'hinterstoisser' or 'fibonacci'. 155 | views, views_level = view_sampler.sample_views( 156 | min_n_views, radius, dp_split_test['azimuth_range'], 157 | dp_split_test['elev_range'], view_sampler_mode) 158 | 159 | misc.log('Sampled views: ' + str(len(views))) 160 | # out_views_vis_path = out_views_vis_tpath.format( 161 | # out_path=out_path, obj_id=obj_id, radius=radius) 162 | # view_sampler.save_vis(out_views_vis_path, views, views_level) 163 | 164 | # Render the object model from all views. 165 | for view_id, view in enumerate(views): 166 | if view_id % 10 == 0: 167 | misc.log('Rendering - obj: {}, radius: {}, view: {}/{}'.format( 168 | obj_id, radius, view_id, len(views))) 169 | 170 | # Rendering. 171 | rgb = ren_rgb.render_object( 172 | obj_id, view['R'], view['t'], fx_rgb, fy_rgb, cx_rgb, cy_rgb)['rgb'] 173 | depth = ren_depth.render_object( 174 | obj_id, view['R'], view['t'], fx_d, fy_d, cx_d, cy_d)['depth'] 175 | 176 | # Convert depth so it is in the same units as other images in the dataset. 177 | depth /= float(dp_camera['depth_scale']) 178 | 179 | # The OpenCV function was used for rendering of the training images 180 | # provided for the SIXD Challenge 2017. 181 | rgb = cv2.resize(rgb, dp_camera['im_size'], interpolation=cv2.INTER_AREA) 182 | # rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic') 183 | 184 | # Save the rendered images. 185 | out_rgb_path = out_rgb_tpath.format( 186 | out_path=out_path, obj_id=obj_id, im_id=im_id) 187 | inout.save_im(out_rgb_path, rgb) 188 | out_depth_path = out_depth_tpath.format( 189 | out_path=out_path, obj_id=obj_id, im_id=im_id) 190 | inout.save_depth(out_depth_path, depth) 191 | 192 | # Get 2D bounding box of the object model at the ground truth pose. 193 | # ys, xs = np.nonzero(depth > 0) 194 | # obj_bb = misc.calc_2d_bbox(xs, ys, dp_camera['im_size']) 195 | 196 | scene_camera[im_id] = { 197 | 'cam_K': dp_camera['K'], 198 | 'depth_scale': dp_camera['depth_scale'], 199 | 'view_level': int(views_level[view_id]) 200 | } 201 | 202 | scene_gt[im_id] = [{ 203 | 'cam_R_m2c': view['R'], 204 | 'cam_t_m2c': view['t'], 205 | 'obj_id': int(obj_id) 206 | }] 207 | 208 | im_id += 1 209 | 210 | # Save metadata. 211 | inout.save_scene_camera(out_scene_camera_tpath.format( 212 | out_path=out_path, obj_id=obj_id), scene_camera) 213 | inout.save_scene_gt(out_scene_gt_tpath.format( 214 | out_path=out_path, obj_id=obj_id), scene_gt) 215 | -------------------------------------------------------------------------------- /keypoint/scripts/show_performance_bop19.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Shows BOP19 metrics and plots recall curves after running eval_bop19.py""" 5 | 6 | import os 7 | import time 8 | import argparse 9 | import subprocess 10 | import numpy as np 11 | 12 | from bop_toolkit_lib import config 13 | from bop_toolkit_lib import inout 14 | from bop_toolkit_lib import misc 15 | 16 | 17 | # PARAMETERS (some can be overwritten by the command line arguments below). 18 | ################################################################################ 19 | p = { 20 | # Errors to calculate. 21 | 'errors': [ 22 | { 23 | 'n_top': -1, 24 | 'type': 'vsd', 25 | 'vsd_deltas': { 26 | 'hb': 15, 27 | 'icbin': 15, 28 | 'icmi': 15, 29 | 'itodd': 5, 30 | 'lm': 15, 31 | 'lmo': 15, 32 | 'ruapc': 15, 33 | 'tless': 15, 34 | 'tudl': 15, 35 | 'tyol': 15, 36 | }, 37 | 'vsd_taus': list(np.arange(0.05, 0.51, 0.05)), 38 | 'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)] 39 | }, 40 | { 41 | 'n_top': -1, 42 | 'type': 'mssd', 43 | 'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)] 44 | }, 45 | { 46 | 'n_top': -1, 47 | 'type': 'mspd', 48 | 'correct_th': [[th] for th in np.arange(5, 51, 5)] 49 | }, 50 | ], 51 | 52 | # Minimum visible surface fraction of a valid GT pose. 53 | 'visib_gt_min': 0.1, 54 | 55 | # Plot Recall curves 56 | 'plot_recall_curves': True, 57 | 58 | # Names of files with results for which to calculate the errors (assumed to be 59 | # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a 60 | # description of the format. Example results can be found at: 61 | # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ 62 | 'result_filenames': [ 63 | '/path/to/csv/with/results', 64 | ], 65 | } 66 | ################################################################################ 67 | 68 | 69 | # Command line arguments. 70 | # ------------------------------------------------------------------------------ 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument('--visib_gt_min', default=p['visib_gt_min']) 73 | parser.add_argument('--result_filenames', 74 | default=','.join(p['result_filenames']), 75 | help='Comma-separated names of files with results.') 76 | args = parser.parse_args() 77 | 78 | p['visib_gt_min'] = float(args.visib_gt_min) 79 | p['result_filenames'] = args.result_filenames.split(',') 80 | 81 | # Evaluation. 82 | # ------------------------------------------------------------------------------ 83 | def plot_recall_curves(recall_dict, p): 84 | """Plots recall curves and displays BOP19 metrics 85 | 86 | :param recall_dict: dictionary containing bop19 recall results 87 | :param p: parameters from show_performance_bop19.py 88 | """ 89 | 90 | for i, error in enumerate(p['errors']): 91 | if error['type'] == 'mspd': 92 | corr_thres = ['{}'.format(e) for sl in error['correct_th'] for e in sl] 93 | else: 94 | corr_thres = ['{:.2f}'.format(e) for sl in error['correct_th'] for e in 95 | sl] 96 | 97 | recalls = recall_dict[error['type']] 98 | all_recalls = [] 99 | plt.figure() 100 | 101 | for key in sorted(recalls): 102 | threshold = key.split('=')[-1] 103 | if 'vsd' in key: 104 | plt.plot(recalls[key], label='tau: ' + threshold) 105 | else: 106 | plt.plot(recalls[key]) 107 | all_recalls += recalls[key] 108 | 109 | plt.legend() 110 | 111 | plt.xticks(np.arange(len(corr_thres)), corr_thres) 112 | plt.ylim([0, 1]) 113 | plt.ylabel('recall') 114 | if error['type'] == 'mspd': 115 | plt.xlabel('thres @ r px') 116 | else: 117 | plt.xlabel('thres @ object diameter') 118 | 119 | plt.title(error['type'] + ' - ' + 'average recall: ' 120 | + '{:.4f}'.format(np.mean(all_recalls))) 121 | 122 | plt.show() 123 | 124 | 125 | for result_filename in p['result_filenames']: 126 | 127 | misc.log('===========') 128 | misc.log('SHOWING: {}'.format(result_filename)) 129 | misc.log('===========') 130 | 131 | time_start = time.time() 132 | aur = {} 133 | 134 | recall_dict = {e['type']:{} for e in p['errors']} 135 | 136 | for error in p['errors']: 137 | 138 | # Name of the result and the dataset. 139 | result_name = os.path.splitext(os.path.basename(result_filename))[0] 140 | dataset = str(result_name.split('_')[1].split('-')[0]) 141 | 142 | # Paths (rel. to config.eval_path) to folders with calculated pose errors. 143 | # For VSD, there is one path for each setting of tau. For the other pose 144 | # error functions, there is only one path. 145 | error_dir_paths = {} 146 | if error['type'] == 'vsd': 147 | for vsd_tau in error['vsd_taus']: 148 | error_sign = misc.get_error_signature( 149 | error['type'], error['n_top'], vsd_delta=error['vsd_deltas'][dataset], 150 | vsd_tau=vsd_tau) 151 | error_dir_paths[error_sign] = os.path.join(result_name, error_sign) 152 | else: 153 | error_sign = misc.get_error_signature(error['type'], error['n_top']) 154 | error_dir_paths[error_sign] = os.path.join(result_name, error_sign) 155 | 156 | # Recall scores for all settings of the threshold of correctness (and also 157 | # of the misalignment tolerance tau in the case of VSD). 158 | recalls = [] 159 | 160 | # Calculate performance scores. 161 | for error_sign, error_dir_path in error_dir_paths.items(): 162 | recall_dict[error['type']][error_sign] = [] 163 | for correct_th in error['correct_th']: 164 | 165 | # Path to file with calculated scores. 166 | score_sign = misc.get_score_signature(correct_th, p['visib_gt_min']) 167 | 168 | scores_filename = 'scores_{}.json'.format(score_sign) 169 | scores_path = os.path.join( 170 | config.eval_path, result_name, error_sign, scores_filename) 171 | 172 | # Load the scores. 173 | misc.log('Loading calculated scores from: {}'.format(scores_path)) 174 | scores = inout.load_json(scores_path) 175 | recalls.append(scores['total_recall']) 176 | recall_dict[error['type']][error_sign].append(scores['total_recall']) 177 | 178 | # Area under the recall surface/curve. 179 | aur[error['type']] = np.mean(recalls) 180 | 181 | time_total = time.time() - time_start 182 | 183 | # output final scores and plot recall curves 184 | err_types = [e['type'] for e in p['errors']] 185 | for err_type in err_types: 186 | misc.log('Average Recall {}: {}'.format(err_type, 187 | aur[err_type])) 188 | 189 | if set(['vsd', 'mssd', 'mspd']).issubset(err_types): 190 | test_set = os.path.basename(result_filename) 191 | mean_error = np.mean([aur[err_type] for err_type in err_types]) 192 | misc.log('Average BOP score on {}: {}'.format(test_set, mean_error)) 193 | 194 | if p['plot_recall_curves']: 195 | plot_recall_curves(recall_dict, p) 196 | 197 | misc.log('Done.') 198 | -------------------------------------------------------------------------------- /keypoint/scripts/vis_est_poses.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Visualizes object models in pose estimates saved in the BOP format.""" 5 | 6 | import os 7 | import numpy as np 8 | import itertools 9 | 10 | from bop_toolkit_lib import config 11 | from bop_toolkit_lib import dataset_params 12 | from bop_toolkit_lib import inout 13 | from bop_toolkit_lib import misc 14 | from bop_toolkit_lib import renderer 15 | from bop_toolkit_lib import visualization 16 | 17 | 18 | # PARAMETERS. 19 | ################################################################################ 20 | p = { 21 | # Top N pose estimates (with the highest score) to be visualized for each 22 | # object in each image. 23 | 'n_top': 1, # 0 = all estimates, -1 = given by the number of GT poses. 24 | 25 | # True = one visualization for each (im_id, obj_id), False = one per im_id. 26 | 'vis_per_obj_id': True, 27 | 28 | # Indicates whether to render RGB image. 29 | 'vis_rgb': True, 30 | 31 | # Indicates whether to resolve visibility in the rendered RGB images (using 32 | # depth renderings). If True, only the part of object surface, which is not 33 | # occluded by any other modeled object, is visible. If False, RGB renderings 34 | # of individual objects are blended together. 35 | 'vis_rgb_resolve_visib': True, 36 | 37 | # Indicates whether to render depth image. 38 | 'vis_depth_diff': False, 39 | 40 | # If to use the original model color. 41 | 'vis_orig_color': False, 42 | 43 | # Type of the renderer (used for the VSD pose error function). 44 | 'renderer_type': 'python', # Options: 'cpp', 'python'. 45 | 46 | # Names of files with pose estimates to visualize (assumed to be stored in 47 | # folder config.eval_path). See docs/bop_challenge_2019.md for a description 48 | # of the format. Example results can be found at: 49 | # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ 50 | 'result_filenames': [ 51 | '/path/to/csv/with/results', 52 | ], 53 | 54 | # Folder containing the BOP datasets. 55 | 'datasets_path': config.datasets_path, 56 | 57 | # Folder for output visualisations. 58 | 'vis_path': os.path.join(config.output_path, 'vis_est_poses'), 59 | 60 | # Path templates for output images. 61 | 'vis_rgb_tpath': os.path.join( 62 | '{vis_path}', '{result_name}', '{scene_id:06d}', '{vis_name}.jpg'), 63 | 'vis_depth_diff_tpath': os.path.join( 64 | '{vis_path}', '{result_name}', '{scene_id:06d}', 65 | '{vis_name}_depth_diff.jpg'), 66 | } 67 | ################################################################################ 68 | 69 | 70 | # Load colors. 71 | colors_path = os.path.join( 72 | os.path.dirname(visualization.__file__), 'colors.json') 73 | colors = inout.load_json(colors_path) 74 | 75 | for result_fname in p['result_filenames']: 76 | misc.log('Processing: ' + result_fname) 77 | 78 | # Parse info about the method and the dataset from the filename. 79 | result_name = os.path.splitext(os.path.basename(result_fname))[0] 80 | result_info = result_name.split('_') 81 | method = result_info[0] 82 | dataset_info = result_info[1].split('-') 83 | dataset = dataset_info[0] 84 | split = dataset_info[1] 85 | split_type = dataset_info[2] if len(dataset_info) > 2 else None 86 | 87 | # Load dataset parameters. 88 | dp_split = dataset_params.get_split_params( 89 | p['datasets_path'], dataset, split, split_type) 90 | 91 | model_type = 'eval' 92 | dp_model = dataset_params.get_model_params( 93 | p['datasets_path'], dataset, model_type) 94 | 95 | # Rendering mode. 96 | renderer_modalities = [] 97 | if p['vis_rgb']: 98 | renderer_modalities.append('rgb') 99 | if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): 100 | renderer_modalities.append('depth') 101 | renderer_mode = '+'.join(renderer_modalities) 102 | 103 | # Create a renderer. 104 | width, height = dp_split['im_size'] 105 | ren = renderer.create_renderer( 106 | width, height, p['renderer_type'], mode=renderer_mode) 107 | 108 | # Load object models. 109 | models = {} 110 | for obj_id in dp_model['obj_ids']: 111 | misc.log('Loading 3D model of object {}...'.format(obj_id)) 112 | model_path = dp_model['model_tpath'].format(obj_id=obj_id) 113 | model_color = None 114 | if not p['vis_orig_color']: 115 | model_color = tuple(colors[(obj_id - 1) % len(colors)]) 116 | ren.add_object(obj_id, model_path, surf_color=model_color) 117 | 118 | # Load pose estimates. 119 | misc.log('Loading pose estimates...') 120 | ests = inout.load_bop_results( 121 | os.path.join(config.results_path, result_fname)) 122 | 123 | # Organize the pose estimates by scene, image and object. 124 | misc.log('Organizing pose estimates...') 125 | ests_org = {} 126 | for est in ests: 127 | ests_org.setdefault(est['scene_id'], {}).setdefault( 128 | est['im_id'], {}).setdefault(est['obj_id'], []).append(est) 129 | 130 | for scene_id, scene_ests in ests_org.items(): 131 | 132 | # Load info and ground-truth poses for the current scene. 133 | scene_camera = inout.load_scene_camera( 134 | dp_split['scene_camera_tpath'].format(scene_id=scene_id)) 135 | scene_gt = inout.load_scene_gt( 136 | dp_split['scene_gt_tpath'].format(scene_id=scene_id)) 137 | 138 | for im_ind, (im_id, im_ests) in enumerate(scene_ests.items()): 139 | 140 | if im_ind % 10 == 0: 141 | split_type_str = ' - ' + split_type if split_type is not None else '' 142 | misc.log( 143 | 'Visualizing pose estimates - method: {}, dataset: {}{}, scene: {}, ' 144 | 'im: {}'.format(method, dataset, split_type_str, scene_id, im_id)) 145 | 146 | # Intrinsic camera matrix. 147 | K = scene_camera[im_id]['cam_K'] 148 | 149 | im_ests_vis = [] 150 | im_ests_vis_obj_ids = [] 151 | for obj_id, obj_ests in im_ests.items(): 152 | 153 | # Sort the estimates by score (in descending order). 154 | obj_ests_sorted = sorted( 155 | obj_ests, key=lambda est: est['score'], reverse=True) 156 | 157 | # Select the number of top estimated poses to visualize. 158 | if p['n_top'] == 0: # All estimates are considered. 159 | n_top_curr = None 160 | elif p['n_top'] == -1: # Given by the number of GT poses. 161 | n_gt = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]]) 162 | n_top_curr = n_gt 163 | else: # Specified by the parameter n_top. 164 | n_top_curr = p['n_top'] 165 | obj_ests_sorted = obj_ests_sorted[slice(0, n_top_curr)] 166 | 167 | # Get list of poses to visualize. 168 | for est in obj_ests_sorted: 169 | est['obj_id'] = obj_id 170 | 171 | # Text info to write on the image at the pose estimate. 172 | if p['vis_per_obj_id']: 173 | est['text_info'] = [ 174 | {'name': '', 'val': est['score'], 'fmt': ':.2f'} 175 | ] 176 | else: 177 | val = '{}:{:.2f}'.format(obj_id, est['score']) 178 | est['text_info'] = [{'name': '', 'val': val, 'fmt': ''}] 179 | 180 | im_ests_vis.append(obj_ests_sorted) 181 | im_ests_vis_obj_ids.append(obj_id) 182 | 183 | # Join the per-object estimates if only one visualization is to be made. 184 | if not p['vis_per_obj_id']: 185 | im_ests_vis = [list(itertools.chain.from_iterable(im_ests_vis))] 186 | 187 | for ests_vis_id, ests_vis in enumerate(im_ests_vis): 188 | 189 | # Load the color and depth images and prepare images for rendering. 190 | rgb = None 191 | if p['vis_rgb']: 192 | if 'rgb' in dp_split['im_modalities']: 193 | rgb = inout.load_im(dp_split['rgb_tpath'].format( 194 | scene_id=scene_id, im_id=im_id))[:, :, :3] 195 | elif 'gray' in dp_split['im_modalities']: 196 | gray = inout.load_im(dp_split['gray_tpath'].format( 197 | scene_id=scene_id, im_id=im_id)) 198 | rgb = np.dstack([gray, gray, gray]) 199 | else: 200 | raise ValueError('RGB nor gray images are available.') 201 | 202 | depth = None 203 | if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): 204 | depth = inout.load_depth(dp_split['depth_tpath'].format( 205 | scene_id=scene_id, im_id=im_id)) 206 | depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. 207 | 208 | # Visualization name. 209 | if p['vis_per_obj_id']: 210 | vis_name = '{im_id:06d}_{obj_id:06d}'.format( 211 | im_id=im_id, obj_id=im_ests_vis_obj_ids[ests_vis_id]) 212 | else: 213 | vis_name = '{im_id:06d}'.format(im_id=im_id) 214 | 215 | # Path to the output RGB visualization. 216 | vis_rgb_path = None 217 | if p['vis_rgb']: 218 | vis_rgb_path = p['vis_rgb_tpath'].format( 219 | vis_path=p['vis_path'], result_name=result_name, scene_id=scene_id, 220 | vis_name=vis_name) 221 | 222 | # Path to the output depth difference visualization. 223 | vis_depth_diff_path = None 224 | if p['vis_depth_diff']: 225 | vis_depth_diff_path = p['vis_depth_diff_tpath'].format( 226 | vis_path=p['vis_path'], result_name=result_name, scene_id=scene_id, 227 | vis_name=vis_name) 228 | 229 | # Visualization. 230 | visualization.vis_object_poses( 231 | poses=ests_vis, K=K, renderer=ren, rgb=rgb, depth=depth, 232 | vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path, 233 | vis_rgb_resolve_visib=p['vis_rgb_resolve_visib']) 234 | 235 | misc.log('Done.') 236 | -------------------------------------------------------------------------------- /keypoint/scripts/vis_gt_poses.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Visualizes object models in the ground-truth poses.""" 5 | 6 | import os 7 | import numpy as np 8 | 9 | from bop_toolkit_lib import config 10 | from bop_toolkit_lib import dataset_params 11 | from bop_toolkit_lib import inout 12 | from bop_toolkit_lib import misc 13 | from bop_toolkit_lib import renderer 14 | from bop_toolkit_lib import visualization 15 | 16 | 17 | # PARAMETERS. 18 | ################################################################################ 19 | p = { 20 | # See dataset_params.py for options. 21 | 'dataset': 'lm', 22 | 23 | # Dataset split. Options: 'train', 'val', 'test'. 24 | 'dataset_split': 'test', 25 | 26 | # Dataset split type. None = default. See dataset_params.py for options. 27 | 'dataset_split_type': None, 28 | 29 | # File with a list of estimation targets used to determine the set of images 30 | # for which the GT poses will be visualized. The file is assumed to be stored 31 | # in the dataset folder. None = all images. 32 | # 'targets_filename': 'test_targets_bop19.json', 33 | 'targets_filename': None, 34 | 35 | # Select ID's of scenes, images and GT poses to be processed. 36 | # Empty list [] means that all ID's will be used. 37 | 'scene_ids': [], 38 | 'im_ids': [], 39 | 'gt_ids': [], 40 | 41 | # Indicates whether to render RGB images. 42 | 'vis_rgb': True, 43 | 44 | # Indicates whether to resolve visibility in the rendered RGB images (using 45 | # depth renderings). If True, only the part of object surface, which is not 46 | # occluded by any other modeled object, is visible. If False, RGB renderings 47 | # of individual objects are blended together. 48 | 'vis_rgb_resolve_visib': True, 49 | 50 | # Indicates whether to save images of depth differences. 51 | 'vis_depth_diff': False, 52 | 53 | # Whether to use the original model color. 54 | 'vis_orig_color': False, 55 | 56 | # Type of the renderer (used for the VSD pose error function). 57 | 'renderer_type': 'python', # Options: 'cpp', 'python'. 58 | 59 | # Folder containing the BOP datasets. 60 | 'datasets_path': config.datasets_path, 61 | 62 | # Folder for output visualisations. 63 | 'vis_path': os.path.join(config.output_path, 'vis_gt_poses'), 64 | 65 | # Path templates for output images. 66 | 'vis_rgb_tpath': os.path.join( 67 | '{vis_path}', '{dataset}', '{split}', '{scene_id:06d}', '{im_id:06d}.jpg'), 68 | 'vis_depth_diff_tpath': os.path.join( 69 | '{vis_path}', '{dataset}', '{split}', '{scene_id:06d}', 70 | '{im_id:06d}_depth_diff.jpg'), 71 | } 72 | ################################################################################ 73 | 74 | 75 | # Load dataset parameters. 76 | dp_split = dataset_params.get_split_params( 77 | p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) 78 | 79 | model_type = 'eval' # None = default. 80 | dp_model = dataset_params.get_model_params( 81 | p['datasets_path'], p['dataset'], model_type) 82 | 83 | # Load colors. 84 | colors_path = os.path.join( 85 | os.path.dirname(visualization.__file__), 'colors.json') 86 | colors = inout.load_json(colors_path) 87 | 88 | # Subset of images for which the ground-truth poses will be rendered. 89 | if p['targets_filename'] is not None: 90 | targets = inout.load_json( 91 | os.path.join(dp_split['base_path'], p['targets_filename'])) 92 | scene_im_ids = {} 93 | for target in targets: 94 | scene_im_ids.setdefault( 95 | target['scene_id'], set()).add(target['im_id']) 96 | else: 97 | scene_im_ids = None 98 | 99 | # List of considered scenes. 100 | scene_ids_curr = dp_split['scene_ids'] 101 | if p['scene_ids']: 102 | scene_ids_curr = set(scene_ids_curr).intersection(p['scene_ids']) 103 | 104 | # Rendering mode. 105 | renderer_modalities = [] 106 | if p['vis_rgb']: 107 | renderer_modalities.append('rgb') 108 | if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): 109 | renderer_modalities.append('depth') 110 | renderer_mode = '+'.join(renderer_modalities) 111 | 112 | # Create a renderer. 113 | width, height = dp_split['im_size'] 114 | ren = renderer.create_renderer( 115 | width, height, p['renderer_type'], mode=renderer_mode, shading='flat') 116 | 117 | # Load object models. 118 | models = {} 119 | for obj_id in dp_model['obj_ids']: 120 | misc.log('Loading 3D model of object {}...'.format(obj_id)) 121 | model_path = dp_model['model_tpath'].format(obj_id=obj_id) 122 | model_color = None 123 | if not p['vis_orig_color']: 124 | model_color = tuple(colors[(obj_id - 1) % len(colors)]) 125 | ren.add_object(obj_id, model_path, surf_color=model_color) 126 | 127 | for scene_id in scene_ids_curr: 128 | 129 | # Load scene info and ground-truth poses. 130 | scene_camera = inout.load_scene_camera( 131 | dp_split['scene_camera_tpath'].format(scene_id=scene_id)) 132 | scene_gt = inout.load_scene_gt( 133 | dp_split['scene_gt_tpath'].format(scene_id=scene_id)) 134 | 135 | # List of considered images. 136 | if scene_im_ids is not None: 137 | im_ids = scene_im_ids[scene_id] 138 | else: 139 | im_ids = sorted(scene_gt.keys()) 140 | if p['im_ids']: 141 | im_ids = set(im_ids).intersection(p['im_ids']) 142 | 143 | # Render the object models in the ground-truth poses in the selected images. 144 | for im_counter, im_id in enumerate(im_ids): 145 | if im_counter % 10 == 0: 146 | misc.log( 147 | 'Visualizing GT poses - dataset: {}, scene: {}, im: {}/{}'.format( 148 | p['dataset'], scene_id, im_counter, len(im_ids))) 149 | 150 | K = scene_camera[im_id]['cam_K'] 151 | 152 | # List of considered ground-truth poses. 153 | gt_ids_curr = range(len(scene_gt[im_id])) 154 | if p['gt_ids']: 155 | gt_ids_curr = set(gt_ids_curr).intersection(p['gt_ids']) 156 | 157 | # Collect the ground-truth poses. 158 | gt_poses = [] 159 | for gt_id in gt_ids_curr: 160 | gt = scene_gt[im_id][gt_id] 161 | gt_poses.append({ 162 | 'obj_id': gt['obj_id'], 163 | 'R': gt['cam_R_m2c'], 164 | 't': gt['cam_t_m2c'], 165 | 'text_info': [ 166 | {'name': '', 'val': '{}:{}'.format(gt['obj_id'], gt_id), 'fmt': ''} 167 | ] 168 | }) 169 | 170 | # Load the color and depth images and prepare images for rendering. 171 | rgb = None 172 | if p['vis_rgb']: 173 | if 'rgb' in dp_split['im_modalities']: 174 | rgb = inout.load_im(dp_split['rgb_tpath'].format( 175 | scene_id=scene_id, im_id=im_id))[:, :, :3] 176 | elif 'gray' in dp_split['im_modalities']: 177 | gray = inout.load_im(dp_split['gray_tpath'].format( 178 | scene_id=scene_id, im_id=im_id)) 179 | rgb = np.dstack([gray, gray, gray]) 180 | else: 181 | raise ValueError('RGB nor gray images are available.') 182 | 183 | depth = None 184 | if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): 185 | depth = inout.load_depth(dp_split['depth_tpath'].format( 186 | scene_id=scene_id, im_id=im_id)) 187 | depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. 188 | 189 | # Path to the output RGB visualization. 190 | vis_rgb_path = None 191 | if p['vis_rgb']: 192 | vis_rgb_path = p['vis_rgb_tpath'].format( 193 | vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'], 194 | scene_id=scene_id, im_id=im_id) 195 | 196 | # Path to the output depth difference visualization. 197 | vis_depth_diff_path = None 198 | if p['vis_depth_diff']: 199 | vis_depth_diff_path = p['vis_depth_diff_tpath'].format( 200 | vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'], 201 | scene_id=scene_id, im_id=im_id) 202 | 203 | # Visualization. 204 | visualization.vis_object_poses( 205 | poses=gt_poses, K=K, renderer=ren, rgb=rgb, depth=depth, 206 | vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path, 207 | vis_rgb_resolve_visib=p['vis_rgb_resolve_visib']) 208 | 209 | misc.log('Done.') 210 | -------------------------------------------------------------------------------- /keypoint/scripts/vis_object_symmetries.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Visualizes object models under all identified symmetry transformations.""" 5 | 6 | import os 7 | import numpy as np 8 | 9 | from bop_toolkit_lib import config 10 | from bop_toolkit_lib import dataset_params 11 | from bop_toolkit_lib import inout 12 | from bop_toolkit_lib import misc 13 | from bop_toolkit_lib import renderer 14 | from bop_toolkit_lib import transform as tr 15 | 16 | 17 | # PARAMETERS. 18 | ################################################################################ 19 | p = { 20 | # See dataset_params.py for options. 21 | 'dataset': 'itodd', 22 | 23 | # Type of the renderer (used for the VSD pose error function). 24 | 'renderer_type': 'python', # Options: 'cpp', 'python'. 25 | 26 | # See misc.get_symmetry_transformations(). 27 | 'max_sym_disc_step': 0.01, 28 | 29 | 'views': [ 30 | { 31 | 'R': tr.rotation_matrix(0.5 * np.pi, [1, 0, 0]).dot( 32 | tr.rotation_matrix(-0.5 * np.pi, [0, 0, 1])).dot( 33 | tr.rotation_matrix(0.1 * np.pi, [0, 1, 0]))[:3, :3], 34 | 't': np.array([[0, 0, 500]]).T 35 | } 36 | ], 37 | 38 | # Folder containing the BOP datasets. 39 | 'datasets_path': config.datasets_path, 40 | 41 | # Folder for output visualisations. 42 | 'vis_path': os.path.join(config.output_path, 'vis_object_symmetries'), 43 | 44 | # Path templates for output images. 45 | 'vis_rgb_tpath': os.path.join( 46 | '{vis_path}', '{dataset}', '{obj_id:06d}', 47 | '{view_id:06d}_{pose_id:06d}.jpg'), 48 | } 49 | ################################################################################ 50 | 51 | 52 | # Load dataset parameters. 53 | model_type = None # None = default. 54 | if p['dataset'] == 'tless': 55 | model_type = 'cad' 56 | dp_model = dataset_params.get_model_params( 57 | p['datasets_path'], p['dataset'], model_type) 58 | dp_camera = dataset_params.get_camera_params( 59 | p['datasets_path'], p['dataset']) 60 | 61 | K = dp_camera['K'] 62 | fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] 63 | 64 | # Create a renderer. 65 | width, height = dp_camera['im_size'] 66 | ren = renderer.create_renderer( 67 | width, height, p['renderer_type'], mode='rgb', shading='flat') 68 | 69 | # Load meta info about the models (including symmetries). 70 | models_info = inout.load_json(dp_model['models_info_path'], keys_to_int=True) 71 | 72 | 73 | for obj_id in dp_model['obj_ids']: 74 | 75 | # Load object model. 76 | misc.log('Loading 3D model of object {}...'.format(obj_id)) 77 | model_path = dp_model['model_tpath'].format(obj_id=obj_id) 78 | ren.add_object(obj_id, model_path) 79 | 80 | poses = misc.get_symmetry_transformations( 81 | models_info[obj_id], p['max_sym_disc_step']) 82 | 83 | for pose_id, pose in enumerate(poses): 84 | 85 | for view_id, view in enumerate(p['views']): 86 | 87 | R = view['R'].dot(pose['R']) 88 | t = view['R'].dot(pose['t']) + view['t'] 89 | 90 | vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb'] 91 | 92 | # Path to the output RGB visualization. 93 | vis_rgb_path = p['vis_rgb_tpath'].format( 94 | vis_path=p['vis_path'], dataset=p['dataset'], obj_id=obj_id, 95 | view_id=view_id, pose_id=pose_id) 96 | misc.ensure_dir(os.path.dirname(vis_rgb_path)) 97 | inout.save_im(vis_rgb_path, vis_rgb) 98 | 99 | misc.log('Done.') 100 | -------------------------------------------------------------------------------- /keypoint/train/base_options.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from collections import namedtuple 4 | 5 | class BaseTrainOptions(): 6 | 7 | def parse_args(self, arg_list=None): 8 | if arg_list is None: 9 | self.args = self.parser.parse_args() 10 | else: 11 | self.args = self.parser.parse_args(arg_list) 12 | 13 | if self.args.from_json is not None: 14 | path_to_json = os.path.abspath(self.args.from_json) 15 | with open(path_to_json, "r") as f: 16 | json_args = json.load(f) 17 | json_args = namedtuple("json_args", json_args.keys())(**json_args) 18 | return json_args 19 | else: 20 | self.args.log_dir = os.path.join(os.path.abspath(self.args.log_dir), self.args.name) 21 | self.args.summary_dir = os.path.join(self.args.log_dir, 'tensorboard') 22 | if not os.path.exists(self.args.log_dir): 23 | os.makedirs(self.args.log_dir) 24 | self.args.checkpoint_dir = os.path.join(self.args.log_dir, 'checkpoints') 25 | if not os.path.exists(self.args.checkpoint_dir): 26 | os.makedirs(self.args.checkpoint_dir) 27 | self._save_dump() 28 | return self.args 29 | 30 | def _save_dump(self): 31 | if not os.path.exists(self.args.log_dir): 32 | os.makedirs(self.args.log_dir) 33 | with open(os.path.join(self.args.log_dir, "config.json"), "w") as f: 34 | json.dump(vars(self.args), f, indent=4) 35 | return 36 | 37 | class BaseTestOptions(): 38 | 39 | def parse_args(self): 40 | args = self.parser.parse_args() 41 | path_to_json = os.path.abspath(args.json) 42 | with open(path_to_json, "r") as f: 43 | train_args = json.load(f) 44 | train_args = namedtuple("train_args", train_args.keys())(**train_args) 45 | if not os.path.exists(args.out_dir): 46 | os.makedirs(args.out_dir) 47 | return args, train_args 48 | -------------------------------------------------------------------------------- /keypoint/train/base_trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import sys 4 | import math 5 | from tqdm import tqdm 6 | tqdm.monitor_interval = 0 7 | from tensorboardX import SummaryWriter 8 | from utils import CheckpointDataLoader, CheckpointSaver 9 | 10 | class BaseTrainer: 11 | 12 | def __init__(self, options): 13 | self.options = options 14 | self.collate_fn = None 15 | self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 16 | 17 | self._init_fn() # define your model, optimizers etc. 18 | self.saver = CheckpointSaver(save_dir=options.checkpoint_dir) 19 | self.summary_writer = SummaryWriter(self.options.summary_dir) 20 | 21 | self.checkpoint = None 22 | if self.options.resume and self.saver.exists_checkpoint(): 23 | self.checkpoint = self.saver.load_checkpoint(self.models_dict, self.optimizers_dict, checkpoint_file=self.options.checkpoint) 24 | 25 | if self.checkpoint is None: 26 | self.epoch_count = 0 27 | self.step_count = 0 28 | else: 29 | self.epoch_count = self.checkpoint['epoch'] 30 | self.step_count = self.checkpoint['total_step_count'] 31 | 32 | self.lr_scheduler = None 33 | self.exponential_scheduler = None 34 | if self.options.lr_decay < 1.0: 35 | self.exponential_scheduler = torch.optim.lr_scheduler.ExponentialLR( 36 | optimizer = self.optimizer,, 37 | gamma = self.options.lr_decay, 38 | last_epoch = self.step_count-1) 39 | print('lr_decay/epoch:', self.options.lr_decay) 40 | 41 | if self.options.lr_schedule is not None: 42 | self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 43 | optimizer = self.optimizer, 44 | milestones = self.options.lr_schedule, 45 | gamma = self.options.lr_gamma, 46 | last_epoch = self.step_count-1) 47 | 48 | print('lr_schedule:', self.options.lr_schedule) 49 | 50 | def _init_fn(self): 51 | raise NotImplementedError('You need to provide an _init_fn method') 52 | 53 | # @profile 54 | def train(self): 55 | 56 | self.endtime = time.time() + self.options.time_to_run 57 | for epoch in tqdm(range(self.epoch_count, self.options.num_epochs), total=self.options.num_epochs, initial=self.epoch_count): 58 | train_data_loader = CheckpointDataLoader(self.train_ds,checkpoint=self.checkpoint, 59 | batch_size=self.options.batch_size, 60 | num_workers=self.options.num_workers, 61 | pin_memory=self.options.pin_memory, 62 | shuffle=self.options.shuffle_train, 63 | collate_fn=self.collate_fn) 64 | warmup_scheduler = None 65 | warmup_steps = self.options.warmup_steps 66 | if epoch == 0 and self.step_count == 0 and self.checkpoint is None: 67 | warmup_iters = warmup_steps 68 | warmup_factor = 1./warmup_steps 69 | warmup_scheduler = warmup_lr_scheduler(self.optimizer, warmup_iters, warmup_factor) 70 | 71 | for step, batch in enumerate(tqdm(train_data_loader, desc='Epoch '+str(epoch), 72 | total=math.ceil(len(self.train_ds)/self.options.batch_size), 73 | initial=train_data_loader.checkpoint_batch_idx), 74 | train_data_loader.checkpoint_batch_idx): 75 | 76 | if time.time() < self.endtime: 77 | out = self._train_step(batch) 78 | 79 | self.step_count += 1 80 | 81 | if self.step_count % self.options.summary_steps == 0: 82 | self._train_summaries(batch, *out) 83 | 84 | if self.step_count % self.options.test_steps == 0: 85 | val_loss = self.test() 86 | 87 | if self.step_count % self.options.checkpoint_steps == 0: 88 | self.saver.save_checkpoint(self.models_dict, self.optimizers_dict, epoch, step+1, self.options.batch_size, train_data_loader.sampler.dataset_perm, self.step_count) 89 | tqdm.write('Checkpoint saved') 90 | 91 | else: 92 | tqdm.write('Timeout reached') 93 | self.saver.save_checkpoint(self.models_dict, self.optimizers_dict, epoch, step, self.options.batch_size, train_data_loader.sampler.dataset_perm, self.step_count) 94 | tqdm.write('Checkpoint saved') 95 | sys.exit(0) 96 | 97 | 98 | if warmup_scheduler is not None: 99 | warmup_scheduler.step() 100 | if self.step_count > warmup_steps: 101 | print('Setting warmup scheduler to none') 102 | warmup_scheduler = None 103 | 104 | if self.lr_scheduler is not None: 105 | self.lr_scheduler.step() 106 | 107 | if self.exponential_scheduler is not None: 108 | self.exponential_scheduler.step() 109 | 110 | # load a checkpoint only on startup, for the next epochs 111 | # just iterate over the dataset as usual 112 | self.checkpoint=None 113 | 114 | # save checkpoint after each epoch 115 | self.saver.save_checkpoint(self.models_dict, self.optimizers_dict, epoch+1, 0, self.options.batch_size, None, self.step_count) 116 | 117 | return 118 | 119 | def _get_lr(self): 120 | return next(iter(self.optimizers_dict.values())).param_groups[0]['lr'] 121 | 122 | def _train_step(self, input_batch): 123 | raise NotImplementedError('You need to provide a _train_step method') 124 | 125 | def _train_summaries(self, input_batch): 126 | raise NotImplementedError('You need to provide a _save_summaries method') 127 | 128 | def test(self, input_batch): 129 | raise NotImplementedError('You need to provide a _test_step method') 130 | 131 | 132 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): 133 | 134 | def f(x): 135 | if x >= warmup_iters: 136 | return 1 137 | alpha = float(x) / warmup_iters 138 | return warmup_factor * (1 - alpha) + alpha 139 | 140 | return torch.optim.lr_scheduler.LambdaLR(optimizer, f) 141 | -------------------------------------------------------------------------------- /keypoint/train/detection_trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | from torchvision import transforms 5 | from torchvision.utils import make_grid 6 | 7 | import time 8 | from tqdm import tqdm 9 | tqdm.monitor_interval = 0 10 | 11 | from bop_dataset import BOPDataset 12 | from base_trainer import BaseTrainer 13 | from transforms import ColorJitter, ToTensor, \ 14 | RandomHorizontalFlip, RandomGaussianBlur, RandomGrayscale 15 | 16 | from models import FRCNN 17 | 18 | class DetectionTrainer(BaseTrainer): 19 | 20 | def _init_fn(self): 21 | transform_list = [] 22 | transform_list.append(ColorJitter(brightness=self.options.jitter, contrast=self.options.jitter, saturation=self.options.jitter, hue=self.options.jitter/4)) 23 | transform_list.append(RandomGrayscale(0.2)) 24 | transform_list.append(RandomGaussianBlur(kernel_size=7)) 25 | transform_list.append(ToTensor()) 26 | transform_list.append(RandomHorizontalFlip(0.5)) 27 | 28 | test_transform_list = [] 29 | test_transform_list.append(ToTensor()) 30 | 31 | self.train_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='train', 32 | valid_objid = self.options.objid, 33 | return_keypoints=False, return_coco=True, 34 | transform=transforms.Compose(transform_list)) 35 | 36 | self.test_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='test', 37 | valid_objid = self.options.objid, 38 | return_keypoints=False, return_coco=True, 39 | transform=transforms.Compose(test_transform_list)) 40 | 41 | self.collate_fn = lambda batch: tuple(batch) 42 | 43 | 44 | num_classes = len(self.train_ds.obj2idx) + 1 45 | self.model = FRCNN(num_classes).to(self.device) 46 | 47 | params = [p for p in self.model.parameters() if p.requires_grad] 48 | self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr) 49 | 50 | # pack all models and optimizers in dictionaries to interact with the checkpoint saver 51 | self.models_dict = {'frcnn': self.model} 52 | self.optimizers_dict = {'optimizer': self.optimizer} 53 | 54 | # meter to track moving average 55 | self.loss_box_meter = AverageMeter() 56 | self.loss_class_meter = AverageMeter() 57 | 58 | print('Using device:', self.device) 59 | print('Using optimizer:', self.options.optimizer) 60 | print('Total number of classes:', num_classes) 61 | 62 | def _train_step(self, input_batch): 63 | # Force optimizer to use initial/reset learning rate, if specified 64 | if self.options.new_lr is True: 65 | for g in self.optimizer.param_groups: 66 | g['lr'] = self.options.lr 67 | self.options.new_lr = False 68 | 69 | # structure input_batch for torchvision detection module 70 | images = [s['image'].to(self.device) for s in input_batch] 71 | targets = [{k: v.to(self.device) for k, v in s.items() if k!='image'} for s in input_batch] 72 | 73 | # train step 74 | self.model.train() 75 | loss_dict = self.model(images, targets) 76 | losses = sum(loss for loss in loss_dict.values()) 77 | 78 | self.optimizer.zero_grad() 79 | losses.backward() 80 | self.optimizer.step() 81 | 82 | # value 83 | loss_box = loss_dict['loss_box_reg'].cpu().item() 84 | loss_class = loss_dict['loss_classifier'].cpu().item() 85 | 86 | ## reset every 25k steps 87 | if self.step_count % 100 == 0: 88 | self.loss_box_meter.reset() 89 | self.loss_class_meter.reset() 90 | 91 | self.loss_box_meter.update(loss_box) 92 | self.loss_class_meter.update(loss_class) 93 | 94 | return self.loss_box_meter.avg, self.loss_class_meter.avg 95 | 96 | 97 | def _train_summaries(self, batch, loss_box, loss_class): 98 | self._get_summaries(batch, loss_box, loss_class, is_train=True) 99 | 100 | 101 | def test(self): 102 | test_data_loader = DataLoader(self.test_ds, batch_size=self.options.test_batch_size, 103 | num_workers=self.options.num_workers, 104 | pin_memory=self.options.pin_memory, 105 | shuffle=self.options.shuffle_test, 106 | collate_fn=self.collate_fn) 107 | 108 | self.model.always_return_loss = True 109 | 110 | test_loss_box = torch.tensor(0.0, device=self.device) 111 | test_loss_class = torch.tensor(0.0, device=self.device) 112 | for tstep, batch in enumerate(tqdm(test_data_loader, desc='Testing')): 113 | if time.time() < self.endtime: 114 | 115 | loss_box, loss_class = self._test_step(batch) 116 | 117 | test_loss_box += loss_box 118 | test_loss_class += loss_class 119 | else: 120 | tqdm.write('Testing interrupted at step ' + str(tstep)) 121 | break 122 | 123 | test_loss_box /= (tstep+1) 124 | test_loss_class /= (tstep+1) 125 | 126 | self.model.always_return_loss = False 127 | self._get_summaries(batch, test_loss_box, test_loss_class, is_train=False) 128 | 129 | 130 | return 131 | 132 | def _test_step(self, input_batch): 133 | 134 | images = [s['image'].to(self.device) for s in input_batch] 135 | targets = [{k: v.to(self.device) for k, v in s.items() if k!='image'} for s in input_batch] 136 | 137 | self.model.eval() 138 | with torch.no_grad(): 139 | loss_dict = self.model(images, targets) 140 | 141 | loss_box = loss_dict['loss_box_reg'].cpu().item() 142 | loss_class = loss_dict['loss_classifier'].cpu().item() 143 | 144 | return loss_box, loss_class 145 | 146 | 147 | def _get_summaries(self, batch, loss_box, loss_class, is_train): 148 | images = [s['image'].to(self.device) for s in batch] 149 | targets = [{k: v.to(self.device) for k, v in s.items() if k!='image'} for s in batch] 150 | 151 | image = images[0] 152 | target = targets[0] 153 | 154 | self.model.eval() 155 | self.model.always_return_loss = False 156 | with torch.no_grad(): 157 | pred = self.model([image])[0] 158 | 159 | # ground truth 160 | gt_boxes = target['boxes'].int() 161 | gt_labels = target['labels'].int() 162 | gt_labels = [str(l) for l in gt_labels.tolist()] 163 | 164 | 165 | # prediction 166 | thresh = 0.80 167 | conf = pred['scores'] > thresh 168 | 169 | conf_scores = pred['scores'][conf] 170 | conf_boxes = pred['boxes'][conf].int() 171 | conf_labels = pred['labels'][conf].int() 172 | 173 | valid = torch.zeros_like(conf_labels).bool() 174 | unique_labels = torch.unique(conf_labels) 175 | for uni in unique_labels: 176 | p = (conf_labels==uni).nonzero(as_tuple=False).reshape(-1) 177 | valid[p[0]] = True 178 | 179 | pd_boxes = conf_boxes[valid] 180 | pd_labels = conf_labels[valid] 181 | pd_labels = [str(l) for l in pd_labels.tolist()] 182 | 183 | self._save_summaries(image, gt_boxes, gt_labels, pd_boxes, pd_labels, 184 | loss_box, loss_class, self.step_count, is_train=is_train) 185 | 186 | 187 | def _save_summaries(self, image, gt_boxes, gt_labels, pd_boxes, pd_labels, 188 | loss_box, loss_class, step, is_train=True): 189 | 190 | prefix = 'train/' if is_train else 'test/' 191 | 192 | self.summary_writer.add_scalar(prefix + 'loss_box', loss_box, step) 193 | self.summary_writer.add_scalar(prefix + 'loss_class', loss_class, step) 194 | 195 | self.summary_writer.add_image_with_boxes(prefix + 'gt_boxes', image, gt_boxes, 196 | step, labels=gt_labels, dataformats='CHW') 197 | self.summary_writer.add_image_with_boxes(prefix + 'pd_boxes', image, pd_boxes, 198 | step, labels=pd_labels, dataformats='CHW') 199 | 200 | if is_train: 201 | self.summary_writer.add_scalar('lr', self._get_lr(), step) 202 | return 203 | 204 | 205 | 206 | class AverageMeter(object): 207 | """Computes and stores the average and current value""" 208 | def __init__(self): 209 | self.reset() 210 | 211 | def reset(self): 212 | self.val = 0 213 | self.avg = 0 214 | self.sum = 0 215 | self.count = 0 216 | 217 | def update(self, val, n=1): 218 | self.val = val 219 | self.sum += val * n 220 | self.count += n 221 | self.avg = self.sum / self.count if self.count != 0 else 0 222 | -------------------------------------------------------------------------------- /keypoint/train/keypoint_trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | from torchvision import transforms 5 | from torchvision.utils import make_grid 6 | 7 | import time 8 | from tqdm import tqdm 9 | tqdm.monitor_interval = 0 10 | 11 | from bop_dataset import BOPDataset 12 | from base_trainer import BaseTrainer 13 | from transforms import RandomFlipLR, RandomRescaleBB, RandomGrayscale, RandomRotation,\ 14 | RandomBlur, ColorJitter, CropAndResize, LocsToHeatmaps,\ 15 | ToTensor, Normalize, Denormalize, Select, AffineCrop 16 | from models import StackedHourglass 17 | from misc import Pose2DEval, KptsMSELoss 18 | 19 | class KeypointTrainer(BaseTrainer): 20 | 21 | def _init_fn(self): 22 | transform_list = [] 23 | transform_list.append(ColorJitter(brightness=self.options.jitter, contrast=self.options.jitter, saturation=self.options.jitter, hue=self.options.jitter/4)) 24 | transform_list.append(AffineCrop(out_size=self.options.crop_size, scale_factor=0.15, rotation_factor=45, dialation=0.25)) 25 | transform_list.append(LocsToHeatmaps(out_size=(self.options.heatmap_size, self.options.heatmap_size))) 26 | transform_list.append(ToTensor()) 27 | transform_list.append(Normalize()) 28 | 29 | test_transform_list = [] 30 | test_transform_list.append(AffineCrop(out_size=self.options.crop_size, scale_factor=0, rotation_factor=0, dialation=0.25)) 31 | test_transform_list.append(LocsToHeatmaps(out_size=(self.options.heatmap_size, self.options.heatmap_size))) 32 | test_transform_list.append(ToTensor()) 33 | test_transform_list.append(Normalize()) 34 | 35 | self.train_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='train', 36 | valid_objid = self.options.objid, 37 | return_keypoints=True, 38 | transform=transforms.Compose(transform_list)) 39 | 40 | self.test_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='test', 41 | valid_objid = self.options.objid, 42 | return_keypoints=True, 43 | transform=transforms.Compose(test_transform_list)) 44 | self.collate_fn = None 45 | 46 | self.options.num_keypoints = self.train_ds.n_kpts 47 | 48 | self.model = StackedHourglass(self.options.num_keypoints).to(self.device) 49 | 50 | if self.options.optimizer is 'adam': 51 | self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr, 52 | betas=(0.9, 0.999), eps=1e-08) 53 | print('Using ADAM.') 54 | else: 55 | self.optimizer = torch.optim.RMSprop(params=self.model.parameters(), lr=self.options.lr, 56 | momentum=0, weight_decay=self.options.wd) 57 | 58 | 59 | # pack all models and optimizers in dictionaries to interact with the checkpoint saver 60 | self.models_dict = {'stacked_hg': self.model} 61 | self.optimizers_dict = {'optimizer': self.optimizer} 62 | 63 | self.criterion = KptsMSELoss(use_vis=self.options.use_vis).to(self.device) 64 | self.pose = Pose2DEval(detection_thresh=self.options.detection_thresh, dist_thresh=self.options.dist_thresh) 65 | 66 | print('Total number of model parameters:', self.model.num_trainable_parameters()) 67 | print('Using device:', self.device) 68 | print('Using optimizer:', self.options.optimizer) 69 | 70 | def _train_step(self, input_batch): 71 | # Force optimizer to use initial/reset learning rate, if specified 72 | if self.options.new_lr is True: 73 | for g in self.optimizer.param_groups: 74 | g['lr'] = self.options.lr 75 | self.options.new_lr = False 76 | 77 | input_batch = {k: v.to(self.device) for k,v in input_batch.items()} 78 | 79 | self.model.train() 80 | images = input_batch['image'] 81 | gt_keypoints = input_batch['keypoint_heatmaps'] 82 | vis = input_batch['visible_keypoints'] 83 | 84 | pred_keypoints = self.model(images) 85 | loss = torch.tensor(0.0, device=self.device) 86 | for i in range(len(pred_keypoints)): 87 | loss += self.criterion(pred_keypoints[i], gt_keypoints, vis) 88 | self.optimizer.zero_grad() 89 | loss.backward() 90 | self.optimizer.step() 91 | return [pk.detach() for pk in pred_keypoints], loss.detach() 92 | 93 | def _train_summaries(self, batch, pred_keypoints, loss): 94 | batch = {k: v.to(self.device) for k,v in batch.items()} 95 | 96 | pck = self.pose.pck(batch['keypoint_heatmaps'], pred_keypoints[-1]) 97 | self._save_summaries(batch, pred_keypoints, loss, pck, self.step_count, is_train=True) 98 | 99 | def test(self): 100 | test_data_loader = DataLoader(self.test_ds, batch_size=self.options.test_batch_size, 101 | num_workers=self.options.num_workers, 102 | pin_memory=self.options.pin_memory, 103 | shuffle=self.options.shuffle_test) 104 | test_loss = torch.tensor(0.0, device=self.device) 105 | mean_pck = 0.0 106 | for tstep, batch in enumerate(tqdm(test_data_loader, desc='Testing')): 107 | if time.time() < self.endtime: 108 | batch = {k: v.to(self.device) for k,v in batch.items()} 109 | pred_keypoints, loss = self._test_step(batch) 110 | test_loss += loss.data 111 | mean_pck += self.pose.pck(batch['keypoint_heatmaps'], pred_keypoints[-1]) 112 | else: 113 | tqdm.write('Testing interrupted at step ' + str(tstep)) 114 | break 115 | test_loss /= (tstep+1) 116 | mean_pck /= (tstep+1) 117 | self._save_summaries(batch, pred_keypoints, test_loss, mean_pck, self.step_count, is_train=False) 118 | return test_loss 119 | 120 | def _test_step(self, input_batch): 121 | self.model.eval() 122 | images = input_batch['image'] 123 | gt_keypoints = input_batch['keypoint_heatmaps'] 124 | vis = input_batch['visible_keypoints'] 125 | with torch.no_grad(): 126 | pred_keypoints = self.model(images) 127 | loss = torch.tensor(0.0, device=self.device) 128 | for i in range(len(pred_keypoints)): 129 | loss += self.criterion(pred_keypoints[i], gt_keypoints, vis) 130 | return pred_keypoints, loss 131 | 132 | def _save_summaries(self, input_batch, pred_keypoints, loss, pck, step, is_train=True): 133 | prefix = 'train/' if is_train else 'test/' 134 | input_batch = Denormalize()(input_batch) 135 | images = input_batch['image'] 136 | gt_keypoints = input_batch['keypoint_heatmaps'] 137 | 138 | gt_image_keypoints = [] 139 | pred_image_keypoints = [] 140 | gt_image_keypoints, pred_image_keypoints = self.pose.draw_keypoints_with_labels(images, gt_keypoints, pred_keypoints[-1]) 141 | 142 | gt_image_keypoints_grid = make_grid(gt_image_keypoints, pad_value=1, nrow=3) 143 | pred_image_keypoints_grid = make_grid(pred_image_keypoints, pad_value=1, nrow=3) 144 | 145 | pred_heatmaps_grid = make_grid(pred_keypoints[-1][0,:,:,:].unsqueeze(0).transpose(0,1), pad_value=1, nrow=5) 146 | pred_heatmaps_grid[pred_heatmaps_grid > 1] = 1 147 | pred_heatmaps_grid[pred_heatmaps_grid < 0] = 0 148 | 149 | self.summary_writer.add_scalar(prefix + 'loss', loss, step) 150 | self.summary_writer.add_scalar(prefix + 'PCK', pck, step) 151 | self.summary_writer.add_image(prefix + 'gt_image_keypoints', gt_image_keypoints_grid, step) 152 | self.summary_writer.add_image(prefix + 'pred_image_keypoints', pred_image_keypoints_grid, step) 153 | self.summary_writer.add_image(prefix + 'pred_heatmaps_image1', pred_heatmaps_grid, step) 154 | if is_train: 155 | self.summary_writer.add_scalar('lr', self._get_lr(), step) 156 | return 157 | -------------------------------------------------------------------------------- /keypoint/train/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # this is a hack to make it work in the cluster because 4 | #import matplotlib 5 | #matplotlib.use('Agg') 6 | 7 | import torch 8 | import numpy as np 9 | from train_options import TrainOptions 10 | from keypoint_trainer import KeypointTrainer 11 | from detection_trainer import DetectionTrainer 12 | 13 | if __name__ == '__main__': 14 | 15 | # reproducibility 16 | np.random.seed(0) 17 | torch.manual_seed(0) 18 | 19 | # training code 20 | options = TrainOptions().parse_args() 21 | if options.task == 'keypoints': 22 | trainer = KeypointTrainer(options) 23 | elif options.task == 'detection': 24 | trainer = DetectionTrainer(options) 25 | else: 26 | print("The requested option is not supported on this dataset") 27 | exit() 28 | 29 | trainer.train() 30 | -------------------------------------------------------------------------------- /keypoint/train/train_options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from .base_options import BaseTrainOptions 3 | 4 | class TrainOptions(BaseTrainOptions): 5 | 6 | def __init__(self): 7 | self.parser = argparse.ArgumentParser() 8 | 9 | bop = self.parser.add_argument_group('BOP') 10 | bop.add_argument('--dataset', default=None) 11 | bop.add_argument('--objid', nargs='+', type=int) 12 | bop.add_argument('--use_vis', default=False, action='store_true', 13 | help='ignore invisible keypoint during backprop; \ 14 | default false: network will learn to only detect keypoint on only centered object') 15 | bop.add_argument('--new_lr', default=False, action='store_true') 16 | bop.add_argument('--lr_schedule', nargs='+', type=int) 17 | bop.add_argument('--lr_gamma', type=float, default=0.1) 18 | bop.add_argument("--lr_decay", type=float, default=1.00, help="Exponential decay rate") 19 | 20 | bop.add_argument('--warmup_steps', type=int, default=1000) 21 | bop.add_argument('--hr_w', type=int, default=19) 22 | 23 | req = self.parser.add_argument_group('Required') 24 | req.add_argument('--name', required=True, help='Name of the experiment') 25 | task = req.add_mutually_exclusive_group(required=True) 26 | task.add_argument('--detection', dest='task', action='store_const', const='detection') 27 | task.add_argument('--segmentation', dest='task', action='store_const', const='segmentation') 28 | task.add_argument('--keypoints', dest='task', action='store_const', const='keypoints') 29 | task.add_argument('--keypoints_hr', dest='task', action='store_const', const='keypoints_hr') 30 | 31 | task.add_argument('--joint', dest='task', action='store_const', const='joint') 32 | task.add_argument('--joint_gan', dest='task', action='store_const', const='joint_gan') 33 | task.add_argument('--joint_ref', dest='task', action='store_const', const='joint_ref') 34 | task.add_argument('--autoencoder', dest='task', action='store_const', const='autoencoder') 35 | task.add_argument('--k2m', dest='task', action='store_const', const='k2m') 36 | task.add_argument('--k2m_gan', dest='task', action='store_const', const='k2m_gan') 37 | req.set_defaults(task='keypoints') 38 | 39 | gen = self.parser.add_argument_group('General') 40 | gen.add_argument('--time_to_run', type=int, default=82800, help='Total time to run in seconds') 41 | gen.add_argument('--resume', dest='resume', default=False, action='store_true', help='Resume from checkpoint (Use latest checkpoint by default') 42 | gen.add_argument('--num_workers', type=int, default=4, help='Number of processes used for data loading') 43 | pin = gen.add_mutually_exclusive_group() 44 | pin.add_argument('--pin_memory', dest='pin_memory', action='store_true', help='Number of processes used for data loading') 45 | pin.add_argument('--no_pin_memory', dest='pin_memory', action='store_false', help='Number of processes used for data loading') 46 | gen.set_defaults(pin_memory=True) 47 | 48 | io = self.parser.add_argument_group('io') 49 | io.add_argument('--dataset_dir', default='/scratch/yufu/bop', help='Path to the desired dataset') 50 | io.add_argument('--log_dir', default='../logs', help='Directory to store logs') 51 | io.add_argument('--checkpoint', default=None, help='Path to checkpoint') 52 | io.add_argument('--from_json', default=None, help='Load options from json file instead of the command line') 53 | 54 | data_proc = self.parser.add_argument_group('Data Preprocessing') 55 | data_proc.add_argument('--degrees', type=float, default=0, help='Random rotation angle in the range [-degrees, degrees]') 56 | data_proc.add_argument('--max_scale', type=float, default=1.0) 57 | data_proc.add_argument('--crop_size', type=int, default=256, help='Size of cropped image to feed to the network') 58 | fliplr = data_proc.add_mutually_exclusive_group() 59 | fliplr.add_argument('--flip_lr', dest='flip_lr', action='store_true', help='Flip training images') 60 | fliplr.add_argument('--no_flip_lr', dest='flip_lr', action='store_false', help='Flip training images') 61 | apriltag = data_proc.add_mutually_exclusive_group() 62 | apriltag.add_argument('--apriltag', dest='apriltag', action='store_true', help='Flip training images') 63 | apriltag.add_argument('--no_apriltag', dest='apriltag', action='store_false', help='Flip training images') 64 | rr = data_proc.add_mutually_exclusive_group() 65 | rr.add_argument('--random_rescale', dest='random_rescale', action='store_true', help='Randomly rescale bounding boxes') 66 | rr.add_argument('--no_random_rescale', dest='random_rescale', action='store_false', help='Do not rescale bounding boxes') 67 | data_proc.add_argument('--heatmap_size', type=int, default=64, help='Size of output heatmaps') 68 | data_proc.add_argument('--detection_thresh', type=float, default=1e-1, help='Size of output heatmaps') 69 | data_proc.add_argument('--dist_thresh', type=float, default=10, help='Size of output heatmaps') 70 | data_proc.add_argument('--jitter', type=float, default=0.25, help='Amount of image jitter to apply [0, 1]') 71 | data_proc.set_defaults(flip_lr=True, random_rescale=True, apriltag=True) 72 | arch_hg = self.parser.add_argument_group('Hourglass Architecture') 73 | arch_hg.add_argument('--hg_channels', type=int, default=256, help='Number of channels for the Hourglass') 74 | arch_hg.add_argument('--num_hg', type=int, default=2, help='Number of stacked Hourglasses') 75 | arch_hg.add_argument('--num_resblocks', type=int, default=1, help='Number of stacked residual blocks') 76 | 77 | arch_unet = self.parser.add_argument_group('UNet Architecture') 78 | arch_unet.add_argument('--num_filters', type=int, default=64, help='Number of filters in conv1') 79 | arch_unet.add_argument('--num_blocks', type=int, default=5, help='Number of blocks') 80 | arch_unet.add_argument('--unet_type', default='v2', help='Number of blocks') 81 | arch_unet.add_argument('--mask_only', dest='mask_only', default=False, action='store_true', help='Number of blocks') 82 | 83 | train = self.parser.add_argument_group('Training Options') 84 | train.add_argument('--num_keypoints', type=int, default=76, help='Number of distinct keypoint classes') 85 | train.add_argument('--num_epochs', type=int, default=30, help='Total number of training epochs') 86 | train.add_argument('--batch_size', type=int, default=16, help='Batch size') 87 | train.add_argument('--test_batch_size', type=int, default=8, help='Batch size') 88 | shuffle_train = train.add_mutually_exclusive_group() 89 | shuffle_train.add_argument('--shuffle_train', dest='shuffle_train', action='store_true', help='Shuffle training data') 90 | shuffle_train.add_argument('--no_shuffle_train', dest='shuffle_train', action='store_false', help='Don\'t shuffle training data') 91 | shuffle_test = train.add_mutually_exclusive_group() 92 | shuffle_test.add_argument('--shuffle_test', dest='shuffle_test', action='store_true', help='Shuffle testing data') 93 | shuffle_test.add_argument('--no_shuffle_test', dest='shuffle_test', action='store_false', help='Don\'t shuffle testing data') 94 | train.set_defaults(shuffle_train=True, shuffle_test=True) 95 | train.add_argument('--summary_steps', type=int, default=100, help='Summary saving frequency') 96 | train.add_argument('--checkpoint_steps', type=int, default=10000, help='Chekpoint saving frequency') 97 | train.add_argument('--test_steps', type=int, default=1000, help='Testing frequency') 98 | train.add_argument('--test_iters', type=int, default=200, help='Number of testing iterations') 99 | 100 | 101 | optim = self.parser.add_argument_group('Optimization') 102 | optim_type = optim.add_mutually_exclusive_group() 103 | optim_type.add_argument('--use_sgd', dest='optimizer', action='store_const', const='sgd',help='Use SGD (default Adam)') 104 | optim_type.add_argument('--use_rmsprop', dest='optimizer', action='store_const', const='rmsprop',help='Use (default Adam)') 105 | optim_type.add_argument('--use_adam', dest='optimizer', action='store_const', const='adam',help='Use SGD (default Adam)') 106 | optim.add_argument('--adam_beta1', type=float, default=0.9, help='Value for Adam Beta 1') 107 | optim.add_argument('--sgd_momentum', type=float, default=0.0, help='Momentum for SGD') 108 | optim.add_argument("--lr", type=float, default=2.5e-4, help="Learning rate") 109 | optim.add_argument("--wd", type=float, default=0, help="Weight decay weight") 110 | optim.add_argument('--keypoint_lw', type=float, default=100, help='Keypoint loss weight') 111 | optim.add_argument('--gan_mask_lw', type=float, default=10, help='Gan mask loss weight') 112 | 113 | optim.set_defaults(optimizer='rmsprop') 114 | 115 | return 116 | -------------------------------------------------------------------------------- /keypoint/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_loader import CheckpointDataLoader 2 | from .saver import CheckpointSaver 3 | -------------------------------------------------------------------------------- /keypoint/utils/data_loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torch.utils.data.sampler import Sampler 4 | 5 | class RandomSampler(Sampler): 6 | 7 | def __init__(self, data_source, checkpoint): 8 | self.data_source = data_source 9 | if checkpoint is not None and checkpoint['dataset_perm'] is not None: 10 | self.dataset_perm = checkpoint['dataset_perm'] 11 | self.perm = self.dataset_perm[checkpoint['batch_size']*checkpoint['batch_idx']:] 12 | else: 13 | self.dataset_perm = torch.randperm(len(self.data_source)).tolist() 14 | self.perm = self.dataset_perm 15 | 16 | def __iter__(self): 17 | return iter(self.perm) 18 | 19 | def __len__(self): 20 | return len(self.perm) 21 | 22 | class SequentialSampler(Sampler): 23 | 24 | def __init__(self, data_source, checkpoint): 25 | self.data_source = data_source 26 | if checkpoint is not None and checkpoint['dataset_perm'] is not None: 27 | self.dataset_perm = checkpoint['dataset_perm'] 28 | self.perm = self.dataset_perm[checkpoint['batch_size']*checkpoint['batch_idx']:] 29 | else: 30 | self.dataset_perm = list(range(len(self.data_source))) 31 | self.perm = self.dataset_perm 32 | 33 | def __iter__(self): 34 | return iter(self.perm) 35 | 36 | def __len__(self): 37 | return len(self.perm) 38 | 39 | class CheckpointDataLoader(DataLoader): 40 | 41 | def __init__(self, dataset, checkpoint=None, batch_size=1, 42 | shuffle=False, num_workers=0, pin_memory=False, drop_last=False, 43 | timeout=0, worker_init_fn=None, collate_fn=None): 44 | 45 | if shuffle: 46 | sampler = RandomSampler(dataset, checkpoint) 47 | else: 48 | sampler = SequentialSampler(dataset, checkpoint) 49 | if checkpoint is not None: 50 | self.checkpoint_batch_idx = checkpoint['batch_idx'] 51 | else: 52 | self.checkpoint_batch_idx = 0 53 | 54 | super(CheckpointDataLoader, self).__init__(dataset, sampler=sampler, shuffle=False, batch_size=batch_size, 55 | pin_memory=pin_memory, timeout=timeout, worker_init_fn=None, 56 | collate_fn=collate_fn) 57 | -------------------------------------------------------------------------------- /keypoint/utils/img_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import numpy as np 4 | from plyfile import PlyData, PlyElement 5 | 6 | def draw_kpts(img, kpts, r=5, thickness=5, color=(255,0,0)): 7 | if isinstance(img, np.ndarray): 8 | img = img.copy().astype(np.uint8) 9 | if isinstance(img, torch.Tensor): 10 | img = img.numpy() 11 | img = img.copy().astype(np.uint8) 12 | 13 | for kpt in kpts: 14 | if len(kpt)>2: 15 | x, y, c = kpt 16 | else: 17 | x, y = kpt 18 | c = 1 19 | 20 | if c > 0: 21 | cv2.circle(img, (int(x), int(y)), r, color, thickness) 22 | 23 | return img 24 | 25 | 26 | 27 | ### Save for visualization 28 | def save_ply(vert, face=None, filename='file.ply'): 29 | # Vertices 30 | if isinstance(vert, np.ndarray): 31 | vert = vert.tolist() 32 | vert = [tuple(v) for v in vert] 33 | vert = np.array(vert, dtype=[('x', 'f4'), 34 | ('y', 'f4'), 35 | ('z', 'f4')]) 36 | vert = PlyElement.describe(vert, 'vertex') 37 | 38 | # Faces 39 | if face is not None: 40 | if isinstance(face, np.ndarray): 41 | face = face.tolist() 42 | face = [(face[i], 255, 255, 255) for i in range(len(face))] 43 | face = np.array(face, dtype=[('vertex_indices', 'i4', (3,)), 44 | ('red', 'u1'), 45 | ('green', 'u1'), 46 | ('blue', 'u1')]) 47 | face = PlyElement.describe(face, 'face') 48 | 49 | # Save 50 | if face is not None: 51 | with open(filename, 'wb') as f: 52 | PlyData([vert, face]).write(f) 53 | else: 54 | with open(filename, 'wb') as f: 55 | PlyData([vert]).write(f) 56 | 57 | 58 | def read_ply(plyfile): 59 | plydata = PlyData.read(plyfile) 60 | v = plydata['vertex'].data 61 | v = [list(i) for i in v] 62 | v = np.array(v) 63 | f = plydata['face'].data 64 | f = [list(i) for i in f] 65 | f = np.array(f).squeeze() 66 | return v, f 67 | 68 | 69 | -------------------------------------------------------------------------------- /keypoint/utils/saver.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datetime 3 | import os 4 | 5 | class CheckpointSaver: 6 | 7 | def __init__(self, save_dir, save_steps=1000): 8 | self.save_dir = os.path.abspath(save_dir) 9 | self.save_steps = save_steps 10 | if not os.path.exists(self.save_dir): 11 | os.makedirs(self.save_dir) 12 | self._get_latest_checkpoint() 13 | return 14 | 15 | # check if a checkpoint exists in the current directory 16 | def exists_checkpoint(self, checkpoint_file=None): 17 | if checkpoint_file is None: 18 | return False if self.latest_checkpoint is None else True 19 | else: 20 | return os.path.isfile(checkpoint_file) 21 | 22 | # save checkpoint 23 | def save_checkpoint(self, models, optimizers, epoch, batch_idx, batch_size, dataset_perm, total_step_count): 24 | timestamp = datetime.datetime.now() 25 | checkpoint_filename = os.path.abspath(os.path.join(self.save_dir, timestamp.strftime('%Y_%m_%d-%H_%M_%S') + '.pt')) 26 | checkpoint = {} 27 | for model in models: 28 | checkpoint[model] = models[model].state_dict() 29 | for optimizer in optimizers: 30 | checkpoint[optimizer] = optimizers[optimizer].state_dict() 31 | checkpoint['epoch'] = epoch 32 | checkpoint['batch_idx'] = batch_idx 33 | checkpoint['batch_size'] = batch_size 34 | checkpoint['dataset_perm'] = dataset_perm 35 | checkpoint['total_step_count'] = total_step_count 36 | print(timestamp, 'Epoch:', epoch, 'Iteration:', batch_idx) 37 | print('Saving checkpoint file [' + checkpoint_filename + ']') 38 | torch.save(checkpoint, checkpoint_filename) 39 | return 40 | 41 | # load a checkpoint 42 | def load_checkpoint(self, models, optimizers, checkpoint_file=None): 43 | if checkpoint_file is None: 44 | print('Loading latest checkpoint [' + self.latest_checkpoint + ']') 45 | checkpoint_file = self.latest_checkpoint 46 | checkpoint = torch.load(checkpoint_file) 47 | for model in models: 48 | models[model].load_state_dict(checkpoint[model]) 49 | for optimizer in optimizers: 50 | optimizers[optimizer].load_state_dict(checkpoint[optimizer]) 51 | return {'epoch': checkpoint['epoch'], 52 | 'batch_idx': checkpoint['batch_idx'], 53 | 'batch_size': checkpoint['batch_size'], 54 | 'dataset_perm': checkpoint['dataset_perm'], 55 | 'total_step_count': checkpoint['total_step_count']} 56 | 57 | # get filename of latest checkpoint if it exists 58 | def _get_latest_checkpoint(self): 59 | checkpoint_list = [] 60 | for dirpath, dirnames, filenames in os.walk(self.save_dir): 61 | for filename in filenames: 62 | if filename.endswith('.pt'): 63 | checkpoint_list.append(os.path.abspath(os.path.join(dirpath, filename))) 64 | checkpoint_list = sorted(checkpoint_list) 65 | self.latest_checkpoint = None if (len(checkpoint_list) is 0) else checkpoint_list[-1] 66 | return 67 | 68 | -------------------------------------------------------------------------------- /keypoint/utils/trimesh_renderer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import trimesh 3 | import pyrender 4 | from PIL import Image, ImageEnhance 5 | 6 | class trimesh_renderer(): 7 | def __init__(self, img_w, img_h): 8 | self.img_w = img_w 9 | self.img_h = img_h 10 | self.default_focal = 500 11 | self.renderer = pyrender.OffscreenRenderer(viewport_width=img_w, 12 | viewport_height=img_h, 13 | point_size=1.0) 14 | 15 | def __call__(self, fuze_trimesh, rot=None, t=None, image=None, 16 | fx=None, fy=None, cx=None, cy=None, mask_over=False): 17 | 18 | # Camera parameter 19 | if fx is None or fy is None: 20 | fx = self.default_focal 21 | fy = self.default_focal 22 | 23 | if cx is None or cy is None: 24 | cx = self.img_w / 2 25 | cy = self.img_h / 2 26 | 27 | # 6DoF object pose in camera coordinate 28 | # You can skip this and apply directly on the input mesh 29 | if rot is None: 30 | rot = np.eye(3) 31 | if t is None: 32 | t = np.zeros(3) 33 | transform = np.zeros([4,4]) 34 | transform[:3, :3] = rot 35 | transform[:3, -1] = t 36 | fuze_trimesh.apply_transform(transform) 37 | 38 | 39 | # OpenGL convension 40 | transform = trimesh.transformations.rotation_matrix( 41 | np.radians(180), [1, 0, 0]) 42 | fuze_trimesh.apply_transform(transform) 43 | 44 | 45 | mesh = pyrender.Mesh.from_trimesh(fuze_trimesh) 46 | scene = pyrender.Scene(ambient_light=(0.5, 0.5, 0.5)) 47 | scene.add(mesh) 48 | 49 | camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy, zfar=3000) 50 | camera_pose = np.eye(4) 51 | scene.add(camera, pose=camera_pose) 52 | 53 | # Render 54 | color, rend_depth = self.renderer.render(scene, flags=pyrender.RenderFlags.RGBA) 55 | color = color.astype(np.uint8) 56 | 57 | if image is None: 58 | return color 59 | 60 | 61 | valid_mask = (rend_depth>0)[:,:,None] 62 | output_img = (color[:, :, :3] * valid_mask + 63 | (1 - valid_mask) * image) 64 | 65 | if mask_over: 66 | mask = np.zeros([self.img_h, self.img_w, 3]) 67 | mask[:,:,1] = 250 68 | alpha = 0.3 69 | mask = alpha * mask + (1-alpha) * image 70 | overlay = mask * valid_mask + image * (1-valid_mask) 71 | overlay = overlay.astype(np.uint8) 72 | return output_img, overlay 73 | 74 | else: 75 | return output_img 76 | 77 | def render_scene(self, meshes, Rs, ts, image=None, fx=None, fy=None, cx=None, cy=None): 78 | 79 | # Camera parameter 80 | if fx is None or fy is None: 81 | fx = self.default_focal 82 | fy = self.default_focal 83 | 84 | if cx is None or cy is None: 85 | cx = self.img_w / 2 86 | cy = self.img_h / 2 87 | 88 | color = (0.2, 0.4, 0.2, 1.0) 89 | material = pyrender.MetallicRoughnessMaterial( 90 | metallicFactor=0.1, 91 | alphaMode='OPAQUE', 92 | baseColorFactor=color) 93 | 94 | # 6DoF object pose in camera coordinate 95 | # You can skip this and apply directly on the input mesh 96 | for i, mesh in enumerate(meshes): 97 | transform = np.zeros([4,4]) 98 | transform[:3, :3] = Rs[i] 99 | transform[:3, -1] = ts[i] 100 | mesh.apply_transform(transform) 101 | 102 | 103 | # OpenGL convension 104 | transform = trimesh.transformations.rotation_matrix( 105 | np.radians(180), [1, 0, 0]) 106 | for mesh in meshes: 107 | mesh.apply_transform(transform) 108 | 109 | 110 | scene = pyrender.Scene(ambient_light=(0.8, 0.8, 0.8, 1.0)) 111 | for fuze_trimesh in meshes: 112 | mesh = pyrender.Mesh.from_trimesh(fuze_trimesh) 113 | scene.add(mesh) 114 | 115 | camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy, zfar=3000) 116 | camera_pose = np.eye(4) 117 | scene.add(camera, pose=camera_pose) 118 | 119 | light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1.0) 120 | light_pose = np.eye(4) 121 | 122 | light_pose[:3, 3] = np.array([0, -1, 1]) 123 | scene.add(light, pose=light_pose) 124 | 125 | light_pose[:3, 3] = np.array([0, 1, 1]) 126 | scene.add(light, pose=light_pose) 127 | 128 | light_pose[:3, 3] = np.array([1, 1, 2]) 129 | scene.add(light, pose=light_pose) 130 | 131 | # Render 132 | color, rend_depth = self.renderer.render(scene, flags=pyrender.RenderFlags.RGBA) 133 | color = color.astype(np.uint8) 134 | 135 | enhancer = ImageEnhance.Contrast(Image.fromarray(color)) 136 | factor = 1.2 #increase contrast 137 | color = enhancer.enhance(factor) 138 | color = np.array(color, dtype=np.uint8) 139 | 140 | if image is None: 141 | return color 142 | 143 | 144 | valid_mask = (rend_depth>0)[:,:,None] 145 | output_img = (color[:, :, :3] * valid_mask + 146 | (1 - valid_mask) * image) 147 | 148 | return output_img 149 | 150 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # [CVPR 2023 Highlight] Object Pose Estimation with Statistical Guarantees: Conformal Keypoint Detection and Geometric Uncertainty Propagation 2 | [Paper](https://arxiv.org/abs/2303.12246) | [Short Presentation](https://youtu.be/NWUf4hd571E) | [Long Presentation](https://youtu.be/JPvoObEYCAo) 3 | 4 | ## Motivation 5 | Endow any estimated pose with **provably correct** performance guarantees, i.e., **a worst-case error bound** from the groundtruth pose 6 | 7 | ## Abstract 8 | The two-stage object pose estimation paradigm first detects semantic keypoints on the image and then estimates the 6D pose by minimizing reprojection errors. Despite performing well on standard benchmarks, existing techniques offer no provable guarantees on the quality and uncertainty of the estimation. In this paper, we inject two fundamental changes, namely **conformal keypoint detection** and **geometric uncertainty propagation**, into the two-stage paradigm and propose the first pose estimator that endows an estimation with provable and computable worst-case error bounds. On one hand, conformal keypoint detection applies the statistical machinery of _inductive conformal prediction_ to convert heuristic keypoint detections into circular or elliptical prediction sets that cover the groundtruth keypoints with a user-specified marginal probability (e.g., 90%). Geometric uncertainty propagation, on the other, propagates the geometric constraints on the keypoints to the 6D object pose, leading to a **Pose UnceRtainty SEt (PURSE)** that guarantees coverage of the groundtruth pose with the same probability. The PURSE, however, is a nonconvex set that does not directly lead to estimated poses and uncertainties. Therefore, we develop RANdom SAmple averaGing (RANSAG) to compute an average pose and apply semidefinite relaxation to upper bound the worst-case errors between the average pose and the groundtruth. On the LineMOD Occlusion dataset we demonstrate: (i) the PURSE covers the groundtruth with valid probabilities; (ii) the worst-case error bounds provide correct uncertainty quantification; and (iii) the average pose achieves better or similar accuracy as representative methods based on sparse keypoints. 9 | 10 | ![](assets/poster.png) 11 | 12 | ## Quick start 13 | 14 | ### Prepare data 15 | - Download `data.zip` from this google drive [link](https://drive.google.com/file/d/1UGek7S3-4wwvgMlGvfBxJQPGW3Q2MfaR/view?usp=sharing) 16 | - Unzip the data and put it into the `keypoint` folder (then you should have a folder `keypoint/data`) 17 | 18 | ### Conformal calibration 19 | 20 | ```python 21 | python conformal_calibration.py --score_type ball 22 | ``` 23 | 24 | You can change `--score_type` to `ellipse` to use a different nonconformity function. 25 | You can also add `--do_frcnn` to use FRCNN to detect object bounding boxes. 26 | 27 | The calibration scores will be saved into a pickle file. 28 | 29 | ### Conformal prediction 30 | 31 | ```python 32 | python conformal_prediction.py --score_type ball --epsilon 10 --save_fig 33 | ``` 34 | will write a set of pdf files drawing the conformal prediction sets (balls) into `keypoint/data/bop/lmo-org/icp_results`. You can change the results folder in `conformal_prediction.py`. 35 | 36 | ## Acknowledgement 37 | The source code in the `keypoint` folder are adapted from the git repo https://github.com/yufu-wang/6D_Pose. We would like to thank Yufu Wang for helping us run the code. 38 | 39 | ## Citation 40 | If you find this paper and implementation useful, please cite 41 | ```bibtex 42 | @inproceedings{yang23cvpr-purse, 43 | title={Object pose estimation with statistical guarantees: Conformal keypoint detection and geometric uncertainty propagation}, 44 | author={Yang, Heng and Pavone, Marco}, 45 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 46 | pages={8947--8958}, 47 | year={2023} 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.cm as cm 3 | import matplotlib.pyplot as plt 4 | import matplotlib 5 | import torch 6 | 7 | K = 100 8 | alpha = 0.8 9 | 10 | def one_each(pred, thresh=0.0): 11 | # Postprocess frcnn: get at most one instance per class 12 | # Return: boxes and labels 13 | conf = pred['scores'] > thresh 14 | 15 | conf_scores = pred['scores'][conf] 16 | conf_boxes = pred['boxes'][conf].int() 17 | conf_labels = pred['labels'][conf].int() 18 | 19 | valid = torch.zeros_like(conf_labels).bool() 20 | unique_labels = torch.unique(conf_labels) 21 | for uni in unique_labels: 22 | p = (conf_labels==uni).nonzero(as_tuple=False).reshape(-1) 23 | valid[p[0]] = True 24 | 25 | pd_scores = conf_scores[valid] 26 | pd_boxes = conf_boxes[valid] 27 | pd_labels = conf_labels[valid] 28 | 29 | return pd_boxes, pd_labels 30 | 31 | 32 | def clean_heatmap(heatmap,mode=1): 33 | ''' 34 | Normalize raw heatmap such that 35 | - the entries are all nonnegative 36 | - the entries sum up to 1.0 37 | ''' 38 | if mode == 1: 39 | min_val = np.min(heatmap) 40 | heatmap = heatmap - min_val # make sure heatmap is always positive 41 | med_val = np.median(heatmap) # take median 42 | heatmap[heatmap < med_val] = 0 # get rid of all values below median 43 | 44 | elif mode == 2: 45 | min_val = np.min(heatmap) 46 | if min_val < 0: 47 | heatmap = heatmap - min_val 48 | 49 | else: 50 | raise RuntimeError('Unknown mode for cleaning heatmap') 51 | heatmap = heatmap / np.sum(heatmap) 52 | return heatmap 53 | 54 | def topk_points(heatmap,k): 55 | ''' 56 | Return the top k most likely keypoint detections in the heatmap 57 | xy: xy coordinates of the keypoints 58 | vk: values of the top k probabilities (re-normalized to sum up to 1.0) 59 | ''' 60 | r, c = np.unravel_index( 61 | np.flip(np.argsort(heatmap.ravel())), heatmap.shape) 62 | v = heatmap[r,c] 63 | rk = r[:k] 64 | ck = c[:k] 65 | vk = v[:k] 66 | vk = vk / np.sum(vk) 67 | # offset the coordinates to the center 68 | # For example (0,0) pixel has coordinates (0.5,0.5) 69 | ck = ck + 0.5 70 | rk = rk + 0.5 71 | xy = np.stack((ck,rk),axis=1) 72 | return xy, vk 73 | 74 | 75 | def conformity_score(kpt,heatmap,type="ball"): 76 | ''' 77 | Given a keypoint location on a 2D image, and 78 | a heatmap prediction of the keypoint location, 79 | compute the nonconformility score 80 | :param 81 | kpt: (2,) numpy array 82 | heatmap: (H,W) numpy array 83 | type: choice of the conformity function 84 | :return 85 | conformity score 86 | ''' 87 | 88 | heatmap = clean_heatmap(heatmap,mode=1) 89 | 90 | if type == "ball": 91 | r, c = np.unravel_index( 92 | np.argmax(heatmap.ravel()),heatmap.shape) 93 | maxp = heatmap[r,c] 94 | # note here kpt loc (x,y), x corresponds to column, y corresponds to row!!! 95 | r += 0.5 96 | c += 0.5 97 | dist = np.linalg.norm( kpt - np.array([c,r]) ) 98 | return dist * maxp 99 | 100 | elif type == "ellipse": 101 | xy, v = topk_points(heatmap,K) 102 | wkpt = v @ xy 103 | diff = xy - wkpt 104 | sigma = diff.T @ np.diag(v) @ diff 105 | sigmainv = np.linalg.inv(sigma) 106 | return (kpt - wkpt) @ sigmainv @ (kpt-wkpt) 107 | 108 | else: 109 | raise RuntimeError('Unknown score type.') 110 | 111 | 112 | def icp(heatmap,q,type="ball"): 113 | ''' 114 | Given a heatmap and a quantile, output the inductive prediction set 115 | :param 116 | heatmap: numpy array H x W 117 | q: scalar quantitle 118 | type: choice of conformity function 119 | ''' 120 | 121 | heatmap = clean_heatmap(heatmap,mode=1) 122 | 123 | if type == "ball": 124 | r, c = np.unravel_index( 125 | np.argmax(heatmap.ravel()),heatmap.shape) 126 | maxp = heatmap[r,c] 127 | c += 0.5 128 | r += 0.5 129 | return np.array([c,r]), q / maxp # return center and radius 130 | 131 | elif type == "ellipse": 132 | xy, v = topk_points(heatmap,K) 133 | wkpt = v @ xy 134 | diff = xy - wkpt 135 | sigma = diff.T @ np.diag(v) @ diff 136 | sigmainv = np.linalg.inv(sigma) 137 | return wkpt, sigmainv / q # return center and information matrix 138 | 139 | else: 140 | raise RuntimeError('Unknown score type.') 141 | 142 | 143 | def draw_icp_ball(img,heatmaps,kpt_gt,pred_set,fname=None,show=False,heatmaponly=False): 144 | linewidth = 2 145 | pointsize = 2 146 | height = 20 147 | subplot_gap = 0.05 148 | num_kpts = len(pred_set) 149 | colors = cm.Set2(np.linspace(0, 1, num_kpts)) 150 | 151 | fig, axes = plt.subplots(1,num_kpts+1,figsize=(2*height,2*height)) 152 | fig.subplots_adjust(wspace=subplot_gap) 153 | 154 | for i in range(num_kpts): 155 | heatmap = np.squeeze(heatmaps[i,:,:]) 156 | heatmap = clean_heatmap(heatmap) 157 | 158 | axes[i].imshow(img) 159 | axes[i].imshow(heatmap,alpha=alpha) 160 | if not heatmaponly: 161 | center, radius = pred_set[i] 162 | circ = plt.Circle(center,radius,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5) 163 | axes[i].add_patch(circ) 164 | circ_b = plt.Circle(center,radius,color=colors[i],fill=False,linewidth=linewidth) 165 | axes[i].add_patch(circ_b) 166 | # point = plt.Circle((kpt_gt[i,0],kpt_gt[i,1]),pointsize,color=colors[i]) 167 | point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i]) 168 | axes[i].add_patch(point) 169 | axes[i].xaxis.set_visible(False) 170 | axes[i].yaxis.set_visible(False) 171 | 172 | axes[-1].imshow(img) 173 | for i in range(num_kpts): 174 | center, radius = pred_set[i] 175 | circ = plt.Circle(center,radius,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5) 176 | axes[-1].add_patch(circ) 177 | circ_b = plt.Circle(center,radius,color=colors[i],fill=False,linewidth=linewidth) 178 | axes[-1].add_patch(circ_b) 179 | # point = plt.Circle((kpt_gt[i,0],kpt_gt[i,1]),pointsize,color=colors[i]) 180 | point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i]) 181 | axes[-1].add_patch(point) 182 | axes[-1].xaxis.set_visible(False) 183 | axes[-1].yaxis.set_visible(False) 184 | 185 | if fname is not None: 186 | plt.savefig(fname,bbox_inches='tight') 187 | if show: 188 | plt.show() 189 | 190 | return fig 191 | 192 | 193 | def angle_length_ellipse(A): 194 | ''' 195 | Given an ellipse x' * A * x <= 1 196 | return a, b, and angle 197 | angle is the angle rotating from x to y (anti-clockwise) 198 | ''' 199 | v, V = np.linalg.eig(A) 200 | idx = np.argsort(v) 201 | v = v[idx] # ascending order v[0] <= ... <= v[-1] 202 | V = V[:,idx] 203 | 204 | ab = np.sqrt(1.0 / v) 205 | a = ab[0] 206 | b = ab[-1] 207 | assert a >= b, "semi-axes lengths wrong." 208 | 209 | Vl = V[:,0] # long axis direction 210 | angle = np.arctan2(Vl[-1],Vl[0]) / np.pi * 180.0 211 | 212 | return a, b, angle 213 | 214 | 215 | def draw_icp_ellipse(img,heatmaps,kpt_gt,pred_set,fname=None,show=False): 216 | linewidth = 2 217 | pointsize = 2 218 | height = 20 219 | subplot_gap = 0.05 220 | num_kpts = len(pred_set) 221 | colors = cm.Set2(np.linspace(0, 1, num_kpts)) 222 | 223 | fig, axes = plt.subplots(1,num_kpts+1,figsize=(2*height,2*height)) 224 | fig.subplots_adjust(wspace=subplot_gap) 225 | 226 | for i in range(num_kpts): 227 | heatmap = np.squeeze(heatmaps[i,:,:]) 228 | heatmap = clean_heatmap(heatmap) 229 | 230 | axes[i].imshow(img) 231 | axes[i].imshow(heatmap,alpha=alpha) 232 | center, lam = pred_set[i] 233 | a, b, angle = angle_length_ellipse(lam) 234 | ellipse = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5) 235 | axes[i].add_patch(ellipse) 236 | ellipse_b = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=False,linewidth=linewidth) 237 | axes[i].add_patch(ellipse_b) 238 | point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i]) 239 | axes[i].add_patch(point) 240 | axes[i].xaxis.set_visible(False) 241 | axes[i].yaxis.set_visible(False) 242 | 243 | axes[-1].imshow(img) 244 | for i in range(num_kpts): 245 | center, lam = pred_set[i] 246 | a, b, angle = angle_length_ellipse(lam) 247 | ellipse = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5) 248 | axes[-1].add_patch(ellipse) 249 | ellipse_b = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=False,linewidth=linewidth) 250 | axes[-1].add_patch(ellipse_b) 251 | point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i]) 252 | axes[-1].add_patch(point) 253 | axes[-1].xaxis.set_visible(False) 254 | axes[-1].yaxis.set_visible(False) 255 | 256 | if fname is not None: 257 | plt.savefig(fname,bbox_inches='tight') 258 | if show: 259 | plt.show() 260 | 261 | return fig 262 | --------------------------------------------------------------------------------