├── .gitignore
├── assets
    └── poster.png
├── conformal_calibration.py
├── conformal_prediction.py
├── get_calibration_ids.py
├── keypoint
    ├── README.md
    ├── bop_dataset.py
    ├── bop_toolkit_lib
    │   ├── __init__.py
    │   ├── colors.json
    │   ├── config.py
    │   ├── dataset_params.py
    │   ├── droid_sans_mono.ttf
    │   ├── droid_sans_mono_license.txt
    │   ├── inout.py
    │   ├── misc.py
    │   ├── pose_error.py
    │   ├── pose_matching.py
    │   ├── renderer.py
    │   ├── renderer_cpp.py
    │   ├── renderer_py.py
    │   ├── score.py
    │   ├── transform.py
    │   ├── view_sampler.py
    │   ├── visibility.py
    │   └── visualization.py
    ├── demo_data.ipynb
    ├── demo_pipeline.ipynb
    ├── est_6dof.py
    ├── eval
    │   ├── results_lmo-test.csv
    │   ├── results_tudl-test.csv
    │   └── results_ycbv-test.csv
    ├── kpts3d.json
    ├── misc
    │   ├── __init__.py
    │   ├── loss.py
    │   ├── pose2d_eval.py
    │   └── segmentation.py
    ├── models
    │   ├── __init__.py
    │   ├── fasterRCNN.py
    │   ├── hourglass.py
    │   ├── layers.py
    │   ├── mask_rcnn.py
    │   └── patched.py
    ├── scripts
    │   ├── _init_paths.py
    │   ├── calc_gt_distribution.py
    │   ├── calc_gt_info.py
    │   ├── calc_gt_masks.py
    │   ├── calc_model_info.py
    │   ├── check_results_bop19.py
    │   ├── eval_bop19.py
    │   ├── eval_calc_errors.py
    │   ├── eval_calc_scores.py
    │   ├── meshlab_scripts
    │   │   ├── remesh_for_eval_cell=0.25.mlx
    │   │   └── remesh_for_eval_cell=0.5.mlx
    │   ├── remesh_models_for_eval.py
    │   ├── render_train_imgs.py
    │   ├── show_performance_bop19.py
    │   ├── vis_est_poses.py
    │   ├── vis_gt_poses.py
    │   └── vis_object_symmetries.py
    ├── train
    │   ├── base_options.py
    │   ├── base_trainer.py
    │   ├── detection_trainer.py
    │   ├── keypoint_trainer.py
    │   ├── train.py
    │   ├── train_options.py
    │   └── transforms.py
    └── utils
    │   ├── __init__.py
    │   ├── data_loader.py
    │   ├── img_utils.py
    │   ├── saver.py
    │   └── trimesh_renderer.py
├── readme.md
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | ## Python ##
 2 | *__pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | .vscode
 6 | ## Jupyter Notebook ##
 7 | .ipynb_checkpoints
 8 | ## Ignore Mac DS_Store files ##
 9 | .DS_Store
10 | ## Large Files ##
11 | keypoint/data
12 | *.pkl


--------------------------------------------------------------------------------
/assets/poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/ConformalKeypoint/427d71bd7fb40686b345c1e780250728266f2944/assets/poster.png


--------------------------------------------------------------------------------
/conformal_calibration.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torchvision import transforms as T
  4 | import tqdm
  5 | import pickle
  6 | import argparse
  7 | 
  8 | from keypoint.models import FRCNN, StackedHourglass, fasterrcnn_backbone
  9 | from keypoint.bop_dataset import BOPDataset
 10 | from keypoint.train.transforms import ToTensor, Normalize, AffineCrop
 11 | 
 12 | from utils import conformity_score, one_each
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--score_type', action='store', type=str)
 16 | parser.add_argument('--do_frcnn', action='store_true')
 17 | args = parser.parse_args()
 18 | 
 19 | score_type  = args.score_type
 20 | do_frcnn    = args.do_frcnn
 21 | 
 22 | # Load dataset 
 23 | dataset_name = 'lmo' # this the lmo calibration dataset containing 200 images
 24 | root         = './keypoint/data/bop'
 25 | num_classes  = {'lmo':8, 'lmo-org':8} 
 26 | device       = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 27 | dataset      = BOPDataset(root, dataset_name, split='test', return_coco=True)
 28 | dataset._set_kpts_info()
 29 | 
 30 | if do_frcnn:
 31 |     # Load Faster-RCNN detector
 32 |     detector_trainsform = T.ToTensor()
 33 |     state_dict = torch.load('data/detect_checkpoints/d{}.pt'.format(dataset_name), map_location=device)['frcnn']
 34 |     detector = fasterrcnn_backbone('resnet101', num_classes=1+num_classes[dataset_name]).to(device)
 35 |     detector.eval()
 36 |     detector.load_state_dict(state_dict)
 37 | 
 38 | # Load keypoints detector
 39 | transform_list = []
 40 | transform_list.append(AffineCrop(out_size=256, scale_factor=0, rotation_factor=0, dialation=0.25))
 41 | transform_list.append(ToTensor())
 42 | transform_list.append(Normalize())
 43 | kpts_transform = T.Compose(transform_list)
 44 | 
 45 | state_dict    = torch.load('keypoint/data/kpts_checkpoints/{}.pt'.format(dataset_name), map_location=device)['stacked_hg']
 46 | kpts_detector = StackedHourglass(dataset.n_kpts).to(device)
 47 | kpts_detector.eval()
 48 | kpts_detector.load_state_dict(state_dict)
 49 | 
 50 | # useful info about dataset
 51 | n_kpts  = dataset.n_kpts
 52 | n_smps  = len(dataset)
 53 | obj2idx = dataset.obj2idx
 54 | idx2obj = {v:k for k,v in obj2idx.items()}
 55 | lab2obj = {v+1:k for k,v in obj2idx.items()}
 56 | n_objs  = len(idx2obj)
 57 | 
 58 | # Prepare to store obj scores
 59 | obj_scores = [[] for i in range(n_objs)]
 60 | print(f'nonconformity function: {score_type}.')
 61 | 
 62 | # Compute conformity score on calibration dataset
 63 | for i in tqdm.tqdm(range(n_smps)):
 64 |     sample      = dataset[i]
 65 |     meta        = dataset.db[i]
 66 | 
 67 |     image       = sample['image']
 68 |     gt_boxes    = sample['boxes']
 69 |     gt_objs     = [lab2obj[l] for l in sample['labels']]
 70 |     gt_kpts     = meta['keypoints']
 71 | 
 72 |     if do_frcnn:
 73 |         # Object detection
 74 |         with torch.no_grad():
 75 |             img = detector_trainsform(image).to(device)
 76 |             pred = detector([img])[0]
 77 |             pred = {k:v.cpu() for k,v in pred.items()}
 78 |         pd_boxes, pd_labels = one_each(pred, thresh=0.0)
 79 |         pd_objs = [lab2obj[i] for i in pd_labels.tolist()]
 80 |         pd_boxes = pd_boxes.tolist()
 81 |         
 82 |         _, comm1, comm2 = np.intersect1d(np.array(pd_objs), np.array(gt_objs), return_indices=True)
 83 |         comm1 = comm1.tolist()
 84 |         comm2 = comm2.tolist()
 85 | 
 86 |         pd_objs_true = [pd_objs[i] for i in comm1]
 87 |         pd_boxes_true = [pd_boxes[i] for i in comm1]
 88 |         gt_kpts_pd = [gt_kpts[i] for i in comm2]
 89 |         gt_objs = pd_objs_true
 90 |         gt_boxes = pd_boxes_true
 91 |         gt_kpts = gt_kpts_pd
 92 |         
 93 |     for obj, box, gt_kpt in zip(gt_objs, gt_boxes, gt_kpts):
 94 |         box         = [box[0], box[1], box[2]-box[0], box[3]-box[1]]
 95 |         gt_kpt_homo = np.concatenate(
 96 |             (gt_kpt,np.ones((gt_kpt.shape[0],1))),axis=1)
 97 |         input_crop  = {'image':image, 'bb':box, 'keypoints':gt_kpt_homo}
 98 |         input_crop  = kpts_transform(input_crop)
 99 |         gt_kpt_crop = input_crop['keypoints'][:,:2].numpy() * (64/256) # the heatmap is size (64,64), rescale from 256 to 64
100 |         
101 |         with torch.no_grad():
102 |             batch = input_crop['image'][None].to(device)
103 |             output = kpts_detector(batch)
104 |             output = output[-1].cpu()
105 |         
106 |         kpt_start       = dataset.obj2kptid[obj][0]
107 |         kpt_end         = dataset.obj2kptid[obj][1]
108 |         heatmaps_pred   = torch.squeeze(
109 |             output[[0], kpt_start:kpt_end, :, :])
110 |         
111 |         scores = []
112 |         for j in np.arange(kpt_start,kpt_end):
113 |             score = conformity_score(
114 |                 np.squeeze(gt_kpt_crop[j-kpt_start,:]),
115 |                 torch.squeeze(heatmaps_pred[j-kpt_start,:]).numpy(),
116 |                 type=score_type)
117 |             scores.append(score)
118 |         # @Apoorva: here is the place to quickly implement the windowed nonconformity score
119 |         max_score = np.max(np.stack(scores))
120 |         obj_scores[obj2idx[obj]].append(max_score)
121 | 
122 | obj_scores_np = []
123 | for i in range(n_objs):
124 |     obj_scores_np.append(np.array(obj_scores[i]))
125 | fname = f'calibration_scores_{score_type}_{dataset_name}.pkl'
126 | if do_frcnn:
127 |     fname = f'calibration_scores_{score_type}_{dataset_name}_frcnn.pkl'
128 | with open(fname, 'wb') as f:
129 |     pickle.dump(obj_scores_np, f)


--------------------------------------------------------------------------------
/conformal_prediction.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import matplotlib.pyplot as plt
  4 | import torch
  5 | from torchvision import transforms as T
  6 | import tqdm
  7 | import pickle, argparse
  8 | import os
  9 | 
 10 | from keypoint.models import FRCNN, StackedHourglass, fasterrcnn_backbone
 11 | from keypoint.bop_dataset import BOPDataset
 12 | from keypoint.train.transforms import ToTensor, Normalize, AffineCrop
 13 | from keypoint.misc.pose2d_eval import Pose2DEval
 14 | 
 15 | from utils import icp, draw_icp_ball, draw_icp_ellipse
 16 | 
 17 | def heatmap2org(kpts,lams,T):
 18 |     '''
 19 |     The heatmap is on the cropped image, this function converts the prediction sets on the cropped image to the original image (which will be used for pose estimation)
 20 |     '''
 21 |     A = T[:,:2]
 22 |     b = T[:,2]
 23 |     kpts_new = np.linalg.solve(A,kpts*4 - b[:,np.newaxis])
 24 |     lam_new = []
 25 |     for lam in lams:
 26 |         lam_new.append( (A.T @ lam @ A)/16 )
 27 |     return kpts_new, np.stack(lam_new)
 28 | 
 29 | parser = argparse.ArgumentParser()
 30 | parser.add_argument('--score_type', action='store', type=str)
 31 | parser.add_argument('--eps', type=int)
 32 | parser.add_argument('--do_frcnn', action='store_true')
 33 | parser.add_argument('--save_fig', action='store_true')
 34 | 
 35 | args = parser.parse_args()
 36 | 
 37 | score_type = args.score_type
 38 | eps = args.eps
 39 | eps = eps / 100.0
 40 | save_fig = args.save_fig
 41 | do_frcnn = args.do_frcnn
 42 | 
 43 | print(f'nonconformity function: {score_type}, epsilon: {eps}, save_fig: {save_fig}.')
 44 | 
 45 | # Load dataset 
 46 | dataset_name = 'lmo-org' # this is the full lmo dataset containing 1214 images
 47 | root         = './keypoint/data/bop'
 48 | num_classes  = {'lmo':8, 'lmo-org':8} 
 49 | device       = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 50 | dataset      = BOPDataset(root, dataset_name, split='test', return_coco=True)
 51 | dataset._set_kpts_info()
 52 | 
 53 | if do_frcnn:
 54 |     # Load Faster-RCNN detector
 55 |     detector_trainsform = T.ToTensor()
 56 |     state_dict = torch.load('keypoint/data/detect_checkpoints/d{}.pt'.format('lmo'), map_location=device)['frcnn']
 57 |     detector = fasterrcnn_backbone('resnet101', num_classes=1+num_classes[dataset_name]).to(device)
 58 |     detector.eval()
 59 |     detector.load_state_dict(state_dict)
 60 | 
 61 | # Load keypoints detector
 62 | transform_list = []
 63 | transform_list.append(AffineCrop(out_size=256, scale_factor=0, rotation_factor=0, dialation=0.25))
 64 | transform_list.append(ToTensor())
 65 | transform_list.append(Normalize())
 66 | kpts_transform = T.Compose(transform_list)
 67 | 
 68 | state_dict    = torch.load('keypoint/data/kpts_checkpoints/{}.pt'.format(dataset_name), map_location=device)['stacked_hg']
 69 | kpts_detector = StackedHourglass(dataset.n_kpts).to(device)
 70 | kpts_detector.eval()
 71 | kpts_detector.load_state_dict(state_dict)
 72 | 
 73 | # useful info about dataset
 74 | n_kpts  = dataset.n_kpts
 75 | n_smps  = len(dataset)
 76 | obj2idx = dataset.obj2idx
 77 | idx2obj = {v:k for k,v in obj2idx.items()}
 78 | lab2obj = {v+1:k for k,v in obj2idx.items()}
 79 | n_objs = len(idx2obj)
 80 | poseEval = Pose2DEval()
 81 | 
 82 | img_result_dir = './keypoint/data/bop/lmo-org/test/000002/icp_results'
 83 | 
 84 | fname = f'calibration_scores_{score_type}_lmo.pkl'
 85 | if do_frcnn:
 86 |     fname = f'calibration_scores_{score_type}_lmo_frcnn.pkl'
 87 | # Compute quantiles
 88 | with open(fname, 'rb') as f:
 89 |     obj_scores = pickle.load(f)
 90 | obj_qs = []
 91 | for i in range(n_objs):
 92 |     scores = obj_scores[i]
 93 |     n      = np.size(scores)
 94 |     idx    = np.int64( np.floor( (n+1) * eps ) )
 95 |     scores_sort = scores[np.flip(np.argsort(scores))]
 96 |     obj_qs.append(scores_sort[idx-1])
 97 | obj_qs = np.array(obj_qs)
 98 | 
 99 | # Perform Conformal prediction
100 | obj_kpts = [[] for i in range(n_objs)]
101 | obj_lams = [[] for i in range(n_objs)]
102 | obj_imgs = [[] for i in range(n_objs)]
103 | 
104 | for i in tqdm.tqdm(range(n_smps)):
105 |     sample      = dataset[i]
106 |     meta        = dataset.db[i]
107 |     image       = sample['image']
108 |     gt_boxes    = sample['boxes']
109 |     gt_objs     = [lab2obj[l] for l in sample['labels']]
110 |     gt_kpts     = meta['keypoints']
111 | 
112 |     if do_frcnn:
113 |         # Object detection
114 |         with torch.no_grad():
115 |             img = detector_trainsform(image).to(device)
116 |             pred = detector([img])[0]
117 |             pred = {k:v.cpu() for k,v in pred.items()}
118 |         pd_boxes, pd_labels = one_each(pred, thresh=0)
119 |         pd_objs = [lab2obj[i] for i in pd_labels.tolist()]
120 |         pd_boxes = pd_boxes.tolist()
121 |         
122 |         _, comm1, comm2 = np.intersect1d(np.array(pd_objs), np.array(gt_objs), return_indices=True)
123 |         comm1 = comm1.tolist()
124 |         comm2 = comm2.tolist()
125 | 
126 |         pd_objs_true = [pd_objs[i] for i in comm1]
127 |         pd_boxes_true = [pd_boxes[i] for i in comm1]
128 |         gt_kpts_pd = [gt_kpts[i] for i in comm2]
129 |         gt_objs = pd_objs_true
130 |         gt_boxes = pd_boxes_true
131 |         gt_kpts = gt_kpts_pd
132 | 
133 | 
134 |     for obj, box, gt_kpt in zip(gt_objs, gt_boxes, gt_kpts):
135 |         box         = [box[0], box[1], box[2]-box[0], box[3]-box[1]]
136 |         gt_kpt_homo = np.concatenate((gt_kpt,np.ones((gt_kpt.shape[0],1))),axis=1)
137 |         input_crop  = {'image':image, 'bb':box, 'keypoints':gt_kpt_homo} 
138 |         input_crop  = kpts_transform(input_crop)
139 |         gt_kpt_crop = input_crop['keypoints'][:,:2].numpy() * (64/256) # the heatmap is size (64,64), rescale from 256 to 64
140 |         # affine transformation between original kpt loc and that in heatmap
141 |         affineT     = transform_list[0].crop_augment(box) 
142 |         
143 |         with torch.no_grad():
144 |             batch = input_crop['image'][None].to(device)
145 |             output = kpts_detector(batch)
146 |             output = output[-1].cpu()
147 |         
148 |         kpt_start       = dataset.obj2kptid[obj][0]
149 |         kpt_end         = dataset.obj2kptid[obj][1]
150 |         heatmaps_pred   = torch.squeeze(output[[0], kpt_start:kpt_end, :, :])
151 | 
152 |         # output inductive conformal prediction set
153 |         kpts = []
154 |         lams = []
155 |         icp_sets = []
156 |         for j in range(kpt_start,kpt_end):
157 |             if score_type == "ball":
158 |                 center, radius = icp(
159 |                     torch.squeeze(heatmaps_pred[j-kpt_start,:]).numpy(),
160 |                     obj_qs[obj2idx[obj]],
161 |                     type=score_type)
162 | 
163 |                 lam = np.eye(2) / (radius**2)
164 |                 kpts.append(center) # center
165 |                 lams.append(lam) # information matrix
166 |                 icp_sets.append((center,radius))
167 | 
168 |             elif score_type == "ellipse":
169 |                 center, lam = icp(
170 |                     torch.squeeze(heatmaps_pred[j-kpt_start,:]).numpy(),
171 |                     obj_qs[obj2idx[obj]],type=score_type)
172 |                 kpts.append(center)
173 |                 lams.append(lam)      
174 |                 icp_sets.append((center,lam))  
175 | 
176 |             else:
177 |                 raise RuntimeError('Unknown score type for ICP.')
178 |         
179 |         if save_fig:
180 |             dir_path = "{:s}/{:.2f}/{:s}".format(img_result_dir,eps,score_type)
181 |             os.makedirs(dir_path,exist_ok=True)
182 |             fname = "{:s}/{:06d}_{:06d}_{:02d}.pdf".format(dir_path,i,meta['im_id'],obj)
183 |             if do_frcnn:
184 |                 fname = "{:s}/{:06d}_{:06d}_{:02d}_frcnn.pdf".format(dir_path,i,meta['im_id'],obj)
185 |             # plot
186 |             img_disp = cv2.resize((input_crop['image'].permute(1, 2, 0).numpy()) / 2.0 + 0.5,(64,64))
187 |             if score_type == "ball":
188 |                 fig = draw_icp_ball(img_disp,heatmaps_pred.numpy(),gt_kpt_crop,icp_sets,fname=fname,show=False)
189 |             elif score_type == "ellipse":
190 |                 fig = draw_icp_ellipse(img_disp,heatmaps_pred.numpy(),gt_kpt_crop,icp_sets,fname=fname,show=False)
191 |             plt.close(fig)
192 | 
193 |         kpts = np.stack(kpts,axis=1)
194 |         # convert the keypoints coordinates to the original image space and save
195 |         kpts_new, lams_new = heatmap2org(kpts,lams,affineT)
196 |         obj_kpts[obj2idx[obj]].append(kpts_new)
197 |         obj_lams[obj2idx[obj]].append(lams_new)
198 |         obj_imgs[obj2idx[obj]].append(i)
199 | 
200 | # save the keypoint prediction sets
201 | data = {"kpts": obj_kpts,
202 |         "lams": obj_lams,
203 |         "imgs": obj_imgs}
204 | fname = "icp_sets_{:s}_{:.2f}.pkl".format(score_type,eps)
205 | if do_frcnn:
206 |     fname = "icp_sets_{:s}_{:.2f}_frcnn.pkl".format(score_type,eps)
207 | with open(fname, 'wb') as f:
208 |     pickle.dump(data, f)


--------------------------------------------------------------------------------
/get_calibration_ids.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import cv2
 3 | import numpy as np
 4 | from glob import glob
 5 | import matplotlib.pyplot as plt
 6 | import torch
 7 | from torchvision import transforms as T
 8 | import tqdm
 9 | import pickle
10 | 
11 | from bop_dataset import BOPDataset
12 | 
13 | # Load dataset 
14 | dataset_name = 'lmo'
15 | root         = './data/bop'
16 | num_classes  = {'lmo':8, 'lmo-org':8} 
17 | device       = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
18 | dataset      = BOPDataset(root, dataset_name, split='test', return_coco=True)
19 | dataset._set_kpts_info()
20 | 
21 | n_smps  = len(dataset)
22 | ids     = []
23 | 
24 | for i in tqdm.tqdm(range(n_smps)):
25 |     meta        = dataset.db[i]
26 |     path        = meta['imgpath']
27 |     words       = path.split('/')
28 |     names       = words[-1].split('.')
29 |     id          = int(names[0])
30 |     ids.append(id)
31 |     
32 | ids = np.array(ids)   
33 | print(ids)
34 | 
35 | fname = 'calibration_imgs.npy'
36 | np.save(fname,ids)
37 | 


--------------------------------------------------------------------------------
/keypoint/README.md:
--------------------------------------------------------------------------------
 1 | # 6D_Pose
 2 | Python implementation for the BOP benchmark section of the paper: \
 3 | **Semantic keypoint-based pose estimation from single RGB frames**  
 4 | Field Robotics \
 5 | [[Paper](https://arxiv.org/abs/2204.05864)]
 6 | ![cover](data/cover.png)
 7 | 
 8 | ## Data
 9 | You can download the pretrained models for [detection](https://drive.google.com/drive/folders/1Jzg-9sU4nEGawTREsMFblmBEZouPMOjM?usp=sharing) and [keypoint detection](https://drive.google.com/drive/folders/1i9Y5lFm3jc2t8qtxoB-qQJEDLc0urZao?usp=sharing). Please place the models as follows. We also put the test images for the LMO dataset in this repo for convenience.
10 | ```
11 | - data
12 | -- detect_checkpoints
13 | -- kpts_checkpoints
14 | ```
15 | 
16 | ## Demo
17 | Our method uses additional 3D keypoint annotation on the CAD models, which is included in **kpts_3d.json**. We provide two demo. To explore the 3D annotation, please use **demo_data.ipynb**. To explore the inference pipeline, please use **demo_pipeline.ipynb**. 
18 | 
19 | 
20 | ## Reference
21 | 	@article{schmeckpeper2022semantic,
22 | 	  Title          = {Semantic keypoint-based pose estimation from single RGB frames},
23 | 	  Author         = {Schmeckpeper, Karl and Osteen, Philip R and Wang, Yufu and Pavlakos, Georgios and Chaney, Kenneth and Jordan, Wyatt and Zhou, Xiaowei and Derpanis, Konstantinos G and Daniilidis, Kostas},
24 | 	  Booktitle      = {Field Robotics},
25 | 	  Year           = {2022}
26 | 	}
27 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/ConformalKeypoint/427d71bd7fb40686b345c1e780250728266f2944/keypoint/bop_toolkit_lib/__init__.py


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/colors.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   [0.89, 0.28, 0.13],
 3 |   [0.45, 0.38, 0.92],
 4 |   [0.35, 0.73, 0.63],
 5 |   [0.62, 0.28, 0.91],
 6 |   [0.65, 0.71, 0.22],
 7 |   [0.8, 0.29, 0.89],
 8 |   [0.27, 0.55, 0.22],
 9 |   [0.37, 0.46, 0.84],
10 |   [0.84, 0.63, 0.22],
11 |   [0.68, 0.29, 0.71],
12 |   [0.48, 0.75, 0.48],
13 |   [0.88, 0.27, 0.75],
14 |   [0.82, 0.45, 0.2],
15 |   [0.86, 0.27, 0.27],
16 |   [0.52, 0.49, 0.18],
17 |   [0.33, 0.67, 0.25],
18 |   [0.67, 0.42, 0.29],
19 |   [0.67, 0.46, 0.86],
20 |   [0.36, 0.72, 0.84],
21 |   [0.85, 0.29, 0.4],
22 |   [0.24, 0.53, 0.55],
23 |   [0.85, 0.55, 0.8],
24 |   [0.4, 0.51, 0.33],
25 |   [0.56, 0.38, 0.63],
26 |   [0.78, 0.66, 0.46],
27 |   [0.33, 0.5, 0.72],
28 |   [0.83, 0.31, 0.56],
29 |   [0.56, 0.61, 0.85],
30 |   [0.89, 0.58, 0.57],
31 |   [0.67, 0.4, 0.49]
32 | ]


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/config.py:
--------------------------------------------------------------------------------
 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
 2 | # Center for Machine Perception, Czech Technical University in Prague
 3 | 
 4 | """Configuration of the BOP Toolkit."""
 5 | 
 6 | import os
 7 | 
 8 | 
 9 | ######## Basic ########
10 | 
11 | # Folder with the BOP datasets.
12 | if 'BOP_PATH' in os.environ:
13 |   datasets_path = os.environ['BOP_PATH']
14 | else:
15 |   datasets_path = r'/Users/Yufu/Desktop/bop_public/data/bop'
16 | 
17 | # Folder with pose results to be evaluated.
18 | results_path = r'/Users/Yufu/Desktop/bop_public'
19 | 
20 | # Folder for the calculated pose errors and performance scores.
21 | eval_path = r'/Users/Yufu/Desktop/bop_public'
22 | 
23 | ######## Extended ########
24 | 
25 | # Folder for outputs (e.g. visualizations).
26 | output_path = r'/path/to/output/folder'
27 | 
28 | # For offscreen C++ rendering: Path to the build folder of bop_renderer (github.com/thodan/bop_renderer).
29 | bop_renderer_path = r'/path/to/bop_renderer/build'
30 | 
31 | # Executable of the MeshLab server.
32 | meshlab_server_path = r'/path/to/meshlabserver.exe'
33 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/droid_sans_mono.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/ConformalKeypoint/427d71bd7fb40686b345c1e780250728266f2944/keypoint/bop_toolkit_lib/droid_sans_mono.ttf


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/pose_matching.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Matching of estimated poses to the ground-truth poses."""
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | def match_poses(errs, error_ths, max_ests_count=0, gt_valid_mask=None):
 10 |   """Matches the estimated poses to the ground-truth poses.
 11 | 
 12 |   The estimated poses are greedily matched to the ground truth poses in the
 13 |   order of decreasing score of the estimates. An estimated pose is matched to a
 14 |   ground-truth pose if the error w.r.t. the ground-truth pose is below the
 15 |   specified threshold. Each estimated pose is matched to up to one ground-truth
 16 |   pose and each ground-truth pose is matched to up to one estimated pose.
 17 | 
 18 |   :param errs: List of dictionaries, where each dictionary holds the following
 19 |     info about one pose estimate:
 20 |     - 'est_id': ID of the pose estimate.
 21 |     - 'score': Confidence score of the pose estimate.
 22 |     - 'errors': Dictionary mapping ground-truth ID's to errors of the pose
 23 |         estimate w.r.t. the ground-truth poses.
 24 |   :param error_ths: Thresholds of correctness. The pose error can be given
 25 |     by more than one element (e.g. translational + rotational error), in which
 26 |     case there is one threshold for each element.
 27 |   :param max_ests_count: Top k pose estimates to consider (0 = all).
 28 |   :param gt_valid_mask: Mask of ground-truth poses which can be considered.
 29 |   :return: List of dictionaries, where each dictionary holds info for one pose
 30 |     estimate (the estimates are ordered as in errs) about the matching
 31 |     ground-truth pose:
 32 |     - 'est_id': ID of the pose estimate.
 33 |     - 'gt_id': ID of the matched ground-truth pose (-1 means there is no
 34 |         matching ground-truth pose).
 35 |     - 'score': Confidence score of the pose estimate.
 36 |     - 'error': Error of the pose estimate w.r.t. the matched ground-truth pose.
 37 |     - 'error_norm': Error normalized by the threshold value.
 38 |   """
 39 |   # Sort the estimated poses by decreasing confidence score.
 40 |   errs_sorted = sorted(errs, key=lambda e: e['score'], reverse=True)
 41 | 
 42 |   # Keep only the required number of poses with the highest confidence score.
 43 |   # 0 = all pose estimates are considered.
 44 |   if max_ests_count > 0:
 45 |     errs_sorted = errs_sorted[:max_ests_count]
 46 | 
 47 |   # Number of values defining the error (e.g. 1 for "ADD", 2 for "5deg 5cm").
 48 |   error_num_elems = len(list(error_ths))
 49 | 
 50 |   # Greedily match the estimated poses to the ground truth poses in the order of
 51 |   # decreasing score of the estimates.
 52 |   matches = []
 53 |   gt_matched = []
 54 |   for e in errs_sorted:
 55 | 
 56 |     best_gt_id = -1
 57 |     best_error = list(error_ths)
 58 |     for gt_id, error in e['errors'].items():
 59 | 
 60 |       # If the mask of valid GT poses is not provided, consider all valid.
 61 |       is_valid = not gt_valid_mask or gt_valid_mask[gt_id]
 62 | 
 63 |       # Only valid GT poses that have not been matched yet are considered.
 64 |       if is_valid and gt_id not in gt_matched:
 65 | 
 66 |         # The current pose estimate is considered the best so far if all error
 67 |         # elements are the lowest so far.
 68 |         if np.all([error[i] < best_error[i] for i in range(error_num_elems)]):
 69 |           best_gt_id = gt_id
 70 |           best_error = error
 71 | 
 72 |     if best_gt_id >= 0:
 73 | 
 74 |       # Mark the GT pose as matched.
 75 |       gt_matched.append(best_gt_id)
 76 | 
 77 |       # Error normalized by the threshold.
 78 |       best_errors_normed = [best_error[i] / float(error_ths[i])
 79 |                             for i in range(error_num_elems)]
 80 | 
 81 |       # Save info about the match.
 82 |       matches.append({
 83 |         'est_id': e['est_id'],
 84 |         'gt_id': best_gt_id,
 85 |         'score': e['score'],
 86 |         'error': best_error,
 87 |         'error_norm': best_errors_normed
 88 |       })
 89 | 
 90 |   return matches
 91 | 
 92 | 
 93 | def match_poses_scene(scene_id, scene_gt, scene_gt_valid, scene_errs,
 94 |                       correct_th, n_top):
 95 |   """Matches the estimated poses to the ground-truth poses in one scene.
 96 | 
 97 |   :param scene_id: Scene ID.
 98 |   :param scene_gt: Dictionary mapping image ID's to lists of dictionaries with:
 99 |     - 'obj_id': Object ID of the ground-truth pose.
100 |   :param scene_gt_valid: Dictionary mapping image ID's to lists of boolean
101 |     values indicating which ground-truth poses should be considered.
102 |   :param scene_errs: List of dictionaries with:
103 |     - 'im_id': Image ID.
104 |     - 'obj_id': Object ID.
105 |     - 'est_id': ID of the pose estimate.
106 |     - 'score': Confidence score of the pose estimate.
107 |     - 'errors': Dictionary mapping ground-truth ID's to errors of the pose
108 |         estimate w.r.t. the ground-truth poses.
109 |   :param error_obj_threshs: Dictionary mapping object ID's to values of the
110 |     threshold of correctness.
111 |   :param n_top: Top N pose estimates (with the highest score) to be evaluated
112 |     for each object class in each image.
113 |   :return:
114 |   """
115 |   # Organize the errors by image ID and object ID (for faster query).
116 |   scene_errs_org = {}
117 |   for e in scene_errs:
118 |     scene_errs_org.setdefault(
119 |       e['im_id'], {}).setdefault(e['obj_id'], []).append(e)
120 | 
121 |   # Matching of poses in individual images.
122 |   scene_matches = []
123 |   for im_id, im_gts in scene_gt.items():
124 |     im_matches = []
125 | 
126 |     for gt_id, gt in enumerate(im_gts):
127 |       im_matches.append({
128 |         'scene_id': scene_id,
129 |         'im_id': im_id,
130 |         'obj_id': gt['obj_id'],
131 |         'gt_id': gt_id,
132 |         'est_id': -1,
133 |         'score': -1,
134 |         'error': -1,
135 |         'error_norm': -1,
136 |         'valid': scene_gt_valid[im_id][gt_id],
137 |       })
138 | 
139 |     # Treat estimates of each object separately.
140 |     im_obj_ids = set([gt['obj_id'] for gt in im_gts])
141 |     for obj_id in im_obj_ids:
142 |       if im_id in scene_errs_org.keys()\
143 |             and obj_id in scene_errs_org[im_id].keys():
144 | 
145 |         # Greedily match the estimated poses to the ground truth poses.
146 |         errs_im_obj = scene_errs_org[im_id][obj_id]
147 |         ms = match_poses(
148 |           errs_im_obj, correct_th, n_top, scene_gt_valid[im_id])
149 | 
150 |         # Update info about the matched GT poses.
151 |         for m in ms:
152 |           g = im_matches[m['gt_id']]
153 |           g['est_id'] = m['est_id']
154 |           g['score'] = m['score']
155 |           g['error'] = m['error']
156 |           g['error_norm'] = m['error_norm']
157 | 
158 |     scene_matches += im_matches
159 | 
160 |   return scene_matches
161 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/renderer.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Abstract class of a renderer and a factory function to create a renderer.
  5 | 
  6 | The renderer produces an RGB/depth image of a 3D mesh model in a specified pose
  7 | for given camera parameters and illumination settings.
  8 | """
  9 | 
 10 | 
 11 | class Renderer(object):
 12 |   """Abstract class of a renderer."""
 13 | 
 14 |   def __init__(self, width, height):
 15 |     """Constructor.
 16 | 
 17 |     :param width: Width of the rendered image.
 18 |     :param height: Height of the rendered image.
 19 |     """
 20 |     self.width = width
 21 |     self.height = height
 22 | 
 23 |     # 3D location of a point light (in the camera coordinates).
 24 |     self.light_cam_pos = (0, 0, 0)
 25 | 
 26 |     # Set light color and weights.
 27 |     self.light_color = (1.0, 1.0, 1.0)  # Used only in C++ renderer.
 28 |     self.light_ambient_weight = 0.5
 29 |     self.light_diffuse_weight = 1.0  # Used only in C++ renderer.
 30 |     self.light_specular_weight = 0.0  # Used only in C++ renderer.
 31 |     self.light_specular_shininess = 0.0  # Used only in C++ renderer.
 32 | 
 33 |   def set_light_cam_pos(self, light_cam_pos):
 34 |     """Sets the 3D location of a point light.
 35 | 
 36 |     :param light_cam_pos: [X, Y, Z].
 37 |     """
 38 |     self.light_cam_pos = light_cam_pos
 39 | 
 40 |   def set_light_ambient_weight(self, light_ambient_weight):
 41 |     """Sets weight of the ambient light.
 42 | 
 43 |     :param light_ambient_weight: Scalar from 0 to 1.
 44 |     """
 45 |     self.light_ambient_weight = light_ambient_weight
 46 | 
 47 |   def add_object(self, obj_id, model_path, **kwargs):
 48 |     """Loads an object model.
 49 | 
 50 |     :param obj_id: Object identifier.
 51 |     :param model_path: Path to the object model file.
 52 |     """
 53 |     raise NotImplementedError
 54 | 
 55 |   def remove_object(self, obj_id):
 56 |     """Removes an object model.
 57 | 
 58 |     :param obj_id: Identifier of the object to remove.
 59 |     """
 60 |     raise NotImplementedError
 61 | 
 62 |   def render_object(self, obj_id, R, t, fx, fy, cx, cy):
 63 |     """Renders an object model in the specified pose.
 64 | 
 65 |     :param obj_id: Object identifier.
 66 |     :param R: 3x3 ndarray with a rotation matrix.
 67 |     :param t: 3x1 ndarray with a translation vector.
 68 |     :param fx: Focal length (X axis).
 69 |     :param fy: Focal length (Y axis).
 70 |     :param cx: The X coordinate of the principal point.
 71 |     :param cy: The Y coordinate of the principal point.
 72 |     :return: Returns a dictionary with rendered images.
 73 |     """
 74 |     raise NotImplementedError
 75 | 
 76 | 
 77 | def create_renderer(width, height, renderer_type='cpp', mode='rgb+depth',
 78 |                     shading='phong', bg_color=(0.0, 0.0, 0.0, 0.0)):
 79 |   """A factory to create a renderer.
 80 | 
 81 |   Note: Parameters mode, shading and bg_color are currently supported only by
 82 |   the Python renderer (renderer_type='python').
 83 | 
 84 |   :param width: Width of the rendered image.
 85 |   :param height: Height of the rendered image.
 86 |   :param renderer_type: Type of renderer (options: 'cpp', 'python').
 87 |   :param mode: Rendering mode ('rgb+depth', 'rgb', 'depth').
 88 |   :param shading: Type of shading ('flat', 'phong').
 89 |   :param bg_color: Color of the background (R, G, B, A).
 90 |   :return: Instance of a renderer of the specified type.
 91 |   """
 92 |   if renderer_type == 'python':
 93 |     from . import renderer_py
 94 |     return renderer_py.RendererPython(width, height, mode, shading, bg_color)
 95 | 
 96 |   elif renderer_type == 'cpp':
 97 |     from . import renderer_cpp
 98 |     return renderer_cpp.RendererCpp(width, height)
 99 | 
100 |   else:
101 |     raise ValueError('Unknown renderer type.')
102 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/renderer_cpp.py:
--------------------------------------------------------------------------------
 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
 2 | # Center for Machine Perception, Czech Technical University in Prague
 3 | 
 4 | """An interface to the C++ based renderer (bop_renderer)."""
 5 | 
 6 | import sys
 7 | import numpy as np
 8 | 
 9 | from bop_toolkit_lib import config
10 | from bop_toolkit_lib import renderer
11 | 
12 | # C++ renderer (https://github.com/thodan/bop_renderer)
13 | sys.path.append(config.bop_renderer_path)
14 | import bop_renderer
15 | 
16 | 
17 | class RendererCpp(renderer.Renderer):
18 |   """An interface to the C++ based renderer."""
19 | 
20 |   def __init__(self, width, height):
21 |     """See base class."""
22 |     super(RendererCpp, self).__init__(width, height)
23 |     self.renderer = bop_renderer.Renderer()
24 |     self.renderer.init(width, height)
25 |     self._set_light()
26 | 
27 |   def _set_light(self):
28 |     self.renderer.set_light(
29 |       list(self.light_cam_pos), list(self.light_color),
30 |       self.light_ambient_weight, self.light_diffuse_weight,
31 |       self.light_specular_weight, self.light_specular_shininess)
32 | 
33 |   def set_light_cam_pos(self, light_cam_pos):
34 |     """See base class."""
35 |     super(RendererCpp, self).set_light_cam_pos(light_cam_pos)
36 |     self._set_light()
37 | 
38 |   def set_light_ambient_weight(self, light_ambient_weight):
39 |     """See base class."""
40 |     super(RendererCpp, self).set_light_ambient_weight(light_ambient_weight)
41 |     self._set_light()
42 | 
43 |   def add_object(self, obj_id, model_path, **kwargs):
44 |     """See base class.
45 | 
46 |     NEEDS TO BE CALLED RIGHT AFTER CREATING THE RENDERER (this is due to some
47 |     memory issues in the C++ renderer which need to be fixed).
48 |     """
49 |     self.renderer.add_object(obj_id, model_path)
50 | 
51 |   def remove_object(self, obj_id):
52 |     """See base class."""
53 |     self.renderer.remove_object(obj_id)
54 | 
55 |   def render_object(self, obj_id, R, t, fx, fy, cx, cy):
56 |     """See base class."""
57 |     R_l = R.astype(np.float32).flatten().tolist()
58 |     t_l = t.astype(np.float32).flatten().tolist()
59 |     self.renderer.render_object(obj_id, R_l, t_l, fx, fy, cx, cy)
60 |     rgb = self.renderer.get_color_image(obj_id)
61 |     depth = self.renderer.get_depth_image(obj_id).astype(np.float32)
62 |     return {'rgb': rgb, 'depth': depth}
63 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/score.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Calculation of performance scores."""
  5 | 
  6 | import numpy as np
  7 | from collections import defaultdict
  8 | 
  9 | from bop_toolkit_lib import misc
 10 | 
 11 | 
 12 | def calc_ap(rec, pre):
 13 |   """Calculates Average Precision (AP).
 14 | 
 15 |   Calculated in the PASCAL VOC challenge from 2010 onwards [1]:
 16 |   1) Compute a version of the measured precision/recall curve with precision
 17 |      monotonically decreasing, by setting the precision for recall r to the
 18 |      maximum precision obtained for any recall r' >= r.
 19 |   2) Compute the AP as the area under this curve by numerical integration.
 20 |      No approximation is involved since the curve is piecewise constant.
 21 | 
 22 |   NOTE: The used AP formula is different from the one in [2] where the
 23 |   formula from VLFeat [3] was presented - although it was mistakenly
 24 |   introduced as a formula used in PASCAL.
 25 | 
 26 |   References:
 27 |   [1] http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#SECTION00044000000000000000
 28 |   [2] Hodan et al., "On Evaluation of 6D Object Pose Estimation", ECCVW 2016
 29 |   [3] http://www.vlfeat.org/matlab/vl_pr.html
 30 | 
 31 |   :param rec: A list (or 1D ndarray) of recall rates.
 32 |   :param pre: A list (or 1D ndarray) of precision rates.
 33 |   :return: Average Precision - the area under the monotonically decreasing
 34 |            version of the precision/recall curve given by rec and pre.
 35 |   """
 36 |   # Sorts the precision/recall points by increasing recall.
 37 |   i = np.argsort(rec)
 38 | 
 39 |   mrec = np.concatenate(([0], np.array(rec)[i], [1]))
 40 |   mpre = np.concatenate(([0], np.array(pre)[i], [0]))
 41 |   assert (mrec.shape == mpre.shape)
 42 |   for i in range(mpre.size - 3, -1, -1):
 43 |     mpre[i] = max(mpre[i], mpre[i + 1])
 44 |   i = np.nonzero(mrec[1:] != mrec[:-1])[0] + 1
 45 |   ap = np.sum((mrec[i] - mrec[i - 1]) * mpre[i])
 46 |   return ap
 47 | 
 48 | 
 49 | def calc_recall(tp_count, targets_count):
 50 |   """Calculates recall.
 51 | 
 52 |   :param tp_count: Number of true positives.
 53 |   :param targets_count: Number of targets.
 54 |   :return: The recall rate.
 55 |   """
 56 |   if targets_count == 0:
 57 |     return 0.0
 58 |   else:
 59 |     return tp_count / float(targets_count)
 60 | 
 61 | 
 62 | def calc_localization_scores(scene_ids, obj_ids, matches, n_top, do_print=True):
 63 |   """Calculates performance scores for the 6D object localization task.
 64 | 
 65 |   References:
 66 |   Hodan et al., BOP: Benchmark for 6D Object Pose Estimation, ECCV'18.
 67 |   Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16.
 68 | 
 69 |   :param scene_ids: ID's of considered scenes.
 70 |   :param obj_ids: ID's of considered objects.
 71 |   :param matches: Info about matching pose estimates to ground-truth poses
 72 |     (see pose_matching.py for details).
 73 |   :param n_top: Number of top pose estimates to consider per test target.
 74 |   :param do_print: Whether to print the scores to the standard output.
 75 |   :return: Dictionary with the evaluation scores.
 76 |   """
 77 |   # Count the number of visible object instances in each image.
 78 |   insts = {i: {j: defaultdict(lambda: 0) for j in scene_ids} for i in obj_ids}
 79 |   for m in matches:
 80 |     if m['valid']:
 81 |       insts[m['obj_id']][m['scene_id']][m['im_id']] += 1
 82 | 
 83 |   # Count the number of targets = object instances to be found.
 84 |   # For SiSo, there is either zero or one target in each image - there is just
 85 |   # one even if there are more instances of the object of interest.
 86 |   tars = 0  # Total number of targets.
 87 |   obj_tars = {i: 0 for i in obj_ids}  # Targets per object.
 88 |   scene_tars = {i: 0 for i in scene_ids}  # Targets per scene.
 89 |   for obj_id, obj_insts in insts.items():
 90 |     for scene_id, scene_insts in obj_insts.items():
 91 | 
 92 |       # Count the number of targets for the current object in the current scene.
 93 |       if n_top > 0:
 94 |         count = sum(np.minimum(n_top, list(scene_insts.values())))
 95 |       else:
 96 |         count = sum(list(scene_insts.values()))
 97 | 
 98 |       tars += count
 99 |       obj_tars[obj_id] += count
100 |       scene_tars[scene_id] += count
101 | 
102 |   # Count the number of true positives.
103 |   tps = 0  # Total number of true positives.
104 |   obj_tps = {i: 0 for i in obj_ids}  # True positives per object.
105 |   scene_tps = {i: 0 for i in scene_ids}  # True positives per scene.
106 |   for m in matches:
107 |     if m['valid'] and m['est_id'] != -1:
108 |       tps += 1
109 |       obj_tps[m['obj_id']] += 1
110 |       scene_tps[m['scene_id']] += 1
111 | 
112 |   # Total recall.
113 |   recall = calc_recall(tps, tars)
114 | 
115 |   # Recall per object.
116 |   obj_recalls = {}
117 |   for i in obj_ids:
118 |     obj_recalls[i] = calc_recall(obj_tps[i], obj_tars[i])
119 |   mean_obj_recall = float(np.mean(list(obj_recalls.values())).squeeze())
120 | 
121 |   # Recall per scene.
122 |   scene_recalls = {}
123 |   for i in scene_ids:
124 |     scene_recalls[i] = float(calc_recall(scene_tps[i], scene_tars[i]))
125 |   mean_scene_recall = float(np.mean(list(scene_recalls.values())).squeeze())
126 | 
127 |   # Final scores.
128 |   scores = {
129 |     'recall': float(recall),
130 |     'obj_recalls': obj_recalls,
131 |     'mean_obj_recall': float(mean_obj_recall),
132 |     'scene_recalls': scene_recalls,
133 |     'mean_scene_recall': float(mean_scene_recall),
134 |     'gt_count': len(matches),
135 |     'targets_count': int(tars),
136 |     'tp_count': int(tps),
137 |   }
138 | 
139 |   if do_print:
140 |     obj_recalls_str = ', '.join(
141 |       ['{}: {:.3f}'.format(i, s) for i, s in scores['obj_recalls'].items()])
142 | 
143 |     scene_recalls_str = ', '.join(
144 |       ['{}: {:.3f}'.format(i, s) for i, s in scores['scene_recalls'].items()])
145 | 
146 |     misc.log('')
147 |     misc.log('GT count:           {:d}'.format(scores['gt_count']))
148 |     misc.log('Target count:       {:d}'.format(scores['targets_count']))
149 |     misc.log('TP count:           {:d}'.format(scores['tp_count']))
150 |     misc.log('Recall:             {:.4f}'.format(scores['recall']))
151 |     misc.log('Mean object recall: {:.4f}'.format(scores['mean_obj_recall']))
152 |     misc.log('Mean scene recall:  {:.4f}'.format(scores['mean_scene_recall']))
153 |     misc.log('Object recalls:\n{}'.format(obj_recalls_str))
154 |     misc.log('Scene recalls:\n{}'.format(scene_recalls_str))
155 |     misc.log('')
156 | 
157 |   return scores
158 | 
159 | 
160 | if __name__ == '__main__':
161 | 
162 |   # AP test.
163 |   tp = np.array([False, True, True, False, True, False])
164 |   fp = np.logical_not(tp)
165 |   tp_c = np.cumsum(tp).astype(np.float)
166 |   fp_c = np.cumsum(fp).astype(np.float)
167 |   rec = tp_c / tp.size
168 |   pre = tp_c / (fp_c + tp_c)
169 |   misc.log('Average Precision: ' + str(calc_ap(rec, pre)))
170 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/visibility.py:
--------------------------------------------------------------------------------
 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
 2 | # Center for Machine Perception, Czech Technical University in Prague
 3 | 
 4 | """Estimation of the visible object surface from depth images."""
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | def _estimate_visib_mask(d_test, d_model, delta, visib_mode='bop19'):
10 |   """Estimates a mask of the visible object surface.
11 | 
12 |   :param d_test: Distance image of a scene in which the visibility is estimated.
13 |   :param d_model: Rendered distance image of the object model.
14 |   :param delta: Tolerance used in the visibility test.
15 |   :param visib_mode: Visibility mode:
16 |   1) 'bop18' - Object is considered NOT VISIBLE at pixels with missing depth.
17 |   2) 'bop19' - Object is considered VISIBLE at pixels with missing depth. This
18 |        allows to use the VSD pose error function also on shiny objects, which
19 |        are typically not captured well by the depth sensors. A possible problem
20 |        with this mode is that some invisible parts can be considered visible.
21 |        However, the shadows of missing depth measurements, where this problem is
22 |        expected to appear and which are often present at depth discontinuities,
23 |        are typically relatively narrow and therefore this problem is less
24 |        significant.
25 |   :return: Visibility mask.
26 |   """
27 |   assert (d_test.shape == d_model.shape)
28 | 
29 |   if visib_mode == 'bop18':
30 |     mask_valid = np.logical_and(d_test > 0, d_model > 0)
31 |     d_diff = d_model.astype(np.float32) - d_test.astype(np.float32)
32 |     visib_mask = np.logical_and(d_diff <= delta, mask_valid)
33 | 
34 |   elif visib_mode == 'bop19':
35 |     d_diff = d_model.astype(np.float32) - d_test.astype(np.float32)
36 |     visib_mask = np.logical_and(
37 |       np.logical_or(d_diff <= delta, d_test == 0), d_model > 0)
38 | 
39 |   else:
40 |     raise ValueError('Unknown visibility mode.')
41 | 
42 |   return visib_mask
43 | 
44 | 
45 | def estimate_visib_mask_gt(d_test, d_gt, delta, visib_mode='bop19'):
46 |   """Estimates a mask of the visible object surface in the ground-truth pose.
47 | 
48 |   :param d_test: Distance image of a scene in which the visibility is estimated.
49 |   :param d_gt: Rendered distance image of the object model in the GT pose.
50 |   :param delta: Tolerance used in the visibility test.
51 |   :param visib_mode: See _estimate_visib_mask.
52 |   :return: Visibility mask.
53 |   """
54 |   visib_gt = _estimate_visib_mask(d_test, d_gt, delta, visib_mode)
55 |   return visib_gt
56 | 
57 | 
58 | def estimate_visib_mask_est(d_test, d_est, visib_gt, delta, visib_mode='bop19'):
59 |   """Estimates a mask of the visible object surface in the estimated pose.
60 | 
61 |   For an explanation of why the visibility mask is calculated differently for
62 |   the estimated and the ground-truth pose, see equation (14) and related text in
63 |   Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16.
64 | 
65 |   :param d_test: Distance image of a scene in which the visibility is estimated.
66 |   :param d_est: Rendered distance image of the object model in the est. pose.
67 |   :param visib_gt: Visibility mask of the object model in the GT pose (from
68 |     function estimate_visib_mask_gt).
69 |   :param delta: Tolerance used in the visibility test.
70 |   :param visib_mode: See _estimate_visib_mask.
71 |   :return: Visibility mask.
72 |   """
73 |   visib_est = _estimate_visib_mask(d_test, d_est, delta, visib_mode)
74 |   visib_est = np.logical_or(visib_est, np.logical_and(visib_gt, d_est > 0))
75 |   return visib_est
76 | 


--------------------------------------------------------------------------------
/keypoint/bop_toolkit_lib/visualization.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Visualization utilities."""
  5 | 
  6 | import os
  7 | # import cv2
  8 | import numpy as np
  9 | from PIL import Image, ImageDraw, ImageFont
 10 | 
 11 | from bop_toolkit_lib import inout
 12 | from bop_toolkit_lib import misc
 13 | 
 14 | 
 15 | def draw_rect(im, rect, color=(1.0, 1.0, 1.0)):
 16 |   """Draws a rectangle on an image.
 17 | 
 18 |   :param im: ndarray (uint8) on which the rectangle will be drawn.
 19 |   :param rect: Rectangle defined as [x, y, width, height], where [x, y] is the
 20 |     top-left corner.
 21 |   :param color: Color of the rectangle.
 22 |   :return: Image with drawn rectangle.
 23 |   """
 24 |   if im.dtype != np.uint8:
 25 |     raise ValueError('The image must be of type uint8.')
 26 | 
 27 |   im_pil = Image.fromarray(im)
 28 |   draw = ImageDraw.Draw(im_pil)
 29 |   draw.rectangle((rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3]),
 30 |                  outline=tuple([int(c * 255) for c in color]), fill=None)
 31 |   del draw
 32 |   return np.asarray(im_pil)
 33 | 
 34 | 
 35 | def write_text_on_image(im, txt_list, loc=(3, 0), color=(1.0, 1.0, 1.0),
 36 |                         size=20):
 37 |   """Writes text info on an image.
 38 | 
 39 |   :param im: ndarray on which the text info will be written.
 40 |   :param txt_list: List of dictionaries, each describing one info line:
 41 |     - 'name': Entry name.
 42 |     - 'val': Entry value.
 43 |     - 'fmt': String format for the value.
 44 |   :param loc: Location of the top left corner of the text box.
 45 |   :param color: Font color.
 46 |   :param size: Font size.
 47 |   :return: Image with written text info.
 48 |   """
 49 |   im_pil = Image.fromarray(im)
 50 | 
 51 |   # Load font.
 52 |   try:
 53 |     font_path = os.path.join(os.path.dirname(__file__), 'droid_sans_mono.ttf')
 54 |     font = ImageFont.truetype(font_path, size)
 55 |   except IOError:
 56 |     misc.log('Warning: Loading a fallback font.')
 57 |     font = ImageFont.load_default()
 58 | 
 59 |   draw = ImageDraw.Draw(im_pil)
 60 |   for info in txt_list:
 61 |     if info['name'] != '':
 62 |       txt_tpl = '{}:{' + info['fmt'] + '}'
 63 |     else:
 64 |       txt_tpl = '{}{' + info['fmt'] + '}'
 65 |     txt = txt_tpl.format(info['name'], info['val'])
 66 |     draw.text(loc, txt, fill=tuple([int(c * 255) for c in color]), font=font)
 67 |     text_width, text_height = font.getsize(txt)
 68 |     loc = (loc[0], loc[1] + text_height)
 69 |   del draw
 70 | 
 71 |   return np.array(im_pil)
 72 | 
 73 | 
 74 | def depth_for_vis(depth, valid_start=0.2, valid_end=1.0):
 75 |   """Transforms depth values from the specified range to [0, 255].
 76 | 
 77 |   :param depth: ndarray with a depth image (1 channel).
 78 |   :param valid_start: The beginning of the depth range.
 79 |   :param valid_end: The end of the depth range.
 80 |   :return: Transformed depth image.
 81 |   """
 82 |   mask = depth > 0
 83 |   depth_n = depth.astype(np.float)
 84 |   depth_n[mask] -= depth_n[mask].min()
 85 |   depth_n[mask] /= depth_n[mask].max() / (valid_end - valid_start)
 86 |   depth_n[mask] += valid_start
 87 |   return depth_n
 88 | 
 89 | 
 90 | def vis_object_poses(
 91 |       poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None,
 92 |       vis_depth_diff_path=None, vis_rgb_resolve_visib=False):
 93 |   """Visualizes 3D object models in specified poses in a single image.
 94 | 
 95 |   Two visualizations are created:
 96 |   1. An RGB visualization (if vis_rgb_path is not None).
 97 |   2. A Depth-difference visualization (if vis_depth_diff_path is not None).
 98 | 
 99 |   :param poses: List of dictionaries, each with info about one pose:
100 |     - 'obj_id': Object ID.
101 |     - 'R': 3x3 ndarray with a rotation matrix.
102 |     - 't': 3x1 ndarray with a translation vector.
103 |     - 'text_info': Info to write at the object (see write_text_on_image).
104 |   :param K: 3x3 ndarray with an intrinsic camera matrix.
105 |   :param renderer: Instance of the Renderer class (see renderer.py).
106 |   :param rgb: ndarray with the RGB input image.
107 |   :param depth: ndarray with the depth input image.
108 |   :param vis_rgb_path: Path to the output RGB visualization.
109 |   :param vis_depth_diff_path: Path to the output depth-difference visualization.
110 |   :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
111 |     (i.e. only the closest object is visualized at each pixel).
112 |   """
113 |   fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
114 | 
115 |   # Indicators of visualization types.
116 |   vis_rgb = vis_rgb_path is not None
117 |   vis_depth_diff = vis_depth_diff_path is not None
118 | 
119 |   if vis_rgb and rgb is None:
120 |     raise ValueError('RGB visualization triggered but RGB image not provided.')
121 | 
122 |   if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
123 |     raise ValueError('Depth visualization triggered but D image not provided.')
124 | 
125 |   # Prepare images for rendering.
126 |   im_size = None
127 |   ren_rgb = None
128 |   ren_rgb_info = None
129 |   ren_depth = None
130 | 
131 |   if vis_rgb:
132 |     im_size = (rgb.shape[1], rgb.shape[0])
133 |     ren_rgb = np.zeros(rgb.shape, np.uint8)
134 |     ren_rgb_info = np.zeros(rgb.shape, np.uint8)
135 | 
136 |   if vis_depth_diff:
137 |     if im_size and im_size != (depth.shape[1], depth.shape[0]):
138 |         raise ValueError('The RGB and D images must have the same size.')
139 |     else:
140 |       im_size = (depth.shape[1], depth.shape[0])
141 | 
142 |   if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
143 |     ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)
144 | 
145 |   # Render the pose estimates one by one.
146 |   for pose in poses:
147 | 
148 |     # Rendering.
149 |     ren_out = renderer.render_object(
150 |       pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy)
151 | 
152 |     m_rgb = None
153 |     if vis_rgb:
154 |       m_rgb = ren_out['rgb']
155 | 
156 |     m_mask = None
157 |     if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
158 |       m_depth = ren_out['depth']
159 | 
160 |       # Get mask of the surface parts that are closer than the
161 |       # surfaces rendered before.
162 |       visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
163 |       m_mask = np.logical_and(m_depth != 0, visible_mask)
164 | 
165 |       ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)
166 | 
167 |     # Combine the RGB renderings.
168 |     if vis_rgb:
169 |       if vis_rgb_resolve_visib:
170 |         ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
171 |       else:
172 |         ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32)
173 |         ren_rgb_f[ren_rgb_f > 255] = 255
174 |         ren_rgb = ren_rgb_f.astype(np.uint8)
175 | 
176 |       # Draw 2D bounding box and write text info.
177 |       obj_mask = np.sum(m_rgb > 0, axis=2)
178 |       ys, xs = obj_mask.nonzero()
179 |       if len(ys):
180 |         # bbox_color = model_color
181 |         # text_color = model_color
182 |         bbox_color = (0.3, 0.3, 0.3)
183 |         text_color = (1.0, 1.0, 1.0)
184 |         text_size = 11
185 | 
186 |         bbox = misc.calc_2d_bbox(xs, ys, im_size)
187 |         im_size = (obj_mask.shape[1], obj_mask.shape[0])
188 |         ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)
189 | 
190 |         if 'text_info' in pose:
191 |           text_loc = (bbox[0] + 2, bbox[1])
192 |           ren_rgb_info = write_text_on_image(
193 |             ren_rgb_info, pose['text_info'], text_loc, color=text_color,
194 |             size=text_size)
195 | 
196 |   # Blend and save the RGB visualization.
197 |   if vis_rgb:
198 |     misc.ensure_dir(os.path.dirname(vis_rgb_path))
199 | 
200 |     vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
201 |                  0.5 * ren_rgb.astype(np.float32) + \
202 |                  1.0 * ren_rgb_info.astype(np.float32)
203 |     vis_im_rgb[vis_im_rgb > 255] = 255
204 |     inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95)
205 | 
206 |   # Save the image of depth differences.
207 |   if vis_depth_diff:
208 |     misc.ensure_dir(os.path.dirname(vis_depth_diff_path))
209 | 
210 |     # Calculate the depth difference at pixels where both depth maps are valid.
211 |     valid_mask = (depth > 0) * (ren_depth > 0)
212 |     depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth)
213 | 
214 |     delta = 15
215 |     below_delta = valid_mask * (depth_diff < delta)
216 |     below_delta_vis = (255 * below_delta).astype(np.uint8)
217 | 
218 |     depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min())
219 |     depth_diff_vis = np.dstack(
220 |       [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8)
221 |     depth_diff_vis[np.logical_not(valid_mask)] = 0
222 |     depth_diff_valid = depth_diff[valid_mask]
223 |     depth_info = [
224 |       {'name': 'min diff', 'fmt': ':.3f', 'val': np.min(depth_diff_valid)},
225 |       {'name': 'max diff', 'fmt': ':.3f', 'val': np.max(depth_diff_valid)},
226 |       {'name': 'mean diff', 'fmt': ':.3f', 'val': np.mean(depth_diff_valid)},
227 |     ]
228 |     depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
229 |     inout.save_im(vis_depth_diff_path, depth_diff_vis)
230 | 


--------------------------------------------------------------------------------
/keypoint/est_6dof.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import cv2
  3 | import argparse
  4 | from tqdm import tqdm
  5 | import numpy as np
  6 | 
  7 | import torch
  8 | from torchvision import transforms as T
  9 | from models import FRCNN, StackedHourglass, fasterrcnn_backbone
 10 | from bop_dataset import BOPDataset
 11 | from poseOpt import pose_coordinate_descend
 12 | 
 13 | from train.transforms import ToTensor, Normalize, AffineCrop, Normalize_imgnet
 14 | from misc.pose2d_eval import Pose2DEval
 15 | from bop_toolkit_lib.inout import save_bop_results
 16 | 
 17 | def one_each(pred, thresh=0.0):
 18 |     # Postprocess frcnn: get at most one instance per class
 19 |     # Return: boxes and labels
 20 |     conf = pred['scores'] > thresh
 21 | 
 22 |     conf_scores = pred['scores'][conf]
 23 |     conf_boxes = pred['boxes'][conf].int()
 24 |     conf_labels = pred['labels'][conf].int()
 25 | 
 26 |     valid = torch.zeros_like(conf_labels).bool()
 27 |     unique_labels = torch.unique(conf_labels)
 28 |     for uni in unique_labels:
 29 |         p = (conf_labels==uni).nonzero(as_tuple=False).reshape(-1)
 30 |         valid[p[0]] = True
 31 | 
 32 |     pd_scores = conf_scores[valid]
 33 |     pd_boxes = conf_boxes[valid]
 34 |     pd_labels = conf_labels[valid]
 35 |     
 36 |     return pd_boxes, pd_labels
 37 | 
 38 | #*********************************************************
 39 | #  Provide dataset name
 40 | #*********************************************************
 41 | parser = argparse.ArgumentParser()
 42 | parser.add_argument('--dataset', type=str, default=None, help='dataset name')
 43 | args = parser.parse_args()
 44 | dataset_name = args.dataset
 45 | 
 46 | 
 47 | #*********************************************************
 48 | #  Keypoint-based 6DOF estimation
 49 | #*********************************************************
 50 | root = './data/bop'
 51 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 52 | 
 53 | print('Running on:', dataset_name)
 54 | print('Device:', device)
 55 | 
 56 | # Load dataset meta 
 57 | dataset = BOPDataset(root, dataset_name, split='test', 
 58 |                      return_keypoints=False, return_coco=True)
 59 | dataset._set_kpts_info()
 60 | 
 61 | num_classes = {'lmo':8, 'ycbv': 21, 'tudl': 3} 
 62 | 
 63 | # Load Faster-RCNN detector
 64 | detector_trainsform = T.ToTensor()
 65 | state_dict = torch.load('data/detect_checkpoints/d{}.pt'.format(dataset_name), map_location=device)['frcnn']
 66 | 
 67 | #detector = FRCNN(num_classes = 1+num_classes[dataset_name]).to(device)
 68 | detector = fasterrcnn_backbone('resnet101', num_classes=1+num_classes[dataset_name]).to(device)
 69 | detector.eval()
 70 | detector.load_state_dict(state_dict)
 71 | 
 72 | 
 73 | # Load keypoint detector: stacked hourglass
 74 | transform_list = []
 75 | transform_list.append(AffineCrop(out_size=256, scale_factor=0, rotation_factor=0, dialation=0.25))
 76 | transform_list.append(ToTensor())
 77 | transform_list.append(Normalize())
 78 | kpts_transform = T.Compose(transform_list)
 79 | state_dict = torch.load('data/kpts_checkpoints/{}.pt'.format(dataset_name), map_location=device)['stacked_hg']
 80 | 
 81 | kpts_detector = StackedHourglass(dataset.n_kpts).to(device)
 82 | kpts_detector.eval()
 83 | kpts_detector.load_state_dict(state_dict)
 84 | 
 85 | # Run keypoint-base 6DOF
 86 | db = dataset.db
 87 | num_imgs = len(db)
 88 | poseEval = Pose2DEval()
 89 | 
 90 | obj2idx = dataset.obj2idx
 91 | idx2obj = {v:k for k,v in obj2idx.items()}
 92 | lab2obj = {v+1:k for k,v in obj2idx.items()}
 93 | 
 94 | with open('kpts3d.json', 'r') as infile:
 95 |     kpts3d = json.load(infile)[dataset.dataset_name]
 96 | 
 97 | results = []
 98 | for i in tqdm(range(num_imgs)):
 99 |     imgpath = db[i]['imgpath']
100 |     image = dataset.load_img(imgpath)
101 |     
102 |     scene_id = db[i]['scene_id']
103 |     im_id = db[i]['im_id']
104 |     K = db[i]['K']
105 |     gt_objs = [lab2obj[l] for l in db[i]['labels']]
106 |     
107 |     # Object detection
108 |     with torch.no_grad():
109 |         img = detector_trainsform(image).to(device)
110 |         pred = detector([img])[0]
111 |         pred = {k:v.cpu() for k,v in pred.items()}
112 |         
113 |     pd_boxes, pd_labels = one_each(pred, thresh=0)
114 |     pd_objs = [lab2obj[i] for i in pd_labels.tolist()]
115 |     pd_objs = torch.tensor(pd_objs)
116 |     
117 |     # Keypoint-base 6DOF estimation
118 |     for obj in gt_objs:
119 |         ### If Object is not detected
120 |         if obj not in pd_objs:
121 |             res = {'scene_id': scene_id,
122 |                    'im_id': im_id,
123 |                    'obj_id': obj,
124 |                    'score': 0,
125 |                    'R': np.eye(3),
126 |                    't': np.zeros([3]),
127 |                    'time': -1
128 |                    }
129 |             results.append(res)
130 |             continue
131 |         
132 |         ### If Object is detected
133 |         box = pd_boxes[pd_objs == obj].squeeze().tolist()
134 |         box = [box[0], box[1], box[2]-box[0], box[3]-box[1]]
135 |         input_crop = {'image':image, 'bb':box}
136 |         input_crop = kpts_transform(input_crop)
137 |         
138 |         with torch.no_grad():
139 |             batch = input_crop['image'][None].to(device)
140 |             output = kpts_detector(batch)
141 |             output = output[-1].cpu()
142 |             
143 |             kpt_start = dataset.obj2kptid[obj][0]
144 |             kpt_end = dataset.obj2kptid[obj][1]
145 |             heatmaps_pred = output[[0], kpt_start:kpt_end, :, :]
146 |        
147 |             kpts_pred, confs = poseEval.heatmaps_to_locs(heatmaps_pred, return_vals=True)
148 |             confs = confs[0]
149 |             kpts_pred = kpts_pred[0]
150 |             
151 |             crop_kpts = kpts_pred * (256/64)
152 |             view_kpts = poseEval.get_view_kpts(box, crop_kpts)
153 |             view_kpts = view_kpts.numpy()
154 |             kpts_h = np.hstack([view_kpts, np.ones([view_kpts.shape[0], 1])]).astype(np.double)
155 | 
156 |             D = confs.numpy().astype(np.double)
157 |             kpts3d_obj = kpts3d[str(obj)]
158 | 
159 |             R_, t_, Z_, res = pose_coordinate_descend(K, kpts_h, kpts3d_obj, D, 
160 |                                                       max_iters=10000, thresh=1e-6, pnp_int=True)
161 | 
162 | 
163 |             res = {'scene_id': scene_id,
164 |                    'im_id': im_id,
165 |                    'obj_id': obj,
166 |                    'score': np.mean(D),
167 |                    'R': R_,
168 |                    't': t_,
169 |                    'time': -1
170 |             }
171 | 
172 |             results.append(res)
173 | 
174 | 
175 | save_bop_results('results_{}-test.csv'.format(dataset_name), results)
176 | 
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/keypoint/misc/__init__.py:
--------------------------------------------------------------------------------
1 | from .pose2d_eval import Pose2DEval
2 | from .loss import KptsMSELoss
3 | 


--------------------------------------------------------------------------------
/keypoint/misc/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import functional as F
 4 | 
 5 | class CrossEntropy(nn.Module):
 6 |     def __init__(self, ignore_label=-1, weight=None):
 7 |         super(CrossEntropy, self).__init__()
 8 |         self.ignore_label = ignore_label
 9 |         self.criterion = nn.CrossEntropyLoss(weight=weight, 
10 |                                              ignore_index=ignore_label)
11 | 
12 |     def forward(self, score, target):
13 |         ph, pw = score.size(2), score.size(3)
14 |         h, w = target.size(1), target.size(2)
15 |         if ph != h or pw != w:
16 |             score = F.upsample(
17 |                     input=score, size=(h, w), mode='bilinear')
18 | 
19 |         loss = self.criterion(score, target)
20 | 
21 |         return loss
22 | 
23 | 
24 | class KptsMSELoss(nn.Module):
25 |     def __init__(self, use_vis=False):
26 |         super(KptsMSELoss, self).__init__()
27 |         self.criterion = nn.MSELoss(reduction='mean')
28 |         self.use_vis = use_vis
29 | 
30 |     def forward(self, output, target, vis):
31 |         '''
32 |         output: (BN, K, w, h)
33 |         target: (BN, K, w, h)
34 |         vis: (BN, K)
35 |         '''
36 |         batch_size = output.size(0)
37 |         num_kpts = output.size(1)
38 |         heatmaps_pred = output.reshape((batch_size, num_kpts, -1))
39 |         heatmaps_gt = target.reshape((batch_size, num_kpts, -1))
40 |         vis = vis.reshape((batch_size, num_kpts, 1))
41 | 
42 |         if self.use_vis:
43 |             loss = self.criterion(
44 |                 heatmaps_pred.mul(vis),
45 |                 heatmaps_gt.mul(vis)
46 |                 )
47 |         else:
48 |             loss = self.criterion(heatmaps_pred, heatmaps_gt)
49 | 
50 |         return loss 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/keypoint/misc/pose2d_eval.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from skimage.draw import disk
 4 | 
 5 | class Pose2DEval:
 6 | 
 7 |     def __init__(self, detection_thresh=0.1, dist_thresh=10):
 8 |         self.detection_thresh = detection_thresh
 9 |         self.dist_thresh = dist_thresh
10 | 
11 |     def heatmaps_to_locs(self, heatmaps, no_thresh=False, return_vals=False):
12 |         vals, uv = torch.max(heatmaps.view(heatmaps.shape[0], 
13 |                                         heatmaps.shape[1], 
14 |                                         heatmaps.shape[2]*heatmaps.shape[3]), 2)
15 |         # zero out entries below the detection threshold
16 |         thresh = self.detection_thresh
17 |         if no_thresh:
18 |             thresh = 0
19 |         uv *= (vals > thresh).type(torch.long) 
20 |         rows = uv / heatmaps.shape[3]
21 |         cols = uv % heatmaps.shape[3]
22 | 
23 |         locs = torch.stack([cols, rows], 2).cpu().type(torch.float)
24 |         vals[vals<thresh] = 0
25 |         
26 |         if return_vals:
27 |             return locs, vals
28 |         else:
29 |             return locs
30 | 
31 |     def pck(self, gt_heatmaps, pred_heatmaps):
32 |         gt_locs = self.heatmaps_to_locs(gt_heatmaps)
33 |         pred_locs = self.heatmaps_to_locs(pred_heatmaps)
34 |         visible_keypoints = (gt_locs[:,:,0] > 0)
35 |         return 100 * torch.mean((torch.sqrt(torch.sum((gt_locs - pred_locs) ** 2, dim=-1))[visible_keypoints] < self.dist_thresh).type(torch.float))
36 | 
37 |     def get_view_kpts(self, bbox, crop_kpts, crop_size=256, crop_dialation=0.25):
38 |         if type(bbox) is not torch.Tensor:
39 |             bbox = torch.tensor(bbox)
40 |             
41 |         x,y,w,h = bbox
42 |         center = torch.tensor([x+w/2, y+h/2])
43 |         scale = torch.max(w,h) * (1+crop_dialation)
44 |         rescale = scale /crop_size
45 |         ul = center - scale/2
46 |         
47 |         if crop_kpts.shape[1] == 2:
48 |             view_kpts = crop_kpts * rescale + torch.tensor([ul[0],ul[1]])
49 |         elif crop_kpts.shape[1] == 3:
50 |             view_kpts = crop_kpts * rescale + torch.tensor([ul[0],ul[1],0])
51 |         
52 |         return view_kpts
53 | 
54 |     def draw_keypoints_with_labels(self, images, gt_heatmaps, pred_heatmaps):
55 |         gt_images, pred_images  = images.clone(), images.clone()
56 |         rescale = images.shape[2]/gt_heatmaps.shape[2]
57 |         gt_keypoints = self.heatmaps_to_locs(gt_heatmaps)*rescale
58 |         pred_keypoints = self.heatmaps_to_locs(pred_heatmaps)*rescale
59 |         for i in range(images.shape[0]):
60 |             for gt_keypoint, pred_keypoint in zip(gt_keypoints[i,:,:], pred_keypoints[i,:,:]):
61 |                 if gt_keypoint[0] != 0 and gt_keypoint[1] != 0:
62 |                     r,c = disk(gt_keypoint[1], gt_keypoint[0], 3, shape=images.shape[-2:])
63 |                     # blue color for the ground truth keypoints
64 |                     gt_images[i,0,r,c] = 0
65 |                     gt_images[i,1,r,c] = 0
66 |                     gt_images[i,2,r,c] = 1
67 |                 if pred_keypoint[0] != 0 and pred_keypoint[1] != 0:
68 |                     r,c = disk(pred_keypoint[1], pred_keypoint[0], 3, shape=images.shape[-2:])
69 |                     correct_prediction = torch.sqrt(torch.sum((gt_keypoint - pred_keypoint) ** 2)) < self.dist_thresh
70 |                     # blue color if predicted keypoint is within the margin, else red
71 |                     val = [0,0,1] if correct_prediction else [1,0,0]
72 |                     pred_images[i,0,r,c] = val[0]
73 |                     pred_images[i,1,r,c] = val[1]
74 |                     pred_images[i,2,r,c] = val[2]
75 |         return gt_images, pred_images
76 | 
77 |     def draw_keypoints_unlabeled(self, images, pred_heatmaps):
78 |         pred_images  = images.clone()
79 |         rescale = images.shape[2]/pred_heatmaps.shape[2]
80 |         pred_keypoints = self.heatmaps_to_locs(pred_heatmaps)*rescale
81 |         for i in range(images.shape[0]):
82 |             for pred_keypoint in pred_keypoints[i,:,:]:
83 |                 if pred_keypoint[0] != 0 and pred_keypoint[1] != 0:
84 |                     r,c = disk(pred_keypoint[1], pred_keypoint[0], 3, shape=images.shape[-2:])
85 |                     # blue color for the predicted keypoints
86 |                     pred_images[i,0,r,c] = 0
87 |                     pred_images[i,1,r,c] = 0
88 |                     pred_images[i,2,r,c] = 1
89 |         return pred_images
90 | 


--------------------------------------------------------------------------------
/keypoint/misc/segmentation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | def iou(gt_masks, pred_masks):
 5 |     pred_masks_thresh = (pred_masks > 0.5).type(torch.int)
 6 |     gt_masks = (gt_masks > 0.5).type(torch.int)
 7 |     return torch.mean((pred_masks_thresh & gt_masks).type(torch.float))\
 8 |            / torch.mean((pred_masks_thresh | gt_masks).type(torch.float))
 9 | 
10 | def visualize(images, masks):
11 |     scale = int(images.shape[2] / masks.shape[2])
12 |     masks_thresh = (F.upsample(masks, scale_factor=scale, mode='bilinear') > 0.5).type(torch.int)
13 |     segmented_images = images.clone()
14 |     segmented_images[masks_thresh.repeat(1,3,1,1) == 0] = 0
15 |     return segmented_images
16 | 


--------------------------------------------------------------------------------
/keypoint/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .hourglass import StackedHourglass
2 | from .fasterRCNN import FRCNN, fasterrcnn_backbone
3 | 


--------------------------------------------------------------------------------
/keypoint/models/fasterRCNN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN
 4 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 5 | 
 6 | from .patched import rpn_forward, roi_forward
 7 | import types
 8 | 
 9 | @torch.jit.unused
10 | def eager_outputs(self, losses, detections):
11 | 	if self.training or self.always_return_loss:
12 | 		return losses
13 | 
14 | 	return detections
15 | 
16 | 
17 | def FRCNN(num_classes):
18 |     # load a model pre-trained pre-trained on COCO
19 |     model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
20 |     
21 |     # get number of input features for the classifier
22 |     in_features = model.roi_heads.box_predictor.cls_score.in_features
23 | 
24 |     # replace the pre-trained head with a new one
25 |     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
26 | 
27 |     # override ouput functions
28 |     model.always_return_loss = False
29 | 
30 |     model.rpn.forward = types.MethodType(rpn_forward, model.rpn)
31 |     model.roi_heads.forward = types.MethodType(roi_forward, model.roi_heads)
32 |     model.eager_outputs = types.MethodType(eager_outputs, model)
33 | 
34 |     return model
35 | 
36 | 
37 | def fasterrcnn_backbone(backbone_name='resnet50',
38 |             num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs):
39 |     '''
40 |     Input:
41 |     backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50',
42 |     'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
43 |     '''
44 | 
45 |     # load a model pre-trained pre-trained on COCO
46 |     model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
47 |     in_features = model.roi_heads.box_predictor.cls_score.in_features
48 |     model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
49 |     
50 |     # switch backbone
51 |     backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone, trainable_layers=trainable_backbone_layers)
52 |     model.backbone = backbone
53 | 
54 | 
55 |     # override ouput functions
56 |     model.always_return_loss = False
57 |     model.rpn.forward = types.MethodType(rpn_forward, model.rpn)
58 |     model.roi_heads.forward = types.MethodType(roi_forward, model.roi_heads)
59 |     model.eager_outputs = types.MethodType(eager_outputs, model)
60 | 
61 |     return model
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/keypoint/models/hourglass.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import numpy as np
  4 | # from .layers import ConvBlock, ResBlock
  5 | from .layers import Residual
  6 | 
  7 | class Hourglass(nn.Module):
  8 |     def __init__(self, n, in_channels, out_channels):
  9 |         super(Hourglass, self).__init__()
 10 |         self.up1 = Residual(in_channels, 256)
 11 |         self.up2 = Residual(256, 256)
 12 |         self.up4 = Residual(256, out_channels)
 13 | 
 14 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
 15 |         self.low1 = Residual(in_channels, 256)
 16 |         self.low2 = Residual(256, 256)
 17 |         self.low5 = Residual(256, 256)
 18 |         if n > 1:
 19 |             self.low6 = Hourglass(n-1, 256, out_channels)
 20 |         else:
 21 |             self.low6 = Residual(256, out_channels)
 22 |         self.low7 = Residual(out_channels, out_channels)
 23 |         # self.up5 = nn.Upsample(scale_factor=2)
 24 | 
 25 |     def forward(self, x):
 26 |         up = self.up1(x)
 27 |         up = self.up2(up)
 28 |         up = self.up4(up)
 29 | 
 30 |         low = self.pool(x)
 31 |         low = self.low1(low)
 32 |         low = self.low2(low)
 33 |         low = self.low5(low)
 34 |         low = self.low6(low)
 35 |         low = self.low7(low)
 36 |         # low = self.up5(low)
 37 |         low = nn.functional.interpolate(low, scale_factor=2)
 38 | 
 39 |         return up + low
 40 | 
 41 | class Lin(nn.Module):
 42 |     def __init__(self, in_channels, out_channels):
 43 |         super(Lin, self).__init__()
 44 |         self.layer = nn.Sequential(
 45 |             nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
 46 |             nn.BatchNorm2d(out_channels),
 47 |             nn.ReLU(True)
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         return self.layer(x)
 52 | 
 53 | class StackedHourglass(nn.Module):
 54 |     def __init__(self, out_channels):
 55 |         super(StackedHourglass, self).__init__()
 56 |         self.conv1 = nn.Sequential(
 57 |             nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
 58 |             nn.BatchNorm2d(64),
 59 |             nn.ReLU()
 60 |         )
 61 |         self.r1 = Residual(64, 128)
 62 |         self.pool = nn.MaxPool2d(2, 2)
 63 |         self.r4 = Residual(128, 128)
 64 |         self.r5 = Residual(128, 128)
 65 |         self.r6 = Residual(128, 256)
 66 | 
 67 |         self.hg1 = Hourglass(4, 256, 512)
 68 | 
 69 |         self.l1 = Lin(512, 512)
 70 |         self.l2 = Lin(512, 256)
 71 | 
 72 |         self.out1 = nn.Conv2d(256, out_channels, kernel_size=1, stride=1, padding=0)
 73 | 
 74 |         self.out_return = nn.Conv2d(out_channels, 256+128, kernel_size=1, stride=1, padding=0)
 75 | 
 76 |         self.cat_conv = nn.Conv2d(256+128, 256+128, kernel_size=1, stride=1, padding=0)
 77 | 
 78 |         self.hg2 = Hourglass(4, 256+128, 512)
 79 | 
 80 |         self.l3 = Lin(512, 512)
 81 |         self.l4 = Lin(512, 512)
 82 | 
 83 |         self.out2 = nn.Conv2d(512, out_channels, 1, 1, padding=0)
 84 | 
 85 |     def forward(self, x):
 86 |         x = self.conv1(x)
 87 |         x = self.r1(x)
 88 |         pooled = self.pool(x)
 89 |         x = self.r4(pooled)
 90 |         x = self.r5(x)
 91 |         x = self.r6(x)
 92 | 
 93 |         # First hourglass
 94 |         x = self.hg1(x)
 95 | 
 96 |         # Linear layers to produce first set of predictions
 97 |         x = self.l1(x)
 98 |         x = self.l2(x)
 99 | 
100 |         # First predicted heatmaps
101 |         out1 = self.out1(x)
102 |         out1_ = self.out_return(out1)
103 | 
104 |         joined = torch.cat([x, pooled], 1)
105 |         joined = self.cat_conv(joined)
106 |         int1 = joined + out1_
107 | 
108 |         hg2 = self.hg2(int1)
109 | 
110 |         l3 = self.l3(hg2)
111 |         l4 = self.l4(l3)
112 | 
113 |         out2 = self.out2(l4)
114 | 
115 |         return out1, out2
116 | 
117 | 
118 |     def num_trainable_parameters(self):
119 |         trainable_parameters = filter(lambda p: p.requires_grad, self.parameters())
120 |         return sum([np.prod(p.size()) for p in trainable_parameters])
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/keypoint/models/layers.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | # Wrapper around Conv2d
 4 | class ConvBlock(nn.Module):
 5 |     def __init__(self, in_channels, out_channels):
 6 |         super(ConvBlock, self).__init__()
 7 |         self.block =  nn.Sequential(
 8 |             nn.BatchNorm2d(in_channels),
 9 |             nn.ReLU(True),
10 |             nn.Conv2d(in_channels, out_channels//2, kernel_size=1),
11 |             nn.BatchNorm2d(out_channels//2),
12 |             nn.ReLU(True),
13 |             nn.Conv2d(out_channels//2, out_channels//2, kernel_size=3, stride=1, padding=1),
14 |             nn.BatchNorm2d(out_channels//2),
15 |             nn.ReLU(True),
16 |             nn.Conv2d(out_channels//2, out_channels, kernel_size=1)
17 |         )
18 | 
19 |     def forward(self, x):
20 |         return self.block(x)
21 | 
22 | class SkipLayer(nn.Module):
23 |     def __init__(self, in_channels, out_channels):
24 |         super(SkipLayer, self).__init__()
25 |         if in_channels != out_channels:
26 |             self.layer =  nn.Conv2d(in_channels, out_channels, kernel_size=1)
27 |         else:
28 |             self.layer = None
29 | 
30 |     def forward(self, x):
31 |         if self.layer is None:
32 |             return x
33 |         else:
34 |             return self.layer(x)
35 | 
36 | class Residual(nn.Module):
37 |     def __init__(self, in_channels, out_channels):
38 |         super(Residual, self).__init__()
39 |         self.conv = ConvBlock(in_channels, out_channels)
40 |         self.skip = SkipLayer(in_channels, out_channels)
41 | 
42 |     def forward(self, x):
43 |         return self.conv(x) + self.skip(x)
44 | 


--------------------------------------------------------------------------------
/keypoint/models/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from torchvision.models.detection.mask_rcnn import MaskRCNN
 2 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 3 | from torchvision.models.detection.rpn import AnchorGenerator
 4 | 
 5 | 
 6 | class DetectorMaskRCNN(MaskRCNN):
 7 |     def __init__(self, input_resize=(240, 320), n_classes=2,
 8 |                  backbone_str='resnet50-fpn',
 9 |                  anchor_sizes=((32, ), (64, ), (128, ), (256, ), (512, ))):
10 | 
11 |         assert backbone_str == 'resnet50-fpn'
12 |         backbone = resnet_fpn_backbone('resnet50', pretrained=False)
13 | 
14 |         aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
15 |         rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
16 | 
17 |         super().__init__(backbone=backbone, num_classes=n_classes,
18 |                          rpn_anchor_generator=rpn_anchor_generator,
19 |                          max_size=max(input_resize), min_size=min(input_resize))
20 | 


--------------------------------------------------------------------------------
/keypoint/models/patched.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision.models.detection.rpn import concat_box_prediction_layers
  3 | from torchvision.models.detection.roi_heads import fastrcnn_loss
  4 | 
  5 | #************************************************************************
  6 | # Patch RPN forward function to return loss during eval()
  7 | # when "targets" is provided
  8 | #************************************************************************
  9 | def rpn_forward(self,
 10 |             images,       # type: ImageList
 11 |             features,     # type: Dict[str, Tensor]
 12 |             targets=None  # type: Optional[List[Dict[str, Tensor]]]
 13 |             ):
 14 |     # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]
 15 |     """
 16 |     Args:
 17 |         images (ImageList): images for which we want to compute the predictions
 18 |         features (OrderedDict[Tensor]): features computed from the images that are
 19 |             used for computing the predictions. Each tensor in the list
 20 |             correspond to different feature levels
 21 |         targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional).
 22 |             If provided, each element in the dict should contain a field `boxes`,
 23 |             with the locations of the ground-truth boxes.
 24 |     Returns:
 25 |         boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
 26 |             image.
 27 |         losses (Dict[Tensor]): the losses for the model during training. During
 28 |             testing, it is an empty dict.
 29 |     """
 30 |     # RPN uses all feature maps that are available
 31 |     features = list(features.values())
 32 |     objectness, pred_bbox_deltas = self.head(features)
 33 |     anchors = self.anchor_generator(images, features)
 34 | 
 35 |     num_images = len(anchors)
 36 |     num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
 37 |     num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
 38 |     objectness, pred_bbox_deltas = \
 39 |         concat_box_prediction_layers(objectness, pred_bbox_deltas)
 40 |     # apply pred_bbox_deltas to anchors to obtain the decoded proposals
 41 |     # note that we detach the deltas because Faster R-CNN do not backprop through
 42 |     # the proposals
 43 |     proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
 44 |     proposals = proposals.view(num_images, -1, 4)
 45 |     boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)
 46 | 
 47 |     losses = {}
 48 |     if self.training:
 49 |         assert targets is not None
 50 |         labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
 51 |         regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
 52 |         loss_objectness, loss_rpn_box_reg = self.compute_loss(
 53 |             objectness, pred_bbox_deltas, labels, regression_targets)
 54 |         losses = {
 55 |             "loss_objectness": loss_objectness,
 56 |             "loss_rpn_box_reg": loss_rpn_box_reg,
 57 |         }
 58 |     #************************************
 59 |     # Patch start
 60 |     #************************************
 61 |     elif targets is not None:
 62 |         assert targets is not None
 63 |         labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
 64 |         regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
 65 |         loss_objectness, loss_rpn_box_reg = self.compute_loss(
 66 |             objectness, pred_bbox_deltas, labels, regression_targets)
 67 |         losses = {
 68 |             "loss_objectness": loss_objectness,
 69 |             "loss_rpn_box_reg": loss_rpn_box_reg,
 70 |         }
 71 |     #************************************
 72 |     # Patch end
 73 |     #************************************
 74 | 
 75 |     return boxes, losses
 76 | 
 77 | 
 78 | 
 79 | 
 80 | #************************************************************************
 81 | # Patch ROIHeads forward function to return loss during eval()
 82 | # when "targets" is provided
 83 | # This function is reduced to only work for detection task (eg. frcnn)
 84 | #************************************************************************
 85 | def roi_forward(self,
 86 |             features,      # type: Dict[str, Tensor]
 87 |             proposals,     # type: List[Tensor]
 88 |             image_shapes,  # type: List[Tuple[int, int]]
 89 |             targets=None   # type: Optional[List[Dict[str, Tensor]]]
 90 |             ):
 91 |     # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
 92 |     """
 93 |     Args:
 94 |         features (List[Tensor])
 95 |         proposals (List[Tensor[N, 4]])
 96 |         image_shapes (List[Tuple[H, W]])
 97 |         targets (List[Dict])
 98 |     """
 99 |     if targets is not None:
100 |         for t in targets:
101 |             # TODO: https://github.com/pytorch/pytorch/issues/26731
102 |             floating_point_types = (torch.float, torch.double, torch.half)
103 |             assert t["boxes"].dtype in floating_point_types, 'target boxes must of float type'
104 |             assert t["labels"].dtype == torch.int64, 'target labels must of int64 type'
105 |             if self.has_keypoint():
106 |                 assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type'
107 | 
108 |     if self.training:
109 |         proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
110 |     #************************************
111 |     # Patch start
112 |     #************************************
113 |     elif targets is not None:
114 |         proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
115 |     #************************************
116 |     # Patch end
117 |     #************************************
118 |     else:
119 |         labels = None
120 |         regression_targets = None
121 |         matched_idxs = None
122 | 
123 |     box_features = self.box_roi_pool(features, proposals, image_shapes)
124 |     box_features = self.box_head(box_features)
125 |     class_logits, box_regression = self.box_predictor(box_features)
126 | 
127 |     result: List[Dict[str, torch.Tensor]] = []
128 |     losses = {}
129 |     if self.training:
130 |         assert labels is not None and regression_targets is not None
131 |         loss_classifier, loss_box_reg = fastrcnn_loss(
132 |             class_logits, box_regression, labels, regression_targets)
133 |         losses = {
134 |             "loss_classifier": loss_classifier,
135 |             "loss_box_reg": loss_box_reg
136 |         }
137 |     else:
138 |         #************************************
139 |         # Patch start
140 |         #************************************
141 |         if labels is not None and regression_targets is not None:
142 |             loss_classifier, loss_box_reg = fastrcnn_loss(
143 |                 class_logits, box_regression, labels, regression_targets)
144 |             losses = {
145 |                 "loss_classifier": loss_classifier,
146 |                 "loss_box_reg": loss_box_reg
147 |             }
148 |         #************************************
149 |         # Patch end
150 |         #************************************
151 | 
152 |         boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
153 |         num_images = len(boxes)
154 |         for i in range(num_images):
155 |             result.append(
156 |                 {
157 |                     "boxes": boxes[i],
158 |                     "labels": labels[i],
159 |                     "scores": scores[i],
160 |                 }
161 |             )
162 | 
163 |     return result, losses
164 | 
165 | 


--------------------------------------------------------------------------------
/keypoint/scripts/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = os.path.dirname(__file__)
 9 | lib_path = os.path.join(this_dir, '..')
10 | add_path(lib_path)
11 | 


--------------------------------------------------------------------------------
/keypoint/scripts/calc_gt_distribution.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Calculates distribution of GT poses."""
  5 | 
  6 | import math
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from bop_toolkit_lib import config
 11 | from bop_toolkit_lib import dataset_params
 12 | from bop_toolkit_lib import inout
 13 | from bop_toolkit_lib import misc
 14 | 
 15 | 
 16 | # PARAMETERS.
 17 | ################################################################################
 18 | p = {
 19 |   # See dataset_params.py for options.
 20 |   'dataset': 'lm',
 21 | 
 22 |   # Dataset split. Options: 'train', 'val', 'test'.
 23 |   'dataset_split': 'test',
 24 | 
 25 |   # Dataset split type. None = default. See dataset_params.py for options.
 26 |   'dataset_split_type': None,
 27 | 
 28 |   # Folder containing the BOP datasets.
 29 |   'datasets_path': config.datasets_path,
 30 | }
 31 | ################################################################################
 32 | 
 33 | 
 34 | # Load dataset parameters.
 35 | dp_split = dataset_params.get_split_params(
 36 |   p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type'])
 37 | 
 38 | scene_ids = dp_split['scene_ids']
 39 | dists = []
 40 | azimuths = []
 41 | elevs = []
 42 | visib_fracts = []
 43 | ims_count = 0
 44 | for scene_id in scene_ids:
 45 |   misc.log('Processing - dataset: {} ({}, {}), scene: {}'.format(
 46 |     p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id))
 47 | 
 48 |   # Load GT poses.
 49 |   scene_gt = inout.load_scene_gt(
 50 |     dp_split['scene_gt_tpath'].format(scene_id=scene_id))
 51 | 
 52 |   # Load info about the GT poses.
 53 |   scene_gt_info = inout.load_json(
 54 |     dp_split['scene_gt_info_tpath'].format(scene_id=scene_id), keys_to_int=True)
 55 | 
 56 |   ims_count += len(scene_gt)
 57 | 
 58 |   for im_id in scene_gt.keys():
 59 |     for gt_id, im_gt in enumerate(scene_gt[im_id]):
 60 | 
 61 |       # Object distance.
 62 |       dist = np.linalg.norm(im_gt['cam_t_m2c'])
 63 |       dists.append(dist)
 64 | 
 65 |       # Camera origin in the model coordinate system.
 66 |       cam_orig_m = -np.linalg.inv(im_gt['cam_R_m2c']).dot(
 67 |         im_gt['cam_t_m2c'])
 68 | 
 69 |       # Azimuth from [0, 360].
 70 |       azimuth = math.atan2(cam_orig_m[1, 0], cam_orig_m[0, 0])
 71 |       if azimuth < 0:
 72 |         azimuth += 2.0 * math.pi
 73 |       azimuths.append((180.0 / math.pi) * azimuth)
 74 | 
 75 |       # Elevation from [-90, 90].
 76 |       a = np.linalg.norm(cam_orig_m)
 77 |       b = np.linalg.norm([cam_orig_m[0, 0], cam_orig_m[1, 0], 0])
 78 |       elev = math.acos(b / a)
 79 |       if cam_orig_m[2, 0] < 0:
 80 |         elev = -elev
 81 |       elevs.append((180.0 / math.pi) * elev)
 82 | 
 83 |       # Visibility fraction.
 84 |       visib_fracts.append(scene_gt_info[im_id][gt_id]['visib_fract'])
 85 | 
 86 | # Print stats.
 87 | misc.log('Stats of the GT poses in dataset {} {}:'.format(
 88 |   p['dataset'], p['dataset_split']))
 89 | misc.log('Number of images: ' + str(ims_count))
 90 | 
 91 | misc.log('Min dist: {}'.format(np.min(dists)))
 92 | misc.log('Max dist: {}'.format(np.max(dists)))
 93 | misc.log('Mean dist: {}'.format(np.mean(dists)))
 94 | 
 95 | misc.log('Min azimuth: {}'.format(np.min(azimuths)))
 96 | misc.log('Max azimuth: {}'.format(np.max(azimuths)))
 97 | misc.log('Mean azimuth: {}'.format(np.mean(azimuths)))
 98 | 
 99 | misc.log('Min elev: {}'.format(np.min(elevs)))
100 | misc.log('Max elev: {}'.format(np.max(elevs)))
101 | misc.log('Mean elev: {}'.format(np.mean(elevs)))
102 | 
103 | misc.log('Min visib fract: {}'.format(np.min(visib_fracts)))
104 | misc.log('Max visib fract: {}'.format(np.max(visib_fracts)))
105 | misc.log('Mean visib fract: {}'.format(np.mean(visib_fracts)))
106 | 
107 | # Visualize distributions.
108 | plt.figure()
109 | plt.hist(dists, bins=100)
110 | plt.title('Object distance')
111 | 
112 | plt.figure()
113 | plt.hist(azimuths, bins=100)
114 | plt.title('Azimuth')
115 | 
116 | plt.figure()
117 | plt.hist(elevs, bins=100)
118 | plt.title('Elevation')
119 | 
120 | plt.figure()
121 | plt.hist(visib_fracts, bins=100)
122 | plt.title('Visibility fraction')
123 | 
124 | plt.show()
125 | 


--------------------------------------------------------------------------------
/keypoint/scripts/calc_gt_info.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Calculates visibility, 2D bounding boxes etc. for the ground-truth poses.
  5 | 
  6 | See docs/bop_datasets_format.md for documentation of the calculated info.
  7 | 
  8 | The info is saved in folder "{train,val,test}_gt_info" in the main folder of the
  9 | selected dataset.
 10 | """
 11 | 
 12 | import os
 13 | import glob
 14 | import numpy as np
 15 | 
 16 | from bop_toolkit_lib import config
 17 | from bop_toolkit_lib import dataset_params
 18 | from bop_toolkit_lib import inout
 19 | from bop_toolkit_lib import misc
 20 | from bop_toolkit_lib import renderer
 21 | from bop_toolkit_lib import visibility
 22 | 
 23 | 
 24 | # PARAMETERS.
 25 | ################################################################################
 26 | p = {
 27 |   # See dataset_params.py for options.
 28 |   'dataset': 'lm',
 29 | 
 30 |   # Dataset split. Options: 'train', 'val', 'test'.
 31 |   'dataset_split': 'test',
 32 | 
 33 |   # Dataset split type. None = default. See dataset_params.py for options.
 34 |   'dataset_split_type': None,
 35 | 
 36 |   # Whether to save visualizations of visibility masks.
 37 |   'vis_visibility_masks': False,
 38 | 
 39 |   # Tolerance used in the visibility test [mm].
 40 |   'delta': 15,
 41 | 
 42 |   # Type of the renderer.
 43 |   'renderer_type': 'python',  # Options: 'cpp', 'python'.
 44 | 
 45 |   # Folder containing the BOP datasets.
 46 |   'datasets_path': config.datasets_path,
 47 | 
 48 |   # Path template for output images with object masks.
 49 |   'vis_mask_visib_tpath': os.path.join(
 50 |     config.output_path, 'vis_gt_visib_delta={delta}',
 51 |     'vis_gt_visib_delta={delta}', '{dataset}', '{split}', '{scene_id:06d}',
 52 |     '{im_id:06d}_{gt_id:06d}.jpg'),
 53 | }
 54 | ################################################################################
 55 | 
 56 | 
 57 | if p['vis_visibility_masks']:
 58 |   from bop_toolkit_lib import visualization
 59 | 
 60 | # Load dataset parameters.
 61 | dp_split = dataset_params.get_split_params(
 62 |   p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type'])
 63 | 
 64 | model_type = None
 65 | if p['dataset'] == 'tless':
 66 |   model_type = 'cad'
 67 | dp_model = dataset_params.get_model_params(
 68 |   p['datasets_path'], p['dataset'], model_type)
 69 | 
 70 | # Initialize a renderer.
 71 | misc.log('Initializing renderer...')
 72 | 
 73 | # The renderer has a larger canvas for generation of masks of truncated objects.
 74 | im_width, im_height = dp_split['im_size']
 75 | ren_width, ren_height = 3 * im_width, 3 * im_height
 76 | ren_cx_offset, ren_cy_offset = im_width, im_height
 77 | ren = renderer.create_renderer(
 78 |   ren_width, ren_height, p['renderer_type'], mode='depth')
 79 | 
 80 | for obj_id in dp_model['obj_ids']:
 81 |   model_fpath = dp_model['model_tpath'].format(obj_id=obj_id)
 82 |   ren.add_object(obj_id, model_fpath)
 83 | 
 84 | scene_ids = dataset_params.get_present_scene_ids(dp_split)
 85 | for scene_id in scene_ids:
 86 | 
 87 |   # Load scene info and ground-truth poses.
 88 |   scene_camera = inout.load_scene_camera(
 89 |     dp_split['scene_camera_tpath'].format(scene_id=scene_id))
 90 |   scene_gt = inout.load_scene_gt(
 91 |     dp_split['scene_gt_tpath'].format(scene_id=scene_id))
 92 | 
 93 |   scene_gt_info = {}
 94 |   im_ids = sorted(scene_gt.keys())
 95 |   for im_counter, im_id in enumerate(im_ids):
 96 |     if im_counter % 100 == 0:
 97 |       misc.log(
 98 |         'Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}'.format(
 99 |           p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id,
100 |           im_id))
101 | 
102 |     # Load depth image.
103 |     depth_fpath = dp_split['depth_tpath'].format(scene_id=scene_id, im_id=im_id)
104 |     if not os.path.exists(depth_fpath):
105 |       depth_fpath = depth_fpath.replace('.tif', '.png')
106 |     depth = inout.load_depth(depth_fpath)
107 |     depth *= scene_camera[im_id]['depth_scale']  # Convert to [mm].
108 | 
109 |     K = scene_camera[im_id]['cam_K']
110 |     fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
111 |     im_size = (depth.shape[1], depth.shape[0])
112 | 
113 |     scene_gt_info[im_id] = []
114 |     for gt_id, gt in enumerate(scene_gt[im_id]):
115 | 
116 |       # Render depth image of the object model in the ground-truth pose.
117 |       depth_gt_large = ren.render_object(
118 |         gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'],
119 |         fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth']
120 |       depth_gt = depth_gt_large[
121 |                    ren_cy_offset:(ren_cy_offset + im_height),
122 |                    ren_cx_offset:(ren_cx_offset + im_width)]
123 | 
124 |       # Convert depth images to distance images.
125 |       dist_gt = misc.depth_im_to_dist_im(depth_gt, K)
126 |       dist_im = misc.depth_im_to_dist_im(depth, K)
127 | 
128 |       # Estimation of the visibility mask.
129 |       visib_gt = visibility.estimate_visib_mask_gt(
130 |         dist_im, dist_gt, p['delta'], visib_mode='bop19')
131 | 
132 |       # Mask of the object in the GT pose.
133 |       obj_mask_gt_large = depth_gt_large > 0
134 |       obj_mask_gt = dist_gt > 0
135 | 
136 |       # Number of pixels in the whole object silhouette
137 |       # (even in the truncated part).
138 |       px_count_all = np.sum(obj_mask_gt_large)
139 | 
140 |       # Number of pixels in the object silhouette with a valid depth measurement
141 |       # (i.e. with a non-zero value in the depth image).
142 |       px_count_valid = np.sum(dist_im[obj_mask_gt] > 0)
143 | 
144 |       # Number of pixels in the visible part of the object silhouette.
145 |       px_count_visib = visib_gt.sum()
146 | 
147 |       # Visible surface fraction.
148 |       if px_count_all > 0:
149 |         visib_fract = px_count_visib / float(px_count_all)
150 |       else:
151 |         visib_fract = 0.0
152 | 
153 |       # Bounding box of the whole object silhouette
154 |       # (including the truncated part).
155 |       bbox = [-1, -1, -1, -1]
156 |       if px_count_visib > 0:
157 |         ys, xs = obj_mask_gt_large.nonzero()
158 |         ys -= ren_cy_offset
159 |         xs -= ren_cx_offset
160 |         bbox = misc.calc_2d_bbox(xs, ys, im_size)
161 | 
162 |       # Bounding box of the visible surface part.
163 |       bbox_visib = [-1, -1, -1, -1]
164 |       if px_count_visib > 0:
165 |         ys, xs = visib_gt.nonzero()
166 |         bbox_visib = misc.calc_2d_bbox(xs, ys, im_size)
167 | 
168 |       # Store the calculated info.
169 |       scene_gt_info[im_id].append({
170 |         'px_count_all': int(px_count_all),
171 |         'px_count_valid': int(px_count_valid),
172 |         'px_count_visib': int(px_count_visib),
173 |         'visib_fract': float(visib_fract),
174 |         'bbox_obj': [int(e) for e in bbox],
175 |         'bbox_visib': [int(e) for e in bbox_visib]
176 |       })
177 | 
178 |       # Visualization of the visibility mask.
179 |       if p['vis_visibility_masks']:
180 | 
181 |         depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0)
182 |         depth_im_vis = np.dstack([depth_im_vis] * 3)
183 | 
184 |         visib_gt_vis = visib_gt.astype(np.float)
185 |         zero_ch = np.zeros(visib_gt_vis.shape)
186 |         visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch])
187 | 
188 |         vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis
189 |         vis[vis > 1] = 1
190 | 
191 |         vis_path = p['vis_mask_visib_tpath'].format(
192 |           delta=p['delta'], dataset=p['dataset'], split=p['dataset_split'],
193 |           scene_id=scene_id, im_id=im_id, gt_id=gt_id)
194 |         misc.ensure_dir(os.path.dirname(vis_path))
195 |         inout.save_im(vis_path, vis)
196 | 
197 |   # Save the info for the current scene.
198 |   scene_gt_info_path = dp_split['scene_gt_info_tpath'].format(scene_id=scene_id)
199 |   misc.ensure_dir(os.path.dirname(scene_gt_info_path))
200 |   inout.save_json(scene_gt_info_path, scene_gt_info)
201 | 


--------------------------------------------------------------------------------
/keypoint/scripts/calc_gt_masks.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Calculates masks of object models in the ground-truth poses."""
  5 | 
  6 | import os
  7 | import numpy as np
  8 | 
  9 | from bop_toolkit_lib import config
 10 | from bop_toolkit_lib import dataset_params
 11 | from bop_toolkit_lib import inout
 12 | from bop_toolkit_lib import misc
 13 | from bop_toolkit_lib import renderer
 14 | from bop_toolkit_lib import visibility
 15 | 
 16 | 
 17 | # PARAMETERS.
 18 | ################################################################################
 19 | p = {
 20 |   # See dataset_params.py for options.
 21 |   'dataset': 'lm',
 22 | 
 23 |   # Dataset split. Options: 'train', 'val', 'test'.
 24 |   'dataset_split': 'test',
 25 | 
 26 |   # Dataset split type. None = default. See dataset_params.py for options.
 27 |   'dataset_split_type': None,
 28 | 
 29 |   # Tolerance used in the visibility test [mm].
 30 |   'delta': 15,  # 5 for ITODD, 15 for the other datasets.
 31 | 
 32 |   # Type of the renderer.
 33 |   'renderer_type': 'python',  # Options: 'cpp', 'python'.
 34 | 
 35 |   # Folder containing the BOP datasets.
 36 |   'datasets_path': config.datasets_path,
 37 | }
 38 | ################################################################################
 39 | 
 40 | 
 41 | # Load dataset parameters.
 42 | dp_split = dataset_params.get_split_params(
 43 |   p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type'])
 44 | 
 45 | model_type = None
 46 | if p['dataset'] == 'tless':
 47 |   model_type = 'cad'
 48 | dp_model = dataset_params.get_model_params(
 49 |   p['datasets_path'], p['dataset'], model_type)
 50 | 
 51 | scene_ids = dataset_params.get_present_scene_ids(dp_split)
 52 | for scene_id in scene_ids:
 53 | 
 54 |   # Load scene GT.
 55 |   scene_gt_path = dp_split['scene_gt_tpath'].format(
 56 |     scene_id=scene_id)
 57 |   scene_gt = inout.load_scene_gt(scene_gt_path)
 58 | 
 59 |   # Load scene camera.
 60 |   scene_camera_path = dp_split['scene_camera_tpath'].format(
 61 |     scene_id=scene_id)
 62 |   scene_camera = inout.load_scene_camera(scene_camera_path)
 63 | 
 64 |   # Create folders for the output masks (if they do not exist yet).
 65 |   mask_dir_path = os.path.dirname(
 66 |     dp_split['mask_tpath'].format(
 67 |       scene_id=scene_id, im_id=0, gt_id=0))
 68 |   misc.ensure_dir(mask_dir_path)
 69 | 
 70 |   mask_visib_dir_path = os.path.dirname(
 71 |     dp_split['mask_visib_tpath'].format(
 72 |       scene_id=scene_id, im_id=0, gt_id=0))
 73 |   misc.ensure_dir(mask_visib_dir_path)
 74 | 
 75 |   # Initialize a renderer.
 76 |   misc.log('Initializing renderer...')
 77 |   width, height = dp_split['im_size']
 78 |   ren = renderer.create_renderer(
 79 |     width, height, renderer_type=p['renderer_type'], mode='depth')
 80 | 
 81 |   # Add object models.
 82 |   for obj_id in dp_model['obj_ids']:
 83 |     ren.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id))
 84 | 
 85 |   im_ids = sorted(scene_gt.keys())
 86 |   for im_id in im_ids:
 87 | 
 88 |     if im_id % 100 == 0:
 89 |       misc.log(
 90 |         'Calculating masks - dataset: {} ({}, {}), scene: {}, im: {}'.format(
 91 |           p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id,
 92 |           im_id))
 93 | 
 94 |     K = scene_camera[im_id]['cam_K']
 95 |     fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
 96 | 
 97 |     # Load depth image.
 98 |     depth_path = dp_split['depth_tpath'].format(
 99 |       scene_id=scene_id, im_id=im_id)
100 |     depth_im = inout.load_depth(depth_path)
101 |     depth_im *= scene_camera[im_id]['depth_scale']  # to [mm]
102 |     dist_im = misc.depth_im_to_dist_im(depth_im, K)
103 | 
104 |     for gt_id, gt in enumerate(scene_gt[im_id]):
105 | 
106 |       # Render the depth image.
107 |       depth_gt = ren.render_object(
108 |         gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx, cy)['depth']
109 | 
110 |       # Convert depth image to distance image.
111 |       dist_gt = misc.depth_im_to_dist_im(depth_gt, K)
112 | 
113 |       # Mask of the full object silhouette.
114 |       mask = dist_gt > 0
115 | 
116 |       # Mask of the visible part of the object silhouette.
117 |       mask_visib = visibility.estimate_visib_mask_gt(
118 |         dist_im, dist_gt, p['delta'], visib_mode='bop19')
119 | 
120 |       # Save the calculated masks.
121 |       mask_path = dp_split['mask_tpath'].format(
122 |         scene_id=scene_id, im_id=im_id, gt_id=gt_id)
123 |       inout.save_im(mask_path, 255 * mask.astype(np.uint8))
124 | 
125 |       mask_visib_path = dp_split['mask_visib_tpath'].format(
126 |         scene_id=scene_id, im_id=im_id, gt_id=gt_id)
127 |       inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8))
128 | 


--------------------------------------------------------------------------------
/keypoint/scripts/calc_model_info.py:
--------------------------------------------------------------------------------
 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
 2 | # Center for Machine Perception, Czech Technical University in Prague
 3 | 
 4 | """Calculates the 3D bounding box and the diameter of 3D object models."""
 5 | 
 6 | from bop_toolkit_lib import config
 7 | from bop_toolkit_lib import dataset_params
 8 | from bop_toolkit_lib import inout
 9 | from bop_toolkit_lib import misc
10 | 
11 | 
12 | # PARAMETERS.
13 | ################################################################################
14 | p = {
15 |   # See dataset_params.py for options.
16 |   'dataset': 'lm',
17 | 
18 |   # Type of input object models.
19 |   'model_type': None,
20 | 
21 |   # Folder containing the BOP datasets.
22 |   'datasets_path': config.datasets_path,
23 | }
24 | ################################################################################
25 | 
26 | 
27 | # Load dataset parameters.
28 | dp_model = dataset_params.get_model_params(
29 |   p['datasets_path'], p['dataset'], p['model_type'])
30 | 
31 | models_info = {}
32 | for obj_id in dp_model['obj_ids']:
33 |     misc.log('Processing model of object {}...'.format(obj_id))
34 | 
35 |     model = inout.load_ply(dp_model['model_tpath'].format(obj_id=obj_id))
36 | 
37 |     # Calculate 3D bounding box.
38 |     ref_pt = map(float, model['pts'].min(axis=0).flatten())
39 |     size = map(float, (model['pts'].max(axis=0) - ref_pt).flatten())
40 | 
41 |     # Calculated diameter.
42 |     diameter = misc.calc_pts_diameter(model['pts'])
43 | 
44 |     models_info[obj_id] = {
45 |         'min_x': ref_pt[0], 'min_y': ref_pt[1], 'min_z': ref_pt[2],
46 |         'size_x': size[0], 'size_y': size[1], 'size_z': size[2],
47 |         'diameter': diameter
48 |     }
49 | 
50 | # Save the calculated info about the object models.
51 | inout.save_json(dp_model['models_info_path'], models_info)
52 | 


--------------------------------------------------------------------------------
/keypoint/scripts/check_results_bop19.py:
--------------------------------------------------------------------------------
 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
 2 | # Center for Machine Perception, Czech Technical University in Prague
 3 | 
 4 | """Evaluation script for the BOP Challenge 2019/2020."""
 5 | 
 6 | import os
 7 | import argparse
 8 | 
 9 | from bop_toolkit_lib import config
10 | from bop_toolkit_lib import inout
11 | from bop_toolkit_lib import misc
12 | 
13 | 
14 | # PARAMETERS (some can be overwritten by the command line arguments below).
15 | ################################################################################
16 | p = {
17 |   # Names of files with results for which to calculate the errors (assumed to be
18 |   # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a
19 |   # description of the format. Example results can be found at:
20 |   # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/
21 |   'result_filenames': [
22 |     '/path/to/csv/with/results',
23 |   ],
24 | }
25 | ################################################################################
26 | 
27 | 
28 | # Command line arguments.
29 | # ------------------------------------------------------------------------------
30 | parser = argparse.ArgumentParser()
31 | parser.add_argument('--result_filenames',
32 |                     default=','.join(p['result_filenames']),
33 |                     help='Comma-separated names of files with results.')
34 | args = parser.parse_args()
35 | 
36 | p['result_filenames'] = args.result_filenames.split(',')
37 | 
38 | 
39 | if __name__ == '__main__':
40 | 
41 |   for result_filename in p['result_filenames']:
42 |     result_path = os.path.join(config.results_path, result_filename)
43 |     check_passed, check_msg = inout.check_bop_results(
44 |       result_path, version='bop19')
45 | 
46 |     misc.log('Check msg: {}'.format(check_msg))
47 | 


--------------------------------------------------------------------------------
/keypoint/scripts/eval_bop19.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Evaluation script for the BOP Challenge 2019/2020."""
  5 | 
  6 | import os
  7 | import time
  8 | import argparse
  9 | import subprocess
 10 | import numpy as np
 11 | 
 12 | import _init_paths
 13 | from bop_toolkit_lib import config
 14 | from bop_toolkit_lib import inout
 15 | from bop_toolkit_lib import misc
 16 | 
 17 | 
 18 | # PARAMETERS (some can be overwritten by the command line arguments below).
 19 | ################################################################################
 20 | 
 21 | p = {
 22 |   # Errors to calculate.
 23 |   'errors': [
 24 |     {
 25 |       'n_top': -1,
 26 |       'type': 'vsd',
 27 |       'vsd_deltas': {
 28 |         'hb': 15,
 29 |         'icbin': 15,
 30 |         'icmi': 15,
 31 |         'itodd': 5,
 32 |         'lm': 15,
 33 |         'lmo': 15,
 34 |         'ruapc': 15,
 35 |         'tless': 15,
 36 |         'tudl': 15,
 37 |         'tyol': 15,
 38 |         'ycbv': 15,
 39 |       },
 40 |       'vsd_taus': list(np.arange(0.05, 0.51, 0.05)),
 41 |       'vsd_normalized_by_diameter': True,
 42 |       'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)]
 43 |     },
 44 |     {
 45 |       'n_top': -1,
 46 |       'type': 'mssd',
 47 |       'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)]
 48 |     },
 49 |     {
 50 |       'n_top': -1,
 51 |       'type': 'mspd',
 52 |       'correct_th': [[th] for th in np.arange(5, 51, 5)]
 53 |     },
 54 |   ],
 55 | 
 56 |   # Minimum visible surface fraction of a valid GT pose.
 57 |   # -1 == k most visible GT poses will be considered, where k is given by
 58 |   # the "inst_count" item loaded from "targets_filename".
 59 |   'visib_gt_min': -1,
 60 | 
 61 |   # See misc.get_symmetry_transformations().
 62 |   'max_sym_disc_step': 0.01,
 63 | 
 64 |   # Type of the renderer (used for the VSD pose error function).
 65 |   'renderer_type': 'python',  # Options: 'cpp', 'python'.
 66 | 
 67 |   # Names of files with results for which to calculate the errors (assumed to be
 68 |   # stored in folder p['results_path']). See docs/bop_challenge_2019.md for a
 69 |   # description of the format. Example results can be found at:
 70 |   # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/
 71 |   'result_filenames': [
 72 |     '/relative/path/to/csv/with/results',
 73 |   ],
 74 | 
 75 |   # Folder with results to be evaluated.
 76 |   'results_path': config.results_path,
 77 | 
 78 |   # Folder for the calculated pose errors and performance scores.
 79 |   'eval_path': config.eval_path,
 80 | 
 81 |   # File with a list of estimation targets to consider. The file is assumed to
 82 |   # be stored in the dataset folder.
 83 |   'targets_filename': 'test_targets_bop19.json',
 84 | }
 85 | ################################################################################
 86 | 
 87 | 
 88 | # Command line arguments.
 89 | # ------------------------------------------------------------------------------
 90 | parser = argparse.ArgumentParser()
 91 | parser.add_argument('--renderer_type', default=p['renderer_type'])
 92 | parser.add_argument('--result_filenames',
 93 |                     default=','.join(p['result_filenames']),
 94 |                     help='Comma-separated names of files with results.')
 95 | parser.add_argument('--results_path', default=p['results_path'])
 96 | parser.add_argument('--eval_path', default=p['eval_path'])
 97 | parser.add_argument('--targets_filename', default=p['targets_filename'])
 98 | args = parser.parse_args()
 99 | 
100 | p['renderer_type'] = str(args.renderer_type)
101 | p['result_filenames'] = args.result_filenames.split(',')
102 | p['results_path'] = str(args.results_path)
103 | p['eval_path'] = str(args.eval_path)
104 | p['targets_filename'] = str(args.targets_filename)
105 | 
106 | # Evaluation.
107 | # ------------------------------------------------------------------------------
108 | for result_filename in p['result_filenames']:
109 | 
110 |   misc.log('===========')
111 |   misc.log('EVALUATING: {}'.format(result_filename))
112 |   misc.log('===========')
113 | 
114 |   time_start = time.time()
115 | 
116 |   # Volume under recall surface (VSD) / area under recall curve (MSSD, MSPD).
117 |   average_recalls = {}
118 | 
119 |   # Name of the result and the dataset.
120 |   result_name = os.path.splitext(os.path.basename(result_filename))[0]
121 |   dataset = str(result_name.split('_')[1].split('-')[0])
122 | 
123 |   # Calculate the average estimation time per image.
124 |   ests = inout.load_bop_results(
125 |     os.path.join(p['results_path'], result_filename), version='bop19')
126 |   times = {}
127 |   times_available = True
128 |   for est in ests:
129 |     result_key = '{:06d}_{:06d}'.format(est['scene_id'], est['im_id'])
130 |     if est['time'] < 0:
131 |       # All estimation times must be provided.
132 |       times_available = False
133 |       break
134 |     elif result_key in times:
135 |       if abs(times[result_key] - est['time']) > 0.001:
136 |         raise ValueError(
137 |           'The running time for scene {} and image {} is not the same for '
138 |           'all estimates.'.format(est['scene_id'], est['im_id']))
139 |     else:
140 |       times[result_key] = est['time']
141 | 
142 |   if times_available:
143 |     average_time_per_image = np.mean(list(times.values()))
144 |   else:
145 |     average_time_per_image = -1.0
146 | 
147 |   # Evaluate the pose estimates.
148 |   for error in p['errors']:
149 | 
150 |     # Calculate error of the pose estimates.
151 |     calc_errors_cmd = [
152 |       'python',
153 |       os.path.join('scripts', 'eval_calc_errors.py'),
154 |       '--n_top={}'.format(error['n_top']),
155 |       '--error_type={}'.format(error['type']),
156 |       '--result_filenames={}'.format(result_filename),
157 |       '--renderer_type={}'.format(p['renderer_type']),
158 |       '--results_path={}'.format(p['results_path']),
159 |       '--eval_path={}'.format(p['eval_path']),
160 |       '--targets_filename={}'.format(p['targets_filename']),
161 |       '--max_sym_disc_step={}'.format(p['max_sym_disc_step']),
162 |       '--skip_missing=1',
163 |     ]
164 |     if error['type'] == 'vsd':
165 |       vsd_deltas_str = \
166 |         ','.join(['{}:{}'.format(k, v) for k, v in error['vsd_deltas'].items()])
167 |       calc_errors_cmd += [
168 |         '--vsd_deltas={}'.format(vsd_deltas_str),
169 |         '--vsd_taus={}'.format(','.join(map(str, error['vsd_taus']))),
170 |         '--vsd_normalized_by_diameter={}'.format(
171 |           error['vsd_normalized_by_diameter'])
172 |       ]
173 | 
174 |     misc.log('Running: ' + ' '.join(calc_errors_cmd))
175 |     if subprocess.call(calc_errors_cmd) != 0:
176 |       raise RuntimeError('Calculation of pose errors failed.')
177 | 
178 |     # Paths (rel. to p['eval_path']) to folders with calculated pose errors.
179 |     # For VSD, there is one path for each setting of tau. For the other pose
180 |     # error functions, there is only one path.
181 |     error_dir_paths = {}
182 |     if error['type'] == 'vsd':
183 |       for vsd_tau in error['vsd_taus']:
184 |         error_sign = misc.get_error_signature(
185 |           error['type'], error['n_top'], vsd_delta=error['vsd_deltas'][dataset],
186 |           vsd_tau=vsd_tau)
187 |         error_dir_paths[error_sign] = os.path.join(result_name, error_sign)
188 |     else:
189 |       error_sign = misc.get_error_signature(error['type'], error['n_top'])
190 |       error_dir_paths[error_sign] = os.path.join(result_name, error_sign)
191 | 
192 |     # Recall scores for all settings of the threshold of correctness (and also
193 |     # of the misalignment tolerance tau in the case of VSD).
194 |     recalls = []
195 | 
196 |     # Calculate performance scores.
197 |     for error_sign, error_dir_path in error_dir_paths.items():
198 |       for correct_th in error['correct_th']:
199 | 
200 |         calc_scores_cmd = [
201 |           'python',
202 |           os.path.join('scripts', 'eval_calc_scores.py'),
203 |           '--error_dir_paths={}'.format(error_dir_path),
204 |           '--eval_path={}'.format(p['eval_path']),
205 |           '--targets_filename={}'.format(p['targets_filename']),
206 |           '--visib_gt_min={}'.format(p['visib_gt_min'])
207 |         ]
208 | 
209 |         calc_scores_cmd += ['--correct_th_{}={}'.format(
210 |           error['type'], ','.join(map(str, correct_th)))]
211 | 
212 |         misc.log('Running: ' + ' '.join(calc_scores_cmd))
213 |         if subprocess.call(calc_scores_cmd) != 0:
214 |           raise RuntimeError('Calculation of scores failed.')
215 | 
216 |         # Path to file with calculated scores.
217 |         score_sign = misc.get_score_signature(correct_th, p['visib_gt_min'])
218 | 
219 |         scores_filename = 'scores_{}.json'.format(score_sign)
220 |         scores_path = os.path.join(
221 |           p['eval_path'], result_name, error_sign, scores_filename)
222 |         
223 |         # Load the scores.
224 |         misc.log('Loading calculated scores from: {}'.format(scores_path))
225 |         scores = inout.load_json(scores_path)
226 |         recalls.append(scores['recall'])
227 | 
228 |     average_recalls[error['type']] = np.mean(recalls)
229 | 
230 |     misc.log('Recall scores: {}'.format(' '.join(map(str, recalls))))
231 |     misc.log('Average recall: {}'.format(average_recalls[error['type']]))
232 | 
233 |   time_total = time.time() - time_start
234 |   misc.log('Evaluation of {} took {}s.'.format(result_filename, time_total))
235 | 
236 |   # Calculate the final scores.
237 |   final_scores = {}
238 |   for error in p['errors']:
239 |     final_scores['bop19_average_recall_{}'.format(error['type'])] =\
240 |       average_recalls[error['type']]
241 | 
242 |   # Final score for the given dataset.
243 |   final_scores['bop19_average_recall'] = np.mean([
244 |     average_recalls['vsd'], average_recalls['mssd'], average_recalls['mspd']])
245 | 
246 |   # Average estimation time per image.
247 |   final_scores['bop19_average_time_per_image'] = average_time_per_image
248 | 
249 |   # Save the final scores.
250 |   final_scores_path = os.path.join(
251 |     p['eval_path'], result_name, 'scores_bop19.json')
252 |   inout.save_json(final_scores_path, final_scores)
253 | 
254 |   # Print the final scores.
255 |   misc.log('FINAL SCORES:')
256 |   for score_name, score_value in final_scores.items():
257 |     misc.log('- {}: {}'.format(score_name, score_value))
258 | 
259 | misc.log('Done.')
260 | 


--------------------------------------------------------------------------------
/keypoint/scripts/meshlab_scripts/remesh_for_eval_cell=0.25.mlx:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE FilterScript>
 2 | 
 3 | <FilterScript>
 4 | 
 5 |  <filter name="Remove Unreferenced Vertex"/>
 6 | 
 7 |  <filter name="Remove Duplicated Vertex"/>
 8 | 
 9 |  <filter name="Remove Duplicate Faces"/>
10 | 
11 |  <filter name="Uniform Mesh Resampling">
12 |   <Param name="CellSize" max="142.124" value="0.25" min="0" type="RichAbsPerc" description="Precision" tooltip="Size of the cell, the default is 1/50 of the box diag. Smaller cells give better precision at a higher computational cost. Remember that halving the cell size means that you build a volume 8 times larger."/>
13 |   <Param name="Offset" max="28.4248" value="0" min="-28.4248" type="RichAbsPerc" description="Offset" tooltip="Offset of the created surface (i.e. distance of the created surface from the original one).&lt;br>If offset is zero, the created surface passes on the original mesh itself. Values greater than zero mean an external surface, and lower than zero mean an internal surface.&lt;br> In practice this value is the threshold passed to the Marching Cube algorithm to extract the isosurface from the distance field representation."/>
14 |   <Param name="mergeCloseVert" value="true" type="RichBool" description="Clean Vertices" tooltip="If true the mesh generated by MC will be cleaned by unifying vertices that are almost coincident"/>
15 |   <Param name="discretize" value="false" type="RichBool" description="Discretize" tooltip="If true the position of the intersected edge of the marching cube grid is not computed by linear interpolation, but it is placed in fixed middle position. As a consequence the resampled object will look severely aliased by a stairstep appearance.&lt;br>Useful only for simulating the output of 3D printing devices."/>
16 |   <Param name="multisample" value="true" type="RichBool" description="Multisample" tooltip="If true the distance field is more accurately compute by multisampling the volume (7 sample for each voxel). Much slower but less artifacts."/>
17 |   <Param name="absDist" value="false" type="RichBool" description="Absolute Distance" tooltip="If true a &lt;b> not&lt;/b> signed distance field is computed. In this case you have to choose a not zero Offset and a double surface is built around the original surface, inside and outside. Is useful to convrt thin floating surfaces into &lt;i> solid, thick meshes.&lt;/i>. t"/>
18 |  </filter>
19 | 
20 |  <filter name="Quadric Edge Collapse Decimation">
21 |   <Param type="RichInt" name="TargetFaceNum" description="Target number of faces" value="0" tooltip="The desired final number of faces."/>
22 |   <Param type="RichFloat" name="TargetPerc" description="Percentage reduction (0..1)" value="0.025" tooltip="If non zero, this parameter specifies the desired final size of the mesh as a percentage of the initial size."/>
23 |   <Param type="RichFloat" name="QualityThr" description="Quality threshold" value="0.5" tooltip="Quality threshold for penalizing bad shaped faces.&lt;br>The value is in the range [0..1]&#xa; 0 accept any kind of face (no penalties),&#xa; 0.5  penalize faces with quality &lt; 0.5, proportionally to their shape&#xa;"/>
24 |   <Param type="RichBool" name="PreserveBoundary" description="Preserve Boundary of the mesh" value="true" tooltip="The simplification process tries to do not affect mesh boundaries during simplification"/>
25 |   <Param type="RichFloat" name="BoundaryWeight" description="Boundary Preserving Weight" value="1" tooltip="The importance of the boundary during simplification. Default (1.0) means that the boundary has the same importance of the rest. Values greater than 1.0 raise boundary importance and has the effect of removing less vertices on the border. Admitted range of values (0,+inf). "/>
26 |   <Param type="RichBool" name="PreserveNormal" description="Preserve Normal" value="true" tooltip="Try to avoid face flipping effects and try to preserve the original orientation of the surface"/>
27 |   <Param type="RichBool" name="PreserveTopology" description="Preserve Topology" value="false" tooltip="Avoid all the collapses that should cause a topology change in the mesh (like closing holes, squeezing handles, etc). If checked the genus of the mesh should stay unchanged."/>
28 |   <Param type="RichBool" name="OptimalPlacement" description="Optimal position of simplified vertices" value="true" tooltip="Each collapsed vertex is placed in the position minimizing the quadric error.&#xa; It can fail (creating bad spikes) in case of very flat areas. &#xa;If disabled edges are collapsed onto one of the two original vertices and the final mesh is composed by a subset of the original vertices. "/>
29 |   <Param type="RichBool" name="PlanarQuadric" description="Planar Simplification" value="true" tooltip="Add additional simplification constraints that improves the quality of the simplification of the planar portion of the mesh."/>
30 |   <Param type="RichBool" name="QualityWeight" description="Weighted Simplification" value="false" tooltip="Use the Per-Vertex quality as a weighting factor for the simplification. The weight is used as a error amplification value, so a vertex with a high quality value will not be simplified and a portion of the mesh with low quality values will be aggressively simplified."/>
31 |   <Param type="RichBool" name="AutoClean" description="Post-simplification cleaning" value="true" tooltip="After the simplification an additional set of steps is performed to clean the mesh (unreferenced vertices, bad faces, etc)"/>
32 |   <Param type="RichBool" name="Selected" description="Simplify only selected faces" value="false" tooltip="The simplification is applied only to the selected set of faces.&#xa; Take care of the target number of faces!"/>
33 |  </filter>
34 | 
35 | </FilterScript>
36 | 


--------------------------------------------------------------------------------
/keypoint/scripts/meshlab_scripts/remesh_for_eval_cell=0.5.mlx:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE FilterScript>
 2 | 
 3 | <FilterScript>
 4 | 
 5 |  <filter name="Remove Unreferenced Vertex"/>
 6 | 
 7 |  <filter name="Remove Duplicated Vertex"/>
 8 | 
 9 |  <filter name="Remove Duplicate Faces"/>
10 | 
11 |  <filter name="Uniform Mesh Resampling">
12 |   <Param name="CellSize" max="142.124" value="0.5" min="0" type="RichAbsPerc" description="Precision" tooltip="Size of the cell, the default is 1/50 of the box diag. Smaller cells give better precision at a higher computational cost. Remember that halving the cell size means that you build a volume 8 times larger."/>
13 |   <Param name="Offset" max="28.4248" value="0" min="-28.4248" type="RichAbsPerc" description="Offset" tooltip="Offset of the created surface (i.e. distance of the created surface from the original one).&lt;br>If offset is zero, the created surface passes on the original mesh itself. Values greater than zero mean an external surface, and lower than zero mean an internal surface.&lt;br> In practice this value is the threshold passed to the Marching Cube algorithm to extract the isosurface from the distance field representation."/>
14 |   <Param name="mergeCloseVert" value="true" type="RichBool" description="Clean Vertices" tooltip="If true the mesh generated by MC will be cleaned by unifying vertices that are almost coincident"/>
15 |   <Param name="discretize" value="false" type="RichBool" description="Discretize" tooltip="If true the position of the intersected edge of the marching cube grid is not computed by linear interpolation, but it is placed in fixed middle position. As a consequence the resampled object will look severely aliased by a stairstep appearance.&lt;br>Useful only for simulating the output of 3D printing devices."/>
16 |   <Param name="multisample" value="true" type="RichBool" description="Multisample" tooltip="If true the distance field is more accurately compute by multisampling the volume (7 sample for each voxel). Much slower but less artifacts."/>
17 |   <Param name="absDist" value="false" type="RichBool" description="Absolute Distance" tooltip="If true a &lt;b> not&lt;/b> signed distance field is computed. In this case you have to choose a not zero Offset and a double surface is built around the original surface, inside and outside. Is useful to convrt thin floating surfaces into &lt;i> solid, thick meshes.&lt;/i>. t"/>
18 |  </filter>
19 | 
20 |  <filter name="Quadric Edge Collapse Decimation">
21 |   <Param type="RichInt" name="TargetFaceNum" description="Target number of faces" value="0" tooltip="The desired final number of faces."/>
22 |   <Param type="RichFloat" name="TargetPerc" description="Percentage reduction (0..1)" value="0.025" tooltip="If non zero, this parameter specifies the desired final size of the mesh as a percentage of the initial size."/>
23 |   <Param type="RichFloat" name="QualityThr" description="Quality threshold" value="0.5" tooltip="Quality threshold for penalizing bad shaped faces.&lt;br>The value is in the range [0..1]&#xa; 0 accept any kind of face (no penalties),&#xa; 0.5  penalize faces with quality &lt; 0.5, proportionally to their shape&#xa;"/>
24 |   <Param type="RichBool" name="PreserveBoundary" description="Preserve Boundary of the mesh" value="true" tooltip="The simplification process tries to do not affect mesh boundaries during simplification"/>
25 |   <Param type="RichFloat" name="BoundaryWeight" description="Boundary Preserving Weight" value="1" tooltip="The importance of the boundary during simplification. Default (1.0) means that the boundary has the same importance of the rest. Values greater than 1.0 raise boundary importance and has the effect of removing less vertices on the border. Admitted range of values (0,+inf). "/>
26 |   <Param type="RichBool" name="PreserveNormal" description="Preserve Normal" value="true" tooltip="Try to avoid face flipping effects and try to preserve the original orientation of the surface"/>
27 |   <Param type="RichBool" name="PreserveTopology" description="Preserve Topology" value="false" tooltip="Avoid all the collapses that should cause a topology change in the mesh (like closing holes, squeezing handles, etc). If checked the genus of the mesh should stay unchanged."/>
28 |   <Param type="RichBool" name="OptimalPlacement" description="Optimal position of simplified vertices" value="true" tooltip="Each collapsed vertex is placed in the position minimizing the quadric error.&#xa; It can fail (creating bad spikes) in case of very flat areas. &#xa;If disabled edges are collapsed onto one of the two original vertices and the final mesh is composed by a subset of the original vertices. "/>
29 |   <Param type="RichBool" name="PlanarQuadric" description="Planar Simplification" value="true" tooltip="Add additional simplification constraints that improves the quality of the simplification of the planar portion of the mesh."/>
30 |   <Param type="RichBool" name="QualityWeight" description="Weighted Simplification" value="false" tooltip="Use the Per-Vertex quality as a weighting factor for the simplification. The weight is used as a error amplification value, so a vertex with a high quality value will not be simplified and a portion of the mesh with low quality values will be aggressively simplified."/>
31 |   <Param type="RichBool" name="AutoClean" description="Post-simplification cleaning" value="true" tooltip="After the simplification an additional set of steps is performed to clean the mesh (unreferenced vertices, bad faces, etc)"/>
32 |   <Param type="RichBool" name="Selected" description="Simplify only selected faces" value="false" tooltip="The simplification is applied only to the selected set of faces.&#xa; Take care of the target number of faces!"/>
33 |  </filter>
34 | 
35 | </FilterScript>
36 | 


--------------------------------------------------------------------------------
/keypoint/scripts/remesh_models_for_eval.py:
--------------------------------------------------------------------------------
 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
 2 | # Center for Machine Perception, Czech Technical University in Prague
 3 | 
 4 | """'Uniformly' resamples and decimates 3D object models for evaluation.
 5 | 
 6 | Note: Models of some T-LESS objects were processed by Blender (using the Remesh
 7 | modifier).
 8 | """
 9 | 
10 | import os
11 | 
12 | from bop_toolkit_lib import config
13 | from bop_toolkit_lib import dataset_params
14 | from bop_toolkit_lib import misc
15 | 
16 | 
17 | # PARAMETERS.
18 | ################################################################################
19 | p = {
20 |   # See dataset_params.py for options.
21 |   'dataset': 'lm',
22 | 
23 |   # Type of input object models.
24 |   # None = default model type.
25 |   'model_in_type': None,
26 | 
27 |   # Type of output object models.
28 |   'model_out_type': 'eval',
29 | 
30 |   # Folder containing the BOP datasets.
31 |   'datasets_path': config.datasets_path,
32 | 
33 |   # Path to meshlabserver.exe (tested version: 1.3.3).
34 |   # On Windows: C:\Program Files\VCG\MeshLab133\meshlabserver.exe
35 |   'meshlab_server_path': config.meshlab_server_path,
36 | 
37 |   # Path to scripts/meshlab_scripts/remesh_for_eval.mlx.
38 |   'meshlab_script_path': os.path.join(
39 |     os.path.dirname(os.path.realpath(__file__)), 'meshlab_scripts',
40 |     r'remesh_for_eval_cell=0.25.mlx'),
41 | }
42 | ################################################################################
43 | 
44 | 
45 | # Load dataset parameters.
46 | dp_model_in = dataset_params.get_model_params(
47 |   p['datasets_path'], p['dataset'], p['model_in_type'])
48 | 
49 | dp_model_out = dataset_params.get_model_params(
50 |   p['datasets_path'], p['dataset'], p['model_out_type'])
51 | 
52 | # Attributes to save for the output models.
53 | attrs_to_save = []
54 | 
55 | # Process models of all objects in the selected dataset.
56 | for obj_id in dp_model_in['obj_ids']:
57 |   misc.log('\n\n\nProcessing model of object {}...\n'.format(obj_id))
58 | 
59 |   model_in_path = dp_model_in['model_tpath'].format(obj_id=obj_id)
60 |   model_out_path = dp_model_out['model_tpath'].format(obj_id=obj_id)
61 | 
62 |   misc.ensure_dir(os.path.dirname(model_out_path))
63 | 
64 |   misc.run_meshlab_script(p['meshlab_server_path'], p['meshlab_script_path'],
65 |                           model_in_path, model_out_path, attrs_to_save)
66 | 
67 | misc.log('Done.')
68 | 


--------------------------------------------------------------------------------
/keypoint/scripts/render_train_imgs.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Renders RGB-D images of an object model."""
  5 | 
  6 | import os
  7 | import cv2
  8 | 
  9 | from bop_toolkit_lib import config
 10 | from bop_toolkit_lib import dataset_params
 11 | from bop_toolkit_lib import inout
 12 | from bop_toolkit_lib import misc
 13 | from bop_toolkit_lib import renderer
 14 | from bop_toolkit_lib import view_sampler
 15 | 
 16 | 
 17 | # PARAMETERS.
 18 | ################################################################################
 19 | # See dataset_params.py for options.
 20 | dataset = 'tyol'
 21 | 
 22 | # Radii of view spheres from which to render the objects.
 23 | if dataset == 'lm':
 24 |   radii = [400]  # There are only 3 occurrences under 400 mm.
 25 | elif dataset == 'tless':
 26 |   radii = [650]
 27 | elif dataset == 'tudl':
 28 |   radii = [850]
 29 | elif dataset == 'tyol':
 30 |   radii = [500]
 31 | elif dataset == 'ruapc':
 32 |   radii = [590]
 33 | elif dataset == 'icmi':
 34 |   radii = [500]
 35 | elif dataset == 'icbin':
 36 |   radii = [450]
 37 | else:
 38 |   raise ValueError('Unknown dataset.')
 39 | 
 40 | # Type of object models and camera.
 41 | model_type = None
 42 | cam_type = None
 43 | if dataset == 'tless':
 44 |   model_type = 'reconst'
 45 |   cam_type = 'primesense'
 46 | 
 47 | # Objects to render ([] = all objects from the specified dataset).
 48 | obj_ids = []
 49 | 
 50 | # Minimum required number of views on the whole view sphere. The final number of
 51 | # views depends on the sampling method.
 52 | min_n_views = 1000
 53 | 
 54 | # Rendering parameters.
 55 | ambient_weight = 0.5  # Weight of ambient light [0, 1]
 56 | shading = 'phong'  # 'flat', 'phong'
 57 | 
 58 | # Type of the renderer. Options: 'cpp', 'python'.
 59 | renderer_type = 'python'
 60 | 
 61 | # Super-sampling anti-aliasing (SSAA) - the RGB image is rendered at ssaa_fact
 62 | # times higher resolution and then down-sampled to the required resolution.
 63 | # Ref: https://github.com/vispy/vispy/wiki/Tech.-Antialiasing
 64 | ssaa_fact = 4
 65 | 
 66 | # Folder containing the BOP datasets.
 67 | datasets_path = config.datasets_path
 68 | 
 69 | # Folder for the rendered images.
 70 | out_tpath = os.path.join(config.output_path, 'render_{dataset}')
 71 | 
 72 | # Output path templates.
 73 | out_rgb_tpath =\
 74 |   os.path.join('{out_path}', '{obj_id:06d}', 'rgb', '{im_id:06d}.png')
 75 | out_depth_tpath =\
 76 |   os.path.join('{out_path}', '{obj_id:06d}', 'depth', '{im_id:06d}.png')
 77 | out_scene_camera_tpath =\
 78 |   os.path.join('{out_path}', '{obj_id:06d}', 'scene_camera.json')
 79 | out_scene_gt_tpath =\
 80 |   os.path.join('{out_path}', '{obj_id:06d}', 'scene_gt.json')
 81 | out_views_vis_tpath =\
 82 |   os.path.join('{out_path}', '{obj_id:06d}', 'views_radius={radius}.ply')
 83 | ################################################################################
 84 | 
 85 | 
 86 | out_path = out_tpath.format(dataset=dataset)
 87 | misc.ensure_dir(out_path)
 88 | 
 89 | # Load dataset parameters.
 90 | dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test')
 91 | dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type)
 92 | dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type)
 93 | 
 94 | if not obj_ids:
 95 |   obj_ids = dp_model['obj_ids']
 96 | 
 97 | # Image size and K for the RGB image (potentially with SSAA).
 98 | im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']]
 99 | K_rgb = dp_camera['K'] * ssaa_fact
100 | 
101 | # Intrinsic parameters for RGB rendering.
102 | fx_rgb, fy_rgb, cx_rgb, cy_rgb =\
103 |   K_rgb[0, 0], K_rgb[1, 1], K_rgb[0, 2], K_rgb[1, 2]
104 | 
105 | # Intrinsic parameters for depth rendering.
106 | K = dp_camera['K']
107 | fx_d, fy_d, cx_d, cy_d = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
108 | 
109 | # Create the RGB renderer.
110 | width_rgb, height_rgb = im_size_rgb[0], im_size_rgb[1]
111 | ren_rgb = renderer.create_renderer(
112 |   width_rgb, height_rgb, renderer_type, mode='rgb', shading=shading)
113 | ren_rgb.set_light_ambient_weight(ambient_weight)
114 | 
115 | # Add object models to the RGB renderer.
116 | for obj_id in obj_ids:
117 |   ren_rgb.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id))
118 | 
119 | # Create the depth renderer.
120 | width_depth, height_depth,  = dp_camera['im_size'][0], dp_camera['im_size'][1]
121 | ren_depth = renderer.create_renderer(
122 |   width_depth, height_depth, renderer_type, mode='depth')
123 | 
124 | # Add object models to the depth renderer.
125 | for obj_id in obj_ids:
126 |   ren_depth.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id))
127 | 
128 | # Render training images for all object models.
129 | for obj_id in obj_ids:
130 | 
131 |   # Prepare output folders.
132 |   misc.ensure_dir(os.path.dirname(out_rgb_tpath.format(
133 |     out_path=out_path, obj_id=obj_id, im_id=0)))
134 |   misc.ensure_dir(os.path.dirname(out_depth_tpath.format(
135 |     out_path=out_path, obj_id=obj_id, im_id=0)))
136 | 
137 |   # Load model.
138 |   model_path = dp_model['model_tpath'].format(obj_id=obj_id)
139 |   model = inout.load_ply(model_path)
140 | 
141 |   # Load model texture.
142 |   if 'texture_file' in model:
143 |     model_texture_path =\
144 |       os.path.join(os.path.dirname(model_path), model['texture_file'])
145 |     model_texture = inout.load_im(model_texture_path)
146 |   else:
147 |     model_texture = None
148 | 
149 |   scene_camera = {}
150 |   scene_gt = {}
151 |   im_id = 0
152 |   for radius in radii:
153 |     # Sample viewpoints.
154 |     view_sampler_mode = 'hinterstoisser'  # 'hinterstoisser' or 'fibonacci'.
155 |     views, views_level = view_sampler.sample_views(
156 |       min_n_views, radius, dp_split_test['azimuth_range'],
157 |       dp_split_test['elev_range'], view_sampler_mode)
158 | 
159 |     misc.log('Sampled views: ' + str(len(views)))
160 |     # out_views_vis_path = out_views_vis_tpath.format(
161 |     #   out_path=out_path, obj_id=obj_id, radius=radius)
162 |     # view_sampler.save_vis(out_views_vis_path, views, views_level)
163 | 
164 |     # Render the object model from all views.
165 |     for view_id, view in enumerate(views):
166 |       if view_id % 10 == 0:
167 |         misc.log('Rendering - obj: {}, radius: {}, view: {}/{}'.format(
168 |           obj_id, radius, view_id, len(views)))
169 | 
170 |       # Rendering.
171 |       rgb = ren_rgb.render_object(
172 |         obj_id, view['R'], view['t'], fx_rgb, fy_rgb, cx_rgb, cy_rgb)['rgb']
173 |       depth = ren_depth.render_object(
174 |         obj_id, view['R'], view['t'], fx_d, fy_d, cx_d, cy_d)['depth']
175 | 
176 |       # Convert depth so it is in the same units as other images in the dataset.
177 |       depth /= float(dp_camera['depth_scale'])
178 | 
179 |       # The OpenCV function was used for rendering of the training images
180 |       # provided for the SIXD Challenge 2017.
181 |       rgb = cv2.resize(rgb, dp_camera['im_size'], interpolation=cv2.INTER_AREA)
182 |       # rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic')
183 | 
184 |       # Save the rendered images.
185 |       out_rgb_path = out_rgb_tpath.format(
186 |         out_path=out_path, obj_id=obj_id, im_id=im_id)
187 |       inout.save_im(out_rgb_path, rgb)
188 |       out_depth_path = out_depth_tpath.format(
189 |         out_path=out_path, obj_id=obj_id, im_id=im_id)
190 |       inout.save_depth(out_depth_path, depth)
191 | 
192 |       # Get 2D bounding box of the object model at the ground truth pose.
193 |       # ys, xs = np.nonzero(depth > 0)
194 |       # obj_bb = misc.calc_2d_bbox(xs, ys, dp_camera['im_size'])
195 | 
196 |       scene_camera[im_id] = {
197 |         'cam_K': dp_camera['K'],
198 |         'depth_scale': dp_camera['depth_scale'],
199 |         'view_level': int(views_level[view_id])
200 |       }
201 | 
202 |       scene_gt[im_id] = [{
203 |         'cam_R_m2c': view['R'],
204 |         'cam_t_m2c': view['t'],
205 |         'obj_id': int(obj_id)
206 |       }]
207 | 
208 |       im_id += 1
209 | 
210 |   # Save metadata.
211 |   inout.save_scene_camera(out_scene_camera_tpath.format(
212 |     out_path=out_path, obj_id=obj_id), scene_camera)
213 |   inout.save_scene_gt(out_scene_gt_tpath.format(
214 |     out_path=out_path, obj_id=obj_id), scene_gt)
215 | 


--------------------------------------------------------------------------------
/keypoint/scripts/show_performance_bop19.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Shows BOP19 metrics and plots recall curves after running eval_bop19.py"""
  5 | 
  6 | import os
  7 | import time
  8 | import argparse
  9 | import subprocess
 10 | import numpy as np
 11 | 
 12 | from bop_toolkit_lib import config
 13 | from bop_toolkit_lib import inout
 14 | from bop_toolkit_lib import misc
 15 | 
 16 | 
 17 | # PARAMETERS (some can be overwritten by the command line arguments below).
 18 | ################################################################################
 19 | p = {
 20 |   # Errors to calculate.
 21 |   'errors': [
 22 |     {
 23 |       'n_top': -1,
 24 |       'type': 'vsd',
 25 |       'vsd_deltas': {
 26 |         'hb': 15,
 27 |         'icbin': 15,
 28 |         'icmi': 15,
 29 |         'itodd': 5,
 30 |         'lm': 15,
 31 |         'lmo': 15,
 32 |         'ruapc': 15,
 33 |         'tless': 15,
 34 |         'tudl': 15,
 35 |         'tyol': 15,
 36 |       },
 37 |       'vsd_taus': list(np.arange(0.05, 0.51, 0.05)),
 38 |       'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)]
 39 |     },
 40 |     {
 41 |       'n_top': -1,
 42 |       'type': 'mssd',
 43 |       'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)]
 44 |     },
 45 |     {
 46 |       'n_top': -1,
 47 |       'type': 'mspd',
 48 |       'correct_th': [[th] for th in np.arange(5, 51, 5)]
 49 |     },
 50 |   ],
 51 | 
 52 |   # Minimum visible surface fraction of a valid GT pose.
 53 |   'visib_gt_min': 0.1,
 54 | 
 55 |   # Plot Recall curves
 56 |   'plot_recall_curves': True,
 57 | 
 58 |   # Names of files with results for which to calculate the errors (assumed to be
 59 |   # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a
 60 |   # description of the format. Example results can be found at:
 61 |   # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/
 62 |   'result_filenames': [
 63 |     '/path/to/csv/with/results',
 64 |   ],
 65 | }
 66 | ################################################################################
 67 | 
 68 | 
 69 | # Command line arguments.
 70 | # ------------------------------------------------------------------------------
 71 | parser = argparse.ArgumentParser()
 72 | parser.add_argument('--visib_gt_min', default=p['visib_gt_min'])
 73 | parser.add_argument('--result_filenames',
 74 |                     default=','.join(p['result_filenames']),
 75 |                     help='Comma-separated names of files with results.')
 76 | args = parser.parse_args()
 77 | 
 78 | p['visib_gt_min'] = float(args.visib_gt_min)
 79 | p['result_filenames'] = args.result_filenames.split(',')
 80 | 
 81 | # Evaluation.
 82 | # ------------------------------------------------------------------------------
 83 | def plot_recall_curves(recall_dict, p):
 84 |   """Plots recall curves and displays BOP19 metrics
 85 | 
 86 |   :param recall_dict: dictionary containing bop19 recall results
 87 |   :param p: parameters from show_performance_bop19.py
 88 |   """
 89 | 
 90 |   for i, error in enumerate(p['errors']):
 91 |     if error['type'] == 'mspd':
 92 |       corr_thres = ['{}'.format(e) for sl in error['correct_th'] for e in sl]
 93 |     else:
 94 |       corr_thres = ['{:.2f}'.format(e) for sl in error['correct_th'] for e in
 95 |                     sl]
 96 | 
 97 |     recalls = recall_dict[error['type']]
 98 |     all_recalls = []
 99 |     plt.figure()
100 | 
101 |     for key in sorted(recalls):
102 |       threshold = key.split('=')[-1]
103 |       if 'vsd' in key:
104 |         plt.plot(recalls[key], label='tau: ' + threshold)
105 |       else:
106 |         plt.plot(recalls[key])
107 |       all_recalls += recalls[key]
108 | 
109 |     plt.legend()
110 | 
111 |     plt.xticks(np.arange(len(corr_thres)), corr_thres)
112 |     plt.ylim([0, 1])
113 |     plt.ylabel('recall')
114 |     if error['type'] == 'mspd':
115 |       plt.xlabel('thres @ r px')
116 |     else:
117 |       plt.xlabel('thres @ object diameter')
118 | 
119 |     plt.title(error['type'] + ' - ' + 'average recall: '
120 |               + '{:.4f}'.format(np.mean(all_recalls)))
121 | 
122 |   plt.show()
123 | 
124 | 
125 | for result_filename in p['result_filenames']:
126 | 
127 |   misc.log('===========')
128 |   misc.log('SHOWING: {}'.format(result_filename))
129 |   misc.log('===========')
130 | 
131 |   time_start = time.time()
132 |   aur = {}
133 | 
134 |   recall_dict = {e['type']:{} for e in p['errors']}
135 | 
136 |   for error in p['errors']:
137 | 
138 |     # Name of the result and the dataset.
139 |     result_name = os.path.splitext(os.path.basename(result_filename))[0]
140 |     dataset = str(result_name.split('_')[1].split('-')[0])
141 | 
142 |     # Paths (rel. to config.eval_path) to folders with calculated pose errors.
143 |     # For VSD, there is one path for each setting of tau. For the other pose
144 |     # error functions, there is only one path.
145 |     error_dir_paths = {}
146 |     if error['type'] == 'vsd':
147 |       for vsd_tau in error['vsd_taus']:
148 |         error_sign = misc.get_error_signature(
149 |           error['type'], error['n_top'], vsd_delta=error['vsd_deltas'][dataset],
150 |           vsd_tau=vsd_tau)
151 |         error_dir_paths[error_sign] = os.path.join(result_name, error_sign)
152 |     else:
153 |       error_sign = misc.get_error_signature(error['type'], error['n_top'])
154 |       error_dir_paths[error_sign] = os.path.join(result_name, error_sign)
155 | 
156 |     # Recall scores for all settings of the threshold of correctness (and also
157 |     # of the misalignment tolerance tau in the case of VSD).
158 |     recalls = []
159 | 
160 |     # Calculate performance scores.
161 |     for error_sign, error_dir_path in error_dir_paths.items():
162 |       recall_dict[error['type']][error_sign] = []
163 |       for correct_th in error['correct_th']:
164 | 
165 |         # Path to file with calculated scores.
166 |         score_sign = misc.get_score_signature(correct_th, p['visib_gt_min'])
167 | 
168 |         scores_filename = 'scores_{}.json'.format(score_sign)
169 |         scores_path = os.path.join(
170 |           config.eval_path, result_name, error_sign, scores_filename)
171 | 
172 |         # Load the scores.
173 |         misc.log('Loading calculated scores from: {}'.format(scores_path))
174 |         scores = inout.load_json(scores_path)
175 |         recalls.append(scores['total_recall'])
176 |         recall_dict[error['type']][error_sign].append(scores['total_recall'])
177 | 
178 |     # Area under the recall surface/curve.
179 |     aur[error['type']] = np.mean(recalls)
180 | 
181 |   time_total = time.time() - time_start
182 | 
183 |   # output final scores and plot recall curves
184 |   err_types = [e['type'] for e in p['errors']]
185 |   for err_type in err_types:
186 |     misc.log('Average Recall {}: {}'.format(err_type, 
187 |       aur[err_type]))
188 |     
189 |   if set(['vsd', 'mssd', 'mspd']).issubset(err_types):
190 |     test_set = os.path.basename(result_filename)
191 |     mean_error = np.mean([aur[err_type] for err_type in err_types])
192 |     misc.log('Average BOP score on {}: {}'.format(test_set, mean_error))
193 | 
194 |   if p['plot_recall_curves']:
195 |     plot_recall_curves(recall_dict, p)
196 | 
197 | misc.log('Done.')
198 | 


--------------------------------------------------------------------------------
/keypoint/scripts/vis_est_poses.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Visualizes object models in pose estimates saved in the BOP format."""
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import itertools
  9 | 
 10 | from bop_toolkit_lib import config
 11 | from bop_toolkit_lib import dataset_params
 12 | from bop_toolkit_lib import inout
 13 | from bop_toolkit_lib import misc
 14 | from bop_toolkit_lib import renderer
 15 | from bop_toolkit_lib import visualization
 16 | 
 17 | 
 18 | # PARAMETERS.
 19 | ################################################################################
 20 | p = {
 21 |   # Top N pose estimates (with the highest score) to be visualized for each
 22 |   # object in each image.
 23 |   'n_top': 1,  # 0 = all estimates, -1 = given by the number of GT poses.
 24 | 
 25 |   # True = one visualization for each (im_id, obj_id), False = one per im_id.
 26 |   'vis_per_obj_id': True,
 27 | 
 28 |   # Indicates whether to render RGB image.
 29 |   'vis_rgb': True,
 30 | 
 31 |   # Indicates whether to resolve visibility in the rendered RGB images (using
 32 |   # depth renderings). If True, only the part of object surface, which is not
 33 |   # occluded by any other modeled object, is visible. If False, RGB renderings
 34 |   # of individual objects are blended together.
 35 |   'vis_rgb_resolve_visib': True,
 36 | 
 37 |   # Indicates whether to render depth image.
 38 |   'vis_depth_diff': False,
 39 | 
 40 |   # If to use the original model color.
 41 |   'vis_orig_color': False,
 42 | 
 43 |   # Type of the renderer (used for the VSD pose error function).
 44 |   'renderer_type': 'python',  # Options: 'cpp', 'python'.
 45 | 
 46 |   # Names of files with pose estimates to visualize (assumed to be stored in
 47 |   # folder config.eval_path). See docs/bop_challenge_2019.md for a description
 48 |   # of the format. Example results can be found at:
 49 |   # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/
 50 |   'result_filenames': [
 51 |     '/path/to/csv/with/results',
 52 |   ],
 53 | 
 54 |   # Folder containing the BOP datasets.
 55 |   'datasets_path': config.datasets_path,
 56 |   
 57 |   # Folder for output visualisations.
 58 |   'vis_path': os.path.join(config.output_path, 'vis_est_poses'),
 59 |   
 60 |   # Path templates for output images.
 61 |   'vis_rgb_tpath': os.path.join(
 62 |     '{vis_path}', '{result_name}', '{scene_id:06d}', '{vis_name}.jpg'),
 63 |   'vis_depth_diff_tpath': os.path.join(
 64 |     '{vis_path}', '{result_name}', '{scene_id:06d}',
 65 |     '{vis_name}_depth_diff.jpg'),
 66 | }
 67 | ################################################################################
 68 | 
 69 | 
 70 | # Load colors.
 71 | colors_path = os.path.join(
 72 |   os.path.dirname(visualization.__file__), 'colors.json')
 73 | colors = inout.load_json(colors_path)
 74 | 
 75 | for result_fname in p['result_filenames']:
 76 |   misc.log('Processing: ' + result_fname)
 77 | 
 78 |   # Parse info about the method and the dataset from the filename.
 79 |   result_name = os.path.splitext(os.path.basename(result_fname))[0]
 80 |   result_info = result_name.split('_')
 81 |   method = result_info[0]
 82 |   dataset_info = result_info[1].split('-')
 83 |   dataset = dataset_info[0]
 84 |   split = dataset_info[1]
 85 |   split_type = dataset_info[2] if len(dataset_info) > 2 else None
 86 | 
 87 |   # Load dataset parameters.
 88 |   dp_split = dataset_params.get_split_params(
 89 |     p['datasets_path'], dataset, split, split_type)
 90 | 
 91 |   model_type = 'eval'
 92 |   dp_model = dataset_params.get_model_params(
 93 |     p['datasets_path'], dataset, model_type)
 94 | 
 95 |   # Rendering mode.
 96 |   renderer_modalities = []
 97 |   if p['vis_rgb']:
 98 |     renderer_modalities.append('rgb')
 99 |   if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']):
100 |     renderer_modalities.append('depth')
101 |   renderer_mode = '+'.join(renderer_modalities)
102 | 
103 |   # Create a renderer.
104 |   width, height = dp_split['im_size']
105 |   ren = renderer.create_renderer(
106 |     width, height, p['renderer_type'], mode=renderer_mode)
107 | 
108 |   # Load object models.
109 |   models = {}
110 |   for obj_id in dp_model['obj_ids']:
111 |     misc.log('Loading 3D model of object {}...'.format(obj_id))
112 |     model_path = dp_model['model_tpath'].format(obj_id=obj_id)
113 |     model_color = None
114 |     if not p['vis_orig_color']:
115 |       model_color = tuple(colors[(obj_id - 1) % len(colors)])
116 |     ren.add_object(obj_id, model_path, surf_color=model_color)
117 | 
118 |   # Load pose estimates.
119 |   misc.log('Loading pose estimates...')
120 |   ests = inout.load_bop_results(
121 |     os.path.join(config.results_path, result_fname))
122 | 
123 |   # Organize the pose estimates by scene, image and object.
124 |   misc.log('Organizing pose estimates...')
125 |   ests_org = {}
126 |   for est in ests:
127 |     ests_org.setdefault(est['scene_id'], {}).setdefault(
128 |       est['im_id'], {}).setdefault(est['obj_id'], []).append(est)
129 | 
130 |   for scene_id, scene_ests in ests_org.items():
131 | 
132 |     # Load info and ground-truth poses for the current scene.
133 |     scene_camera = inout.load_scene_camera(
134 |       dp_split['scene_camera_tpath'].format(scene_id=scene_id))
135 |     scene_gt = inout.load_scene_gt(
136 |       dp_split['scene_gt_tpath'].format(scene_id=scene_id))
137 | 
138 |     for im_ind, (im_id, im_ests) in enumerate(scene_ests.items()):
139 | 
140 |       if im_ind % 10 == 0:
141 |         split_type_str = ' - ' + split_type if split_type is not None else ''
142 |         misc.log(
143 |           'Visualizing pose estimates - method: {}, dataset: {}{}, scene: {}, '
144 |           'im: {}'.format(method, dataset, split_type_str, scene_id, im_id))
145 | 
146 |       # Intrinsic camera matrix.
147 |       K = scene_camera[im_id]['cam_K']
148 | 
149 |       im_ests_vis = []
150 |       im_ests_vis_obj_ids = []
151 |       for obj_id, obj_ests in im_ests.items():
152 | 
153 |         # Sort the estimates by score (in descending order).
154 |         obj_ests_sorted = sorted(
155 |           obj_ests, key=lambda est: est['score'], reverse=True)
156 | 
157 |         # Select the number of top estimated poses to visualize.
158 |         if p['n_top'] == 0:  # All estimates are considered.
159 |           n_top_curr = None
160 |         elif p['n_top'] == -1:  # Given by the number of GT poses.
161 |           n_gt = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]])
162 |           n_top_curr = n_gt
163 |         else:  # Specified by the parameter n_top.
164 |           n_top_curr = p['n_top']
165 |         obj_ests_sorted = obj_ests_sorted[slice(0, n_top_curr)]
166 | 
167 |         # Get list of poses to visualize.
168 |         for est in obj_ests_sorted:
169 |           est['obj_id'] = obj_id
170 | 
171 |           # Text info to write on the image at the pose estimate.
172 |           if p['vis_per_obj_id']:
173 |             est['text_info'] = [
174 |               {'name': '', 'val': est['score'], 'fmt': ':.2f'}
175 |             ]
176 |           else:
177 |             val = '{}:{:.2f}'.format(obj_id, est['score'])
178 |             est['text_info'] = [{'name': '', 'val': val, 'fmt': ''}]
179 | 
180 |         im_ests_vis.append(obj_ests_sorted)
181 |         im_ests_vis_obj_ids.append(obj_id)
182 | 
183 |       # Join the per-object estimates if only one visualization is to be made.
184 |       if not p['vis_per_obj_id']:
185 |         im_ests_vis = [list(itertools.chain.from_iterable(im_ests_vis))]
186 | 
187 |       for ests_vis_id, ests_vis in enumerate(im_ests_vis):
188 | 
189 |         # Load the color and depth images and prepare images for rendering.
190 |         rgb = None
191 |         if p['vis_rgb']:
192 |           if 'rgb' in dp_split['im_modalities']:
193 |             rgb = inout.load_im(dp_split['rgb_tpath'].format(
194 |               scene_id=scene_id, im_id=im_id))[:, :, :3]
195 |           elif 'gray' in dp_split['im_modalities']:
196 |             gray = inout.load_im(dp_split['gray_tpath'].format(
197 |               scene_id=scene_id, im_id=im_id))
198 |             rgb = np.dstack([gray, gray, gray])
199 |           else:
200 |             raise ValueError('RGB nor gray images are available.')
201 | 
202 |         depth = None
203 |         if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']):
204 |           depth = inout.load_depth(dp_split['depth_tpath'].format(
205 |             scene_id=scene_id, im_id=im_id))
206 |           depth *= scene_camera[im_id]['depth_scale']  # Convert to [mm].
207 | 
208 |         # Visualization name.
209 |         if p['vis_per_obj_id']:
210 |           vis_name = '{im_id:06d}_{obj_id:06d}'.format(
211 |             im_id=im_id, obj_id=im_ests_vis_obj_ids[ests_vis_id])
212 |         else:
213 |           vis_name = '{im_id:06d}'.format(im_id=im_id)
214 | 
215 |         # Path to the output RGB visualization.
216 |         vis_rgb_path = None
217 |         if p['vis_rgb']:
218 |           vis_rgb_path = p['vis_rgb_tpath'].format(
219 |             vis_path=p['vis_path'], result_name=result_name, scene_id=scene_id,
220 |             vis_name=vis_name)
221 | 
222 |         # Path to the output depth difference visualization.
223 |         vis_depth_diff_path = None
224 |         if p['vis_depth_diff']:
225 |           vis_depth_diff_path = p['vis_depth_diff_tpath'].format(
226 |             vis_path=p['vis_path'], result_name=result_name, scene_id=scene_id,
227 |             vis_name=vis_name)
228 | 
229 |         # Visualization.
230 |         visualization.vis_object_poses(
231 |           poses=ests_vis, K=K, renderer=ren, rgb=rgb, depth=depth,
232 |           vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path,
233 |           vis_rgb_resolve_visib=p['vis_rgb_resolve_visib'])
234 | 
235 | misc.log('Done.')
236 | 


--------------------------------------------------------------------------------
/keypoint/scripts/vis_gt_poses.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Visualizes object models in the ground-truth poses."""
  5 | 
  6 | import os
  7 | import numpy as np
  8 | 
  9 | from bop_toolkit_lib import config
 10 | from bop_toolkit_lib import dataset_params
 11 | from bop_toolkit_lib import inout
 12 | from bop_toolkit_lib import misc
 13 | from bop_toolkit_lib import renderer
 14 | from bop_toolkit_lib import visualization
 15 | 
 16 | 
 17 | # PARAMETERS.
 18 | ################################################################################
 19 | p = {
 20 |   # See dataset_params.py for options.
 21 |   'dataset': 'lm',
 22 | 
 23 |   # Dataset split. Options: 'train', 'val', 'test'.
 24 |   'dataset_split': 'test',
 25 | 
 26 |   # Dataset split type. None = default. See dataset_params.py for options.
 27 |   'dataset_split_type': None,
 28 | 
 29 |   # File with a list of estimation targets used to determine the set of images
 30 |   # for which the GT poses will be visualized. The file is assumed to be stored
 31 |   # in the dataset folder. None = all images.
 32 |   # 'targets_filename': 'test_targets_bop19.json',
 33 |   'targets_filename': None,
 34 | 
 35 |   # Select ID's of scenes, images and GT poses to be processed.
 36 |   # Empty list [] means that all ID's will be used.
 37 |   'scene_ids': [],
 38 |   'im_ids': [],
 39 |   'gt_ids': [],
 40 |   
 41 |   # Indicates whether to render RGB images.
 42 |   'vis_rgb': True,
 43 |   
 44 |   # Indicates whether to resolve visibility in the rendered RGB images (using
 45 |   # depth renderings). If True, only the part of object surface, which is not
 46 |   # occluded by any other modeled object, is visible. If False, RGB renderings
 47 |   # of individual objects are blended together.
 48 |   'vis_rgb_resolve_visib': True,
 49 |   
 50 |   # Indicates whether to save images of depth differences.
 51 |   'vis_depth_diff': False,
 52 |   
 53 |   # Whether to use the original model color.
 54 |   'vis_orig_color': False,
 55 | 
 56 |   # Type of the renderer (used for the VSD pose error function).
 57 |   'renderer_type': 'python',  # Options: 'cpp', 'python'.
 58 |   
 59 |   # Folder containing the BOP datasets.
 60 |   'datasets_path': config.datasets_path,
 61 |   
 62 |   # Folder for output visualisations.
 63 |   'vis_path': os.path.join(config.output_path, 'vis_gt_poses'),
 64 |   
 65 |   # Path templates for output images.
 66 |   'vis_rgb_tpath': os.path.join(
 67 |     '{vis_path}', '{dataset}', '{split}', '{scene_id:06d}', '{im_id:06d}.jpg'),
 68 |   'vis_depth_diff_tpath': os.path.join(
 69 |     '{vis_path}', '{dataset}', '{split}', '{scene_id:06d}',
 70 |     '{im_id:06d}_depth_diff.jpg'),
 71 | }
 72 | ################################################################################
 73 | 
 74 | 
 75 | # Load dataset parameters.
 76 | dp_split = dataset_params.get_split_params(
 77 |   p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type'])
 78 | 
 79 | model_type = 'eval'  # None = default.
 80 | dp_model = dataset_params.get_model_params(
 81 |   p['datasets_path'], p['dataset'], model_type)
 82 | 
 83 | # Load colors.
 84 | colors_path = os.path.join(
 85 |   os.path.dirname(visualization.__file__), 'colors.json')
 86 | colors = inout.load_json(colors_path)
 87 | 
 88 | # Subset of images for which the ground-truth poses will be rendered.
 89 | if p['targets_filename'] is not None:
 90 |   targets = inout.load_json(
 91 |     os.path.join(dp_split['base_path'], p['targets_filename']))
 92 |   scene_im_ids = {}
 93 |   for target in targets:
 94 |     scene_im_ids.setdefault(
 95 |       target['scene_id'], set()).add(target['im_id'])
 96 | else:
 97 |   scene_im_ids = None
 98 | 
 99 | # List of considered scenes.
100 | scene_ids_curr = dp_split['scene_ids']
101 | if p['scene_ids']:
102 |   scene_ids_curr = set(scene_ids_curr).intersection(p['scene_ids'])
103 | 
104 | # Rendering mode.
105 | renderer_modalities = []
106 | if p['vis_rgb']:
107 |   renderer_modalities.append('rgb')
108 | if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']):
109 |   renderer_modalities.append('depth')
110 | renderer_mode = '+'.join(renderer_modalities)
111 | 
112 | # Create a renderer.
113 | width, height = dp_split['im_size']
114 | ren = renderer.create_renderer(
115 |   width, height, p['renderer_type'], mode=renderer_mode, shading='flat')
116 | 
117 | # Load object models.
118 | models = {}
119 | for obj_id in dp_model['obj_ids']:
120 |   misc.log('Loading 3D model of object {}...'.format(obj_id))
121 |   model_path = dp_model['model_tpath'].format(obj_id=obj_id)
122 |   model_color = None
123 |   if not p['vis_orig_color']:
124 |     model_color = tuple(colors[(obj_id - 1) % len(colors)])
125 |   ren.add_object(obj_id, model_path, surf_color=model_color)
126 | 
127 | for scene_id in scene_ids_curr:
128 | 
129 |   # Load scene info and ground-truth poses.
130 |   scene_camera = inout.load_scene_camera(
131 |     dp_split['scene_camera_tpath'].format(scene_id=scene_id))
132 |   scene_gt = inout.load_scene_gt(
133 |     dp_split['scene_gt_tpath'].format(scene_id=scene_id))
134 | 
135 |   # List of considered images.
136 |   if scene_im_ids is not None:
137 |     im_ids = scene_im_ids[scene_id]
138 |   else:
139 |     im_ids = sorted(scene_gt.keys())
140 |   if p['im_ids']:
141 |     im_ids = set(im_ids).intersection(p['im_ids'])
142 | 
143 |   # Render the object models in the ground-truth poses in the selected images.
144 |   for im_counter, im_id in enumerate(im_ids):
145 |     if im_counter % 10 == 0:
146 |       misc.log(
147 |         'Visualizing GT poses - dataset: {}, scene: {}, im: {}/{}'.format(
148 |           p['dataset'], scene_id, im_counter, len(im_ids)))
149 | 
150 |     K = scene_camera[im_id]['cam_K']
151 | 
152 |     # List of considered ground-truth poses.
153 |     gt_ids_curr = range(len(scene_gt[im_id]))
154 |     if p['gt_ids']:
155 |       gt_ids_curr = set(gt_ids_curr).intersection(p['gt_ids'])
156 | 
157 |     # Collect the ground-truth poses.
158 |     gt_poses = []
159 |     for gt_id in gt_ids_curr:
160 |       gt = scene_gt[im_id][gt_id]
161 |       gt_poses.append({
162 |         'obj_id': gt['obj_id'],
163 |         'R': gt['cam_R_m2c'],
164 |         't': gt['cam_t_m2c'],
165 |         'text_info': [
166 |           {'name': '', 'val': '{}:{}'.format(gt['obj_id'], gt_id), 'fmt': ''}
167 |         ]
168 |       })
169 | 
170 |     # Load the color and depth images and prepare images for rendering.
171 |     rgb = None
172 |     if p['vis_rgb']:
173 |       if 'rgb' in dp_split['im_modalities']:
174 |         rgb = inout.load_im(dp_split['rgb_tpath'].format(
175 |           scene_id=scene_id, im_id=im_id))[:, :, :3]
176 |       elif 'gray' in dp_split['im_modalities']:
177 |         gray = inout.load_im(dp_split['gray_tpath'].format(
178 |           scene_id=scene_id, im_id=im_id))
179 |         rgb = np.dstack([gray, gray, gray])
180 |       else:
181 |         raise ValueError('RGB nor gray images are available.')
182 | 
183 |     depth = None
184 |     if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']):
185 |       depth = inout.load_depth(dp_split['depth_tpath'].format(
186 |         scene_id=scene_id, im_id=im_id))
187 |       depth *= scene_camera[im_id]['depth_scale']  # Convert to [mm].
188 | 
189 |     # Path to the output RGB visualization.
190 |     vis_rgb_path = None
191 |     if p['vis_rgb']:
192 |       vis_rgb_path = p['vis_rgb_tpath'].format(
193 |         vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'],
194 |         scene_id=scene_id, im_id=im_id)
195 | 
196 |     # Path to the output depth difference visualization.
197 |     vis_depth_diff_path = None
198 |     if p['vis_depth_diff']:
199 |       vis_depth_diff_path = p['vis_depth_diff_tpath'].format(
200 |         vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'],
201 |         scene_id=scene_id, im_id=im_id)
202 | 
203 |     # Visualization.
204 |     visualization.vis_object_poses(
205 |       poses=gt_poses, K=K, renderer=ren, rgb=rgb, depth=depth,
206 |       vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path,
207 |       vis_rgb_resolve_visib=p['vis_rgb_resolve_visib'])
208 | 
209 | misc.log('Done.')
210 | 


--------------------------------------------------------------------------------
/keypoint/scripts/vis_object_symmetries.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Visualizes object models under all identified symmetry transformations."""
  5 | 
  6 | import os
  7 | import numpy as np
  8 | 
  9 | from bop_toolkit_lib import config
 10 | from bop_toolkit_lib import dataset_params
 11 | from bop_toolkit_lib import inout
 12 | from bop_toolkit_lib import misc
 13 | from bop_toolkit_lib import renderer
 14 | from bop_toolkit_lib import transform as tr
 15 | 
 16 | 
 17 | # PARAMETERS.
 18 | ################################################################################
 19 | p = {
 20 |   # See dataset_params.py for options.
 21 |   'dataset': 'itodd',
 22 | 
 23 |   # Type of the renderer (used for the VSD pose error function).
 24 |   'renderer_type': 'python',  # Options: 'cpp', 'python'.
 25 | 
 26 |   # See misc.get_symmetry_transformations().
 27 |   'max_sym_disc_step': 0.01,
 28 | 
 29 |   'views': [
 30 |     {
 31 |       'R': tr.rotation_matrix(0.5 * np.pi, [1, 0, 0]).dot(
 32 |         tr.rotation_matrix(-0.5 * np.pi, [0, 0, 1])).dot(
 33 |         tr.rotation_matrix(0.1 * np.pi, [0, 1, 0]))[:3, :3],
 34 |       't': np.array([[0, 0, 500]]).T
 35 |     }
 36 |   ],
 37 |   
 38 |   # Folder containing the BOP datasets.
 39 |   'datasets_path': config.datasets_path,
 40 |   
 41 |   # Folder for output visualisations.
 42 |   'vis_path': os.path.join(config.output_path, 'vis_object_symmetries'),
 43 |   
 44 |   # Path templates for output images.
 45 |   'vis_rgb_tpath': os.path.join(
 46 |     '{vis_path}', '{dataset}', '{obj_id:06d}',
 47 |     '{view_id:06d}_{pose_id:06d}.jpg'),
 48 | }
 49 | ################################################################################
 50 | 
 51 | 
 52 | # Load dataset parameters.
 53 | model_type = None  # None = default.
 54 | if p['dataset'] == 'tless':
 55 |   model_type = 'cad'
 56 | dp_model = dataset_params.get_model_params(
 57 |   p['datasets_path'], p['dataset'], model_type)
 58 | dp_camera = dataset_params.get_camera_params(
 59 |   p['datasets_path'], p['dataset'])
 60 | 
 61 | K = dp_camera['K']
 62 | fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
 63 | 
 64 | # Create a renderer.
 65 | width, height = dp_camera['im_size']
 66 | ren = renderer.create_renderer(
 67 |   width, height, p['renderer_type'], mode='rgb', shading='flat')
 68 | 
 69 | # Load meta info about the models (including symmetries).
 70 | models_info = inout.load_json(dp_model['models_info_path'], keys_to_int=True)
 71 | 
 72 | 
 73 | for obj_id in dp_model['obj_ids']:
 74 | 
 75 |   # Load object model.
 76 |   misc.log('Loading 3D model of object {}...'.format(obj_id))
 77 |   model_path = dp_model['model_tpath'].format(obj_id=obj_id)
 78 |   ren.add_object(obj_id, model_path)
 79 | 
 80 |   poses = misc.get_symmetry_transformations(
 81 |     models_info[obj_id], p['max_sym_disc_step'])
 82 | 
 83 |   for pose_id, pose in enumerate(poses):
 84 | 
 85 |     for view_id, view in enumerate(p['views']):
 86 | 
 87 |       R = view['R'].dot(pose['R'])
 88 |       t = view['R'].dot(pose['t']) + view['t']
 89 | 
 90 |       vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb']
 91 | 
 92 |       # Path to the output RGB visualization.
 93 |       vis_rgb_path = p['vis_rgb_tpath'].format(
 94 |         vis_path=p['vis_path'], dataset=p['dataset'], obj_id=obj_id,
 95 |         view_id=view_id, pose_id=pose_id)
 96 |       misc.ensure_dir(os.path.dirname(vis_rgb_path))
 97 |       inout.save_im(vis_rgb_path, vis_rgb)
 98 | 
 99 | misc.log('Done.')
100 | 


--------------------------------------------------------------------------------
/keypoint/train/base_options.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from collections import namedtuple
 4 | 
 5 | class BaseTrainOptions():
 6 | 
 7 |     def parse_args(self, arg_list=None):
 8 |         if arg_list is None:
 9 |             self.args = self.parser.parse_args()
10 |         else:
11 |             self.args = self.parser.parse_args(arg_list)
12 | 
13 |         if self.args.from_json is not None:
14 |             path_to_json = os.path.abspath(self.args.from_json)
15 |             with open(path_to_json, "r") as f:
16 |                 json_args = json.load(f)
17 |                 json_args = namedtuple("json_args", json_args.keys())(**json_args)
18 |                 return json_args
19 |         else:
20 |             self.args.log_dir = os.path.join(os.path.abspath(self.args.log_dir), self.args.name)
21 |             self.args.summary_dir = os.path.join(self.args.log_dir, 'tensorboard')
22 |             if not os.path.exists(self.args.log_dir):
23 |                 os.makedirs(self.args.log_dir)
24 |             self.args.checkpoint_dir = os.path.join(self.args.log_dir, 'checkpoints')
25 |             if not os.path.exists(self.args.checkpoint_dir):
26 |                 os.makedirs(self.args.checkpoint_dir)
27 |             self._save_dump()
28 |             return self.args
29 | 
30 |     def _save_dump(self):
31 |         if not os.path.exists(self.args.log_dir):
32 |             os.makedirs(self.args.log_dir)
33 |         with open(os.path.join(self.args.log_dir, "config.json"), "w") as f:
34 |             json.dump(vars(self.args), f, indent=4)
35 |         return
36 | 
37 | class BaseTestOptions():
38 | 
39 |     def parse_args(self):
40 |         args = self.parser.parse_args()
41 |         path_to_json = os.path.abspath(args.json)
42 |         with open(path_to_json, "r") as f:
43 |             train_args = json.load(f)
44 |             train_args = namedtuple("train_args", train_args.keys())(**train_args)
45 |         if not os.path.exists(args.out_dir):
46 |             os.makedirs(args.out_dir)
47 |         return args, train_args
48 | 


--------------------------------------------------------------------------------
/keypoint/train/base_trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import time
  3 | import sys
  4 | import math
  5 | from tqdm import tqdm
  6 | tqdm.monitor_interval = 0
  7 | from tensorboardX import SummaryWriter
  8 | from utils import CheckpointDataLoader, CheckpointSaver
  9 | 
 10 | class BaseTrainer:
 11 | 
 12 |     def __init__(self, options):
 13 |         self.options = options
 14 |         self.collate_fn = None
 15 |         self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 16 |         
 17 |         self._init_fn() # define your model, optimizers etc.
 18 |         self.saver = CheckpointSaver(save_dir=options.checkpoint_dir)
 19 |         self.summary_writer = SummaryWriter(self.options.summary_dir)
 20 |         
 21 |         self.checkpoint = None
 22 |         if self.options.resume and self.saver.exists_checkpoint():
 23 |             self.checkpoint = self.saver.load_checkpoint(self.models_dict, self.optimizers_dict, checkpoint_file=self.options.checkpoint)
 24 | 
 25 |         if self.checkpoint is None:
 26 |             self.epoch_count = 0
 27 |             self.step_count = 0
 28 |         else:
 29 |             self.epoch_count = self.checkpoint['epoch']
 30 |             self.step_count = self.checkpoint['total_step_count']
 31 | 
 32 |         self.lr_scheduler = None
 33 |         self.exponential_scheduler = None
 34 |         if self.options.lr_decay < 1.0:
 35 |             self.exponential_scheduler = torch.optim.lr_scheduler.ExponentialLR(
 36 |                             optimizer = self.optimizer,, 
 37 |                             gamma = self.options.lr_decay,
 38 |                             last_epoch = self.step_count-1)
 39 |             print('lr_decay/epoch:', self.options.lr_decay)
 40 | 
 41 |         if self.options.lr_schedule is not None:
 42 |             self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
 43 |                             optimizer = self.optimizer,
 44 |                             milestones = self.options.lr_schedule,
 45 |                             gamma = self.options.lr_gamma,
 46 |                             last_epoch = self.step_count-1)
 47 | 
 48 |             print('lr_schedule:', self.options.lr_schedule)
 49 | 
 50 |     def _init_fn(self):
 51 |         raise NotImplementedError('You need to provide an _init_fn method')
 52 | 
 53 |     # @profile
 54 |     def train(self):
 55 | 
 56 |         self.endtime = time.time() + self.options.time_to_run
 57 |         for epoch in tqdm(range(self.epoch_count, self.options.num_epochs), total=self.options.num_epochs, initial=self.epoch_count):
 58 |             train_data_loader = CheckpointDataLoader(self.train_ds,checkpoint=self.checkpoint,
 59 |                                                      batch_size=self.options.batch_size,
 60 |                                                      num_workers=self.options.num_workers,
 61 |                                                      pin_memory=self.options.pin_memory,
 62 |                                                      shuffle=self.options.shuffle_train,
 63 |                                                      collate_fn=self.collate_fn)
 64 |             warmup_scheduler = None
 65 |             warmup_steps = self.options.warmup_steps
 66 |             if epoch == 0 and self.step_count == 0 and self.checkpoint is None:
 67 |                 warmup_iters = warmup_steps
 68 |                 warmup_factor = 1./warmup_steps
 69 |                 warmup_scheduler = warmup_lr_scheduler(self.optimizer, warmup_iters, warmup_factor)
 70 | 
 71 |             for step, batch in enumerate(tqdm(train_data_loader, desc='Epoch '+str(epoch),
 72 |                                               total=math.ceil(len(self.train_ds)/self.options.batch_size),
 73 |                                               initial=train_data_loader.checkpoint_batch_idx),
 74 |                                          train_data_loader.checkpoint_batch_idx):
 75 |                 
 76 |                 if time.time() < self.endtime:
 77 |                     out = self._train_step(batch)
 78 | 
 79 |                     self.step_count += 1
 80 | 
 81 |                     if self.step_count % self.options.summary_steps == 0:
 82 |                         self._train_summaries(batch, *out)
 83 | 
 84 |                     if self.step_count % self.options.test_steps == 0:
 85 |                         val_loss = self.test()
 86 | 
 87 |                     if self.step_count % self.options.checkpoint_steps == 0:
 88 |                         self.saver.save_checkpoint(self.models_dict, self.optimizers_dict, epoch, step+1, self.options.batch_size, train_data_loader.sampler.dataset_perm, self.step_count) 
 89 |                         tqdm.write('Checkpoint saved')
 90 | 
 91 |                 else:
 92 |                     tqdm.write('Timeout reached')
 93 |                     self.saver.save_checkpoint(self.models_dict, self.optimizers_dict, epoch, step, self.options.batch_size, train_data_loader.sampler.dataset_perm, self.step_count) 
 94 |                     tqdm.write('Checkpoint saved')
 95 |                     sys.exit(0)
 96 | 
 97 |                 
 98 |                 if warmup_scheduler is not None:
 99 |                     warmup_scheduler.step()
100 |                     if self.step_count > warmup_steps:
101 |                         print('Setting warmup scheduler to none')
102 |                         warmup_scheduler = None
103 | 
104 |                 if self.lr_scheduler is not None:
105 |                     self.lr_scheduler.step()
106 | 
107 |             if self.exponential_scheduler is not None:
108 |                 self.exponential_scheduler.step()
109 | 
110 |             # load a checkpoint only on startup, for the next epochs
111 |             # just iterate over the dataset as usual
112 |             self.checkpoint=None
113 | 
114 |             # save checkpoint after each epoch
115 |             self.saver.save_checkpoint(self.models_dict, self.optimizers_dict, epoch+1, 0, self.options.batch_size, None, self.step_count) 
116 | 
117 |         return
118 | 
119 |     def _get_lr(self):
120 |         return next(iter(self.optimizers_dict.values())).param_groups[0]['lr']
121 | 
122 |     def _train_step(self, input_batch):
123 |         raise NotImplementedError('You need to provide a _train_step method')
124 | 
125 |     def _train_summaries(self, input_batch):
126 |         raise NotImplementedError('You need to provide a _save_summaries method')
127 | 
128 |     def test(self, input_batch):
129 |         raise NotImplementedError('You need to provide a _test_step method')
130 | 
131 | 
132 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
133 | 
134 |     def f(x):
135 |         if x >= warmup_iters:
136 |             return 1
137 |         alpha = float(x) / warmup_iters
138 |         return warmup_factor * (1 - alpha) + alpha
139 | 
140 |     return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
141 | 


--------------------------------------------------------------------------------
/keypoint/train/detection_trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.utils.data import DataLoader
  4 | from torchvision import transforms
  5 | from torchvision.utils import make_grid
  6 | 
  7 | import time
  8 | from tqdm import tqdm
  9 | tqdm.monitor_interval = 0
 10 | 
 11 | from bop_dataset import BOPDataset
 12 | from base_trainer import BaseTrainer
 13 | from transforms import ColorJitter, ToTensor, \
 14 |                 RandomHorizontalFlip, RandomGaussianBlur, RandomGrayscale
 15 | 
 16 | from models import FRCNN
 17 | 
 18 | class DetectionTrainer(BaseTrainer):
 19 | 
 20 |     def _init_fn(self):
 21 |         transform_list = []
 22 |         transform_list.append(ColorJitter(brightness=self.options.jitter, contrast=self.options.jitter, saturation=self.options.jitter, hue=self.options.jitter/4))
 23 |         transform_list.append(RandomGrayscale(0.2))
 24 |         transform_list.append(RandomGaussianBlur(kernel_size=7))
 25 |         transform_list.append(ToTensor())
 26 |         transform_list.append(RandomHorizontalFlip(0.5))
 27 | 
 28 |         test_transform_list = []
 29 |         test_transform_list.append(ToTensor())
 30 | 
 31 |         self.train_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='train', 
 32 |                                     valid_objid = self.options.objid,
 33 |                                     return_keypoints=False, return_coco=True, 
 34 |                                     transform=transforms.Compose(transform_list))
 35 | 
 36 |         self.test_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='test', 
 37 |                                     valid_objid = self.options.objid,
 38 |                                     return_keypoints=False, return_coco=True, 
 39 |                                     transform=transforms.Compose(test_transform_list))
 40 | 
 41 |         self.collate_fn = lambda batch: tuple(batch)
 42 |         
 43 | 
 44 |         num_classes = len(self.train_ds.obj2idx) + 1
 45 |         self.model = FRCNN(num_classes).to(self.device)
 46 | 
 47 |         params = [p for p in self.model.parameters() if p.requires_grad]
 48 |         self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr)
 49 | 
 50 |         # pack all models and optimizers in dictionaries to interact with the checkpoint saver
 51 |         self.models_dict = {'frcnn': self.model}
 52 |         self.optimizers_dict = {'optimizer': self.optimizer}
 53 | 
 54 |         # meter to track moving average
 55 |         self.loss_box_meter = AverageMeter()
 56 |         self.loss_class_meter = AverageMeter()
 57 | 
 58 |         print('Using device:', self.device)
 59 |         print('Using optimizer:', self.options.optimizer)
 60 |         print('Total number of classes:', num_classes)
 61 | 
 62 |     def _train_step(self, input_batch):
 63 |         # Force optimizer to use initial/reset learning rate, if specified
 64 |         if self.options.new_lr is True:
 65 |             for g in self.optimizer.param_groups:
 66 |                 g['lr'] = self.options.lr
 67 |             self.options.new_lr = False
 68 | 
 69 |         # structure input_batch for torchvision detection module
 70 |         images = [s['image'].to(self.device) for s in input_batch]
 71 |         targets = [{k: v.to(self.device) for k, v in s.items() if k!='image'} for s in input_batch]
 72 | 
 73 |         # train step
 74 |         self.model.train()
 75 |         loss_dict = self.model(images, targets)
 76 |         losses = sum(loss for loss in loss_dict.values())
 77 |         
 78 |         self.optimizer.zero_grad()
 79 |         losses.backward()
 80 |         self.optimizer.step()
 81 | 
 82 |         # value
 83 |         loss_box = loss_dict['loss_box_reg'].cpu().item()
 84 |         loss_class = loss_dict['loss_classifier'].cpu().item()
 85 | 
 86 |         ## reset every 25k steps
 87 |         if self.step_count % 100 == 0:
 88 |             self.loss_box_meter.reset()
 89 |             self.loss_class_meter.reset()
 90 | 
 91 |         self.loss_box_meter.update(loss_box)
 92 |         self.loss_class_meter.update(loss_class)
 93 | 
 94 |         return self.loss_box_meter.avg, self.loss_class_meter.avg
 95 | 
 96 |     
 97 |     def _train_summaries(self, batch, loss_box, loss_class):
 98 |         self._get_summaries(batch, loss_box, loss_class, is_train=True)
 99 | 
100 | 
101 |     def test(self):
102 |         test_data_loader = DataLoader(self.test_ds, batch_size=self.options.test_batch_size,
103 |                                       num_workers=self.options.num_workers,
104 |                                       pin_memory=self.options.pin_memory,
105 |                                       shuffle=self.options.shuffle_test,
106 |                                       collate_fn=self.collate_fn)
107 | 
108 |         self.model.always_return_loss = True
109 |         
110 |         test_loss_box = torch.tensor(0.0, device=self.device)
111 |         test_loss_class = torch.tensor(0.0, device=self.device)
112 |         for tstep, batch in enumerate(tqdm(test_data_loader, desc='Testing')):
113 |             if time.time() < self.endtime:
114 | 
115 |                 loss_box, loss_class = self._test_step(batch)
116 | 
117 |                 test_loss_box += loss_box
118 |                 test_loss_class += loss_class
119 |             else:
120 |                 tqdm.write('Testing interrupted at step ' + str(tstep))
121 |                 break
122 | 
123 |         test_loss_box /= (tstep+1)
124 |         test_loss_class /= (tstep+1)
125 | 
126 |         self.model.always_return_loss = False
127 |         self._get_summaries(batch, test_loss_box, test_loss_class, is_train=False)
128 | 
129 | 
130 |         return
131 | 
132 |     def _test_step(self, input_batch):
133 |         
134 |         images = [s['image'].to(self.device) for s in input_batch]
135 |         targets = [{k: v.to(self.device) for k, v in s.items() if k!='image'} for s in input_batch]
136 | 
137 |         self.model.eval()
138 |         with torch.no_grad():
139 |             loss_dict = self.model(images, targets)
140 | 
141 |         loss_box = loss_dict['loss_box_reg'].cpu().item()
142 |         loss_class = loss_dict['loss_classifier'].cpu().item()
143 | 
144 |         return loss_box, loss_class
145 | 
146 | 
147 |     def _get_summaries(self, batch, loss_box, loss_class, is_train):
148 |         images = [s['image'].to(self.device) for s in batch]
149 |         targets = [{k: v.to(self.device) for k, v in s.items() if k!='image'} for s in batch]
150 | 
151 |         image = images[0]
152 |         target = targets[0]
153 | 
154 |         self.model.eval()
155 |         self.model.always_return_loss = False
156 |         with torch.no_grad():
157 |             pred = self.model([image])[0]
158 | 
159 |         # ground truth
160 |         gt_boxes = target['boxes'].int()
161 |         gt_labels = target['labels'].int()
162 |         gt_labels = [str(l) for l in gt_labels.tolist()]
163 | 
164 | 
165 |         # prediction
166 |         thresh = 0.80
167 |         conf = pred['scores'] > thresh
168 | 
169 |         conf_scores = pred['scores'][conf]
170 |         conf_boxes = pred['boxes'][conf].int()
171 |         conf_labels = pred['labels'][conf].int()
172 | 
173 |         valid = torch.zeros_like(conf_labels).bool()
174 |         unique_labels = torch.unique(conf_labels)
175 |         for uni in unique_labels:
176 |             p = (conf_labels==uni).nonzero(as_tuple=False).reshape(-1)
177 |             valid[p[0]] = True
178 | 
179 |         pd_boxes = conf_boxes[valid]
180 |         pd_labels = conf_labels[valid]
181 |         pd_labels = [str(l) for l in pd_labels.tolist()]
182 |         
183 |         self._save_summaries(image, gt_boxes, gt_labels, pd_boxes, pd_labels, 
184 |                             loss_box, loss_class, self.step_count, is_train=is_train) 
185 | 
186 | 
187 |     def _save_summaries(self, image, gt_boxes, gt_labels, pd_boxes, pd_labels, 
188 |                         loss_box, loss_class, step, is_train=True):
189 | 
190 |         prefix = 'train/' if is_train else 'test/'
191 | 
192 |         self.summary_writer.add_scalar(prefix + 'loss_box', loss_box, step)
193 |         self.summary_writer.add_scalar(prefix + 'loss_class', loss_class, step)
194 | 
195 |         self.summary_writer.add_image_with_boxes(prefix + 'gt_boxes', image, gt_boxes, 
196 |                                                  step, labels=gt_labels, dataformats='CHW')
197 |         self.summary_writer.add_image_with_boxes(prefix + 'pd_boxes', image, pd_boxes, 
198 |                                                  step, labels=pd_labels, dataformats='CHW')
199 | 
200 |         if is_train:
201 |             self.summary_writer.add_scalar('lr', self._get_lr(), step)
202 |         return
203 | 
204 | 
205 | 
206 | class AverageMeter(object):
207 |     """Computes and stores the average and current value"""
208 |     def __init__(self):
209 |         self.reset()
210 | 
211 |     def reset(self):
212 |         self.val = 0
213 |         self.avg = 0
214 |         self.sum = 0
215 |         self.count = 0
216 | 
217 |     def update(self, val, n=1):
218 |         self.val = val
219 |         self.sum += val * n
220 |         self.count += n
221 |         self.avg = self.sum / self.count if self.count != 0 else 0
222 | 


--------------------------------------------------------------------------------
/keypoint/train/keypoint_trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.utils.data import DataLoader
  4 | from torchvision import transforms
  5 | from torchvision.utils import make_grid
  6 | 
  7 | import time
  8 | from tqdm import tqdm
  9 | tqdm.monitor_interval = 0
 10 | 
 11 | from bop_dataset import BOPDataset
 12 | from base_trainer import BaseTrainer
 13 | from transforms import RandomFlipLR, RandomRescaleBB, RandomGrayscale, RandomRotation,\
 14 |                                 RandomBlur, ColorJitter, CropAndResize, LocsToHeatmaps,\
 15 |                                 ToTensor, Normalize, Denormalize, Select, AffineCrop
 16 | from models import StackedHourglass
 17 | from misc import Pose2DEval, KptsMSELoss
 18 | 
 19 | class KeypointTrainer(BaseTrainer):
 20 | 
 21 |     def _init_fn(self):
 22 |         transform_list = []
 23 |         transform_list.append(ColorJitter(brightness=self.options.jitter, contrast=self.options.jitter, saturation=self.options.jitter, hue=self.options.jitter/4))
 24 |         transform_list.append(AffineCrop(out_size=self.options.crop_size, scale_factor=0.15, rotation_factor=45, dialation=0.25))
 25 |         transform_list.append(LocsToHeatmaps(out_size=(self.options.heatmap_size, self.options.heatmap_size)))
 26 |         transform_list.append(ToTensor())
 27 |         transform_list.append(Normalize())
 28 | 
 29 |         test_transform_list = []
 30 |         test_transform_list.append(AffineCrop(out_size=self.options.crop_size, scale_factor=0, rotation_factor=0, dialation=0.25))
 31 |         test_transform_list.append(LocsToHeatmaps(out_size=(self.options.heatmap_size, self.options.heatmap_size)))
 32 |         test_transform_list.append(ToTensor())
 33 |         test_transform_list.append(Normalize())
 34 | 
 35 |         self.train_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='train', 
 36 |                                     valid_objid = self.options.objid,
 37 |                                     return_keypoints=True, 
 38 |                                     transform=transforms.Compose(transform_list))
 39 | 
 40 |         self.test_ds = BOPDataset(self.options.dataset_dir, self.options.dataset, split='test', 
 41 |                                     valid_objid = self.options.objid,
 42 |                                     return_keypoints=True, 
 43 |                                     transform=transforms.Compose(test_transform_list))
 44 |         self.collate_fn = None
 45 | 
 46 |         self.options.num_keypoints = self.train_ds.n_kpts
 47 | 
 48 |         self.model = StackedHourglass(self.options.num_keypoints).to(self.device)
 49 | 
 50 |         if self.options.optimizer is 'adam':
 51 |             self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr, 
 52 |                                         betas=(0.9, 0.999), eps=1e-08)
 53 |             print('Using ADAM.')
 54 |         else:
 55 |             self.optimizer = torch.optim.RMSprop(params=self.model.parameters(), lr=self.options.lr, 
 56 |                                         momentum=0, weight_decay=self.options.wd)
 57 |         
 58 | 
 59 |         # pack all models and optimizers in dictionaries to interact with the checkpoint saver
 60 |         self.models_dict = {'stacked_hg': self.model}
 61 |         self.optimizers_dict = {'optimizer': self.optimizer}
 62 | 
 63 |         self.criterion = KptsMSELoss(use_vis=self.options.use_vis).to(self.device)
 64 |         self.pose = Pose2DEval(detection_thresh=self.options.detection_thresh, dist_thresh=self.options.dist_thresh)
 65 | 
 66 |         print('Total number of model parameters:', self.model.num_trainable_parameters())
 67 |         print('Using device:', self.device)
 68 |         print('Using optimizer:', self.options.optimizer)
 69 | 
 70 |     def _train_step(self, input_batch):
 71 |         # Force optimizer to use initial/reset learning rate, if specified
 72 |         if self.options.new_lr is True:
 73 |             for g in self.optimizer.param_groups:
 74 |                 g['lr'] = self.options.lr
 75 |             self.options.new_lr = False
 76 | 
 77 |         input_batch = {k: v.to(self.device) for k,v in input_batch.items()}
 78 | 
 79 |         self.model.train()
 80 |         images = input_batch['image']
 81 |         gt_keypoints = input_batch['keypoint_heatmaps']
 82 |         vis = input_batch['visible_keypoints']
 83 | 
 84 |         pred_keypoints = self.model(images)
 85 |         loss = torch.tensor(0.0, device=self.device)
 86 |         for i in range(len(pred_keypoints)):
 87 |             loss += self.criterion(pred_keypoints[i], gt_keypoints, vis)
 88 |         self.optimizer.zero_grad()
 89 |         loss.backward()
 90 |         self.optimizer.step()
 91 |         return [pk.detach() for pk in pred_keypoints], loss.detach()
 92 |     
 93 |     def _train_summaries(self, batch, pred_keypoints, loss):
 94 |         batch = {k: v.to(self.device) for k,v in batch.items()}
 95 |         
 96 |         pck = self.pose.pck(batch['keypoint_heatmaps'], pred_keypoints[-1])
 97 |         self._save_summaries(batch, pred_keypoints, loss, pck, self.step_count, is_train=True) 
 98 | 
 99 |     def test(self):
100 |         test_data_loader = DataLoader(self.test_ds, batch_size=self.options.test_batch_size,
101 |                                       num_workers=self.options.num_workers,
102 |                                       pin_memory=self.options.pin_memory,
103 |                                       shuffle=self.options.shuffle_test)
104 |         test_loss = torch.tensor(0.0, device=self.device)
105 |         mean_pck = 0.0
106 |         for tstep, batch in enumerate(tqdm(test_data_loader, desc='Testing')):
107 |             if time.time() < self.endtime:
108 |                 batch = {k: v.to(self.device) for k,v in batch.items()}
109 |                 pred_keypoints, loss = self._test_step(batch)
110 |                 test_loss += loss.data
111 |                 mean_pck += self.pose.pck(batch['keypoint_heatmaps'], pred_keypoints[-1])
112 |             else:
113 |                 tqdm.write('Testing interrupted at step ' + str(tstep))
114 |                 break
115 |         test_loss /= (tstep+1)
116 |         mean_pck /= (tstep+1)
117 |         self._save_summaries(batch, pred_keypoints, test_loss, mean_pck, self.step_count, is_train=False) 
118 |         return test_loss
119 | 
120 |     def _test_step(self, input_batch):
121 |         self.model.eval()
122 |         images = input_batch['image']
123 |         gt_keypoints = input_batch['keypoint_heatmaps']
124 |         vis = input_batch['visible_keypoints']
125 |         with torch.no_grad():
126 |             pred_keypoints = self.model(images)
127 |         loss = torch.tensor(0.0, device=self.device)
128 |         for i in range(len(pred_keypoints)):
129 |             loss += self.criterion(pred_keypoints[i], gt_keypoints, vis)
130 |         return pred_keypoints, loss
131 | 
132 |     def _save_summaries(self, input_batch, pred_keypoints, loss, pck, step, is_train=True):
133 |         prefix = 'train/' if is_train else 'test/'
134 |         input_batch = Denormalize()(input_batch)
135 |         images = input_batch['image']
136 |         gt_keypoints = input_batch['keypoint_heatmaps']
137 | 
138 |         gt_image_keypoints = []
139 |         pred_image_keypoints = []
140 |         gt_image_keypoints, pred_image_keypoints = self.pose.draw_keypoints_with_labels(images, gt_keypoints, pred_keypoints[-1])
141 | 
142 |         gt_image_keypoints_grid = make_grid(gt_image_keypoints, pad_value=1, nrow=3)
143 |         pred_image_keypoints_grid = make_grid(pred_image_keypoints, pad_value=1, nrow=3)
144 | 
145 |         pred_heatmaps_grid = make_grid(pred_keypoints[-1][0,:,:,:].unsqueeze(0).transpose(0,1), pad_value=1, nrow=5)
146 |         pred_heatmaps_grid[pred_heatmaps_grid > 1] = 1
147 |         pred_heatmaps_grid[pred_heatmaps_grid < 0] = 0
148 | 
149 |         self.summary_writer.add_scalar(prefix + 'loss', loss, step)
150 |         self.summary_writer.add_scalar(prefix + 'PCK', pck, step)
151 |         self.summary_writer.add_image(prefix + 'gt_image_keypoints', gt_image_keypoints_grid, step)
152 |         self.summary_writer.add_image(prefix + 'pred_image_keypoints', pred_image_keypoints_grid, step)
153 |         self.summary_writer.add_image(prefix + 'pred_heatmaps_image1', pred_heatmaps_grid, step)
154 |         if is_train:
155 |             self.summary_writer.add_scalar('lr', self._get_lr(), step)
156 |         return
157 | 


--------------------------------------------------------------------------------
/keypoint/train/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # this is a hack to make it work in the cluster because
 4 | #import matplotlib
 5 | #matplotlib.use('Agg')
 6 | 
 7 | import torch
 8 | import numpy as np
 9 | from train_options import TrainOptions
10 | from keypoint_trainer import KeypointTrainer
11 | from detection_trainer import DetectionTrainer
12 | 
13 | if __name__ == '__main__':
14 | 
15 | 	# reproducibility
16 | 	np.random.seed(0)
17 | 	torch.manual_seed(0)
18 | 
19 | 	# training code
20 |     options = TrainOptions().parse_args()
21 |     if options.task == 'keypoints':
22 |         trainer = KeypointTrainer(options)
23 |     elif options.task == 'detection':
24 |         trainer = DetectionTrainer(options)
25 |     else:
26 |         print("The requested option is not supported on this dataset")
27 |         exit()
28 | 
29 |     trainer.train()
30 | 


--------------------------------------------------------------------------------
/keypoint/train/train_options.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from .base_options import BaseTrainOptions
  3 | 
  4 | class TrainOptions(BaseTrainOptions):
  5 | 
  6 |     def __init__(self):
  7 |         self.parser = argparse.ArgumentParser()
  8 | 
  9 |         bop = self.parser.add_argument_group('BOP')
 10 |         bop.add_argument('--dataset', default=None)
 11 |         bop.add_argument('--objid', nargs='+', type=int)
 12 |         bop.add_argument('--use_vis', default=False, action='store_true',
 13 |                             help='ignore invisible keypoint during backprop; \
 14 |                             default false: network will learn to only detect keypoint on only centered object')
 15 |         bop.add_argument('--new_lr', default=False, action='store_true')
 16 |         bop.add_argument('--lr_schedule', nargs='+', type=int)
 17 |         bop.add_argument('--lr_gamma', type=float, default=0.1)
 18 |         bop.add_argument("--lr_decay", type=float, default=1.00, help="Exponential decay rate")
 19 |         
 20 |         bop.add_argument('--warmup_steps', type=int, default=1000)
 21 |         bop.add_argument('--hr_w', type=int, default=19)
 22 | 
 23 |         req = self.parser.add_argument_group('Required')
 24 |         req.add_argument('--name', required=True, help='Name of the experiment')
 25 |         task  = req.add_mutually_exclusive_group(required=True)
 26 |         task.add_argument('--detection', dest='task', action='store_const', const='detection')
 27 |         task.add_argument('--segmentation', dest='task', action='store_const', const='segmentation')
 28 |         task.add_argument('--keypoints', dest='task', action='store_const', const='keypoints')
 29 |         task.add_argument('--keypoints_hr', dest='task', action='store_const', const='keypoints_hr')
 30 |         
 31 |         task.add_argument('--joint', dest='task', action='store_const', const='joint')
 32 |         task.add_argument('--joint_gan', dest='task', action='store_const', const='joint_gan')
 33 |         task.add_argument('--joint_ref', dest='task', action='store_const', const='joint_ref')
 34 |         task.add_argument('--autoencoder', dest='task', action='store_const', const='autoencoder')
 35 |         task.add_argument('--k2m', dest='task', action='store_const', const='k2m')
 36 |         task.add_argument('--k2m_gan', dest='task', action='store_const', const='k2m_gan')
 37 |         req.set_defaults(task='keypoints')
 38 | 
 39 |         gen = self.parser.add_argument_group('General')
 40 |         gen.add_argument('--time_to_run', type=int, default=82800, help='Total time to run in seconds')
 41 |         gen.add_argument('--resume', dest='resume', default=False, action='store_true', help='Resume from checkpoint (Use latest checkpoint by default')
 42 |         gen.add_argument('--num_workers', type=int, default=4, help='Number of processes used for data loading')
 43 |         pin = gen.add_mutually_exclusive_group()
 44 |         pin.add_argument('--pin_memory', dest='pin_memory', action='store_true', help='Number of processes used for data loading')
 45 |         pin.add_argument('--no_pin_memory', dest='pin_memory', action='store_false', help='Number of processes used for data loading')
 46 |         gen.set_defaults(pin_memory=True)
 47 | 
 48 |         io = self.parser.add_argument_group('io')
 49 |         io.add_argument('--dataset_dir', default='/scratch/yufu/bop', help='Path to the desired dataset')
 50 |         io.add_argument('--log_dir', default='../logs', help='Directory to store logs')
 51 |         io.add_argument('--checkpoint', default=None, help='Path to checkpoint')
 52 |         io.add_argument('--from_json', default=None, help='Load options from json file instead of the command line')
 53 | 
 54 |         data_proc = self.parser.add_argument_group('Data Preprocessing')
 55 |         data_proc.add_argument('--degrees', type=float, default=0, help='Random rotation angle in the range [-degrees, degrees]')
 56 |         data_proc.add_argument('--max_scale', type=float, default=1.0)
 57 |         data_proc.add_argument('--crop_size', type=int, default=256, help='Size of cropped image to feed to the network')
 58 |         fliplr = data_proc.add_mutually_exclusive_group()
 59 |         fliplr.add_argument('--flip_lr', dest='flip_lr', action='store_true', help='Flip training images')
 60 |         fliplr.add_argument('--no_flip_lr', dest='flip_lr', action='store_false', help='Flip training images')
 61 |         apriltag = data_proc.add_mutually_exclusive_group()
 62 |         apriltag.add_argument('--apriltag', dest='apriltag', action='store_true', help='Flip training images')
 63 |         apriltag.add_argument('--no_apriltag', dest='apriltag', action='store_false', help='Flip training images')
 64 |         rr = data_proc.add_mutually_exclusive_group()
 65 |         rr.add_argument('--random_rescale', dest='random_rescale', action='store_true', help='Randomly rescale bounding boxes')
 66 |         rr.add_argument('--no_random_rescale', dest='random_rescale', action='store_false', help='Do not rescale bounding boxes')
 67 |         data_proc.add_argument('--heatmap_size', type=int, default=64, help='Size of output heatmaps')
 68 |         data_proc.add_argument('--detection_thresh', type=float, default=1e-1, help='Size of output heatmaps')
 69 |         data_proc.add_argument('--dist_thresh', type=float, default=10, help='Size of output heatmaps')
 70 |         data_proc.add_argument('--jitter', type=float, default=0.25, help='Amount of image jitter to apply [0, 1]')
 71 |         data_proc.set_defaults(flip_lr=True, random_rescale=True, apriltag=True) 
 72 |         arch_hg = self.parser.add_argument_group('Hourglass Architecture')
 73 |         arch_hg.add_argument('--hg_channels', type=int, default=256, help='Number of channels for the Hourglass') 
 74 |         arch_hg.add_argument('--num_hg', type=int, default=2, help='Number of stacked Hourglasses') 
 75 |         arch_hg.add_argument('--num_resblocks', type=int, default=1, help='Number of stacked residual blocks') 
 76 | 
 77 |         arch_unet = self.parser.add_argument_group('UNet Architecture')
 78 |         arch_unet.add_argument('--num_filters', type=int, default=64, help='Number of filters in conv1') 
 79 |         arch_unet.add_argument('--num_blocks', type=int, default=5, help='Number of blocks') 
 80 |         arch_unet.add_argument('--unet_type', default='v2', help='Number of blocks') 
 81 |         arch_unet.add_argument('--mask_only', dest='mask_only', default=False, action='store_true', help='Number of blocks') 
 82 | 
 83 |         train = self.parser.add_argument_group('Training Options')
 84 |         train.add_argument('--num_keypoints', type=int, default=76, help='Number of distinct keypoint classes')
 85 |         train.add_argument('--num_epochs', type=int, default=30, help='Total number of training epochs')
 86 |         train.add_argument('--batch_size', type=int, default=16, help='Batch size')
 87 |         train.add_argument('--test_batch_size', type=int, default=8, help='Batch size')
 88 |         shuffle_train = train.add_mutually_exclusive_group()
 89 |         shuffle_train.add_argument('--shuffle_train', dest='shuffle_train', action='store_true', help='Shuffle training data')
 90 |         shuffle_train.add_argument('--no_shuffle_train', dest='shuffle_train', action='store_false', help='Don\'t shuffle training data')
 91 |         shuffle_test = train.add_mutually_exclusive_group()
 92 |         shuffle_test.add_argument('--shuffle_test', dest='shuffle_test', action='store_true', help='Shuffle testing data')
 93 |         shuffle_test.add_argument('--no_shuffle_test', dest='shuffle_test', action='store_false', help='Don\'t shuffle testing data')
 94 |         train.set_defaults(shuffle_train=True, shuffle_test=True)
 95 |         train.add_argument('--summary_steps', type=int, default=100, help='Summary saving frequency')
 96 |         train.add_argument('--checkpoint_steps', type=int, default=10000, help='Chekpoint saving frequency')
 97 |         train.add_argument('--test_steps', type=int, default=1000, help='Testing frequency')
 98 |         train.add_argument('--test_iters', type=int, default=200, help='Number of testing iterations')
 99 | 
100 | 
101 |         optim = self.parser.add_argument_group('Optimization')
102 |         optim_type = optim.add_mutually_exclusive_group()
103 |         optim_type.add_argument('--use_sgd', dest='optimizer', action='store_const', const='sgd',help='Use SGD (default Adam)')
104 |         optim_type.add_argument('--use_rmsprop', dest='optimizer', action='store_const', const='rmsprop',help='Use  (default Adam)')
105 |         optim_type.add_argument('--use_adam', dest='optimizer', action='store_const', const='adam',help='Use SGD (default Adam)')
106 |         optim.add_argument('--adam_beta1', type=float, default=0.9, help='Value for Adam Beta 1')
107 |         optim.add_argument('--sgd_momentum', type=float, default=0.0, help='Momentum for SGD')
108 |         optim.add_argument("--lr", type=float, default=2.5e-4, help="Learning rate")
109 |         optim.add_argument("--wd", type=float, default=0, help="Weight decay weight")
110 |         optim.add_argument('--keypoint_lw', type=float, default=100, help='Keypoint loss weight')
111 |         optim.add_argument('--gan_mask_lw', type=float, default=10, help='Gan mask loss weight')
112 | 
113 |         optim.set_defaults(optimizer='rmsprop')
114 | 
115 |         return 
116 | 


--------------------------------------------------------------------------------
/keypoint/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_loader import CheckpointDataLoader
2 | from .saver import CheckpointSaver
3 | 


--------------------------------------------------------------------------------
/keypoint/utils/data_loader.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import DataLoader
 3 | from torch.utils.data.sampler import Sampler
 4 | 
 5 | class RandomSampler(Sampler):
 6 | 
 7 |     def __init__(self, data_source, checkpoint):
 8 |         self.data_source = data_source
 9 |         if checkpoint is not None and checkpoint['dataset_perm'] is not None:
10 |             self.dataset_perm = checkpoint['dataset_perm']
11 |             self.perm = self.dataset_perm[checkpoint['batch_size']*checkpoint['batch_idx']:]
12 |         else:
13 |             self.dataset_perm = torch.randperm(len(self.data_source)).tolist()
14 |             self.perm = self.dataset_perm
15 | 
16 |     def __iter__(self):
17 |         return iter(self.perm)
18 |     
19 |     def __len__(self):
20 |         return len(self.perm)
21 | 
22 | class SequentialSampler(Sampler):
23 | 
24 |     def __init__(self, data_source, checkpoint):
25 |         self.data_source = data_source
26 |         if checkpoint is not None and checkpoint['dataset_perm'] is not None:
27 |             self.dataset_perm = checkpoint['dataset_perm']
28 |             self.perm = self.dataset_perm[checkpoint['batch_size']*checkpoint['batch_idx']:]
29 |         else:
30 |             self.dataset_perm = list(range(len(self.data_source)))
31 |             self.perm = self.dataset_perm
32 | 
33 |     def __iter__(self):
34 |         return iter(self.perm)
35 |     
36 |     def __len__(self):
37 |         return len(self.perm)
38 | 
39 | class CheckpointDataLoader(DataLoader):
40 |     
41 |     def __init__(self, dataset, checkpoint=None, batch_size=1,
42 |                  shuffle=False, num_workers=0, pin_memory=False, drop_last=False,
43 |                  timeout=0, worker_init_fn=None, collate_fn=None):
44 | 
45 |         if shuffle:
46 |             sampler = RandomSampler(dataset, checkpoint)
47 |         else:
48 |             sampler = SequentialSampler(dataset, checkpoint)
49 |         if checkpoint is not None:
50 |             self.checkpoint_batch_idx = checkpoint['batch_idx']
51 |         else:
52 |             self.checkpoint_batch_idx = 0
53 | 
54 |         super(CheckpointDataLoader, self).__init__(dataset, sampler=sampler, shuffle=False, batch_size=batch_size,
55 |                                                    pin_memory=pin_memory, timeout=timeout, worker_init_fn=None, 
56 |                                                    collate_fn=collate_fn)
57 | 


--------------------------------------------------------------------------------
/keypoint/utils/img_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import torch
 3 | import numpy as np
 4 | from plyfile import PlyData, PlyElement
 5 | 
 6 | def draw_kpts(img, kpts, r=5, thickness=5, color=(255,0,0)):
 7 |     if isinstance(img, np.ndarray):
 8 |         img = img.copy().astype(np.uint8)
 9 |     if isinstance(img, torch.Tensor):
10 |         img = img.numpy()
11 |         img = img.copy().astype(np.uint8)
12 |         
13 |     for kpt in kpts:
14 |         if len(kpt)>2:
15 |             x, y, c = kpt
16 |         else:
17 |             x, y = kpt
18 |             c = 1
19 | 
20 |         if c > 0:
21 |             cv2.circle(img, (int(x), int(y)), r, color, thickness)
22 | 
23 |     return img
24 | 
25 | 
26 | 
27 | ### Save for visualization
28 | def save_ply(vert, face=None, filename='file.ply'):
29 |     # Vertices
30 |     if isinstance(vert, np.ndarray):
31 |         vert = vert.tolist()
32 |     vert = [tuple(v) for v in vert]
33 |     vert = np.array(vert, dtype=[('x', 'f4'), 
34 |                                  ('y', 'f4'), 
35 |                                  ('z', 'f4')])
36 |     vert = PlyElement.describe(vert, 'vertex')
37 |     
38 |     # Faces
39 |     if face is not None:
40 |         if isinstance(face, np.ndarray):
41 |             face = face.tolist()
42 |         face = [(face[i], 255, 255, 255) for i in range(len(face))]
43 |         face = np.array(face, dtype=[('vertex_indices', 'i4', (3,)),
44 |                                      ('red', 'u1'),
45 |                                      ('green', 'u1'),
46 |                                      ('blue', 'u1')])
47 |         face = PlyElement.describe(face, 'face')
48 |     
49 |     # Save
50 |     if face is not None:
51 |         with open(filename, 'wb') as f:
52 |             PlyData([vert, face]).write(f)
53 |     else:
54 |         with open(filename, 'wb') as f:
55 |             PlyData([vert]).write(f)
56 | 
57 | 
58 | def read_ply(plyfile):
59 |     plydata = PlyData.read(plyfile)
60 |     v = plydata['vertex'].data
61 |     v = [list(i) for i in v]
62 |     v = np.array(v)
63 |     f = plydata['face'].data
64 |     f = [list(i) for i in f]
65 |     f = np.array(f).squeeze()
66 |     return v, f
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/keypoint/utils/saver.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import datetime
 3 | import os
 4 | 
 5 | class CheckpointSaver:
 6 | 
 7 |     def __init__(self, save_dir, save_steps=1000):
 8 |         self.save_dir = os.path.abspath(save_dir)
 9 |         self.save_steps = save_steps
10 |         if not os.path.exists(self.save_dir):
11 |             os.makedirs(self.save_dir)
12 |         self._get_latest_checkpoint()
13 |         return
14 | 
15 |     # check if a checkpoint exists in the current directory
16 |     def exists_checkpoint(self, checkpoint_file=None):
17 |         if checkpoint_file is None:
18 |             return False if self.latest_checkpoint is None else True
19 |         else:
20 |             return os.path.isfile(checkpoint_file)
21 |     
22 |     # save checkpoint
23 |     def save_checkpoint(self, models, optimizers, epoch, batch_idx, batch_size, dataset_perm, total_step_count):
24 |         timestamp = datetime.datetime.now()
25 |         checkpoint_filename = os.path.abspath(os.path.join(self.save_dir, timestamp.strftime('%Y_%m_%d-%H_%M_%S') + '.pt'))
26 |         checkpoint = {}
27 |         for model in models:
28 |             checkpoint[model] = models[model].state_dict()
29 |         for optimizer in optimizers:
30 |             checkpoint[optimizer] = optimizers[optimizer].state_dict()
31 |         checkpoint['epoch'] = epoch
32 |         checkpoint['batch_idx'] = batch_idx
33 |         checkpoint['batch_size'] = batch_size
34 |         checkpoint['dataset_perm'] = dataset_perm
35 |         checkpoint['total_step_count'] = total_step_count
36 |         print(timestamp, 'Epoch:', epoch, 'Iteration:', batch_idx)
37 |         print('Saving checkpoint file [' + checkpoint_filename + ']')
38 |         torch.save(checkpoint, checkpoint_filename) 
39 |         return
40 | 
41 |     # load a checkpoint
42 |     def load_checkpoint(self, models, optimizers, checkpoint_file=None):
43 |         if checkpoint_file is None:
44 |             print('Loading latest checkpoint [' + self.latest_checkpoint + ']')
45 |             checkpoint_file = self.latest_checkpoint
46 |         checkpoint = torch.load(checkpoint_file)
47 |         for model in models:
48 |             models[model].load_state_dict(checkpoint[model])
49 |         for optimizer in optimizers:
50 |             optimizers[optimizer].load_state_dict(checkpoint[optimizer])
51 |         return {'epoch': checkpoint['epoch'],
52 |                 'batch_idx': checkpoint['batch_idx'],
53 |                 'batch_size': checkpoint['batch_size'],
54 |                 'dataset_perm': checkpoint['dataset_perm'],
55 |                 'total_step_count': checkpoint['total_step_count']}
56 | 
57 |     # get filename of latest checkpoint if it exists
58 |     def _get_latest_checkpoint(self):
59 |         checkpoint_list = [] 
60 |         for dirpath, dirnames, filenames in os.walk(self.save_dir):
61 |             for filename in filenames:
62 |                 if filename.endswith('.pt'):
63 |                     checkpoint_list.append(os.path.abspath(os.path.join(dirpath, filename)))
64 |         checkpoint_list = sorted(checkpoint_list)
65 |         self.latest_checkpoint =  None if (len(checkpoint_list) is 0) else checkpoint_list[-1]
66 |         return
67 | 
68 | 


--------------------------------------------------------------------------------
/keypoint/utils/trimesh_renderer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import trimesh
  3 | import pyrender
  4 | from PIL import Image, ImageEnhance
  5 | 
  6 | class trimesh_renderer():
  7 |     def __init__(self, img_w, img_h):
  8 |         self.img_w = img_w
  9 |         self.img_h = img_h
 10 |         self.default_focal = 500
 11 |         self.renderer = pyrender.OffscreenRenderer(viewport_width=img_w,
 12 |                                        viewport_height=img_h,
 13 |                                        point_size=1.0)
 14 |     
 15 |     def __call__(self, fuze_trimesh, rot=None, t=None, image=None, 
 16 |                  fx=None, fy=None, cx=None, cy=None, mask_over=False):
 17 |         
 18 |         # Camera parameter
 19 |         if fx is None or fy is None:
 20 |             fx = self.default_focal
 21 |             fy = self.default_focal
 22 |             
 23 |         if cx is None or cy is None:
 24 |             cx = self.img_w / 2
 25 |             cy = self.img_h / 2
 26 |         
 27 |         # 6DoF object pose in camera coordinate
 28 |         # You can skip this and apply directly on the input mesh
 29 |         if rot is None:
 30 |             rot = np.eye(3)
 31 |         if t is None:
 32 |             t = np.zeros(3)
 33 |         transform = np.zeros([4,4])
 34 |         transform[:3, :3] = rot
 35 |         transform[:3, -1] = t
 36 |         fuze_trimesh.apply_transform(transform)
 37 | 
 38 | 
 39 |         # OpenGL convension
 40 |         transform = trimesh.transformations.rotation_matrix(
 41 |                     np.radians(180), [1, 0, 0])
 42 |         fuze_trimesh.apply_transform(transform)
 43 | 
 44 |     
 45 |         mesh = pyrender.Mesh.from_trimesh(fuze_trimesh)
 46 |         scene = pyrender.Scene(ambient_light=(0.5, 0.5, 0.5))
 47 |         scene.add(mesh)
 48 | 
 49 |         camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy, zfar=3000)
 50 |         camera_pose = np.eye(4)
 51 |         scene.add(camera, pose=camera_pose)
 52 |         
 53 |         # Render
 54 |         color, rend_depth = self.renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
 55 |         color = color.astype(np.uint8)
 56 |         
 57 |         if image is None:
 58 |             return color
 59 |         
 60 | 
 61 |         valid_mask = (rend_depth>0)[:,:,None]
 62 |         output_img = (color[:, :, :3] * valid_mask +
 63 |                   (1 - valid_mask) * image)
 64 | 
 65 |         if mask_over:
 66 |             mask = np.zeros([self.img_h, self.img_w, 3])
 67 |             mask[:,:,1] = 250
 68 |             alpha = 0.3
 69 |             mask = alpha * mask + (1-alpha) * image
 70 |             overlay = mask * valid_mask + image * (1-valid_mask)
 71 |             overlay = overlay.astype(np.uint8)
 72 |             return output_img, overlay
 73 | 
 74 |         else:
 75 |             return output_img
 76 | 
 77 |     def render_scene(self, meshes, Rs, ts, image=None, fx=None, fy=None, cx=None, cy=None):
 78 |         
 79 |         # Camera parameter
 80 |         if fx is None or fy is None:
 81 |             fx = self.default_focal
 82 |             fy = self.default_focal
 83 |             
 84 |         if cx is None or cy is None:
 85 |             cx = self.img_w / 2
 86 |             cy = self.img_h / 2
 87 | 
 88 |         color = (0.2, 0.4, 0.2, 1.0)
 89 |         material = pyrender.MetallicRoughnessMaterial(
 90 |             metallicFactor=0.1,
 91 |             alphaMode='OPAQUE',
 92 |             baseColorFactor=color)
 93 | 
 94 |         # 6DoF object pose in camera coordinate
 95 |         # You can skip this and apply directly on the input mesh
 96 |         for i, mesh in enumerate(meshes):
 97 |             transform = np.zeros([4,4])
 98 |             transform[:3, :3] = Rs[i]
 99 |             transform[:3, -1] = ts[i]
100 |             mesh.apply_transform(transform)
101 | 
102 | 
103 |         # OpenGL convension
104 |         transform = trimesh.transformations.rotation_matrix(
105 |                     np.radians(180), [1, 0, 0])
106 |         for mesh in meshes:
107 |             mesh.apply_transform(transform)
108 | 
109 | 
110 |         scene = pyrender.Scene(ambient_light=(0.8, 0.8, 0.8, 1.0))
111 |         for fuze_trimesh in meshes:
112 |             mesh = pyrender.Mesh.from_trimesh(fuze_trimesh)
113 |             scene.add(mesh)
114 | 
115 |         camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy, zfar=3000)
116 |         camera_pose = np.eye(4)
117 |         scene.add(camera, pose=camera_pose)
118 | 
119 |         light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1.0)
120 |         light_pose = np.eye(4)
121 | 
122 |         light_pose[:3, 3] = np.array([0, -1, 1])
123 |         scene.add(light, pose=light_pose)
124 | 
125 |         light_pose[:3, 3] = np.array([0, 1, 1])
126 |         scene.add(light, pose=light_pose)
127 | 
128 |         light_pose[:3, 3] = np.array([1, 1, 2])
129 |         scene.add(light, pose=light_pose)
130 |         
131 |         # Render
132 |         color, rend_depth = self.renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
133 |         color = color.astype(np.uint8)
134 |         
135 |         enhancer = ImageEnhance.Contrast(Image.fromarray(color))
136 |         factor = 1.2 #increase contrast
137 |         color = enhancer.enhance(factor)
138 |         color = np.array(color, dtype=np.uint8)
139 |         
140 |         if image is None:
141 |             return color
142 |         
143 | 
144 |         valid_mask = (rend_depth>0)[:,:,None]
145 |         output_img = (color[:, :, :3] * valid_mask +
146 |                   (1 - valid_mask) * image)
147 | 
148 |         return output_img
149 | 
150 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # [CVPR 2023 Highlight] Object Pose Estimation with Statistical Guarantees: Conformal Keypoint Detection and Geometric Uncertainty Propagation
 2 | [Paper](https://arxiv.org/abs/2303.12246) | [Short Presentation](https://youtu.be/NWUf4hd571E) | [Long Presentation](https://youtu.be/JPvoObEYCAo)
 3 | 
 4 | ## Motivation
 5 | Endow any estimated pose with **provably correct** performance guarantees, i.e., **a worst-case error bound** from the groundtruth pose
 6 | 
 7 | ## Abstract
 8 | The two-stage object pose estimation paradigm first detects semantic keypoints on the image and then estimates the 6D pose by minimizing reprojection errors. Despite performing well on standard benchmarks, existing techniques offer no provable guarantees on the quality and uncertainty of the estimation. In this paper, we inject two fundamental changes, namely **conformal keypoint detection** and **geometric uncertainty propagation**, into the two-stage paradigm and propose the first pose estimator that endows an estimation with provable and computable worst-case error bounds. On one hand, conformal keypoint detection applies the statistical machinery of _inductive conformal prediction_ to convert heuristic keypoint detections into circular or elliptical prediction sets that cover the groundtruth keypoints with a user-specified marginal probability (e.g., 90%). Geometric uncertainty propagation, on the other, propagates the geometric constraints on the keypoints to the 6D object pose, leading to a **Pose UnceRtainty SEt (PURSE)** that guarantees coverage of the groundtruth pose with the same probability. The PURSE, however, is a nonconvex set that does not directly lead to estimated poses and uncertainties. Therefore, we develop RANdom SAmple averaGing (RANSAG) to compute an average pose and apply semidefinite relaxation to upper bound the worst-case errors between the average pose and the groundtruth. On the LineMOD Occlusion dataset we demonstrate: (i) the PURSE covers the groundtruth with valid probabilities; (ii) the worst-case error bounds provide correct uncertainty quantification; and (iii) the average pose achieves better or similar accuracy as representative methods based on sparse keypoints.
 9 | 
10 | ![](assets/poster.png)
11 | 
12 | ## Quick start
13 | 
14 | ### Prepare data
15 | - Download `data.zip` from this google drive [link](https://drive.google.com/file/d/1UGek7S3-4wwvgMlGvfBxJQPGW3Q2MfaR/view?usp=sharing)
16 | - Unzip the data and put it into the `keypoint` folder (then you should have a folder `keypoint/data`)
17 | 
18 | ### Conformal calibration
19 | 
20 | ```python
21 | python conformal_calibration.py --score_type ball
22 | ```
23 | 
24 | You can change `--score_type` to `ellipse` to use a different nonconformity function.
25 | You can also add `--do_frcnn` to use FRCNN to detect object bounding boxes.
26 | 
27 | The calibration scores will be saved into a pickle file.
28 | 
29 | ### Conformal prediction
30 | 
31 | ```python
32 | python conformal_prediction.py --score_type ball --epsilon 10 --save_fig
33 | ```
34 | will write a set of pdf files drawing the conformal prediction sets (balls) into `keypoint/data/bop/lmo-org/icp_results`. You can change the results folder in `conformal_prediction.py`.
35 | 
36 | ## Acknowledgement
37 | The source code in the `keypoint` folder are adapted from the git repo https://github.com/yufu-wang/6D_Pose. We would like to thank Yufu Wang for helping us run the code.
38 | 
39 | ## Citation
40 | If you find this paper and implementation useful, please cite
41 | ```bibtex
42 | @inproceedings{yang23cvpr-purse,
43 |   title={Object pose estimation with statistical guarantees: Conformal keypoint detection and geometric uncertainty propagation},
44 |   author={Yang, Heng and Pavone, Marco},
45 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
46 |   pages={8947--8958},
47 |   year={2023}
48 | }
49 | ```
50 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.cm as cm
  3 | import matplotlib.pyplot as plt
  4 | import matplotlib
  5 | import torch
  6 | 
  7 | K     = 100
  8 | alpha = 0.8
  9 | 
 10 | def one_each(pred, thresh=0.0):
 11 |     # Postprocess frcnn: get at most one instance per class
 12 |     # Return: boxes and labels
 13 |     conf = pred['scores'] > thresh
 14 | 
 15 |     conf_scores = pred['scores'][conf]
 16 |     conf_boxes = pred['boxes'][conf].int()
 17 |     conf_labels = pred['labels'][conf].int()
 18 | 
 19 |     valid = torch.zeros_like(conf_labels).bool()
 20 |     unique_labels = torch.unique(conf_labels)
 21 |     for uni in unique_labels:
 22 |         p = (conf_labels==uni).nonzero(as_tuple=False).reshape(-1)
 23 |         valid[p[0]] = True
 24 | 
 25 |     pd_scores = conf_scores[valid]
 26 |     pd_boxes = conf_boxes[valid]
 27 |     pd_labels = conf_labels[valid]
 28 |     
 29 |     return pd_boxes, pd_labels
 30 | 
 31 | 
 32 | def clean_heatmap(heatmap,mode=1):
 33 |     '''
 34 |     Normalize raw heatmap such that
 35 |     - the entries are all nonnegative
 36 |     - the entries sum up to 1.0
 37 |     '''
 38 |     if mode == 1:
 39 |         min_val = np.min(heatmap)
 40 |         heatmap = heatmap - min_val # make sure heatmap is always positive
 41 |         med_val = np.median(heatmap) # take median
 42 |         heatmap[heatmap < med_val] = 0 # get rid of all values below median
 43 | 
 44 |     elif mode == 2:
 45 |         min_val = np.min(heatmap)
 46 |         if min_val < 0:
 47 |             heatmap = heatmap - min_val
 48 | 
 49 |     else:
 50 |         raise RuntimeError('Unknown mode for cleaning heatmap')
 51 |     heatmap = heatmap / np.sum(heatmap)
 52 |     return heatmap
 53 | 
 54 | def topk_points(heatmap,k):
 55 |     '''
 56 |     Return the top k most likely keypoint detections in the heatmap
 57 |     xy: xy coordinates of the keypoints
 58 |     vk: values of the top k probabilities (re-normalized to sum up to 1.0)
 59 |     '''
 60 |     r, c = np.unravel_index(
 61 |             np.flip(np.argsort(heatmap.ravel())), heatmap.shape)
 62 |     v    = heatmap[r,c]
 63 |     rk = r[:k]
 64 |     ck = c[:k]
 65 |     vk = v[:k]
 66 |     vk = vk / np.sum(vk)
 67 |     # offset the coordinates to the center
 68 |     # For example (0,0) pixel has coordinates (0.5,0.5)
 69 |     ck = ck + 0.5 
 70 |     rk = rk + 0.5
 71 |     xy = np.stack((ck,rk),axis=1)
 72 |     return xy, vk
 73 | 
 74 | 
 75 | def conformity_score(kpt,heatmap,type="ball"):
 76 |     '''
 77 |     Given a keypoint location on a 2D image, and 
 78 |     a heatmap prediction of the keypoint location,
 79 |     compute the nonconformility score
 80 |     :param
 81 |     kpt: (2,) numpy array
 82 |     heatmap: (H,W) numpy array
 83 |     type: choice of the conformity function
 84 |     :return
 85 |     conformity score
 86 |     '''
 87 | 
 88 |     heatmap = clean_heatmap(heatmap,mode=1)
 89 |        
 90 |     if type == "ball":
 91 |         r, c = np.unravel_index(
 92 |             np.argmax(heatmap.ravel()),heatmap.shape)
 93 |         maxp = heatmap[r,c]
 94 |         # note here kpt loc (x,y), x corresponds to column, y corresponds to row!!!
 95 |         r += 0.5
 96 |         c += 0.5
 97 |         dist = np.linalg.norm( kpt - np.array([c,r]) )
 98 |         return dist * maxp
 99 | 
100 |     elif type == "ellipse":
101 |         xy, v = topk_points(heatmap,K)
102 |         wkpt   = v @ xy
103 |         diff   = xy - wkpt
104 |         sigma  = diff.T @ np.diag(v) @ diff
105 |         sigmainv = np.linalg.inv(sigma)
106 |         return (kpt - wkpt) @ sigmainv @ (kpt-wkpt)
107 | 
108 |     else:
109 |         raise RuntimeError('Unknown score type.')
110 | 
111 | 
112 | def icp(heatmap,q,type="ball"):
113 |     '''
114 |     Given a heatmap and a quantile, output the inductive prediction set
115 |     :param
116 |     heatmap: numpy array H x W
117 |     q: scalar quantitle
118 |     type: choice of conformity function
119 |     '''
120 | 
121 |     heatmap = clean_heatmap(heatmap,mode=1)
122 | 
123 |     if type == "ball":
124 |         r, c = np.unravel_index(
125 |             np.argmax(heatmap.ravel()),heatmap.shape)
126 |         maxp = heatmap[r,c]
127 |         c += 0.5
128 |         r += 0.5
129 |         return np.array([c,r]), q / maxp # return center and radius
130 | 
131 |     elif type == "ellipse":
132 |         xy, v = topk_points(heatmap,K)
133 |         wkpt   = v @ xy
134 |         diff   = xy - wkpt
135 |         sigma  = diff.T @ np.diag(v) @ diff
136 |         sigmainv = np.linalg.inv(sigma)
137 |         return wkpt, sigmainv / q # return center and information matrix
138 | 
139 |     else:
140 |         raise RuntimeError('Unknown score type.')
141 |     
142 | 
143 | def draw_icp_ball(img,heatmaps,kpt_gt,pred_set,fname=None,show=False,heatmaponly=False):
144 |     linewidth = 2
145 |     pointsize = 2
146 |     height = 20
147 |     subplot_gap = 0.05
148 |     num_kpts = len(pred_set)
149 |     colors  = cm.Set2(np.linspace(0, 1, num_kpts))
150 | 
151 |     fig, axes = plt.subplots(1,num_kpts+1,figsize=(2*height,2*height))
152 |     fig.subplots_adjust(wspace=subplot_gap)
153 | 
154 |     for i in range(num_kpts):
155 |         heatmap = np.squeeze(heatmaps[i,:,:])
156 |         heatmap = clean_heatmap(heatmap)
157 |         
158 |         axes[i].imshow(img)
159 |         axes[i].imshow(heatmap,alpha=alpha)
160 |         if not heatmaponly:
161 |             center, radius = pred_set[i]
162 |             circ = plt.Circle(center,radius,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5)
163 |             axes[i].add_patch(circ)
164 |             circ_b = plt.Circle(center,radius,color=colors[i],fill=False,linewidth=linewidth)
165 |             axes[i].add_patch(circ_b)
166 |             # point = plt.Circle((kpt_gt[i,0],kpt_gt[i,1]),pointsize,color=colors[i])
167 |             point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i])
168 |             axes[i].add_patch(point)
169 |         axes[i].xaxis.set_visible(False)
170 |         axes[i].yaxis.set_visible(False)
171 | 
172 |     axes[-1].imshow(img)
173 |     for i in range(num_kpts):
174 |         center, radius = pred_set[i]
175 |         circ = plt.Circle(center,radius,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5)
176 |         axes[-1].add_patch(circ)
177 |         circ_b = plt.Circle(center,radius,color=colors[i],fill=False,linewidth=linewidth)
178 |         axes[-1].add_patch(circ_b)
179 |         # point = plt.Circle((kpt_gt[i,0],kpt_gt[i,1]),pointsize,color=colors[i])
180 |         point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i])
181 |         axes[-1].add_patch(point)
182 |     axes[-1].xaxis.set_visible(False)
183 |     axes[-1].yaxis.set_visible(False)
184 | 
185 |     if fname is not None:
186 |         plt.savefig(fname,bbox_inches='tight')
187 |     if show:
188 |         plt.show()
189 |     
190 |     return fig 
191 | 
192 | 
193 | def angle_length_ellipse(A):
194 |     '''
195 |     Given an ellipse x' * A * x <= 1
196 |     return a, b, and angle
197 |     angle is the angle rotating from x to y (anti-clockwise)
198 |     '''
199 |     v, V = np.linalg.eig(A)
200 |     idx  = np.argsort(v)
201 |     v = v[idx] # ascending order v[0] <= ... <= v[-1]
202 |     V = V[:,idx]
203 | 
204 |     ab = np.sqrt(1.0 / v)
205 |     a  = ab[0]
206 |     b  = ab[-1]
207 |     assert a >= b, "semi-axes lengths wrong."
208 | 
209 |     Vl = V[:,0] # long axis direction
210 |     angle = np.arctan2(Vl[-1],Vl[0]) / np.pi * 180.0
211 | 
212 |     return a, b, angle
213 | 
214 | 
215 | def draw_icp_ellipse(img,heatmaps,kpt_gt,pred_set,fname=None,show=False):
216 |     linewidth = 2
217 |     pointsize = 2
218 |     height = 20
219 |     subplot_gap = 0.05
220 |     num_kpts = len(pred_set)
221 |     colors  = cm.Set2(np.linspace(0, 1, num_kpts))
222 | 
223 |     fig, axes = plt.subplots(1,num_kpts+1,figsize=(2*height,2*height))
224 |     fig.subplots_adjust(wspace=subplot_gap)
225 | 
226 |     for i in range(num_kpts):
227 |         heatmap = np.squeeze(heatmaps[i,:,:])
228 |         heatmap = clean_heatmap(heatmap)
229 |         
230 |         axes[i].imshow(img)
231 |         axes[i].imshow(heatmap,alpha=alpha)
232 |         center, lam = pred_set[i]
233 |         a, b, angle = angle_length_ellipse(lam)
234 |         ellipse = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5)
235 |         axes[i].add_patch(ellipse)
236 |         ellipse_b = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=False,linewidth=linewidth)
237 |         axes[i].add_patch(ellipse_b)
238 |         point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i])
239 |         axes[i].add_patch(point)
240 |         axes[i].xaxis.set_visible(False)
241 |         axes[i].yaxis.set_visible(False)
242 | 
243 |     axes[-1].imshow(img)
244 |     for i in range(num_kpts):
245 |         center, lam = pred_set[i]
246 |         a, b, angle = angle_length_ellipse(lam)
247 |         ellipse = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=True,linewidth=linewidth,alpha=0.5)
248 |         axes[-1].add_patch(ellipse)
249 |         ellipse_b = matplotlib.patches.Ellipse(center,2*a,2*b,angle=angle,color=colors[i],fill=False,linewidth=linewidth)
250 |         axes[-1].add_patch(ellipse_b)
251 |         point = plt.Rectangle([kpt_gt[i,0]-pointsize/2,kpt_gt[i,1]-pointsize/2],pointsize,pointsize,color=colors[i])
252 |         axes[-1].add_patch(point)
253 |     axes[-1].xaxis.set_visible(False)
254 |     axes[-1].yaxis.set_visible(False)
255 | 
256 |     if fname is not None:
257 |         plt.savefig(fname,bbox_inches='tight')
258 |     if show:
259 |         plt.show()
260 |     
261 |     return fig
262 | 


--------------------------------------------------------------------------------