├── .gitignore ├── datasets ├── dataset.py ├── dataset_config │ ├── classes.txt │ ├── test_data_list.txt │ └── train_data_list.txt └── utils.py ├── eval.sh ├── lib ├── extractors.py ├── foldingnet.py ├── models.py ├── network.py ├── pointnet.py ├── pspnet.py ├── transformations.py └── utils.py ├── metrics └── readme.md ├── readme.md ├── tools ├── _init_paths.py ├── eval.py └── utils.py └── trained_models └── placeholder /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | pip-wheel-metadata/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 94 | __pypackages__/ 95 | 96 | # Celery stuff 97 | celerybeat-schedule 98 | celerybeat.pid 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | -------------------------------------------------------------------------------- /datasets/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | import numpy.ma as ma 6 | import torch 7 | import torch.utils.data as data 8 | import torchvision.transforms as transforms 9 | 10 | import datasets.utils as dutils 11 | from lib.transformations import quaternion_from_matrix 12 | 13 | 14 | def align(class_ids, masks, coords, depth, intr): 15 | num_instances = len(class_ids) 16 | RTs = np.zeros((num_instances, 4, 4), dtype=np.float32) 17 | scales = np.ones((num_instances, 3), dtype=np.float32) 18 | 19 | for i in range(num_instances): 20 | mask = ma.getmaskarray(ma.masked_equal(masks, class_ids[i])) 21 | if np.sum(mask) < 50: 22 | RTs[i] = np.eye(4) 23 | continue 24 | 25 | pts, idxs = dutils.backproject(depth, intr, mask) 26 | pts = pts / 1000.0 27 | if len(pts) < 50: 28 | RTs[i] = np.eye(4) 29 | continue 30 | coord_pts = coords[idxs[0], idxs[1], :] - 0.5 31 | 32 | scale, rotation, trans, _ = dutils.estimateSimilarityTranform( 33 | coord_pts, pts) 34 | if rotation is None or trans is None or np.any(np.isnan(rotation)) or np.any(np.isnan(trans))\ 35 | or np.any(np.isinf(trans)) or np.any(np.isinf(rotation)): 36 | RTs[i] = np.eye(4) 37 | continue 38 | 39 | aligned_RT = np.eye(4) 40 | aligned_RT[:3, :3] = rotation.T 41 | 42 | aligned_RT[:3, 3] = trans 43 | aligned_RT[3, 3] = 1 44 | 45 | RTs[i, :, :] = aligned_RT 46 | scales[i] = scale 47 | 48 | return RTs, scales 49 | 50 | 51 | def load_obj(path, ori_path, num_points): 52 | if os.path.isfile(path): 53 | return dutils.load_obj(path) 54 | else: 55 | vertex = dutils.sample_obj(ori_path, num_points, True) 56 | dutils.save_obj(vertex, path[:-3]+"ply") 57 | return np.asarray(vertex) 58 | 59 | 60 | class PoseDataset(data.Dataset): 61 | def __init__(self, mode, num_pt, root): 62 | if mode == 'train': 63 | self.path = 'datasets/dataset_config/train_data_list.txt' 64 | elif mode == 'test': 65 | self.path = 'datasets/dataset_config/test_data_list.txt' 66 | self.num_pt = num_pt 67 | self.root = root 68 | 69 | self.list = [] 70 | self.real = [] 71 | self.syn = [] 72 | input_file = open(self.path) 73 | with open(self.path, "r") as input_file: 74 | while 1: 75 | input_line = input_file.readline() 76 | if not input_line: 77 | break 78 | input_line = input_line.replace("\n", "") 79 | if input_line.startswith("real"): 80 | self.real.append(input_line) 81 | else: 82 | self.syn.append(input_line) 83 | self.list.append(input_line) 84 | 85 | self.length = len(self.list) 86 | self.len_real = len(self.real) 87 | self.len_syn = len(self.syn) 88 | # real 89 | self.cam_cx_1 = 322.525 90 | self.cam_cy_1 = 244.11084 91 | self.cam_fx_1 = 591.0125 92 | self.cam_fy_1 = 590.16775 93 | # syn 94 | self.cam_cx_2 = 319.5 95 | self.cam_cy_2 = 239.5 96 | self.cam_fx_2 = 577.5 97 | self.cam_fy_2 = 577.5 98 | 99 | self.xmap = np.array([[j for i in range(640)] for j in range(480)]) 100 | self.ymap = np.array([[i for i in range(640)] for j in range(480)]) 101 | 102 | self.minimum_num_pt = 50 103 | 104 | self.norm = transforms.Normalize( 105 | mean=[0.51, 0.47, 0.44], std=[0.29, 0.27, 0.28]) 106 | self.symmetry_obj_idx = [0, 1, 3] 107 | 108 | self.class_names = PoseDataset.get_class_names() 109 | print(len(self.list)) 110 | 111 | @staticmethod 112 | def get_class_names(): 113 | class_names = [] 114 | with open("datasets/dataset_config/classes.txt", "r") as f: 115 | class_names = ["_".join(line.split("_")[1:2]) for line in f] 116 | class_names = [c.replace("\n", "") for c in class_names] 117 | 118 | return class_names 119 | 120 | def __getitem__(self, index): 121 | try: 122 | img = np.array(cv2.imread( 123 | '{0}/{1}_color.png'.format(self.root, self.list[index]))) / 255. 124 | depth = np.array(cv2.imread( 125 | '{0}/{1}_depth.png'.format(self.root, self.list[index]), -1)) 126 | if len(depth.shape) == 3: 127 | depth = np.uint16(depth[:, :, 1] * 256) + \ 128 | np.uint16(depth[:, :, 2]) 129 | label = np.array(cv2.imread( 130 | '{0}/{1}_mask.png'.format(self.root, self.list[index]))[:, :, 2]) 131 | 132 | meta = dict() 133 | with open("{0}/{1}_meta.txt".format(self.root, self.list[index]), "r") as f: 134 | for line in f: 135 | line = line.replace("\n", "") 136 | line = line.split(" ") 137 | if int(line[1]) == 0: # mask out background 138 | continue 139 | d = {"cls_id": line[1], "inst_name": line[2]} 140 | if "real_train" in self.list[index]: 141 | d["inst_dir"] = os.path.join(self.root, "obj_models", "real_train", 142 | line[2]+"_{}.ply".format(self.num_pt)) 143 | d["ori_inst_dir"] = os.path.join(self.root, 144 | "obj_models", "real_train", line[2]+".obj") 145 | elif "real_test" in self.list[index]: 146 | d["inst_dir"] = os.path.join(self.root, "obj_models", "real_test", 147 | line[2]+"_{}.ply".format(self.num_pt)) 148 | d["ori_inst_dir"] = os.path.join( 149 | self.root, "obj_models", "real_test", line[2]+".obj") 150 | else: 151 | d["inst_dir"] = os.path.join(self.root, "obj_models", "train", 152 | *line[2:], "model_{}.ply".format(self.num_pt)) 153 | d["ori_inst_dir"] = os.path.join(self.root, "obj_models", "train", 154 | *line[2:], "model.obj") 155 | meta[int(line[0])] = d 156 | 157 | if not self.list[index].startswith("real"): 158 | cam_cx = self.cam_cx_2 159 | cam_cy = self.cam_cy_2 160 | cam_fx = self.cam_fx_2 161 | cam_fy = self.cam_fy_2 162 | else: 163 | cam_cx = self.cam_cx_1 164 | cam_cy = self.cam_cy_1 165 | cam_fx = self.cam_fx_1 166 | cam_fy = self.cam_fy_1 167 | 168 | obj = list(meta.keys()) 169 | iidx = np.arange(len(obj)) 170 | np.random.shuffle(iidx) 171 | for idx in iidx: 172 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 173 | mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx])) 174 | mask = mask_label * mask_depth 175 | if len(mask.nonzero()[0]) > self.minimum_num_pt: 176 | break 177 | else: 178 | print("Can't find any valid training object in {}".format( 179 | self.list[index])) 180 | raise ValueError 181 | 182 | # A method to load target_r and target_t 183 | if os.path.isfile("{}/gts/{}_poses.txt".format(self.root, self.list[index])) and os.path.isfile("{}/gts/{}_scales.txt".format(self.root, self.list[index])): 184 | meta["poses"] = np.loadtxt( 185 | "{}/gts/{}_poses.txt".format(self.root, self.list[index])).reshape(-1, 4, 4) 186 | meta["scales"] = np.loadtxt( 187 | "{}/gts/{}_scales.txt".format(self.root, self.list[index])).reshape(-1, 3) 188 | else: 189 | coord = cv2.imread( 190 | '{0}/{1}_coord.png'.format(self.root, self.list[index]))[:, :, :3][:, :, (2, 1, 0)] 191 | coord = np.array(coord, dtype=np.float32) / 255. 192 | coord[:, :, 2] = 1.0 - coord[:, :, 2] 193 | intr = np.array( 194 | [[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], [0., 0., 1.]]) 195 | poses, scales = align(obj, label, coord, depth, intr) 196 | os.makedirs(os.path.dirname( 197 | "{}/gts/{}_poses.txt".format(self.root, self.list[index])), exist_ok=True) 198 | np.savetxt("{}/gts/{}_poses.txt".format(self.root, self.list[index]), 199 | poses.reshape(-1, 4)) 200 | np.savetxt("{}/gts/{}_scales.txt".format(self.root, 201 | self.list[index]), scales.reshape(-1, 3)) 202 | meta["poses"] = poses 203 | meta["scales"] = scales 204 | rmin, rmax, cmin, cmax = get_bbox(mask_label) 205 | img_masked = np.transpose(img, (2, 0, 1))[:, rmin:rmax, cmin:cmax] 206 | target_r = meta['poses'][idx][:3, 0:3] 207 | target_t = np.array([meta['poses'][idx][:3, 3:4].flatten()]) 208 | 209 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 210 | if len(choose) > self.num_pt: 211 | c_mask = np.zeros(len(choose), dtype=int) 212 | c_mask[:self.num_pt] = 1 213 | np.random.shuffle(c_mask) 214 | choose = choose[c_mask.nonzero()] 215 | else: 216 | choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap') 217 | 218 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten( 219 | )[choose][:, np.newaxis].astype(np.float32) 220 | xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten( 221 | )[choose][:, np.newaxis].astype(np.float32) 222 | ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten( 223 | )[choose][:, np.newaxis].astype(np.float32) 224 | choose = np.array([choose]) 225 | 226 | cam_scale = 1000.0 227 | pt2 = depth_masked / cam_scale 228 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 229 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 230 | cloud = np.concatenate((-pt0, -pt1, pt2), axis=1) 231 | 232 | model_points = load_obj( 233 | path=meta[obj[idx]]["inst_dir"], 234 | ori_path=meta[obj[idx]]["ori_inst_dir"], num_points=self.num_pt) 235 | 236 | model_points = model_points * meta["scales"][idx] 237 | 238 | target = np.dot(model_points, target_r.T) 239 | target = np.add(target, target_t) 240 | matrix = np.eye(4) 241 | matrix[:3, :3] = target_r 242 | quat = quaternion_from_matrix(matrix) 243 | 244 | return torch.from_numpy(cloud.astype(np.float32)), \ 245 | torch.LongTensor(choose.astype(np.int32)), \ 246 | self.norm(torch.from_numpy(img_masked.astype(np.float32))), \ 247 | torch.from_numpy(target.astype(np.float32)), \ 248 | torch.from_numpy(model_points.astype(np.float32)), \ 249 | torch.LongTensor([int(meta[obj[idx]]["cls_id"])-1]), \ 250 | torch.from_numpy(quat.astype(np.float32)), \ 251 | torch.from_numpy(target_t.astype(np.float32)) 252 | except: 253 | return self.__getitem__(index//2) 254 | 255 | def __len__(self): 256 | return self.length 257 | 258 | def get_sym_list(self): 259 | return self.symmetry_obj_idx 260 | 261 | def get_num_points_mesh(self): 262 | return self.num_pt 263 | 264 | 265 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 266 | 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 267 | img_width = 480 268 | img_length = 640 269 | 270 | 271 | def get_bbox(label): 272 | rows = np.any(label, axis=1) 273 | cols = np.any(label, axis=0) 274 | rmin, rmax = np.where(rows)[0][[0, -1]] 275 | cmin, cmax = np.where(cols)[0][[0, -1]] 276 | rmax += 1 277 | cmax += 1 278 | r_b = rmax - rmin 279 | for tt in range(len(border_list)): 280 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 281 | r_b = border_list[tt + 1] 282 | break 283 | c_b = cmax - cmin 284 | for tt in range(len(border_list)): 285 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 286 | c_b = border_list[tt + 1] 287 | break 288 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 289 | rmin = center[0] - int(r_b / 2) 290 | rmax = center[0] + int(r_b / 2) 291 | cmin = center[1] - int(c_b / 2) 292 | cmax = center[1] + int(c_b / 2) 293 | if rmin < 0: 294 | delt = -rmin 295 | rmin = 0 296 | rmax += delt 297 | if cmin < 0: 298 | delt = -cmin 299 | cmin = 0 300 | cmax += delt 301 | if rmax > img_width: 302 | delt = rmax - img_width 303 | rmax = img_width 304 | rmin -= delt 305 | if cmax > img_length: 306 | delt = cmax - img_length 307 | cmax = img_length 308 | cmin -= delt 309 | return rmin, rmax, cmin, cmax 310 | -------------------------------------------------------------------------------- /datasets/dataset_config/classes.txt: -------------------------------------------------------------------------------- 1 | 1_bottle_02876657 2 | 2_bowl_02880940 3 | 3_camera_02942699 4 | 4_can_02946921 5 | 5_laptop_03642806 6 | 6_mug_03797390 -------------------------------------------------------------------------------- /datasets/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | 4 | import numpy as np 5 | import open3d as o3d 6 | import trimesh 7 | 8 | 9 | def save_obj(vertex: np.array, path: str): 10 | """ vertex: [N x 3] 11 | """ 12 | pcd = o3d.geometry.PointCloud() 13 | pcd.points = o3d.utility.Vector3dVector(vertex) 14 | o3d.io.write_point_cloud(path, pcd) 15 | 16 | 17 | def load_obj(path): 18 | """return np.array 19 | """ 20 | pcd_load = o3d.io.read_point_cloud(path) 21 | return np.asarray(pcd_load.points) 22 | 23 | 24 | def estimateSimilarityTranform(source: np.array, target: np.array): 25 | source_hom = np.transpose( 26 | np.hstack([source, np.ones([source.shape[0], 1])])) 27 | target_hom = np.transpose( 28 | np.hstack([target, np.ones([source.shape[0], 1])])) 29 | 30 | # auto-parameter selection based on source-target heuritics 31 | target_norm = np.mean(np.linalg.norm(target, axis=1)) 32 | source_norm = np.mean(np.linalg.norm(source, axis=1)) 33 | ratio_TS = (target_norm / source_norm) 34 | ratio_ST = (source_norm / target_norm) 35 | 36 | pass_T = ratio_ST if ratio_ST > ratio_TS else ratio_TS 37 | stop_T = pass_T / 100. 38 | n_iter = 100 39 | 40 | source_inliers_hom, target_inliers_hom, best_inlier_ratio = getRANSACInliers( 41 | source_hom, target_hom, max_iterations=n_iter, pass_threshold=pass_T, stop_threshold=stop_T) 42 | if best_inlier_ratio < 0.1: 43 | return None, None, None, None 44 | 45 | scales, rotation, translation, out_transform = estimateSimilarityUmeyama( 46 | source_inliers_hom, target_inliers_hom) 47 | 48 | return scales, rotation, translation, out_transform 49 | 50 | 51 | def getRANSACInliers(source_hom, target_hom, max_iterations=100, pass_threshold=200, stop_threshold=1): 52 | best_residual = 1e10 53 | best_inlier_ratio = 0 54 | best_inlier_idx = np.arange(source_hom.shape[1]) 55 | for _ in range(max_iterations): 56 | # pick up 5 random (but corresponding) points from source and target 57 | rand_idx = np.random.randint(source_hom.shape[1], size=5) 58 | _, _, _, out_transform = estimateSimilarityUmeyama( 59 | source_hom[:, rand_idx], target_hom[:, rand_idx]) 60 | residual, inlier_ratio, inlier_idx = evaluateModel( 61 | out_transform, source_hom, target_hom, pass_threshold) 62 | if residual < best_residual: 63 | best_residual = residual 64 | best_inlier_ratio = inlier_ratio 65 | best_inlier_idx = inlier_idx 66 | if best_residual < stop_threshold: 67 | break 68 | return source_hom[:, best_inlier_idx], target_hom[:, best_inlier_idx], best_inlier_ratio 69 | 70 | 71 | def evaluateModel(out_transform, source_hom, target_hom, pass_threshold): 72 | diff = target_hom - np.matmul(out_transform, source_hom) 73 | residual_vec = np.linalg.norm(diff[:3, :], axis=0) 74 | residual = np.linalg.norm(residual_vec) 75 | inlier_idx = np.where(residual_vec < pass_threshold) 76 | n_inliers = np.count_nonzero(inlier_idx) 77 | inliner_ratio = n_inliers / source_hom.shape[1] 78 | return residual, inliner_ratio, inlier_idx[0] 79 | 80 | 81 | def estimateSimilarityUmeyama(source_hom, target_hom): 82 | source_centroid = np.mean(source_hom[:3, :], axis=1) 83 | target_centroid = np.mean(target_hom[:3, :], axis=1) 84 | n_points = source_hom.shape[1] 85 | 86 | centered_source = source_hom[:3, :] - \ 87 | np.tile(source_centroid, (n_points, 1)).transpose() 88 | centered_target = target_hom[:3, :] - \ 89 | np.tile(target_centroid, (n_points, 1)).transpose() 90 | 91 | cov_matrix = np.matmul( 92 | centered_target, np.transpose(centered_source)) / n_points 93 | 94 | if np.isnan(cov_matrix).any(): 95 | raise RuntimeError("There are NaNs in the input.") 96 | 97 | U, D, Vh = np.linalg.svd(cov_matrix, full_matrices=True) 98 | d = (np.linalg.det(U) * np.linalg.det(Vh)) < 0.0 99 | if d: 100 | D[-1] = -D[-1] 101 | U[:, -1] = -U[:, -1] 102 | 103 | rotation = np.matmul(U, Vh).T 104 | 105 | var_p = np.var(source_hom[:3, :], axis=1).sum() 106 | scale_fact = 1 / var_p * np.sum(D) 107 | scales = np.array([scale_fact, scale_fact, scale_fact]) 108 | scale_matrix = np.diag(scales) 109 | 110 | translation = target_hom[:3, :].mean( 111 | axis=1) - source_hom[:3, :].mean(axis=1).dot(scale_fact * rotation) 112 | 113 | out_transform = np.identity(4) 114 | out_transform[:3, :3] = scale_matrix @ rotation 115 | out_transform[:3, 3] = translation 116 | 117 | return scales, rotation, translation, out_transform 118 | 119 | 120 | def backproject(depth, intr, mask): 121 | intr_inv = np.linalg.inv(intr) 122 | 123 | non_zero_mask = depth > 0 124 | final_instance_mask = np.logical_and(mask, non_zero_mask) 125 | 126 | idxs = np.where(final_instance_mask) 127 | grid = np.array([idxs[1], idxs[0]]) 128 | 129 | length = grid.shape[1] 130 | ones = np.ones([1, length]) 131 | uv_grid = np.concatenate([grid, ones], axis=0) 132 | 133 | xyz = intr_inv @ uv_grid 134 | xyz = np.transpose(xyz) 135 | 136 | z = depth[idxs[0], idxs[1]] 137 | 138 | pts = xyz * z[:, np.newaxis] / xyz[:, -1:] 139 | pts[:, 0] = -pts[:, 0] 140 | pts[:, 1] = -pts[:, 1] 141 | return pts, idxs 142 | 143 | 144 | def triangle_area(v1, v2, v3): 145 | a = np.array(v2) - np.array(v1) 146 | b = np.array(v3) - np.array(v1) 147 | domain = np.dot(a, a) * np.dot(b, b) - (np.dot(a, b) ** 2) 148 | domain = domain if domain > 0 else 0.0 149 | 150 | return math.sqrt(domain) / 2.0 151 | 152 | 153 | def cal_surface_area(mesh): 154 | areas = [] 155 | if hasattr(mesh, "faces"): 156 | for face in mesh.faces: 157 | v1, v2, v3 = face 158 | v1 = mesh.vertices[v1] 159 | v2 = mesh.vertices[v2] 160 | v3 = mesh.vertices[v3] 161 | 162 | areas += [triangle_area(v1, v2, v3)] 163 | else: 164 | for face in mesh.triangles: 165 | v1, v2, v3 = face 166 | 167 | areas += [triangle_area(v1, v2, v3)] 168 | return np.array(areas) 169 | 170 | 171 | def sample_obj(path, num_points, norm): 172 | """sample uniform point from .obj mesh file. 173 | if norm, we ill normalize it. 174 | """ 175 | mesh = trimesh.load(path) 176 | areas = cal_surface_area(mesh) 177 | prefix_sum = np.cumsum(areas) 178 | 179 | total_area = prefix_sum[-1] 180 | sample_points = [] 181 | 182 | for _ in range(num_points): 183 | prob = random.random() 184 | sample_pos = prob * total_area 185 | 186 | # binary search 187 | left_bound, right_bound = 0, len(areas) - 1 188 | while left_bound < right_bound: 189 | mid = (left_bound + right_bound) // 2 190 | if sample_pos <= prefix_sum[mid]: 191 | right_bound = mid 192 | else: 193 | left_bound = mid + 1 194 | 195 | target_surface = right_bound 196 | 197 | # sampel point 198 | if hasattr(mesh, "faces"): 199 | v1, v2, v3 = mesh.faces[target_surface] 200 | 201 | v1, v2, v3 = mesh.vertices[v1], mesh.vertices[v2], mesh.vertices[v3] 202 | else: 203 | v1, v2, v3 = mesh.triangles[target_surface] 204 | 205 | edge_vec1 = np.array(v2) - np.array(v1) 206 | edge_vec2 = np.array(v3) - np.array(v1) 207 | 208 | prob_vec1, prob_vec2 = random.random(), random.random() 209 | if prob_vec1 + prob_vec2 > 1: 210 | prob_vec1 = 1 - prob_vec1 211 | prob_vec2 = 1 - prob_vec2 212 | 213 | target_point = np.array( 214 | v1) + (edge_vec1 * prob_vec1 + edge_vec2 * prob_vec2) 215 | 216 | sample_points.append(target_point) 217 | sample_points = np.stack(sample_points, axis=0) 218 | 219 | if norm: 220 | min_ = np.min(sample_points, axis=0) 221 | max_ = np.max(sample_points, axis=0) 222 | dis_ = max_ - min_ 223 | 224 | scale = 1 / np.sqrt(np.sum(dis_ * dis_)) 225 | 226 | sample_points *= scale 227 | 228 | return sample_points 229 | -------------------------------------------------------------------------------- /eval.sh: -------------------------------------------------------------------------------- 1 | # echo "EVAL CASS ..." 2 | # python ./tools/eval.py --resume_model cass_best.pth --dataset_dir ../nocs --cuda --save_dir ../predicted_result --eval --mode cass 3 | 4 | echo "EVAL CASS ..." 5 | python ./tools/eval.py --save_dir ../predicted_result --mode cass 6 | 7 | 8 | echo "EVAL NOCS ..." 9 | python ./tools/eval.py --save_dir ../predicted_result --mode nocs 10 | -------------------------------------------------------------------------------- /lib/extractors.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import math 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | def load_weights_sequential(target, source_state): 9 | new_dict = OrderedDict() 10 | for (k1, v1), (k2, v2) in zip(target.state_dict().items(), source_state.items()): 11 | new_dict[k1] = v2 12 | target.load_state_dict(new_dict) 13 | 14 | def conv3x3(in_planes, out_planes, stride=1, dilation=1): 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=dilation, dilation=dilation, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride=stride, dilation=dilation) 24 | self.relu = nn.ReLU(inplace=True) 25 | self.conv2 = conv3x3(planes, planes, stride=1, dilation=dilation) 26 | self.downsample = downsample 27 | self.stride = stride 28 | 29 | def forward(self, x): 30 | residual = x 31 | 32 | out = self.conv1(x) 33 | out = self.relu(out) 34 | 35 | out = self.conv2(out) 36 | 37 | if self.downsample is not None: 38 | residual = self.downsample(x) 39 | 40 | out += residual 41 | out = self.relu(out) 42 | 43 | return out 44 | 45 | 46 | class Bottleneck(nn.Module): 47 | expansion = 4 48 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 49 | super(Bottleneck, self).__init__() 50 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 51 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, dilation=dilation, 52 | padding=dilation, bias=False) 53 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 54 | self.relu = nn.ReLU(inplace=True) 55 | self.downsample = downsample 56 | self.stride = stride 57 | 58 | def forward(self, x): 59 | residual = x 60 | 61 | out = self.conv1(x) 62 | out = self.relu(out) 63 | 64 | out = self.conv2(out) 65 | out = self.relu(out) 66 | 67 | out = self.conv3(out) 68 | 69 | if self.downsample is not None: 70 | residual = self.downsample(x) 71 | 72 | out += residual 73 | out = self.relu(out) 74 | 75 | return out 76 | 77 | 78 | class ResNet(nn.Module): 79 | def __init__(self, block, layers=(3, 4, 23, 3)): 80 | self.inplanes = 64 81 | super(ResNet, self).__init__() 82 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 83 | bias=False) 84 | self.relu = nn.ReLU(inplace=True) 85 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 86 | self.layer1 = self._make_layer(block, 64, layers[0]) 87 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 88 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 89 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 90 | 91 | for m in self.modules(): 92 | if isinstance(m, nn.Conv2d): 93 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 94 | m.weight.data.normal_(0, math.sqrt(2. / n)) 95 | elif isinstance(m, nn.BatchNorm2d): 96 | m.weight.data.fill_(1) 97 | m.bias.data.zero_() 98 | 99 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 100 | downsample = None 101 | if stride != 1 or self.inplanes != planes * block.expansion: 102 | downsample = nn.Sequential( 103 | nn.Conv2d(self.inplanes, planes * block.expansion, 104 | kernel_size=1, stride=stride, bias=False) 105 | ) 106 | 107 | layers = [block(self.inplanes, planes, stride, downsample)] 108 | self.inplanes = planes * block.expansion 109 | for i in range(1, blocks): 110 | layers.append(block(self.inplanes, planes, dilation=dilation)) 111 | 112 | return nn.Sequential(*layers) 113 | 114 | def forward(self, x): 115 | x = self.conv1(x) 116 | x = self.relu(x) 117 | x = self.maxpool(x) 118 | 119 | x = self.layer1(x) 120 | x = self.layer2(x) 121 | x_3 = self.layer3(x) 122 | x = self.layer4(x_3) 123 | 124 | return x, x_3 125 | 126 | 127 | def resnet18(pretrained=False): 128 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 129 | return model 130 | 131 | def resnet34(pretrained=False): 132 | model = ResNet(BasicBlock, [3, 4, 6, 3]) 133 | return model 134 | 135 | def resnet50(pretrained=False): 136 | model = ResNet(Bottleneck, [3, 4, 6, 3]) 137 | return model 138 | 139 | def resnet101(pretrained=False): 140 | model = ResNet(Bottleneck, [3, 4, 23, 3]) 141 | return model 142 | 143 | def resnet152(pretrained=False): 144 | model = ResNet(Bottleneck, [3, 8, 36, 3]) 145 | return model 146 | -------------------------------------------------------------------------------- /lib/foldingnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from lib.pointnet import PointNetGlobalMax, get_MLP_layers, PointNetVanilla, PointwiseMLP 6 | from lib.utils import make_box, make_sphere, make_cylinder 7 | 8 | class ChamfersDistance3(nn.Module): 9 | ''' 10 | Extensively search to compute the Chamfersdistance. No reference to external implementation Incomplete 11 | ''' 12 | def forward(self, input1, input2): 13 | # input1, input2: BxNxK, BxMxK, K = 3 14 | B, N, K = input1.shape 15 | _, M, _ = input2.shape 16 | 17 | # Repeat (x,y,z) M times in a row 18 | input11 = input1.unsqueeze(2) # BxNx1xK 19 | input11 = input11.expand(B, N, M, K) # BxNxMxK 20 | # Repeat (x,y,z) N times in a column 21 | input22 = input2.unsqueeze(1) # Bx1xMxK 22 | input22 = input22.expand(B, N, M, K) # BxNxMxK 23 | # compute the distance matrix 24 | D = input11 - input22 # BxNxMxK 25 | D = torch.norm( D, p=2, dim=3 ) # BxNxM 26 | 27 | dist0, _ = torch.min( D, dim=1 ) # BxM 28 | dist1, _ = torch.min( D, dim=2 ) # BxN 29 | 30 | loss = torch.mean(dist0, 1) + torch.mean(dist1, 1) # B 31 | loss = torch.mean(loss) # 1 32 | return loss 33 | 34 | 35 | class FoldingNetSingle(nn.Module): 36 | def __init__(self, dims): 37 | super(FoldingNetSingle, self).__init__() 38 | self.mlp = PointwiseMLP(dims, doLastRelu=False) 39 | 40 | def forward(self, X): 41 | return self.mlp.forward(X) 42 | 43 | 44 | class FoldingNetVanilla(nn.Module): # PointNetVanilla or nn.Sequential 45 | def __init__(self, MLP_dims, FC_dims, grid_dims, Folding1_dims, 46 | Folding2_dims, MLP_doLastRelu=False): 47 | assert(MLP_dims[-1]==FC_dims[0]) 48 | super(FoldingNetVanilla, self).__init__() 49 | # Encoder 50 | # PointNet 51 | self.PointNet = PointNetVanilla(MLP_dims, FC_dims, MLP_doLastRelu) 52 | 53 | # Decoder 54 | # Folding 55 | # 2D grid: (grid_dims(0) * grid_dims(1)) x 2 56 | # TODO: normalize the grid to align with the input data 57 | self.N = grid_dims[0] * grid_dims[1] 58 | u = (torch.arange(0, grid_dims[0]) / grid_dims[0] - 0.5).repeat(grid_dims[1]) 59 | v = (torch.arange(0, grid_dims[1]) / grid_dims[1] - 0.5).expand(grid_dims[0], -1).t().reshape(-1) 60 | self.grid = torch.stack((u, v), 1) # Nx2 61 | 62 | # 1st folding 63 | self.Fold1 = FoldingNetSingle(Folding1_dims) 64 | # 2nd folding 65 | self.Fold2 = FoldingNetSingle(Folding2_dims) 66 | 67 | 68 | def forward(self, X): 69 | # encoding 70 | f = self.PointNet.forward(X) # BxK 71 | f = f.unsqueeze(1) # Bx1xK 72 | codeword = f.expand(-1, self.N, -1) # BxNxK 73 | 74 | # cat 2d grid and feature 75 | B = codeword.shape[0] # extract batch size 76 | if not X.is_cuda: 77 | tmpGrid = self.grid # Nx2 78 | else: 79 | tmpGrid = self.grid.cuda() # Nx2 80 | tmpGrid = tmpGrid.unsqueeze(0) 81 | tmpGrid = tmpGrid.expand(B, -1, -1) # BxNx2 82 | 83 | # 1st folding 84 | f = torch.cat((tmpGrid, codeword), 2 ) # BxNx(K+2) 85 | f = self.Fold1.forward(f) # BxNx3 86 | 87 | # 2nd folding 88 | f = torch.cat((f, codeword), 2 ) # BxNx(K+3) 89 | f = self.Fold2.forward(f) # BxNx3 90 | return f 91 | 92 | 93 | class FoldingNetShapes(nn.Module): 94 | ## add 3 shapes to choose and a learnable layer 95 | def __init__(self, MLP_dims, FC_dims, Folding1_dims, 96 | Folding2_dims, MLP_doLastRelu=False): 97 | assert(MLP_dims[-1]==FC_dims[0]) 98 | super(FoldingNetShapes, self).__init__() 99 | # Encoder 100 | # PointNet 101 | self.PointNet = PointNetVanilla(MLP_dims, FC_dims, MLP_doLastRelu) 102 | 103 | # Decoder 104 | # Folding 105 | self.box = make_box() # 18 * 18 * 6 points 106 | self.cylinder = make_cylinder() # same as 1944 107 | self.sphere = make_sphere() # 1944 points 108 | self.grid = torch.Tensor(np.hstack((self.box, self.cylinder, self.sphere))) 109 | 110 | # 1st folding 111 | self.Fold1 = FoldingNetSingle(Folding1_dims) 112 | # 2nd folding 113 | self.Fold2 = FoldingNetSingle(Folding2_dims) 114 | self.N = 1944 # number of points needed to replicate codeword later; also points in Grid 115 | self.fc = nn.Linear(9, 9, True) # geometric transformation 116 | 117 | 118 | def forward(self, X): 119 | # encoding 120 | f = self.PointNet.forward(X) # BxK 121 | f = f.unsqueeze(1) # Bx1xK 122 | codeword = f.expand(-1, self.N, -1) # BxNxK 123 | 124 | # cat 2d grid and feature 125 | B = codeword.shape[0] # extract batch size 126 | if not X.is_cuda: 127 | tmpGrid = self.grid # Nx9 128 | else: 129 | tmpGrid = self.grid.cuda() # Nx9 130 | tmpGrid = tmpGrid.unsqueeze(0) 131 | tmpGrid = tmpGrid.expand(B, -1, -1) # BxNx9 132 | tmpGrid = self.fc(tmpGrid) # transform 133 | 134 | 135 | # 1st folding 136 | f = torch.cat((tmpGrid, codeword), 2) # BxNx(K+9) 137 | f = self.Fold1.forward(f) # BxNx3 138 | 139 | # 2nd folding 140 | f = torch.cat((f, codeword), 2 ) # BxNx(K+3) 141 | f = self.Fold2.forward(f) # BxNx3 142 | return f 143 | 144 | 145 | class Recon(nn.Module): 146 | def __init__(self, Folding1_dims, Folding2_dims): 147 | super(Recon, self).__init__() 148 | # Decoder 149 | # Folding 150 | self.box = make_box() # 18 * 18 * 6 points 151 | self.cylinder = make_cylinder() # same as 1944 152 | self.sphere = make_sphere() # 1944 points 153 | self.grid = torch.Tensor(np.hstack((self.box, self.cylinder, self.sphere))) 154 | 155 | # 1st folding 156 | self.Fold1 = FoldingNetSingle(Folding1_dims) 157 | # 2nd folding 158 | self.Fold2 = FoldingNetSingle(Folding2_dims) 159 | self.N = 1944 # number of points needed to replicate codeword later; also points in Grid 160 | self.fc = nn.Linear(9, 9, True) # geometric transformation 161 | 162 | 163 | def forward(self, codeword): 164 | # cat 2d grid and feature 165 | codeword = codeword.transpose(1, 2) 166 | B = codeword.shape[0] # extract batch size 167 | if not codeword.is_cuda: 168 | tmpGrid = self.grid # Nx2 169 | else: 170 | tmpGrid = self.grid.cuda() # Nx2 171 | tmpGrid = tmpGrid.unsqueeze(0) 172 | tmpGrid = tmpGrid.expand(B, -1, -1) # BxNx2 173 | 174 | # 1st folding 175 | f = torch.cat((tmpGrid, codeword), 2 ) # BxNx(K+2) 176 | f = self.Fold1.forward(f) # BxNx3 177 | 178 | # 2nd folding 179 | f = torch.cat((f, codeword), 2 ) # BxNx(K+3) 180 | f = self.Fold2.forward(f) # BxNx3 181 | return f 182 | -------------------------------------------------------------------------------- /lib/models.py: -------------------------------------------------------------------------------- 1 | import lib.network as dlib 2 | import lib.foldingnet as flib 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class ModifiedEncode(dlib.Encode): 9 | def __init__(self, *args, **kwargs): 10 | super(ModifiedEncode, self).__init__(*args, **kwargs) 11 | 12 | 13 | class ModifiedRecon(flib.Recon): 14 | def __init__(self, num_points, *args, **kwargs): 15 | assert num_points <= 1944 16 | super(ModifiedRecon, self).__init__(*args, **kwargs) 17 | 18 | stride = 1944 // num_points 19 | self.grid = [self.grid[i] for i in range(0, 1944, stride)] 20 | self.grid = torch.stack(self.grid, dim=0)[:num_points] 21 | 22 | self.N = num_points 23 | 24 | self.register_buffer("grid_buf", self.grid) 25 | 26 | self.var = nn.Linear(num_points, 1) 27 | 28 | def forward(self, codeword): 29 | if self.training: 30 | # ADD VAE MODULE HERE 31 | noise = self.var(codeword) 32 | 33 | eps = torch.randn_like(noise) 34 | codeword = (codeword + torch.exp(noise / 2.0) * eps) 35 | kl_loss = torch.mean(0.5 * torch.sum(torch.exp(noise) + codeword ** 2 - 1.0 - noise, 1)) 36 | return super().forward(codeword), kl_loss 37 | else: 38 | return super().forward(codeword) 39 | 40 | class ModifiedPose(dlib.Pose): 41 | def __init__(self, *args, **kwargs): 42 | super(ModifiedPose, self).__init__(*args, **kwargs) 43 | 44 | self.conv1_r = torch.nn.Conv1d(1408 * 2, 640, 1) 45 | self.conv1_t = torch.nn.Conv1d(1408 * 2, 640, 1) 46 | self.conv1_c = torch.nn.Conv1d(1408 * 2, 640, 1) 47 | 48 | 49 | class ModifiedFoldingNetShapes(nn.Module): 50 | def __init__(self, num_points, MLP_dims, FC_dims, Folding1_dims, Folding2_dims, MLP_doLastRelu): 51 | super(ModifiedFoldingNetShapes, self).__init__() 52 | 53 | self.encoding = ModifiedEncode(num_points) 54 | 55 | self.reconstructing = ModifiedRecon( 56 | num_points, Folding1_dims, Folding2_dims) 57 | 58 | # self.var = nn.Linear(num_points, 1) 59 | 60 | def encode(self, img, x, choose): 61 | return self.encoding(img, x, choose) 62 | 63 | def recon(self, codeword): 64 | return self.reconstructing(codeword) 65 | # if self.training: 66 | # # ADD VAE MODULE HERE 67 | # noise = self.var(codeword) 68 | 69 | # eps = torch.randn_like(noise) 70 | # codeword = (codeword + torch.exp(noise / 2.0) * eps) 71 | # kl_loss = torch.mean(0.5 * torch.sum(torch.exp(noise) + codeword ** 2 - 1.0 - noise, 1)) 72 | # return self.reconstructing(codeword), kl_loss 73 | # else: 74 | # return self.reconstructing(codeword) 75 | 76 | 77 | class ModifiedPoseNet(nn.Module): 78 | def __init__(self, num_points, num_obj): 79 | super(ModifiedPoseNet, self).__init__() 80 | 81 | self.encoding = ModifiedEncode(num_points) 82 | 83 | self.posing = ModifiedPose(num_points, num_obj) 84 | 85 | def encode(self, img, x, choose): 86 | return self.encoding(img, x, choose) 87 | 88 | def pose(self, codeword, obj): 89 | return self.posing(codeword, obj) 90 | 91 | 92 | class ModifiedPoseRefineNet(dlib.PoseRefineNet): 93 | def __init__(self, *args, **kwargs): 94 | super(ModifiedPoseRefineNet, self).__init__(*args, **kwargs) 95 | 96 | 97 | class CASS(nn.Module): 98 | def __init__(self, opt): 99 | super().__init__() 100 | 101 | MLP_dims = (3, 64, 64, 64, 128, 1024) 102 | FC_dims = (1024, 512, 1408) 103 | Folding1_dims = (1408+9, 512, 512, 3) 104 | Folding2_dims = (1408+3, 512, 512, 3) 105 | MLP_doLastRelu = False 106 | self.opt = opt 107 | self.estimator = ModifiedPoseNet( 108 | num_points=opt.num_points, num_obj=opt.num_objects 109 | ) 110 | self.refiner = ModifiedPoseRefineNet( 111 | num_points=opt.num_points, num_obj=opt.num_objects 112 | ) 113 | self.foldingnet = ModifiedFoldingNetShapes( 114 | opt.num_points, 115 | MLP_dims, FC_dims, Folding1_dims, Folding2_dims, MLP_doLastRelu 116 | ) 117 | -------------------------------------------------------------------------------- /lib/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import torch.nn.functional as F 5 | from lib.pspnet import PSPNet 6 | 7 | psp_models = { 8 | 'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'), 9 | 'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'), 10 | 'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'), 11 | 'resnet101': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet101'), 12 | 'resnet152': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet152') 13 | } 14 | 15 | 16 | class ModifiedResnet(nn.Module): 17 | 18 | def __init__(self, usegpu=True): 19 | super(ModifiedResnet, self).__init__() 20 | 21 | self.model = psp_models['resnet18'.lower()]() 22 | 23 | def forward(self, x): 24 | x = self.model(x) 25 | return x 26 | 27 | 28 | class PoseNetFeat(nn.Module): 29 | def __init__(self, num_points): 30 | super(PoseNetFeat, self).__init__() 31 | self.conv1 = torch.nn.Conv1d(3, 64, 1) 32 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 33 | 34 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1) 35 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1) 36 | 37 | self.conv5 = torch.nn.Conv1d(256, 512, 1) 38 | self.conv6 = torch.nn.Conv1d(512, 1024, 1) 39 | 40 | self.ap1 = torch.nn.AvgPool1d(num_points) 41 | self.num_points = num_points 42 | 43 | def forward(self, x, emb): 44 | x = F.relu(self.conv1(x)) 45 | emb = F.relu(self.e_conv1(emb)) 46 | pointfeat_1 = torch.cat((x, emb), dim=1) 47 | 48 | x = F.relu(self.conv2(x)) 49 | emb = F.relu(self.e_conv2(emb)) 50 | pointfeat_2 = torch.cat((x, emb), dim=1) 51 | 52 | x = F.relu(self.conv5(pointfeat_2)) 53 | x = F.relu(self.conv6(x)) 54 | 55 | ap_x = self.ap1(x) 56 | 57 | ap_x = ap_x.view(-1, 1024, 1).repeat(1, 1, self.num_points) 58 | # 128 + 256 + 1024 59 | return torch.cat([pointfeat_1, pointfeat_2, ap_x], 1) 60 | 61 | 62 | class PoseNet(nn.Module): 63 | def __init__(self, num_points, num_obj): 64 | super(PoseNet, self).__init__() 65 | self.num_points = num_points 66 | self.cnn = ModifiedResnet() 67 | self.feat = PoseNetFeat(num_points) 68 | 69 | self.conv1_r = torch.nn.Conv1d(1408, 640, 1) 70 | self.conv1_t = torch.nn.Conv1d(1408, 640, 1) 71 | self.conv1_c = torch.nn.Conv1d(1408, 640, 1) 72 | 73 | self.conv2_r = torch.nn.Conv1d(640, 256, 1) 74 | self.conv2_t = torch.nn.Conv1d(640, 256, 1) 75 | self.conv2_c = torch.nn.Conv1d(640, 256, 1) 76 | 77 | self.conv3_r = torch.nn.Conv1d(256, 128, 1) 78 | self.conv3_t = torch.nn.Conv1d(256, 128, 1) 79 | self.conv3_c = torch.nn.Conv1d(256, 128, 1) 80 | 81 | self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) # quaternion 82 | self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) # translation 83 | self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) # confidence 84 | 85 | self.num_obj = num_obj 86 | 87 | def forward(self, img, x, choose, obj): 88 | out_img = self.cnn(img) 89 | 90 | bs, di, _, _ = out_img.size() 91 | 92 | emb = out_img.view(bs, di, -1) 93 | choose = choose.repeat(1, di, 1) 94 | emb = torch.gather(emb, 2, choose).contiguous() 95 | 96 | x = x.transpose(2, 1).contiguous() 97 | ap_x = self.feat(x, emb) 98 | 99 | rx = F.relu(self.conv1_r(ap_x)) 100 | tx = F.relu(self.conv1_t(ap_x)) 101 | cx = F.relu(self.conv1_c(ap_x)) 102 | 103 | rx = F.relu(self.conv2_r(rx)) 104 | tx = F.relu(self.conv2_t(tx)) 105 | cx = F.relu(self.conv2_c(cx)) 106 | 107 | rx = F.relu(self.conv3_r(rx)) 108 | tx = F.relu(self.conv3_t(tx)) 109 | cx = F.relu(self.conv3_c(cx)) 110 | 111 | rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points) 112 | tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points) 113 | cx = torch.sigmoid(self.conv4_c(cx)).view( 114 | bs, self.num_obj, 1, self.num_points) 115 | 116 | b = 0 117 | out_rx = torch.index_select(rx[b], 0, obj[b]) 118 | out_tx = torch.index_select(tx[b], 0, obj[b]) 119 | out_cx = torch.index_select(cx[b], 0, obj[b]) 120 | 121 | out_rx = out_rx.contiguous().transpose(2, 1).contiguous() 122 | out_cx = out_cx.contiguous().transpose(2, 1).contiguous() 123 | out_tx = out_tx.contiguous().transpose(2, 1).contiguous() 124 | 125 | return out_rx, out_tx, out_cx, emb.detach() 126 | 127 | 128 | class PoseRefineNetFeat(nn.Module): 129 | def __init__(self, num_points): 130 | super(PoseRefineNetFeat, self).__init__() 131 | self.conv1 = torch.nn.Conv1d(3, 64, 1) 132 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 133 | 134 | self.pre_conv1 = torch.nn.Conv1d(1408, 512, 1) 135 | self.pre_conv2 = torch.nn.Conv1d(512, 256, 1) 136 | self.pre_conv3 = torch.nn.Conv1d(256, 32, 1) 137 | 138 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1) 139 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1) 140 | 141 | self.conv5 = torch.nn.Conv1d(384, 512, 1) 142 | self.conv6 = torch.nn.Conv1d(512, 1024, 1) 143 | 144 | self.ap1 = torch.nn.AvgPool1d(num_points) 145 | self.num_points = num_points 146 | 147 | def forward(self, x, emb): 148 | emb = F.relu(self.pre_conv1(emb)) 149 | emb = F.relu(self.pre_conv2(emb)) 150 | emb = F.relu(self.pre_conv3(emb)) 151 | x = F.relu(self.conv1(x)) 152 | emb = F.relu(self.e_conv1(emb)) 153 | pointfeat_1 = torch.cat([x, emb], dim=1) 154 | 155 | x = F.relu(self.conv2(x)) 156 | emb = F.relu(self.e_conv2(emb)) 157 | pointfeat_2 = torch.cat([x, emb], dim=1) 158 | 159 | pointfeat_3 = torch.cat([pointfeat_1, pointfeat_2], dim=1) 160 | 161 | x = F.relu(self.conv5(pointfeat_3)) 162 | x = F.relu(self.conv6(x)) 163 | 164 | ap_x = self.ap1(x) 165 | 166 | ap_x = ap_x.view(-1, 1024) 167 | return ap_x 168 | 169 | 170 | class PoseRefineNet(nn.Module): 171 | def __init__(self, num_points, num_obj): 172 | super(PoseRefineNet, self).__init__() 173 | self.num_points = num_points 174 | self.feat = PoseRefineNetFeat(num_points) 175 | 176 | self.conv1_r = torch.nn.Linear(1024, 512) 177 | self.conv1_t = torch.nn.Linear(1024, 512) 178 | 179 | self.conv2_r = torch.nn.Linear(512, 128) 180 | self.conv2_t = torch.nn.Linear(512, 128) 181 | 182 | self.conv3_r = torch.nn.Linear(128, num_obj*4) # quaternion 183 | self.conv3_t = torch.nn.Linear(128, num_obj*3) # translation 184 | 185 | self.num_obj = num_obj 186 | 187 | def forward(self, x, emb, obj): 188 | bs = x.size()[0] 189 | 190 | x = x.transpose(2, 1).contiguous() 191 | ap_x = self.feat(x, emb) 192 | 193 | rx = F.relu(self.conv1_r(ap_x)) 194 | tx = F.relu(self.conv1_t(ap_x)) 195 | 196 | rx = F.relu(self.conv2_r(rx)) 197 | tx = F.relu(self.conv2_t(tx)) 198 | 199 | rx = self.conv3_r(rx).view(bs, self.num_obj, 4) 200 | tx = self.conv3_t(tx).view(bs, self.num_obj, 3) 201 | 202 | b = 0 203 | out_rx = torch.index_select(rx[b], 0, obj[b]) 204 | out_tx = torch.index_select(tx[b], 0, obj[b]) 205 | 206 | return out_rx, out_tx 207 | 208 | 209 | class Encode(nn.Module): 210 | def __init__(self, num_points): 211 | super(Encode, self).__init__() 212 | 213 | self.num_points = num_points 214 | self.cnn = ModifiedResnet() 215 | self.feat = PoseNetFeat(num_points) 216 | 217 | def forward(self, img, x, choose): 218 | out_img = self.cnn(img) 219 | 220 | bs, di, _, _ = out_img.size() 221 | 222 | emb = out_img.view(bs, di, -1) 223 | choose = choose.repeat(1, di, 1) 224 | emb = torch.gather(emb, 2, choose).contiguous() 225 | 226 | x = x.transpose(2, 1).contiguous() 227 | ap_x = self.feat(x, emb) 228 | 229 | return ap_x 230 | 231 | 232 | class Pose(nn.Module): 233 | def __init__(self, num_points, num_obj): 234 | super(Pose, self).__init__() 235 | self.conv1_r = torch.nn.Conv1d(1408, 640, 1) 236 | self.conv1_t = torch.nn.Conv1d(1408, 640, 1) 237 | self.conv1_c = torch.nn.Conv1d(1408, 640, 1) 238 | 239 | self.conv2_r = torch.nn.Conv1d(640, 256, 1) 240 | self.conv2_t = torch.nn.Conv1d(640, 256, 1) 241 | self.conv2_c = torch.nn.Conv1d(640, 256, 1) 242 | 243 | self.conv3_r = torch.nn.Conv1d(256, 128, 1) 244 | self.conv3_t = torch.nn.Conv1d(256, 128, 1) 245 | self.conv3_c = torch.nn.Conv1d(256, 128, 1) 246 | 247 | self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) # quaternion 248 | self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) # translation 249 | self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) # confidence 250 | 251 | self.num_obj = num_obj 252 | self.num_points = num_points 253 | 254 | def forward(self, codeword, obj): 255 | 256 | bs = codeword.size(0) 257 | 258 | rx = F.relu(self.conv1_r(codeword)) 259 | tx = F.relu(self.conv1_t(codeword)) 260 | cx = F.relu(self.conv1_c(codeword)) 261 | 262 | rx = F.relu(self.conv2_r(rx)) 263 | tx = F.relu(self.conv2_t(tx)) 264 | cx = F.relu(self.conv2_c(cx)) 265 | 266 | rx = F.relu(self.conv3_r(rx)) 267 | tx = F.relu(self.conv3_t(tx)) 268 | cx = F.relu(self.conv3_c(cx)) 269 | 270 | rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points) 271 | tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points) 272 | cx = torch.sigmoid(self.conv4_c(cx)).view( 273 | bs, self.num_obj, 1, self.num_points) 274 | 275 | b = 0 276 | out_rx = torch.index_select(rx[b], 0, obj[b]) 277 | out_tx = torch.index_select(tx[b], 0, obj[b]) 278 | out_cx = torch.index_select(cx[b], 0, obj[b]) 279 | 280 | out_rx = out_rx.contiguous().transpose(2, 1).contiguous() 281 | out_cx = out_cx.contiguous().transpose(2, 1).contiguous() 282 | out_tx = out_tx.contiguous().transpose(2, 1).contiguous() 283 | 284 | return out_rx, out_tx, out_cx 285 | -------------------------------------------------------------------------------- /lib/pointnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as Functional 4 | 5 | 6 | def get_and_init_FC_layer(din, dout): 7 | li = nn.Linear(din, dout) 8 | # init weights/bias 9 | nn.init.xavier_uniform_( 10 | li.weight.data, gain=nn.init.calculate_gain('relu')) 11 | li.bias.data.fill_(0.) 12 | return li 13 | 14 | 15 | def get_MLP_layers(dims, doLastRelu): 16 | layers = [] 17 | for i in range(1, len(dims)): 18 | layers.append(get_and_init_FC_layer(dims[i-1], dims[i])) 19 | if i == len(dims)-1 and not doLastRelu: 20 | continue 21 | layers.append(nn.ReLU()) 22 | return layers 23 | 24 | 25 | class PointwiseMLP(nn.Sequential): 26 | '''Nxdin ->Nxd1->Nxd2->...-> Nxdout''' 27 | 28 | def __init__(self, dims, doLastRelu=False): 29 | layers = get_MLP_layers(dims, doLastRelu) 30 | super(PointwiseMLP, self).__init__(*layers) 31 | 32 | 33 | class GlobalPool(nn.Module): 34 | '''BxNxK -> BxK''' 35 | 36 | def __init__(self, pool_layer): 37 | super(GlobalPool, self).__init__() 38 | self.Pool = pool_layer 39 | 40 | def forward(self, X): 41 | X = X.unsqueeze(-3) # Bx1xNxK 42 | X = self.Pool(X) 43 | X = X.squeeze(-2) 44 | X = X.squeeze(-2) # BxK 45 | return X 46 | 47 | 48 | class PointNetGlobalMax(nn.Sequential): 49 | '''BxNxdims[0] -> Bxdims[-1]''' 50 | 51 | def __init__(self, dims, doLastRelu=False): 52 | layers = [ 53 | PointwiseMLP(dims, doLastRelu=doLastRelu), # BxNxK 54 | GlobalPool(nn.AdaptiveMaxPool2d((1, dims[-1]))), # BxK 55 | ] 56 | super(PointNetGlobalMax, self).__init__(*layers) 57 | 58 | 59 | class PointNetGlobalAvg(nn.Sequential): 60 | '''BxNxdims[0] -> Bxdims[-1]''' 61 | 62 | def __init__(self, dims, doLastRelu=True): 63 | layers = [ 64 | PointwiseMLP(dims, doLastRelu=doLastRelu), # BxNxK 65 | GlobalPool(nn.AdaptiveAvgPool2d((1, dims[-1]))), # BxK 66 | ] 67 | super(PointNetGlobalAvg, self).__init__(*layers) 68 | 69 | 70 | class PointNetVanilla(nn.Sequential): 71 | 72 | def __init__(self, MLP_dims, FC_dims, MLP_doLastRelu=False): 73 | assert(MLP_dims[-1] == FC_dims[0]) 74 | layers = [ 75 | PointNetGlobalMax(MLP_dims, doLastRelu=MLP_doLastRelu), # BxK 76 | ] 77 | layers.extend(get_MLP_layers(FC_dims, False)) 78 | super(PointNetVanilla, self).__init__(*layers) 79 | 80 | 81 | class PointNetTplMatch(nn.Module): 82 | '''this can learn, but no better than PointNetVanilla''' 83 | 84 | def __init__(self, MLP_dims, C_tpls, M_points): 85 | super(PointNetTplMatch, self).__init__() 86 | self.P = nn.Parameter(torch.rand( 87 | C_tpls, M_points, MLP_dims[0])*2-1.0) # CxMx3 88 | self.G = PointNetGlobalMax(MLP_dims) 89 | 90 | def forward(self, X): 91 | Fx = self.G.forward(X) # BxNx3 -> BxK 92 | Fp = self.G.forward(self.P) # CxMx3 -> CxK 93 | S = torch.mm(Fx, Fp.t()) # BxC 94 | return S 95 | 96 | 97 | class PairwiseDistanceMatrix(nn.Module): 98 | 99 | def __init__(self): 100 | super(PairwiseDistanceMatrix, self).__init__() 101 | 102 | def forward(self, X, Y): 103 | X2 = (X**2).sum(1).view(-1, 1) 104 | Y2 = (Y**2).sum(1).view(1, -1) 105 | D = X2 + Y2 - 2.0*torch.mm(X, Y.t()) 106 | return D 107 | 108 | 109 | class PointNetAttentionPool(nn.Module): 110 | 111 | def __init__(self, MLP_dims, Attention_dims, FC_dims, MLP_doLastRelu=False): 112 | assert(MLP_dims[-1]*Attention_dims[-1] == FC_dims[0]) 113 | # assert(Attention_dims[-1]==1) 114 | super(PointNetAttentionPool, self).__init__() 115 | self.add_module( 116 | 'F', 117 | PointwiseMLP(MLP_dims, doLastRelu=MLP_doLastRelu), # BxNxK 118 | ) 119 | self.S = nn.Sequential( 120 | PointwiseMLP(Attention_dims, doLastRelu=False), # BxNxM 121 | nn.Softmax(dim=-2) # BxNxM 122 | ) 123 | self.L = nn.Sequential(*get_MLP_layers(FC_dims, False)) 124 | 125 | def forward(self, X): 126 | F = self.F.forward(X) # BxNxK 127 | S = self.S.forward(X) # BxNxM 128 | S = torch.transpose(S, -1, -2) # BxMxN 129 | G = torch.bmm(S, F) # BxMxK 130 | sz = G.size() 131 | G = G.view(-1, sz[-1]*sz[-2]) # BxMK 132 | Y = self.L.forward(G) # BxFC_dims[-1] 133 | return Y 134 | 135 | 136 | class PointNetBilinearPool(nn.Module): 137 | 138 | def __init__(self, MLP1_dims, FC1_dims, MLP2_dims, FC2_dims, FC_dims): 139 | assert(MLP1_dims[-1] == FC1_dims[0]) 140 | assert(MLP2_dims[-1] == FC2_dims[0]) 141 | super(PointNetBilinearPool, self).__init__() 142 | self.F1 = nn.Sequential( 143 | PointNetGlobalMax(MLP1_dims), 144 | *get_MLP_layers(FC1_dims, False) 145 | ) # BxFC1_dims[-1] 146 | self.F2 = nn.Sequential( 147 | PointNetGlobalMax(MLP2_dims), 148 | *get_MLP_layers(FC2_dims, False) 149 | ) # BxFC2_dims[-1] 150 | self.L = nn.Sequential(*get_MLP_layers(FC_dims, False)) 151 | 152 | def forward(self, X): 153 | F1 = self.F1.forward(X) # BxK1 154 | F2 = self.F2.forward(X) # BxK2 155 | F1 = F1.unsqueeze(-1) # BxK1x1 156 | F2 = F2.unsqueeze(-2) # Bx1xK2 157 | G = torch.bmm(F1, F2) # BxK1xK2 158 | 159 | sz = G.size() 160 | G = G.view(-1, sz[-1]*sz[-2]) 161 | Y = self.L.forward(G) 162 | return Y 163 | 164 | 165 | class PointPairNet(nn.Module): 166 | 167 | def __init__(self, dims, FC_dims): 168 | assert(dims[-1] == FC_dims[0]) 169 | super(PointPairNet, self).__init__() 170 | self.L = nn.Sequential(*get_MLP_layers(dims, False)) 171 | self.Pool = nn.AdaptiveMaxPool2d((1, 1)) 172 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False)) 173 | 174 | def forward(self, X): 175 | sz = X.size() # BxNx3 176 | Xr = X.view(sz[0], 1, sz[1], sz[2]).expand( 177 | sz[0], sz[1], sz[1], sz[2]) # BxNxNx3 178 | Xrc = torch.cat((Xr, Xr.transpose(1, 2)), dim=-1) # BxNxNx6 179 | G = self.L.forward(Xrc).transpose(1, -1) # BxKxNxN 180 | 181 | P = self.Pool.forward(G).squeeze(-1).squeeze(-1) # BxK 182 | Y = self.F.forward(P) 183 | return Y 184 | 185 | 186 | class BoostedPointPairNet(PointPairNet): 187 | 188 | def __init__(self, d, dims, FC_dims, max_pool=True): 189 | super(BoostedPointPairNet, self).__init__(dims, FC_dims) 190 | self.d = d 191 | self.add_module( 192 | 'BoostPool', 193 | nn.AdaptiveMaxPool1d(1) if max_pool else nn.AdaptiveAvgPool1d(1) 194 | ) 195 | 196 | def forward(self, X): 197 | n = X.size()[1] 198 | X = X.transpose(0, 1) # NxBx3 199 | # rid = torch.randperm(n) 200 | # X = X[rid,...] 201 | Xs = torch.chunk(X, self.d, dim=0) 202 | Ys = [] 203 | for Xi in Xs: 204 | Xi = Xi.transpose(0, 1).contiguous() # Bxmx3 205 | Yi = super(BoostedPointPairNet, self).forward(Xi) # BxC 206 | Ys.append(Yi.unsqueeze(-1)) 207 | Y = torch.cat(Ys, dim=-1) # BxCxd 208 | Y = self.BoostPool.forward(Y).squeeze(-1) # BxC 209 | return Y 210 | 211 | 212 | class BoostedPointPairNet2(nn.Module): 213 | ''' More efficiently implemented than BoostedPointPairNet ''' 214 | 215 | def __init__(self, boost_factor, dims, FC_dims, sym_pool_max=True, boost_pool_max=True): 216 | assert(dims[-1] == FC_dims[0]) 217 | super(BoostedPointPairNet2, self).__init__() 218 | self.boost_factor = boost_factor 219 | self.L = nn.Sequential(*get_MLP_layers(dims, False)) 220 | self.SymPool = nn.AdaptiveMaxPool3d((1, 1, dims[-1])) if sym_pool_max\ 221 | else nn.AdaptiveAvgPool3d((1, 1, dims[-1])) 222 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False)) 223 | self.BoostPool = nn.AdaptiveMaxPool2d((1, FC_dims[-1])) if boost_pool_max\ 224 | else nn.AdaptiveAvgPool2d((1, FC_dims[-1])) 225 | 226 | def forward(self, X): 227 | b, n, din = X.size() 228 | d = self.boost_factor 229 | m = n/d 230 | assert(m*d == n) 231 | Xr = X.view(b, d, 1, m, din).expand(b, d, m, m, din) 232 | Xrc = torch.cat((Xr, Xr.transpose(2, 3)), dim=-1) # bxdxmxmx6 233 | G = self.L.forward(Xrc) # bxdxmxmxK 234 | P = self.SymPool.forward(G).squeeze(-2).squeeze(-2) # bxdxK 235 | Y = self.F.forward(P) # bxdxC 236 | Y = self.BoostPool.forward(Y).squeeze(-2) # bxC 237 | return Y 238 | 239 | 240 | class BoostedPointPairNetSuccessivePool(nn.Module): 241 | ''' Change SymPool to successive pool ''' 242 | 243 | def __init__(self, boost_factor, dims, FC_dims, sym_pool_max=True, boost_pool_max=True): 244 | assert(dims[-1] == FC_dims[0]) 245 | super(BoostedPointPairNetSuccessivePool, self).__init__() 246 | self.boost_factor = boost_factor 247 | self.L = nn.Sequential(*get_MLP_layers(dims, False)) 248 | self.dims = dims 249 | self.sym_pool_max = sym_pool_max 250 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False)) 251 | self.BoostPool = nn.AdaptiveMaxPool2d((1, FC_dims[-1])) if boost_pool_max\ 252 | else nn.AdaptiveAvgPool2d((1, FC_dims[-1])) 253 | 254 | def forward(self, X): 255 | b, n, din = X.size() 256 | d = self.boost_factor 257 | m = n/d 258 | assert(m*d == n) 259 | Xr = X.view(b, d, 1, m, din).expand(b, d, m, m, din) 260 | Xrc = torch.cat((Xr, Xr.transpose(2, 3)), dim=-1) # bxdxmxmx6 261 | G = self.L.forward(Xrc) # bxdxmxmxK 262 | if self.sym_pool_max: # average each point, then max across all points 263 | Pr = Functional.adaptive_avg_pool3d( 264 | G, (m, 1, self.dims[-1])).squeeze(-2) # bxdxmxK 265 | P = Functional.adaptive_max_pool2d( 266 | Pr, (1, self.dims[-1])).squeeze(-2) # bxdxK 267 | else: # max each point, then average over all points 268 | Pr = Functional.adaptive_max_pool3d( 269 | G, (m, 1, self.dims[-1])).squeeze(-2) # bxdxmxK 270 | P = Functional.adaptive_avg_pool2d( 271 | Pr, (1, self.dims[-1])).squeeze(-2) # bxdxK 272 | Y = self.F.forward(P) # bxdxC 273 | Y = self.BoostPool.forward(Y).squeeze(-2) # bxC 274 | return Y 275 | 276 | 277 | class BoostedPointNetVanilla(nn.Module): 278 | 279 | def __init__(self, boost_factor, dims, FC_dims, boost_pool_max=True): 280 | assert(dims[-1] == FC_dims[0]) 281 | super(BoostedPointNetVanilla, self).__init__() 282 | self.boost_factor = boost_factor 283 | self.L = nn.Sequential(*get_MLP_layers(dims, False)) 284 | self.Pool = nn.AdaptiveMaxPool2d((1, dims[-1])) 285 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False)) 286 | self.BoostPool = nn.AdaptiveMaxPool2d((1, FC_dims[-1])) if boost_pool_max\ 287 | else nn.AdaptiveAvgPool2d((1, FC_dims[-1])) 288 | 289 | def forward(self, X): 290 | b, n, din = X.size() 291 | d = self.boost_factor 292 | m = n/d 293 | assert(m*d == n) 294 | Xr = X.view(b, d, m, din) # bxdxmx3 295 | F = self.L.forward(Xr) # bxdxmxK 296 | Fp = self.Pool.forward(F).squeeze(-2) # bxdxK 297 | Yp = self.F.forward(Fp).unsqueeze(0) # 1xbxdxC 298 | Y = self.BoostPool.forward(Yp).squeeze(0).squeeze(-2) # bxC 299 | return Y 300 | -------------------------------------------------------------------------------- /lib/pspnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | import lib.extractors as extractors 5 | 6 | 7 | class PSPModule(nn.Module): 8 | def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)): 9 | super(PSPModule, self).__init__() 10 | self.stages = [] 11 | self.stages = nn.ModuleList( 12 | [self._make_stage(features, size) for size in sizes]) 13 | self.bottleneck = nn.Conv2d( 14 | features * (len(sizes) + 1), out_features, kernel_size=1) 15 | self.relu = nn.ReLU() 16 | 17 | def _make_stage(self, features, size): 18 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 19 | conv = nn.Conv2d(features, features, kernel_size=1, bias=False) 20 | return nn.Sequential(prior, conv) 21 | 22 | def forward(self, feats): 23 | h, w = feats.size(2), feats.size(3) 24 | priors = [F.upsample(input=stage(feats), size=( 25 | h, w), mode='bilinear') for stage in self.stages] + [feats] 26 | bottle = self.bottleneck(torch.cat(priors, 1)) 27 | return self.relu(bottle) 28 | 29 | 30 | class PSPUpsample(nn.Module): 31 | def __init__(self, in_channels, out_channels): 32 | super(PSPUpsample, self).__init__() 33 | self.conv = nn.Sequential( 34 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), 35 | nn.Conv2d(in_channels, out_channels, 3, padding=1), 36 | nn.PReLU() 37 | ) 38 | 39 | def forward(self, x): 40 | return self.conv(x) 41 | 42 | 43 | class PSPNet(nn.Module): 44 | def __init__(self, n_classes=21, sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet18', 45 | pretrained=False): 46 | super(PSPNet, self).__init__() 47 | self.feats = getattr(extractors, backend)(pretrained) 48 | self.psp = PSPModule(psp_size, 1024, sizes) 49 | self.drop_1 = nn.Dropout2d(p=0.3) 50 | 51 | self.up_1 = PSPUpsample(1024, 256) 52 | self.up_2 = PSPUpsample(256, 64) 53 | self.up_3 = PSPUpsample(64, 64) 54 | 55 | self.drop_2 = nn.Dropout2d(p=0.15) 56 | self.final = nn.Sequential( 57 | nn.Conv2d(64, 32, kernel_size=1), 58 | nn.LogSoftmax() 59 | ) 60 | 61 | self.classifier = nn.Sequential( 62 | nn.Linear(deep_features_size, 256), 63 | nn.ReLU(), 64 | nn.Linear(256, n_classes) 65 | ) 66 | 67 | def forward(self, x): 68 | f, class_f = self.feats(x) 69 | p = self.psp(f) 70 | p = self.drop_1(p) 71 | 72 | p = self.up_1(p) 73 | p = self.drop_2(p) 74 | 75 | p = self.up_2(p) 76 | p = self.drop_2(p) 77 | 78 | p = self.up_3(p) 79 | 80 | return self.final(p) 81 | -------------------------------------------------------------------------------- /lib/transformations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # transformations.py 3 | 4 | # Copyright (c) 2006-2018, Christoph Gohlke 5 | # Copyright (c) 2006-2018, The Regents of the University of California 6 | # Produced at the Laboratory for Fluorescence Dynamics 7 | # All rights reserved. 8 | # 9 | # Redistribution and use in source and binary forms, with or without 10 | # modification, are permitted provided that the following conditions are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright 13 | # notice, this list of conditions and the following disclaimer. 14 | # * Redistributions in binary form must reproduce the above copyright 15 | # notice, this list of conditions and the following disclaimer in the 16 | # documentation and/or other materials provided with the distribution. 17 | # * Neither the name of the copyright holders nor the names of any 18 | # contributors may be used to endorse or promote products derived 19 | # from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 25 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | # POSSIBILITY OF SUCH DAMAGE. 32 | 33 | """Homogeneous Transformation Matrices and Quaternions. 34 | 35 | A library for calculating 4x4 matrices for translating, rotating, reflecting, 36 | scaling, shearing, projecting, orthogonalizing, and superimposing arrays of 37 | 3D homogeneous coordinates as well as for converting between rotation matrices, 38 | Euler angles, and quaternions. Also includes an Arcball control object and 39 | functions to decompose transformation matrices. 40 | 41 | :Author: 42 | `Christoph Gohlke `_ 43 | 44 | :Organization: 45 | Laboratory for Fluorescence Dynamics, University of California, Irvine 46 | 47 | :Version: 2018.02.08 48 | 49 | Requirements 50 | ------------ 51 | * `CPython 2.7 or 3.6 `_ 52 | * `Numpy 1.13 `_ 53 | * `Transformations.c 2018.02.08 `_ 54 | (recommended for speedup of some functions) 55 | 56 | Notes 57 | ----- 58 | The API is not stable yet and is expected to change between revisions. 59 | 60 | This Python code is not optimized for speed. Refer to the transformations.c 61 | module for a faster implementation of some functions. 62 | 63 | Documentation in HTML format can be generated with epydoc. 64 | 65 | Matrices (M) can be inverted using numpy.linalg.inv(M), be concatenated using 66 | numpy.dot(M0, M1), or transform homogeneous coordinate arrays (v) using 67 | numpy.dot(M, v) for shape (4, \*) column vectors, respectively 68 | numpy.dot(v, M.T) for shape (\*, 4) row vectors ("array of points"). 69 | 70 | This module follows the "column vectors on the right" and "row major storage" 71 | (C contiguous) conventions. The translation components are in the right column 72 | of the transformation matrix, i.e. M[:3, 3]. 73 | The transpose of the transformation matrices may have to be used to interface 74 | with other graphics systems, e.g. with OpenGL's glMultMatrixd(). See also [16]. 75 | 76 | Calculations are carried out with numpy.float64 precision. 77 | 78 | Vector, point, quaternion, and matrix function arguments are expected to be 79 | "array like", i.e. tuple, list, or numpy arrays. 80 | 81 | Return types are numpy arrays unless specified otherwise. 82 | 83 | Angles are in radians unless specified otherwise. 84 | 85 | Quaternions w+ix+jy+kz are represented as [w, x, y, z]. 86 | 87 | A triple of Euler angles can be applied/interpreted in 24 ways, which can 88 | be specified using a 4 character string or encoded 4-tuple: 89 | 90 | *Axes 4-string*: e.g. 'sxyz' or 'ryxy' 91 | 92 | - first character : rotations are applied to 's'tatic or 'r'otating frame 93 | - remaining characters : successive rotation axis 'x', 'y', or 'z' 94 | 95 | *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1) 96 | 97 | - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix. 98 | - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed 99 | by 'z', or 'z' is followed by 'x'. Otherwise odd (1). 100 | - repetition : first and last axis are same (1) or different (0). 101 | - frame : rotations are applied to static (0) or rotating (1) frame. 102 | 103 | Other Python packages and modules for 3D transformations and quaternions: 104 | 105 | * `Transforms3d `_ 106 | includes most code of this module. 107 | * `Blender.mathutils `_ 108 | * `numpy-dtypes `_ 109 | 110 | References 111 | ---------- 112 | (1) Matrices and transformations. Ronald Goldman. 113 | In "Graphics Gems I", pp 472-475. Morgan Kaufmann, 1990. 114 | (2) More matrices and transformations: shear and pseudo-perspective. 115 | Ronald Goldman. In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991. 116 | (3) Decomposing a matrix into simple transformations. Spencer Thomas. 117 | In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991. 118 | (4) Recovering the data from the transformation matrix. Ronald Goldman. 119 | In "Graphics Gems II", pp 324-331. Morgan Kaufmann, 1991. 120 | (5) Euler angle conversion. Ken Shoemake. 121 | In "Graphics Gems IV", pp 222-229. Morgan Kaufmann, 1994. 122 | (6) Arcball rotation control. Ken Shoemake. 123 | In "Graphics Gems IV", pp 175-192. Morgan Kaufmann, 1994. 124 | (7) Representing attitude: Euler angles, unit quaternions, and rotation 125 | vectors. James Diebel. 2006. 126 | (8) A discussion of the solution for the best rotation to relate two sets 127 | of vectors. W Kabsch. Acta Cryst. 1978. A34, 827-828. 128 | (9) Closed-form solution of absolute orientation using unit quaternions. 129 | BKP Horn. J Opt Soc Am A. 1987. 4(4):629-642. 130 | (10) Quaternions. Ken Shoemake. 131 | http://www.sfu.ca/~jwa3/cmpt461/files/quatut.pdf 132 | (11) From quaternion to matrix and back. JMP van Waveren. 2005. 133 | http://www.intel.com/cd/ids/developer/asmo-na/eng/293748.htm 134 | (12) Uniform random rotations. Ken Shoemake. 135 | In "Graphics Gems III", pp 124-132. Morgan Kaufmann, 1992. 136 | (13) Quaternion in molecular modeling. CFF Karney. 137 | J Mol Graph Mod, 25(5):595-604 138 | (14) New method for extracting the quaternion from a rotation matrix. 139 | Itzhack Y Bar-Itzhack, J Guid Contr Dynam. 2000. 23(6): 1085-1087. 140 | (15) Multiple View Geometry in Computer Vision. Hartley and Zissermann. 141 | Cambridge University Press; 2nd Ed. 2004. Chapter 4, Algorithm 4.7, p 130. 142 | (16) Column Vectors vs. Row Vectors. 143 | http://steve.hollasch.net/cgindex/math/matrix/column-vec.html 144 | 145 | Examples 146 | -------- 147 | >>> alpha, beta, gamma = 0.123, -1.234, 2.345 148 | >>> origin, xaxis, yaxis, zaxis = [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1] 149 | >>> I = identity_matrix() 150 | >>> Rx = rotation_matrix(alpha, xaxis) 151 | >>> Ry = rotation_matrix(beta, yaxis) 152 | >>> Rz = rotation_matrix(gamma, zaxis) 153 | >>> R = concatenate_matrices(Rx, Ry, Rz) 154 | >>> euler = euler_from_matrix(R, 'rxyz') 155 | >>> numpy.allclose([alpha, beta, gamma], euler) 156 | True 157 | >>> Re = euler_matrix(alpha, beta, gamma, 'rxyz') 158 | >>> is_same_transform(R, Re) 159 | True 160 | >>> al, be, ga = euler_from_matrix(Re, 'rxyz') 161 | >>> is_same_transform(Re, euler_matrix(al, be, ga, 'rxyz')) 162 | True 163 | >>> qx = quaternion_about_axis(alpha, xaxis) 164 | >>> qy = quaternion_about_axis(beta, yaxis) 165 | >>> qz = quaternion_about_axis(gamma, zaxis) 166 | >>> q = quaternion_multiply(qx, qy) 167 | >>> q = quaternion_multiply(q, qz) 168 | >>> Rq = quaternion_matrix(q) 169 | >>> is_same_transform(R, Rq) 170 | True 171 | >>> S = scale_matrix(1.23, origin) 172 | >>> T = translation_matrix([1, 2, 3]) 173 | >>> Z = shear_matrix(beta, xaxis, origin, zaxis) 174 | >>> R = random_rotation_matrix(numpy.random.rand(3)) 175 | >>> M = concatenate_matrices(T, R, Z, S) 176 | >>> scale, shear, angles, trans, persp = decompose_matrix(M) 177 | >>> numpy.allclose(scale, 1.23) 178 | True 179 | >>> numpy.allclose(trans, [1, 2, 3]) 180 | True 181 | >>> numpy.allclose(shear, [0, math.tan(beta), 0]) 182 | True 183 | >>> is_same_transform(R, euler_matrix(axes='sxyz', *angles)) 184 | True 185 | >>> M1 = compose_matrix(scale, shear, angles, trans, persp) 186 | >>> is_same_transform(M, M1) 187 | True 188 | >>> v0, v1 = random_vector(3), random_vector(3) 189 | >>> M = rotation_matrix(angle_between_vectors(v0, v1), vector_product(v0, v1)) 190 | >>> v2 = numpy.dot(v0, M[:3,:3].T) 191 | >>> numpy.allclose(unit_vector(v1), unit_vector(v2)) 192 | True 193 | 194 | """ 195 | 196 | from __future__ import division, print_function 197 | 198 | import math 199 | 200 | import numpy 201 | 202 | __version__ = '2018.02.08' 203 | __docformat__ = 'restructuredtext en' 204 | __all__ = () 205 | 206 | 207 | def identity_matrix(): 208 | """Return 4x4 identity/unit matrix. 209 | 210 | >>> I = identity_matrix() 211 | >>> numpy.allclose(I, numpy.dot(I, I)) 212 | True 213 | >>> numpy.sum(I), numpy.trace(I) 214 | (4.0, 4.0) 215 | >>> numpy.allclose(I, numpy.identity(4)) 216 | True 217 | 218 | """ 219 | return numpy.identity(4) 220 | 221 | 222 | def translation_matrix(direction): 223 | """Return matrix to translate by direction vector. 224 | 225 | >>> v = numpy.random.random(3) - 0.5 226 | >>> numpy.allclose(v, translation_matrix(v)[:3, 3]) 227 | True 228 | 229 | """ 230 | M = numpy.identity(4) 231 | M[:3, 3] = direction[:3] 232 | return M 233 | 234 | 235 | def translation_from_matrix(matrix): 236 | """Return translation vector from translation matrix. 237 | 238 | >>> v0 = numpy.random.random(3) - 0.5 239 | >>> v1 = translation_from_matrix(translation_matrix(v0)) 240 | >>> numpy.allclose(v0, v1) 241 | True 242 | 243 | """ 244 | return numpy.array(matrix, copy=False)[:3, 3].copy() 245 | 246 | 247 | def reflection_matrix(point, normal): 248 | """Return matrix to mirror at plane defined by point and normal vector. 249 | 250 | >>> v0 = numpy.random.random(4) - 0.5 251 | >>> v0[3] = 1. 252 | >>> v1 = numpy.random.random(3) - 0.5 253 | >>> R = reflection_matrix(v0, v1) 254 | >>> numpy.allclose(2, numpy.trace(R)) 255 | True 256 | >>> numpy.allclose(v0, numpy.dot(R, v0)) 257 | True 258 | >>> v2 = v0.copy() 259 | >>> v2[:3] += v1 260 | >>> v3 = v0.copy() 261 | >>> v2[:3] -= v1 262 | >>> numpy.allclose(v2, numpy.dot(R, v3)) 263 | True 264 | 265 | """ 266 | normal = unit_vector(normal[:3]) 267 | M = numpy.identity(4) 268 | M[:3, :3] -= 2.0 * numpy.outer(normal, normal) 269 | M[:3, 3] = (2.0 * numpy.dot(point[:3], normal)) * normal 270 | return M 271 | 272 | 273 | def reflection_from_matrix(matrix): 274 | """Return mirror plane point and normal vector from reflection matrix. 275 | 276 | >>> v0 = numpy.random.random(3) - 0.5 277 | >>> v1 = numpy.random.random(3) - 0.5 278 | >>> M0 = reflection_matrix(v0, v1) 279 | >>> point, normal = reflection_from_matrix(M0) 280 | >>> M1 = reflection_matrix(point, normal) 281 | >>> is_same_transform(M0, M1) 282 | True 283 | 284 | """ 285 | M = numpy.array(matrix, dtype=numpy.float64, copy=False) 286 | # normal: unit eigenvector corresponding to eigenvalue -1 287 | w, V = numpy.linalg.eig(M[:3, :3]) 288 | i = numpy.where(abs(numpy.real(w) + 1.0) < 1e-8)[0] 289 | if not len(i): 290 | raise ValueError('no unit eigenvector corresponding to eigenvalue -1') 291 | normal = numpy.real(V[:, i[0]]).squeeze() 292 | # point: any unit eigenvector corresponding to eigenvalue 1 293 | w, V = numpy.linalg.eig(M) 294 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] 295 | if not len(i): 296 | raise ValueError('no unit eigenvector corresponding to eigenvalue 1') 297 | point = numpy.real(V[:, i[-1]]).squeeze() 298 | point /= point[3] 299 | return point, normal 300 | 301 | 302 | def rotation_matrix(angle, direction, point=None): 303 | """Return matrix to rotate about axis defined by point and direction. 304 | 305 | >>> R = rotation_matrix(math.pi/2, [0, 0, 1], [1, 0, 0]) 306 | >>> numpy.allclose(numpy.dot(R, [0, 0, 0, 1]), [1, -1, 0, 1]) 307 | True 308 | >>> angle = (random.random() - 0.5) * (2*math.pi) 309 | >>> direc = numpy.random.random(3) - 0.5 310 | >>> point = numpy.random.random(3) - 0.5 311 | >>> R0 = rotation_matrix(angle, direc, point) 312 | >>> R1 = rotation_matrix(angle-2*math.pi, direc, point) 313 | >>> is_same_transform(R0, R1) 314 | True 315 | >>> R0 = rotation_matrix(angle, direc, point) 316 | >>> R1 = rotation_matrix(-angle, -direc, point) 317 | >>> is_same_transform(R0, R1) 318 | True 319 | >>> I = numpy.identity(4, numpy.float64) 320 | >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc)) 321 | True 322 | >>> numpy.allclose(2, numpy.trace(rotation_matrix(math.pi/2, 323 | ... direc, point))) 324 | True 325 | 326 | """ 327 | sina = math.sin(angle) 328 | cosa = math.cos(angle) 329 | direction = unit_vector(direction[:3]) 330 | # rotation matrix around unit vector 331 | R = numpy.diag([cosa, cosa, cosa]) 332 | R += numpy.outer(direction, direction) * (1.0 - cosa) 333 | direction *= sina 334 | R += numpy.array([[ 0.0, -direction[2], direction[1]], 335 | [ direction[2], 0.0, -direction[0]], 336 | [-direction[1], direction[0], 0.0]]) 337 | M = numpy.identity(4) 338 | M[:3, :3] = R 339 | if point is not None: 340 | # rotation not around origin 341 | point = numpy.array(point[:3], dtype=numpy.float64, copy=False) 342 | M[:3, 3] = point - numpy.dot(R, point) 343 | return M 344 | 345 | 346 | def rotation_from_matrix(matrix): 347 | """Return rotation angle and axis from rotation matrix. 348 | 349 | >>> angle = (random.random() - 0.5) * (2*math.pi) 350 | >>> direc = numpy.random.random(3) - 0.5 351 | >>> point = numpy.random.random(3) - 0.5 352 | >>> R0 = rotation_matrix(angle, direc, point) 353 | >>> angle, direc, point = rotation_from_matrix(R0) 354 | >>> R1 = rotation_matrix(angle, direc, point) 355 | >>> is_same_transform(R0, R1) 356 | True 357 | 358 | """ 359 | R = numpy.array(matrix, dtype=numpy.float64, copy=False) 360 | R33 = R[:3, :3] 361 | # direction: unit eigenvector of R33 corresponding to eigenvalue of 1 362 | w, W = numpy.linalg.eig(R33.T) 363 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] 364 | if not len(i): 365 | raise ValueError('no unit eigenvector corresponding to eigenvalue 1') 366 | direction = numpy.real(W[:, i[-1]]).squeeze() 367 | # point: unit eigenvector of R33 corresponding to eigenvalue of 1 368 | w, Q = numpy.linalg.eig(R) 369 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] 370 | if not len(i): 371 | raise ValueError('no unit eigenvector corresponding to eigenvalue 1') 372 | point = numpy.real(Q[:, i[-1]]).squeeze() 373 | point /= point[3] 374 | # rotation angle depending on direction 375 | cosa = (numpy.trace(R33) - 1.0) / 2.0 376 | if abs(direction[2]) > 1e-8: 377 | sina = (R[1, 0] + (cosa-1.0)*direction[0]*direction[1]) / direction[2] 378 | elif abs(direction[1]) > 1e-8: 379 | sina = (R[0, 2] + (cosa-1.0)*direction[0]*direction[2]) / direction[1] 380 | else: 381 | sina = (R[2, 1] + (cosa-1.0)*direction[1]*direction[2]) / direction[0] 382 | angle = math.atan2(sina, cosa) 383 | return angle, direction, point 384 | 385 | 386 | def scale_matrix(factor, origin=None, direction=None): 387 | """Return matrix to scale by factor around origin in direction. 388 | 389 | Use factor -1 for point symmetry. 390 | 391 | >>> v = (numpy.random.rand(4, 5) - 0.5) * 20 392 | >>> v[3] = 1 393 | >>> S = scale_matrix(-1.234) 394 | >>> numpy.allclose(numpy.dot(S, v)[:3], -1.234*v[:3]) 395 | True 396 | >>> factor = random.random() * 10 - 5 397 | >>> origin = numpy.random.random(3) - 0.5 398 | >>> direct = numpy.random.random(3) - 0.5 399 | >>> S = scale_matrix(factor, origin) 400 | >>> S = scale_matrix(factor, origin, direct) 401 | 402 | """ 403 | if direction is None: 404 | # uniform scaling 405 | M = numpy.diag([factor, factor, factor, 1.0]) 406 | if origin is not None: 407 | M[:3, 3] = origin[:3] 408 | M[:3, 3] *= 1.0 - factor 409 | else: 410 | # nonuniform scaling 411 | direction = unit_vector(direction[:3]) 412 | factor = 1.0 - factor 413 | M = numpy.identity(4) 414 | M[:3, :3] -= factor * numpy.outer(direction, direction) 415 | if origin is not None: 416 | M[:3, 3] = (factor * numpy.dot(origin[:3], direction)) * direction 417 | return M 418 | 419 | 420 | def scale_from_matrix(matrix): 421 | """Return scaling factor, origin and direction from scaling matrix. 422 | 423 | >>> factor = random.random() * 10 - 5 424 | >>> origin = numpy.random.random(3) - 0.5 425 | >>> direct = numpy.random.random(3) - 0.5 426 | >>> S0 = scale_matrix(factor, origin) 427 | >>> factor, origin, direction = scale_from_matrix(S0) 428 | >>> S1 = scale_matrix(factor, origin, direction) 429 | >>> is_same_transform(S0, S1) 430 | True 431 | >>> S0 = scale_matrix(factor, origin, direct) 432 | >>> factor, origin, direction = scale_from_matrix(S0) 433 | >>> S1 = scale_matrix(factor, origin, direction) 434 | >>> is_same_transform(S0, S1) 435 | True 436 | 437 | """ 438 | M = numpy.array(matrix, dtype=numpy.float64, copy=False) 439 | M33 = M[:3, :3] 440 | factor = numpy.trace(M33) - 2.0 441 | try: 442 | # direction: unit eigenvector corresponding to eigenvalue factor 443 | w, V = numpy.linalg.eig(M33) 444 | i = numpy.where(abs(numpy.real(w) - factor) < 1e-8)[0][0] 445 | direction = numpy.real(V[:, i]).squeeze() 446 | direction /= vector_norm(direction) 447 | except IndexError: 448 | # uniform scaling 449 | factor = (factor + 2.0) / 3.0 450 | direction = None 451 | # origin: any eigenvector corresponding to eigenvalue 1 452 | w, V = numpy.linalg.eig(M) 453 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] 454 | if not len(i): 455 | raise ValueError('no eigenvector corresponding to eigenvalue 1') 456 | origin = numpy.real(V[:, i[-1]]).squeeze() 457 | origin /= origin[3] 458 | return factor, origin, direction 459 | 460 | 461 | def projection_matrix(point, normal, direction=None, 462 | perspective=None, pseudo=False): 463 | """Return matrix to project onto plane defined by point and normal. 464 | 465 | Using either perspective point, projection direction, or none of both. 466 | 467 | If pseudo is True, perspective projections will preserve relative depth 468 | such that Perspective = dot(Orthogonal, PseudoPerspective). 469 | 470 | >>> P = projection_matrix([0, 0, 0], [1, 0, 0]) 471 | >>> numpy.allclose(P[1:, 1:], numpy.identity(4)[1:, 1:]) 472 | True 473 | >>> point = numpy.random.random(3) - 0.5 474 | >>> normal = numpy.random.random(3) - 0.5 475 | >>> direct = numpy.random.random(3) - 0.5 476 | >>> persp = numpy.random.random(3) - 0.5 477 | >>> P0 = projection_matrix(point, normal) 478 | >>> P1 = projection_matrix(point, normal, direction=direct) 479 | >>> P2 = projection_matrix(point, normal, perspective=persp) 480 | >>> P3 = projection_matrix(point, normal, perspective=persp, pseudo=True) 481 | >>> is_same_transform(P2, numpy.dot(P0, P3)) 482 | True 483 | >>> P = projection_matrix([3, 0, 0], [1, 1, 0], [1, 0, 0]) 484 | >>> v0 = (numpy.random.rand(4, 5) - 0.5) * 20 485 | >>> v0[3] = 1 486 | >>> v1 = numpy.dot(P, v0) 487 | >>> numpy.allclose(v1[1], v0[1]) 488 | True 489 | >>> numpy.allclose(v1[0], 3-v1[1]) 490 | True 491 | 492 | """ 493 | M = numpy.identity(4) 494 | point = numpy.array(point[:3], dtype=numpy.float64, copy=False) 495 | normal = unit_vector(normal[:3]) 496 | if perspective is not None: 497 | # perspective projection 498 | perspective = numpy.array(perspective[:3], dtype=numpy.float64, 499 | copy=False) 500 | M[0, 0] = M[1, 1] = M[2, 2] = numpy.dot(perspective-point, normal) 501 | M[:3, :3] -= numpy.outer(perspective, normal) 502 | if pseudo: 503 | # preserve relative depth 504 | M[:3, :3] -= numpy.outer(normal, normal) 505 | M[:3, 3] = numpy.dot(point, normal) * (perspective+normal) 506 | else: 507 | M[:3, 3] = numpy.dot(point, normal) * perspective 508 | M[3, :3] = -normal 509 | M[3, 3] = numpy.dot(perspective, normal) 510 | elif direction is not None: 511 | # parallel projection 512 | direction = numpy.array(direction[:3], dtype=numpy.float64, copy=False) 513 | scale = numpy.dot(direction, normal) 514 | M[:3, :3] -= numpy.outer(direction, normal) / scale 515 | M[:3, 3] = direction * (numpy.dot(point, normal) / scale) 516 | else: 517 | # orthogonal projection 518 | M[:3, :3] -= numpy.outer(normal, normal) 519 | M[:3, 3] = numpy.dot(point, normal) * normal 520 | return M 521 | 522 | 523 | def projection_from_matrix(matrix, pseudo=False): 524 | """Return projection plane and perspective point from projection matrix. 525 | 526 | Return values are same as arguments for projection_matrix function: 527 | point, normal, direction, perspective, and pseudo. 528 | 529 | >>> point = numpy.random.random(3) - 0.5 530 | >>> normal = numpy.random.random(3) - 0.5 531 | >>> direct = numpy.random.random(3) - 0.5 532 | >>> persp = numpy.random.random(3) - 0.5 533 | >>> P0 = projection_matrix(point, normal) 534 | >>> result = projection_from_matrix(P0) 535 | >>> P1 = projection_matrix(*result) 536 | >>> is_same_transform(P0, P1) 537 | True 538 | >>> P0 = projection_matrix(point, normal, direct) 539 | >>> result = projection_from_matrix(P0) 540 | >>> P1 = projection_matrix(*result) 541 | >>> is_same_transform(P0, P1) 542 | True 543 | >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=False) 544 | >>> result = projection_from_matrix(P0, pseudo=False) 545 | >>> P1 = projection_matrix(*result) 546 | >>> is_same_transform(P0, P1) 547 | True 548 | >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=True) 549 | >>> result = projection_from_matrix(P0, pseudo=True) 550 | >>> P1 = projection_matrix(*result) 551 | >>> is_same_transform(P0, P1) 552 | True 553 | 554 | """ 555 | M = numpy.array(matrix, dtype=numpy.float64, copy=False) 556 | M33 = M[:3, :3] 557 | w, V = numpy.linalg.eig(M) 558 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] 559 | if not pseudo and len(i): 560 | # point: any eigenvector corresponding to eigenvalue 1 561 | point = numpy.real(V[:, i[-1]]).squeeze() 562 | point /= point[3] 563 | # direction: unit eigenvector corresponding to eigenvalue 0 564 | w, V = numpy.linalg.eig(M33) 565 | i = numpy.where(abs(numpy.real(w)) < 1e-8)[0] 566 | if not len(i): 567 | raise ValueError('no eigenvector corresponding to eigenvalue 0') 568 | direction = numpy.real(V[:, i[0]]).squeeze() 569 | direction /= vector_norm(direction) 570 | # normal: unit eigenvector of M33.T corresponding to eigenvalue 0 571 | w, V = numpy.linalg.eig(M33.T) 572 | i = numpy.where(abs(numpy.real(w)) < 1e-8)[0] 573 | if len(i): 574 | # parallel projection 575 | normal = numpy.real(V[:, i[0]]).squeeze() 576 | normal /= vector_norm(normal) 577 | return point, normal, direction, None, False 578 | else: 579 | # orthogonal projection, where normal equals direction vector 580 | return point, direction, None, None, False 581 | else: 582 | # perspective projection 583 | i = numpy.where(abs(numpy.real(w)) > 1e-8)[0] 584 | if not len(i): 585 | raise ValueError( 586 | 'no eigenvector not corresponding to eigenvalue 0') 587 | point = numpy.real(V[:, i[-1]]).squeeze() 588 | point /= point[3] 589 | normal = - M[3, :3] 590 | perspective = M[:3, 3] / numpy.dot(point[:3], normal) 591 | if pseudo: 592 | perspective -= normal 593 | return point, normal, None, perspective, pseudo 594 | 595 | 596 | def clip_matrix(left, right, bottom, top, near, far, perspective=False): 597 | """Return matrix to obtain normalized device coordinates from frustum. 598 | 599 | The frustum bounds are axis-aligned along x (left, right), 600 | y (bottom, top) and z (near, far). 601 | 602 | Normalized device coordinates are in range [-1, 1] if coordinates are 603 | inside the frustum. 604 | 605 | If perspective is True the frustum is a truncated pyramid with the 606 | perspective point at origin and direction along z axis, otherwise an 607 | orthographic canonical view volume (a box). 608 | 609 | Homogeneous coordinates transformed by the perspective clip matrix 610 | need to be dehomogenized (divided by w coordinate). 611 | 612 | >>> frustum = numpy.random.rand(6) 613 | >>> frustum[1] += frustum[0] 614 | >>> frustum[3] += frustum[2] 615 | >>> frustum[5] += frustum[4] 616 | >>> M = clip_matrix(perspective=False, *frustum) 617 | >>> numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1]) 618 | array([-1., -1., -1., 1.]) 619 | >>> numpy.dot(M, [frustum[1], frustum[3], frustum[5], 1]) 620 | array([ 1., 1., 1., 1.]) 621 | >>> M = clip_matrix(perspective=True, *frustum) 622 | >>> v = numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1]) 623 | >>> v / v[3] 624 | array([-1., -1., -1., 1.]) 625 | >>> v = numpy.dot(M, [frustum[1], frustum[3], frustum[4], 1]) 626 | >>> v / v[3] 627 | array([ 1., 1., -1., 1.]) 628 | 629 | """ 630 | if left >= right or bottom >= top or near >= far: 631 | raise ValueError('invalid frustum') 632 | if perspective: 633 | if near <= _EPS: 634 | raise ValueError('invalid frustum: near <= 0') 635 | t = 2.0 * near 636 | M = [[t/(left-right), 0.0, (right+left)/(right-left), 0.0], 637 | [0.0, t/(bottom-top), (top+bottom)/(top-bottom), 0.0], 638 | [0.0, 0.0, (far+near)/(near-far), t*far/(far-near)], 639 | [0.0, 0.0, -1.0, 0.0]] 640 | else: 641 | M = [[2.0/(right-left), 0.0, 0.0, (right+left)/(left-right)], 642 | [0.0, 2.0/(top-bottom), 0.0, (top+bottom)/(bottom-top)], 643 | [0.0, 0.0, 2.0/(far-near), (far+near)/(near-far)], 644 | [0.0, 0.0, 0.0, 1.0]] 645 | return numpy.array(M) 646 | 647 | 648 | def shear_matrix(angle, direction, point, normal): 649 | """Return matrix to shear by angle along direction vector on shear plane. 650 | 651 | The shear plane is defined by a point and normal vector. The direction 652 | vector must be orthogonal to the plane's normal vector. 653 | 654 | A point P is transformed by the shear matrix into P" such that 655 | the vector P-P" is parallel to the direction vector and its extent is 656 | given by the angle of P-P'-P", where P' is the orthogonal projection 657 | of P onto the shear plane. 658 | 659 | >>> angle = (random.random() - 0.5) * 4*math.pi 660 | >>> direct = numpy.random.random(3) - 0.5 661 | >>> point = numpy.random.random(3) - 0.5 662 | >>> normal = numpy.cross(direct, numpy.random.random(3)) 663 | >>> S = shear_matrix(angle, direct, point, normal) 664 | >>> numpy.allclose(1, numpy.linalg.det(S)) 665 | True 666 | 667 | """ 668 | normal = unit_vector(normal[:3]) 669 | direction = unit_vector(direction[:3]) 670 | if abs(numpy.dot(normal, direction)) > 1e-6: 671 | raise ValueError('direction and normal vectors are not orthogonal') 672 | angle = math.tan(angle) 673 | M = numpy.identity(4) 674 | M[:3, :3] += angle * numpy.outer(direction, normal) 675 | M[:3, 3] = -angle * numpy.dot(point[:3], normal) * direction 676 | return M 677 | 678 | 679 | def shear_from_matrix(matrix): 680 | """Return shear angle, direction and plane from shear matrix. 681 | 682 | >>> angle = (random.random() - 0.5) * 4*math.pi 683 | >>> direct = numpy.random.random(3) - 0.5 684 | >>> point = numpy.random.random(3) - 0.5 685 | >>> normal = numpy.cross(direct, numpy.random.random(3)) 686 | >>> S0 = shear_matrix(angle, direct, point, normal) 687 | >>> angle, direct, point, normal = shear_from_matrix(S0) 688 | >>> S1 = shear_matrix(angle, direct, point, normal) 689 | >>> is_same_transform(S0, S1) 690 | True 691 | 692 | """ 693 | M = numpy.array(matrix, dtype=numpy.float64, copy=False) 694 | M33 = M[:3, :3] 695 | # normal: cross independent eigenvectors corresponding to the eigenvalue 1 696 | w, V = numpy.linalg.eig(M33) 697 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-4)[0] 698 | if len(i) < 2: 699 | raise ValueError('no two linear independent eigenvectors found %s' % w) 700 | V = numpy.real(V[:, i]).squeeze().T 701 | lenorm = -1.0 702 | for i0, i1 in ((0, 1), (0, 2), (1, 2)): 703 | n = numpy.cross(V[i0], V[i1]) 704 | w = vector_norm(n) 705 | if w > lenorm: 706 | lenorm = w 707 | normal = n 708 | normal /= lenorm 709 | # direction and angle 710 | direction = numpy.dot(M33 - numpy.identity(3), normal) 711 | angle = vector_norm(direction) 712 | direction /= angle 713 | angle = math.atan(angle) 714 | # point: eigenvector corresponding to eigenvalue 1 715 | w, V = numpy.linalg.eig(M) 716 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] 717 | if not len(i): 718 | raise ValueError('no eigenvector corresponding to eigenvalue 1') 719 | point = numpy.real(V[:, i[-1]]).squeeze() 720 | point /= point[3] 721 | return angle, direction, point, normal 722 | 723 | 724 | def decompose_matrix(matrix): 725 | """Return sequence of transformations from transformation matrix. 726 | 727 | matrix : array_like 728 | Non-degenerative homogeneous transformation matrix 729 | 730 | Return tuple of: 731 | scale : vector of 3 scaling factors 732 | shear : list of shear factors for x-y, x-z, y-z axes 733 | angles : list of Euler angles about static x, y, z axes 734 | translate : translation vector along x, y, z axes 735 | perspective : perspective partition of matrix 736 | 737 | Raise ValueError if matrix is of wrong type or degenerative. 738 | 739 | >>> T0 = translation_matrix([1, 2, 3]) 740 | >>> scale, shear, angles, trans, persp = decompose_matrix(T0) 741 | >>> T1 = translation_matrix(trans) 742 | >>> numpy.allclose(T0, T1) 743 | True 744 | >>> S = scale_matrix(0.123) 745 | >>> scale, shear, angles, trans, persp = decompose_matrix(S) 746 | >>> scale[0] 747 | 0.123 748 | >>> R0 = euler_matrix(1, 2, 3) 749 | >>> scale, shear, angles, trans, persp = decompose_matrix(R0) 750 | >>> R1 = euler_matrix(*angles) 751 | >>> numpy.allclose(R0, R1) 752 | True 753 | 754 | """ 755 | M = numpy.array(matrix, dtype=numpy.float64, copy=True).T 756 | if abs(M[3, 3]) < _EPS: 757 | raise ValueError('M[3, 3] is zero') 758 | M /= M[3, 3] 759 | P = M.copy() 760 | P[:, 3] = 0.0, 0.0, 0.0, 1.0 761 | if not numpy.linalg.det(P): 762 | raise ValueError('matrix is singular') 763 | 764 | scale = numpy.zeros((3, )) 765 | shear = [0.0, 0.0, 0.0] 766 | angles = [0.0, 0.0, 0.0] 767 | 768 | if any(abs(M[:3, 3]) > _EPS): 769 | perspective = numpy.dot(M[:, 3], numpy.linalg.inv(P.T)) 770 | M[:, 3] = 0.0, 0.0, 0.0, 1.0 771 | else: 772 | perspective = numpy.array([0.0, 0.0, 0.0, 1.0]) 773 | 774 | translate = M[3, :3].copy() 775 | M[3, :3] = 0.0 776 | 777 | row = M[:3, :3].copy() 778 | scale[0] = vector_norm(row[0]) 779 | row[0] /= scale[0] 780 | shear[0] = numpy.dot(row[0], row[1]) 781 | row[1] -= row[0] * shear[0] 782 | scale[1] = vector_norm(row[1]) 783 | row[1] /= scale[1] 784 | shear[0] /= scale[1] 785 | shear[1] = numpy.dot(row[0], row[2]) 786 | row[2] -= row[0] * shear[1] 787 | shear[2] = numpy.dot(row[1], row[2]) 788 | row[2] -= row[1] * shear[2] 789 | scale[2] = vector_norm(row[2]) 790 | row[2] /= scale[2] 791 | shear[1:] /= scale[2] 792 | 793 | if numpy.dot(row[0], numpy.cross(row[1], row[2])) < 0: 794 | numpy.negative(scale, scale) 795 | numpy.negative(row, row) 796 | 797 | angles[1] = math.asin(-row[0, 2]) 798 | if math.cos(angles[1]): 799 | angles[0] = math.atan2(row[1, 2], row[2, 2]) 800 | angles[2] = math.atan2(row[0, 1], row[0, 0]) 801 | else: 802 | # angles[0] = math.atan2(row[1, 0], row[1, 1]) 803 | angles[0] = math.atan2(-row[2, 1], row[1, 1]) 804 | angles[2] = 0.0 805 | 806 | return scale, shear, angles, translate, perspective 807 | 808 | 809 | def compose_matrix(scale=None, shear=None, angles=None, translate=None, 810 | perspective=None): 811 | """Return transformation matrix from sequence of transformations. 812 | 813 | This is the inverse of the decompose_matrix function. 814 | 815 | Sequence of transformations: 816 | scale : vector of 3 scaling factors 817 | shear : list of shear factors for x-y, x-z, y-z axes 818 | angles : list of Euler angles about static x, y, z axes 819 | translate : translation vector along x, y, z axes 820 | perspective : perspective partition of matrix 821 | 822 | >>> scale = numpy.random.random(3) - 0.5 823 | >>> shear = numpy.random.random(3) - 0.5 824 | >>> angles = (numpy.random.random(3) - 0.5) * (2*math.pi) 825 | >>> trans = numpy.random.random(3) - 0.5 826 | >>> persp = numpy.random.random(4) - 0.5 827 | >>> M0 = compose_matrix(scale, shear, angles, trans, persp) 828 | >>> result = decompose_matrix(M0) 829 | >>> M1 = compose_matrix(*result) 830 | >>> is_same_transform(M0, M1) 831 | True 832 | 833 | """ 834 | M = numpy.identity(4) 835 | if perspective is not None: 836 | P = numpy.identity(4) 837 | P[3, :] = perspective[:4] 838 | M = numpy.dot(M, P) 839 | if translate is not None: 840 | T = numpy.identity(4) 841 | T[:3, 3] = translate[:3] 842 | M = numpy.dot(M, T) 843 | if angles is not None: 844 | R = euler_matrix(angles[0], angles[1], angles[2], 'sxyz') 845 | M = numpy.dot(M, R) 846 | if shear is not None: 847 | Z = numpy.identity(4) 848 | Z[1, 2] = shear[2] 849 | Z[0, 2] = shear[1] 850 | Z[0, 1] = shear[0] 851 | M = numpy.dot(M, Z) 852 | if scale is not None: 853 | S = numpy.identity(4) 854 | S[0, 0] = scale[0] 855 | S[1, 1] = scale[1] 856 | S[2, 2] = scale[2] 857 | M = numpy.dot(M, S) 858 | M /= M[3, 3] 859 | return M 860 | 861 | 862 | def orthogonalization_matrix(lengths, angles): 863 | """Return orthogonalization matrix for crystallographic cell coordinates. 864 | 865 | Angles are expected in degrees. 866 | 867 | The de-orthogonalization matrix is the inverse. 868 | 869 | >>> O = orthogonalization_matrix([10, 10, 10], [90, 90, 90]) 870 | >>> numpy.allclose(O[:3, :3], numpy.identity(3, float) * 10) 871 | True 872 | >>> O = orthogonalization_matrix([9.8, 12.0, 15.5], [87.2, 80.7, 69.7]) 873 | >>> numpy.allclose(numpy.sum(O), 43.063229) 874 | True 875 | 876 | """ 877 | a, b, c = lengths 878 | angles = numpy.radians(angles) 879 | sina, sinb, _ = numpy.sin(angles) 880 | cosa, cosb, cosg = numpy.cos(angles) 881 | co = (cosa * cosb - cosg) / (sina * sinb) 882 | return numpy.array([ 883 | [ a*sinb*math.sqrt(1.0-co*co), 0.0, 0.0, 0.0], 884 | [-a*sinb*co, b*sina, 0.0, 0.0], 885 | [ a*cosb, b*cosa, c, 0.0], 886 | [ 0.0, 0.0, 0.0, 1.0]]) 887 | 888 | 889 | def affine_matrix_from_points(v0, v1, shear=True, scale=True, usesvd=True): 890 | """Return affine transform matrix to register two point sets. 891 | 892 | v0 and v1 are shape (ndims, \*) arrays of at least ndims non-homogeneous 893 | coordinates, where ndims is the dimensionality of the coordinate space. 894 | 895 | If shear is False, a similarity transformation matrix is returned. 896 | If also scale is False, a rigid/Euclidean transformation matrix 897 | is returned. 898 | 899 | By default the algorithm by Hartley and Zissermann [15] is used. 900 | If usesvd is True, similarity and Euclidean transformation matrices 901 | are calculated by minimizing the weighted sum of squared deviations 902 | (RMSD) according to the algorithm by Kabsch [8]. 903 | Otherwise, and if ndims is 3, the quaternion based algorithm by Horn [9] 904 | is used, which is slower when using this Python implementation. 905 | 906 | The returned matrix performs rotation, translation and uniform scaling 907 | (if specified). 908 | 909 | >>> v0 = [[0, 1031, 1031, 0], [0, 0, 1600, 1600]] 910 | >>> v1 = [[675, 826, 826, 677], [55, 52, 281, 277]] 911 | >>> affine_matrix_from_points(v0, v1) 912 | array([[ 0.14549, 0.00062, 675.50008], 913 | [ 0.00048, 0.14094, 53.24971], 914 | [ 0. , 0. , 1. ]]) 915 | >>> T = translation_matrix(numpy.random.random(3)-0.5) 916 | >>> R = random_rotation_matrix(numpy.random.random(3)) 917 | >>> S = scale_matrix(random.random()) 918 | >>> M = concatenate_matrices(T, R, S) 919 | >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20 920 | >>> v0[3] = 1 921 | >>> v1 = numpy.dot(M, v0) 922 | >>> v0[:3] += numpy.random.normal(0, 1e-8, 300).reshape(3, -1) 923 | >>> M = affine_matrix_from_points(v0[:3], v1[:3]) 924 | >>> numpy.allclose(v1, numpy.dot(M, v0)) 925 | True 926 | 927 | More examples in superimposition_matrix() 928 | 929 | """ 930 | v0 = numpy.array(v0, dtype=numpy.float64, copy=True) 931 | v1 = numpy.array(v1, dtype=numpy.float64, copy=True) 932 | 933 | ndims = v0.shape[0] 934 | if ndims < 2 or v0.shape[1] < ndims or v0.shape != v1.shape: 935 | raise ValueError('input arrays are of wrong shape or type') 936 | 937 | # move centroids to origin 938 | t0 = -numpy.mean(v0, axis=1) 939 | M0 = numpy.identity(ndims+1) 940 | M0[:ndims, ndims] = t0 941 | v0 += t0.reshape(ndims, 1) 942 | t1 = -numpy.mean(v1, axis=1) 943 | M1 = numpy.identity(ndims+1) 944 | M1[:ndims, ndims] = t1 945 | v1 += t1.reshape(ndims, 1) 946 | 947 | if shear: 948 | # Affine transformation 949 | A = numpy.concatenate((v0, v1), axis=0) 950 | u, s, vh = numpy.linalg.svd(A.T) 951 | vh = vh[:ndims].T 952 | B = vh[:ndims] 953 | C = vh[ndims:2*ndims] 954 | t = numpy.dot(C, numpy.linalg.pinv(B)) 955 | t = numpy.concatenate((t, numpy.zeros((ndims, 1))), axis=1) 956 | M = numpy.vstack((t, ((0.0,)*ndims) + (1.0,))) 957 | elif usesvd or ndims != 3: 958 | # Rigid transformation via SVD of covariance matrix 959 | u, s, vh = numpy.linalg.svd(numpy.dot(v1, v0.T)) 960 | # rotation matrix from SVD orthonormal bases 961 | R = numpy.dot(u, vh) 962 | if numpy.linalg.det(R) < 0.0: 963 | # R does not constitute right handed system 964 | R -= numpy.outer(u[:, ndims-1], vh[ndims-1, :]*2.0) 965 | s[-1] *= -1.0 966 | # homogeneous transformation matrix 967 | M = numpy.identity(ndims+1) 968 | M[:ndims, :ndims] = R 969 | else: 970 | # Rigid transformation matrix via quaternion 971 | # compute symmetric matrix N 972 | xx, yy, zz = numpy.sum(v0 * v1, axis=1) 973 | xy, yz, zx = numpy.sum(v0 * numpy.roll(v1, -1, axis=0), axis=1) 974 | xz, yx, zy = numpy.sum(v0 * numpy.roll(v1, -2, axis=0), axis=1) 975 | N = [[xx+yy+zz, 0.0, 0.0, 0.0], 976 | [yz-zy, xx-yy-zz, 0.0, 0.0], 977 | [zx-xz, xy+yx, yy-xx-zz, 0.0], 978 | [xy-yx, zx+xz, yz+zy, zz-xx-yy]] 979 | # quaternion: eigenvector corresponding to most positive eigenvalue 980 | w, V = numpy.linalg.eigh(N) 981 | q = V[:, numpy.argmax(w)] 982 | q /= vector_norm(q) # unit quaternion 983 | # homogeneous transformation matrix 984 | M = quaternion_matrix(q) 985 | 986 | if scale and not shear: 987 | # Affine transformation; scale is ratio of RMS deviations from centroid 988 | v0 *= v0 989 | v1 *= v1 990 | M[:ndims, :ndims] *= math.sqrt(numpy.sum(v1) / numpy.sum(v0)) 991 | 992 | # move centroids back 993 | M = numpy.dot(numpy.linalg.inv(M1), numpy.dot(M, M0)) 994 | M /= M[ndims, ndims] 995 | return M 996 | 997 | 998 | def superimposition_matrix(v0, v1, scale=False, usesvd=True): 999 | """Return matrix to transform given 3D point set into second point set. 1000 | 1001 | v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 points. 1002 | 1003 | The parameters scale and usesvd are explained in the more general 1004 | affine_matrix_from_points function. 1005 | 1006 | The returned matrix is a similarity or Euclidean transformation matrix. 1007 | This function has a fast C implementation in transformations.c. 1008 | 1009 | >>> v0 = numpy.random.rand(3, 10) 1010 | >>> M = superimposition_matrix(v0, v0) 1011 | >>> numpy.allclose(M, numpy.identity(4)) 1012 | True 1013 | >>> R = random_rotation_matrix(numpy.random.random(3)) 1014 | >>> v0 = [[1,0,0], [0,1,0], [0,0,1], [1,1,1]] 1015 | >>> v1 = numpy.dot(R, v0) 1016 | >>> M = superimposition_matrix(v0, v1) 1017 | >>> numpy.allclose(v1, numpy.dot(M, v0)) 1018 | True 1019 | >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20 1020 | >>> v0[3] = 1 1021 | >>> v1 = numpy.dot(R, v0) 1022 | >>> M = superimposition_matrix(v0, v1) 1023 | >>> numpy.allclose(v1, numpy.dot(M, v0)) 1024 | True 1025 | >>> S = scale_matrix(random.random()) 1026 | >>> T = translation_matrix(numpy.random.random(3)-0.5) 1027 | >>> M = concatenate_matrices(T, R, S) 1028 | >>> v1 = numpy.dot(M, v0) 1029 | >>> v0[:3] += numpy.random.normal(0, 1e-9, 300).reshape(3, -1) 1030 | >>> M = superimposition_matrix(v0, v1, scale=True) 1031 | >>> numpy.allclose(v1, numpy.dot(M, v0)) 1032 | True 1033 | >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False) 1034 | >>> numpy.allclose(v1, numpy.dot(M, v0)) 1035 | True 1036 | >>> v = numpy.empty((4, 100, 3)) 1037 | >>> v[:, :, 0] = v0 1038 | >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False) 1039 | >>> numpy.allclose(v1, numpy.dot(M, v[:, :, 0])) 1040 | True 1041 | 1042 | """ 1043 | v0 = numpy.array(v0, dtype=numpy.float64, copy=False)[:3] 1044 | v1 = numpy.array(v1, dtype=numpy.float64, copy=False)[:3] 1045 | return affine_matrix_from_points(v0, v1, shear=False, 1046 | scale=scale, usesvd=usesvd) 1047 | 1048 | 1049 | def euler_matrix(ai, aj, ak, axes='sxyz'): 1050 | """Return homogeneous rotation matrix from Euler angles and axis sequence. 1051 | 1052 | ai, aj, ak : Euler's roll, pitch and yaw angles 1053 | axes : One of 24 axis sequences as string or encoded tuple 1054 | 1055 | >>> R = euler_matrix(1, 2, 3, 'syxz') 1056 | >>> numpy.allclose(numpy.sum(R[0]), -1.34786452) 1057 | True 1058 | >>> R = euler_matrix(1, 2, 3, (0, 1, 0, 1)) 1059 | >>> numpy.allclose(numpy.sum(R[0]), -0.383436184) 1060 | True 1061 | >>> ai, aj, ak = (4*math.pi) * (numpy.random.random(3) - 0.5) 1062 | >>> for axes in _AXES2TUPLE.keys(): 1063 | ... R = euler_matrix(ai, aj, ak, axes) 1064 | >>> for axes in _TUPLE2AXES.keys(): 1065 | ... R = euler_matrix(ai, aj, ak, axes) 1066 | 1067 | """ 1068 | try: 1069 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes] 1070 | except (AttributeError, KeyError): 1071 | _TUPLE2AXES[axes] # validation 1072 | firstaxis, parity, repetition, frame = axes 1073 | 1074 | i = firstaxis 1075 | j = _NEXT_AXIS[i+parity] 1076 | k = _NEXT_AXIS[i-parity+1] 1077 | 1078 | if frame: 1079 | ai, ak = ak, ai 1080 | if parity: 1081 | ai, aj, ak = -ai, -aj, -ak 1082 | 1083 | si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak) 1084 | ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak) 1085 | cc, cs = ci*ck, ci*sk 1086 | sc, ss = si*ck, si*sk 1087 | 1088 | M = numpy.identity(4) 1089 | if repetition: 1090 | M[i, i] = cj 1091 | M[i, j] = sj*si 1092 | M[i, k] = sj*ci 1093 | M[j, i] = sj*sk 1094 | M[j, j] = -cj*ss+cc 1095 | M[j, k] = -cj*cs-sc 1096 | M[k, i] = -sj*ck 1097 | M[k, j] = cj*sc+cs 1098 | M[k, k] = cj*cc-ss 1099 | else: 1100 | M[i, i] = cj*ck 1101 | M[i, j] = sj*sc-cs 1102 | M[i, k] = sj*cc+ss 1103 | M[j, i] = cj*sk 1104 | M[j, j] = sj*ss+cc 1105 | M[j, k] = sj*cs-sc 1106 | M[k, i] = -sj 1107 | M[k, j] = cj*si 1108 | M[k, k] = cj*ci 1109 | return M 1110 | 1111 | 1112 | def euler_from_matrix(matrix, axes='sxyz'): 1113 | """Return Euler angles from rotation matrix for specified axis sequence. 1114 | 1115 | axes : One of 24 axis sequences as string or encoded tuple 1116 | 1117 | Note that many Euler angle triplets can describe one matrix. 1118 | 1119 | >>> R0 = euler_matrix(1, 2, 3, 'syxz') 1120 | >>> al, be, ga = euler_from_matrix(R0, 'syxz') 1121 | >>> R1 = euler_matrix(al, be, ga, 'syxz') 1122 | >>> numpy.allclose(R0, R1) 1123 | True 1124 | >>> angles = (4*math.pi) * (numpy.random.random(3) - 0.5) 1125 | >>> for axes in _AXES2TUPLE.keys(): 1126 | ... R0 = euler_matrix(axes=axes, *angles) 1127 | ... R1 = euler_matrix(axes=axes, *euler_from_matrix(R0, axes)) 1128 | ... if not numpy.allclose(R0, R1): print(axes, "failed") 1129 | 1130 | """ 1131 | try: 1132 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] 1133 | except (AttributeError, KeyError): 1134 | _TUPLE2AXES[axes] # validation 1135 | firstaxis, parity, repetition, frame = axes 1136 | 1137 | i = firstaxis 1138 | j = _NEXT_AXIS[i+parity] 1139 | k = _NEXT_AXIS[i-parity+1] 1140 | 1141 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:3, :3] 1142 | if repetition: 1143 | sy = math.sqrt(M[i, j]*M[i, j] + M[i, k]*M[i, k]) 1144 | if sy > _EPS: 1145 | ax = math.atan2( M[i, j], M[i, k]) 1146 | ay = math.atan2( sy, M[i, i]) 1147 | az = math.atan2( M[j, i], -M[k, i]) 1148 | else: 1149 | ax = math.atan2(-M[j, k], M[j, j]) 1150 | ay = math.atan2( sy, M[i, i]) 1151 | az = 0.0 1152 | else: 1153 | cy = math.sqrt(M[i, i]*M[i, i] + M[j, i]*M[j, i]) 1154 | if cy > _EPS: 1155 | ax = math.atan2( M[k, j], M[k, k]) 1156 | ay = math.atan2(-M[k, i], cy) 1157 | az = math.atan2( M[j, i], M[i, i]) 1158 | else: 1159 | ax = math.atan2(-M[j, k], M[j, j]) 1160 | ay = math.atan2(-M[k, i], cy) 1161 | az = 0.0 1162 | 1163 | if parity: 1164 | ax, ay, az = -ax, -ay, -az 1165 | if frame: 1166 | ax, az = az, ax 1167 | return ax, ay, az 1168 | 1169 | 1170 | def euler_from_quaternion(quaternion, axes='sxyz'): 1171 | """Return Euler angles from quaternion for specified axis sequence. 1172 | 1173 | >>> angles = euler_from_quaternion([0.99810947, 0.06146124, 0, 0]) 1174 | >>> numpy.allclose(angles, [0.123, 0, 0]) 1175 | True 1176 | 1177 | """ 1178 | return euler_from_matrix(quaternion_matrix(quaternion), axes) 1179 | 1180 | 1181 | def quaternion_from_euler(ai, aj, ak, axes='sxyz'): 1182 | """Return quaternion from Euler angles and axis sequence. 1183 | 1184 | ai, aj, ak : Euler's roll, pitch and yaw angles 1185 | axes : One of 24 axis sequences as string or encoded tuple 1186 | 1187 | >>> q = quaternion_from_euler(1, 2, 3, 'ryxz') 1188 | >>> numpy.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435]) 1189 | True 1190 | 1191 | """ 1192 | try: 1193 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] 1194 | except (AttributeError, KeyError): 1195 | _TUPLE2AXES[axes] # validation 1196 | firstaxis, parity, repetition, frame = axes 1197 | 1198 | i = firstaxis + 1 1199 | j = _NEXT_AXIS[i+parity-1] + 1 1200 | k = _NEXT_AXIS[i-parity] + 1 1201 | 1202 | if frame: 1203 | ai, ak = ak, ai 1204 | if parity: 1205 | aj = -aj 1206 | 1207 | ai /= 2.0 1208 | aj /= 2.0 1209 | ak /= 2.0 1210 | ci = math.cos(ai) 1211 | si = math.sin(ai) 1212 | cj = math.cos(aj) 1213 | sj = math.sin(aj) 1214 | ck = math.cos(ak) 1215 | sk = math.sin(ak) 1216 | cc = ci*ck 1217 | cs = ci*sk 1218 | sc = si*ck 1219 | ss = si*sk 1220 | 1221 | q = numpy.empty((4, )) 1222 | if repetition: 1223 | q[0] = cj*(cc - ss) 1224 | q[i] = cj*(cs + sc) 1225 | q[j] = sj*(cc + ss) 1226 | q[k] = sj*(cs - sc) 1227 | else: 1228 | q[0] = cj*cc + sj*ss 1229 | q[i] = cj*sc - sj*cs 1230 | q[j] = cj*ss + sj*cc 1231 | q[k] = cj*cs - sj*sc 1232 | if parity: 1233 | q[j] *= -1.0 1234 | 1235 | return q 1236 | 1237 | 1238 | def quaternion_about_axis(angle, axis): 1239 | """Return quaternion for rotation about axis. 1240 | 1241 | >>> q = quaternion_about_axis(0.123, [1, 0, 0]) 1242 | >>> numpy.allclose(q, [0.99810947, 0.06146124, 0, 0]) 1243 | True 1244 | 1245 | """ 1246 | q = numpy.array([0.0, axis[0], axis[1], axis[2]]) 1247 | qlen = vector_norm(q) 1248 | if qlen > _EPS: 1249 | q *= math.sin(angle/2.0) / qlen 1250 | q[0] = math.cos(angle/2.0) 1251 | return q 1252 | 1253 | 1254 | def quaternion_matrix(quaternion): 1255 | """Return homogeneous rotation matrix from quaternion. 1256 | 1257 | >>> M = quaternion_matrix([0.99810947, 0.06146124, 0, 0]) 1258 | >>> numpy.allclose(M, rotation_matrix(0.123, [1, 0, 0])) 1259 | True 1260 | >>> M = quaternion_matrix([1, 0, 0, 0]) 1261 | >>> numpy.allclose(M, numpy.identity(4)) 1262 | True 1263 | >>> M = quaternion_matrix([0, 1, 0, 0]) 1264 | >>> numpy.allclose(M, numpy.diag([1, -1, -1, 1])) 1265 | True 1266 | 1267 | """ 1268 | q = numpy.array(quaternion, dtype=numpy.float64, copy=True) 1269 | n = numpy.dot(q, q) 1270 | if n < _EPS: 1271 | return numpy.identity(4) 1272 | q *= math.sqrt(2.0 / n) 1273 | q = numpy.outer(q, q) 1274 | return numpy.array([ 1275 | [1.0-q[2, 2]-q[3, 3], q[1, 2]-q[3, 0], q[1, 3]+q[2, 0], 0.0], 1276 | [ q[1, 2]+q[3, 0], 1.0-q[1, 1]-q[3, 3], q[2, 3]-q[1, 0], 0.0], 1277 | [ q[1, 3]-q[2, 0], q[2, 3]+q[1, 0], 1.0-q[1, 1]-q[2, 2], 0.0], 1278 | [ 0.0, 0.0, 0.0, 1.0]]) 1279 | 1280 | 1281 | def quaternion_from_matrix(matrix, isprecise=False): 1282 | """Return quaternion from rotation matrix. 1283 | 1284 | If isprecise is True, the input matrix is assumed to be a precise rotation 1285 | matrix and a faster algorithm is used. 1286 | 1287 | >>> q = quaternion_from_matrix(numpy.identity(4), True) 1288 | >>> numpy.allclose(q, [1, 0, 0, 0]) 1289 | True 1290 | >>> q = quaternion_from_matrix(numpy.diag([1, -1, -1, 1])) 1291 | >>> numpy.allclose(q, [0, 1, 0, 0]) or numpy.allclose(q, [0, -1, 0, 0]) 1292 | True 1293 | >>> R = rotation_matrix(0.123, (1, 2, 3)) 1294 | >>> q = quaternion_from_matrix(R, True) 1295 | >>> numpy.allclose(q, [0.9981095, 0.0164262, 0.0328524, 0.0492786]) 1296 | True 1297 | >>> R = [[-0.545, 0.797, 0.260, 0], [0.733, 0.603, -0.313, 0], 1298 | ... [-0.407, 0.021, -0.913, 0], [0, 0, 0, 1]] 1299 | >>> q = quaternion_from_matrix(R) 1300 | >>> numpy.allclose(q, [0.19069, 0.43736, 0.87485, -0.083611]) 1301 | True 1302 | >>> R = [[0.395, 0.362, 0.843, 0], [-0.626, 0.796, -0.056, 0], 1303 | ... [-0.677, -0.498, 0.529, 0], [0, 0, 0, 1]] 1304 | >>> q = quaternion_from_matrix(R) 1305 | >>> numpy.allclose(q, [0.82336615, -0.13610694, 0.46344705, -0.29792603]) 1306 | True 1307 | >>> R = random_rotation_matrix() 1308 | >>> q = quaternion_from_matrix(R) 1309 | >>> is_same_transform(R, quaternion_matrix(q)) 1310 | True 1311 | >>> is_same_quaternion(quaternion_from_matrix(R, isprecise=False), 1312 | ... quaternion_from_matrix(R, isprecise=True)) 1313 | True 1314 | >>> R = euler_matrix(0.0, 0.0, numpy.pi/2.0) 1315 | >>> is_same_quaternion(quaternion_from_matrix(R, isprecise=False), 1316 | ... quaternion_from_matrix(R, isprecise=True)) 1317 | True 1318 | 1319 | """ 1320 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:4, :4] 1321 | if isprecise: 1322 | q = numpy.empty((4, )) 1323 | t = numpy.trace(M) 1324 | if t > M[3, 3]: 1325 | q[0] = t 1326 | q[3] = M[1, 0] - M[0, 1] 1327 | q[2] = M[0, 2] - M[2, 0] 1328 | q[1] = M[2, 1] - M[1, 2] 1329 | else: 1330 | i, j, k = 0, 1, 2 1331 | if M[1, 1] > M[0, 0]: 1332 | i, j, k = 1, 2, 0 1333 | if M[2, 2] > M[i, i]: 1334 | i, j, k = 2, 0, 1 1335 | t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3] 1336 | q[i] = t 1337 | q[j] = M[i, j] + M[j, i] 1338 | q[k] = M[k, i] + M[i, k] 1339 | q[3] = M[k, j] - M[j, k] 1340 | q = q[[3, 0, 1, 2]] 1341 | q *= 0.5 / math.sqrt(t * M[3, 3]) 1342 | else: 1343 | m00 = M[0, 0] 1344 | m01 = M[0, 1] 1345 | m02 = M[0, 2] 1346 | m10 = M[1, 0] 1347 | m11 = M[1, 1] 1348 | m12 = M[1, 2] 1349 | m20 = M[2, 0] 1350 | m21 = M[2, 1] 1351 | m22 = M[2, 2] 1352 | # symmetric matrix K 1353 | K = numpy.array([[m00-m11-m22, 0.0, 0.0, 0.0], 1354 | [m01+m10, m11-m00-m22, 0.0, 0.0], 1355 | [m02+m20, m12+m21, m22-m00-m11, 0.0], 1356 | [m21-m12, m02-m20, m10-m01, m00+m11+m22]]) 1357 | K /= 3.0 1358 | # quaternion is eigenvector of K that corresponds to largest eigenvalue 1359 | w, V = numpy.linalg.eigh(K) 1360 | q = V[[3, 0, 1, 2], numpy.argmax(w)] 1361 | if q[0] < 0.0: 1362 | numpy.negative(q, q) 1363 | return q 1364 | 1365 | 1366 | def quaternion_multiply(quaternion1, quaternion0): 1367 | """Return multiplication of two quaternions. 1368 | 1369 | >>> q = quaternion_multiply([4, 1, -2, 3], [8, -5, 6, 7]) 1370 | >>> numpy.allclose(q, [28, -44, -14, 48]) 1371 | True 1372 | 1373 | """ 1374 | w0, x0, y0, z0 = quaternion0 1375 | w1, x1, y1, z1 = quaternion1 1376 | return numpy.array([ 1377 | -x1*x0 - y1*y0 - z1*z0 + w1*w0, 1378 | x1*w0 + y1*z0 - z1*y0 + w1*x0, 1379 | -x1*z0 + y1*w0 + z1*x0 + w1*y0, 1380 | x1*y0 - y1*x0 + z1*w0 + w1*z0], dtype=numpy.float64) 1381 | 1382 | 1383 | def quaternion_conjugate(quaternion): 1384 | """Return conjugate of quaternion. 1385 | 1386 | >>> q0 = random_quaternion() 1387 | >>> q1 = quaternion_conjugate(q0) 1388 | >>> q1[0] == q0[0] and all(q1[1:] == -q0[1:]) 1389 | True 1390 | 1391 | """ 1392 | q = numpy.array(quaternion, dtype=numpy.float64, copy=True) 1393 | numpy.negative(q[1:], q[1:]) 1394 | return q 1395 | 1396 | 1397 | def quaternion_inverse(quaternion): 1398 | """Return inverse of quaternion. 1399 | 1400 | >>> q0 = random_quaternion() 1401 | >>> q1 = quaternion_inverse(q0) 1402 | >>> numpy.allclose(quaternion_multiply(q0, q1), [1, 0, 0, 0]) 1403 | True 1404 | 1405 | """ 1406 | q = numpy.array(quaternion, dtype=numpy.float64, copy=True) 1407 | numpy.negative(q[1:], q[1:]) 1408 | return q / numpy.dot(q, q) 1409 | 1410 | 1411 | def quaternion_real(quaternion): 1412 | """Return real part of quaternion. 1413 | 1414 | >>> quaternion_real([3, 0, 1, 2]) 1415 | 3.0 1416 | 1417 | """ 1418 | return float(quaternion[0]) 1419 | 1420 | 1421 | def quaternion_imag(quaternion): 1422 | """Return imaginary part of quaternion. 1423 | 1424 | >>> quaternion_imag([3, 0, 1, 2]) 1425 | array([ 0., 1., 2.]) 1426 | 1427 | """ 1428 | return numpy.array(quaternion[1:4], dtype=numpy.float64, copy=True) 1429 | 1430 | 1431 | def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True): 1432 | """Return spherical linear interpolation between two quaternions. 1433 | 1434 | >>> q0 = random_quaternion() 1435 | >>> q1 = random_quaternion() 1436 | >>> q = quaternion_slerp(q0, q1, 0) 1437 | >>> numpy.allclose(q, q0) 1438 | True 1439 | >>> q = quaternion_slerp(q0, q1, 1, 1) 1440 | >>> numpy.allclose(q, q1) 1441 | True 1442 | >>> q = quaternion_slerp(q0, q1, 0.5) 1443 | >>> angle = math.acos(numpy.dot(q0, q)) 1444 | >>> numpy.allclose(2, math.acos(numpy.dot(q0, q1)) / angle) or \ 1445 | numpy.allclose(2, math.acos(-numpy.dot(q0, q1)) / angle) 1446 | True 1447 | 1448 | """ 1449 | q0 = unit_vector(quat0[:4]) 1450 | q1 = unit_vector(quat1[:4]) 1451 | if fraction == 0.0: 1452 | return q0 1453 | elif fraction == 1.0: 1454 | return q1 1455 | d = numpy.dot(q0, q1) 1456 | if abs(abs(d) - 1.0) < _EPS: 1457 | return q0 1458 | if shortestpath and d < 0.0: 1459 | # invert rotation 1460 | d = -d 1461 | numpy.negative(q1, q1) 1462 | angle = math.acos(d) + spin * math.pi 1463 | if abs(angle) < _EPS: 1464 | return q0 1465 | isin = 1.0 / math.sin(angle) 1466 | q0 *= math.sin((1.0 - fraction) * angle) * isin 1467 | q1 *= math.sin(fraction * angle) * isin 1468 | q0 += q1 1469 | return q0 1470 | 1471 | 1472 | def random_quaternion(rand=None): 1473 | """Return uniform random unit quaternion. 1474 | 1475 | rand: array like or None 1476 | Three independent random variables that are uniformly distributed 1477 | between 0 and 1. 1478 | 1479 | >>> q = random_quaternion() 1480 | >>> numpy.allclose(1, vector_norm(q)) 1481 | True 1482 | >>> q = random_quaternion(numpy.random.random(3)) 1483 | >>> len(q.shape), q.shape[0]==4 1484 | (1, True) 1485 | 1486 | """ 1487 | if rand is None: 1488 | rand = numpy.random.rand(3) 1489 | else: 1490 | assert len(rand) == 3 1491 | r1 = numpy.sqrt(1.0 - rand[0]) 1492 | r2 = numpy.sqrt(rand[0]) 1493 | pi2 = math.pi * 2.0 1494 | t1 = pi2 * rand[1] 1495 | t2 = pi2 * rand[2] 1496 | return numpy.array([numpy.cos(t2)*r2, numpy.sin(t1)*r1, 1497 | numpy.cos(t1)*r1, numpy.sin(t2)*r2]) 1498 | 1499 | 1500 | def random_rotation_matrix(rand=None): 1501 | """Return uniform random rotation matrix. 1502 | 1503 | rand: array like 1504 | Three independent random variables that are uniformly distributed 1505 | between 0 and 1 for each returned quaternion. 1506 | 1507 | >>> R = random_rotation_matrix() 1508 | >>> numpy.allclose(numpy.dot(R.T, R), numpy.identity(4)) 1509 | True 1510 | 1511 | """ 1512 | return quaternion_matrix(random_quaternion(rand)) 1513 | 1514 | 1515 | class Arcball(object): 1516 | """Virtual Trackball Control. 1517 | 1518 | >>> ball = Arcball() 1519 | >>> ball = Arcball(initial=numpy.identity(4)) 1520 | >>> ball.place([320, 320], 320) 1521 | >>> ball.down([500, 250]) 1522 | >>> ball.drag([475, 275]) 1523 | >>> R = ball.matrix() 1524 | >>> numpy.allclose(numpy.sum(R), 3.90583455) 1525 | True 1526 | >>> ball = Arcball(initial=[1, 0, 0, 0]) 1527 | >>> ball.place([320, 320], 320) 1528 | >>> ball.setaxes([1, 1, 0], [-1, 1, 0]) 1529 | >>> ball.constrain = True 1530 | >>> ball.down([400, 200]) 1531 | >>> ball.drag([200, 400]) 1532 | >>> R = ball.matrix() 1533 | >>> numpy.allclose(numpy.sum(R), 0.2055924) 1534 | True 1535 | >>> ball.next() 1536 | 1537 | """ 1538 | def __init__(self, initial=None): 1539 | """Initialize virtual trackball control. 1540 | 1541 | initial : quaternion or rotation matrix 1542 | 1543 | """ 1544 | self._axis = None 1545 | self._axes = None 1546 | self._radius = 1.0 1547 | self._center = [0.0, 0.0] 1548 | self._vdown = numpy.array([0.0, 0.0, 1.0]) 1549 | self._constrain = False 1550 | if initial is None: 1551 | self._qdown = numpy.array([1.0, 0.0, 0.0, 0.0]) 1552 | else: 1553 | initial = numpy.array(initial, dtype=numpy.float64) 1554 | if initial.shape == (4, 4): 1555 | self._qdown = quaternion_from_matrix(initial) 1556 | elif initial.shape == (4, ): 1557 | initial /= vector_norm(initial) 1558 | self._qdown = initial 1559 | else: 1560 | raise ValueError("initial not a quaternion or matrix") 1561 | self._qnow = self._qpre = self._qdown 1562 | 1563 | def place(self, center, radius): 1564 | """Place Arcball, e.g. when window size changes. 1565 | 1566 | center : sequence[2] 1567 | Window coordinates of trackball center. 1568 | radius : float 1569 | Radius of trackball in window coordinates. 1570 | 1571 | """ 1572 | self._radius = float(radius) 1573 | self._center[0] = center[0] 1574 | self._center[1] = center[1] 1575 | 1576 | def setaxes(self, *axes): 1577 | """Set axes to constrain rotations.""" 1578 | if axes is None: 1579 | self._axes = None 1580 | else: 1581 | self._axes = [unit_vector(axis) for axis in axes] 1582 | 1583 | @property 1584 | def constrain(self): 1585 | """Return state of constrain to axis mode.""" 1586 | return self._constrain 1587 | 1588 | @constrain.setter 1589 | def constrain(self, value): 1590 | """Set state of constrain to axis mode.""" 1591 | self._constrain = bool(value) 1592 | 1593 | def down(self, point): 1594 | """Set initial cursor window coordinates and pick constrain-axis.""" 1595 | self._vdown = arcball_map_to_sphere(point, self._center, self._radius) 1596 | self._qdown = self._qpre = self._qnow 1597 | if self._constrain and self._axes is not None: 1598 | self._axis = arcball_nearest_axis(self._vdown, self._axes) 1599 | self._vdown = arcball_constrain_to_axis(self._vdown, self._axis) 1600 | else: 1601 | self._axis = None 1602 | 1603 | def drag(self, point): 1604 | """Update current cursor window coordinates.""" 1605 | vnow = arcball_map_to_sphere(point, self._center, self._radius) 1606 | if self._axis is not None: 1607 | vnow = arcball_constrain_to_axis(vnow, self._axis) 1608 | self._qpre = self._qnow 1609 | t = numpy.cross(self._vdown, vnow) 1610 | if numpy.dot(t, t) < _EPS: 1611 | self._qnow = self._qdown 1612 | else: 1613 | q = [numpy.dot(self._vdown, vnow), t[0], t[1], t[2]] 1614 | self._qnow = quaternion_multiply(q, self._qdown) 1615 | 1616 | def next(self, acceleration=0.0): 1617 | """Continue rotation in direction of last drag.""" 1618 | q = quaternion_slerp(self._qpre, self._qnow, 2.0+acceleration, False) 1619 | self._qpre, self._qnow = self._qnow, q 1620 | 1621 | def matrix(self): 1622 | """Return homogeneous rotation matrix.""" 1623 | return quaternion_matrix(self._qnow) 1624 | 1625 | 1626 | def arcball_map_to_sphere(point, center, radius): 1627 | """Return unit sphere coordinates from window coordinates.""" 1628 | v0 = (point[0] - center[0]) / radius 1629 | v1 = (center[1] - point[1]) / radius 1630 | n = v0*v0 + v1*v1 1631 | if n > 1.0: 1632 | # position outside of sphere 1633 | n = math.sqrt(n) 1634 | return numpy.array([v0/n, v1/n, 0.0]) 1635 | else: 1636 | return numpy.array([v0, v1, math.sqrt(1.0 - n)]) 1637 | 1638 | 1639 | def arcball_constrain_to_axis(point, axis): 1640 | """Return sphere point perpendicular to axis.""" 1641 | v = numpy.array(point, dtype=numpy.float64, copy=True) 1642 | a = numpy.array(axis, dtype=numpy.float64, copy=True) 1643 | v -= a * numpy.dot(a, v) # on plane 1644 | n = vector_norm(v) 1645 | if n > _EPS: 1646 | if v[2] < 0.0: 1647 | numpy.negative(v, v) 1648 | v /= n 1649 | return v 1650 | if a[2] == 1.0: 1651 | return numpy.array([1.0, 0.0, 0.0]) 1652 | return unit_vector([-a[1], a[0], 0.0]) 1653 | 1654 | 1655 | def arcball_nearest_axis(point, axes): 1656 | """Return axis, which arc is nearest to point.""" 1657 | point = numpy.array(point, dtype=numpy.float64, copy=False) 1658 | nearest = None 1659 | mx = -1.0 1660 | for axis in axes: 1661 | t = numpy.dot(arcball_constrain_to_axis(point, axis), point) 1662 | if t > mx: 1663 | nearest = axis 1664 | mx = t 1665 | return nearest 1666 | 1667 | 1668 | # epsilon for testing whether a number is close to zero 1669 | _EPS = numpy.finfo(float).eps * 4.0 1670 | 1671 | # axis sequences for Euler angles 1672 | _NEXT_AXIS = [1, 2, 0, 1] 1673 | 1674 | # map axes strings to/from tuples of inner axis, parity, repetition, frame 1675 | _AXES2TUPLE = { 1676 | 'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0), 1677 | 'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0), 1678 | 'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0), 1679 | 'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0), 1680 | 'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1), 1681 | 'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1), 1682 | 'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1), 1683 | 'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)} 1684 | 1685 | _TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items()) 1686 | 1687 | 1688 | def vector_norm(data, axis=None, out=None): 1689 | """Return length, i.e. Euclidean norm, of ndarray along axis. 1690 | 1691 | >>> v = numpy.random.random(3) 1692 | >>> n = vector_norm(v) 1693 | >>> numpy.allclose(n, numpy.linalg.norm(v)) 1694 | True 1695 | >>> v = numpy.random.rand(6, 5, 3) 1696 | >>> n = vector_norm(v, axis=-1) 1697 | >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=2))) 1698 | True 1699 | >>> n = vector_norm(v, axis=1) 1700 | >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1))) 1701 | True 1702 | >>> v = numpy.random.rand(5, 4, 3) 1703 | >>> n = numpy.empty((5, 3)) 1704 | >>> vector_norm(v, axis=1, out=n) 1705 | >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1))) 1706 | True 1707 | >>> vector_norm([]) 1708 | 0.0 1709 | >>> vector_norm([1]) 1710 | 1.0 1711 | 1712 | """ 1713 | data = numpy.array(data, dtype=numpy.float64, copy=True) 1714 | if out is None: 1715 | if data.ndim == 1: 1716 | return math.sqrt(numpy.dot(data, data)) 1717 | data *= data 1718 | out = numpy.atleast_1d(numpy.sum(data, axis=axis)) 1719 | numpy.sqrt(out, out) 1720 | return out 1721 | else: 1722 | data *= data 1723 | numpy.sum(data, axis=axis, out=out) 1724 | numpy.sqrt(out, out) 1725 | 1726 | 1727 | def unit_vector(data, axis=None, out=None): 1728 | """Return ndarray normalized by length, i.e. Euclidean norm, along axis. 1729 | 1730 | >>> v0 = numpy.random.random(3) 1731 | >>> v1 = unit_vector(v0) 1732 | >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0)) 1733 | True 1734 | >>> v0 = numpy.random.rand(5, 4, 3) 1735 | >>> v1 = unit_vector(v0, axis=-1) 1736 | >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2) 1737 | >>> numpy.allclose(v1, v2) 1738 | True 1739 | >>> v1 = unit_vector(v0, axis=1) 1740 | >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1) 1741 | >>> numpy.allclose(v1, v2) 1742 | True 1743 | >>> v1 = numpy.empty((5, 4, 3)) 1744 | >>> unit_vector(v0, axis=1, out=v1) 1745 | >>> numpy.allclose(v1, v2) 1746 | True 1747 | >>> list(unit_vector([])) 1748 | [] 1749 | >>> list(unit_vector([1])) 1750 | [1.0] 1751 | 1752 | """ 1753 | if out is None: 1754 | data = numpy.array(data, dtype=numpy.float64, copy=True) 1755 | if data.ndim == 1: 1756 | data /= math.sqrt(numpy.dot(data, data)) 1757 | return data 1758 | else: 1759 | if out is not data: 1760 | out[:] = numpy.array(data, copy=False) 1761 | data = out 1762 | length = numpy.atleast_1d(numpy.sum(data*data, axis)) 1763 | numpy.sqrt(length, length) 1764 | if axis is not None: 1765 | length = numpy.expand_dims(length, axis) 1766 | data /= length 1767 | if out is None: 1768 | return data 1769 | 1770 | 1771 | def random_vector(size): 1772 | """Return array of random doubles in the half-open interval [0.0, 1.0). 1773 | 1774 | >>> v = random_vector(10000) 1775 | >>> numpy.all(v >= 0) and numpy.all(v < 1) 1776 | True 1777 | >>> v0 = random_vector(10) 1778 | >>> v1 = random_vector(10) 1779 | >>> numpy.any(v0 == v1) 1780 | False 1781 | 1782 | """ 1783 | return numpy.random.random(size) 1784 | 1785 | 1786 | def vector_product(v0, v1, axis=0): 1787 | """Return vector perpendicular to vectors. 1788 | 1789 | >>> v = vector_product([2, 0, 0], [0, 3, 0]) 1790 | >>> numpy.allclose(v, [0, 0, 6]) 1791 | True 1792 | >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]] 1793 | >>> v1 = [[3], [0], [0]] 1794 | >>> v = vector_product(v0, v1) 1795 | >>> numpy.allclose(v, [[0, 0, 0, 0], [0, 0, 6, 6], [0, -6, 0, -6]]) 1796 | True 1797 | >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]] 1798 | >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]] 1799 | >>> v = vector_product(v0, v1, axis=1) 1800 | >>> numpy.allclose(v, [[0, 0, 6], [0, -6, 0], [6, 0, 0], [0, -6, 6]]) 1801 | True 1802 | 1803 | """ 1804 | return numpy.cross(v0, v1, axis=axis) 1805 | 1806 | 1807 | def angle_between_vectors(v0, v1, directed=True, axis=0): 1808 | """Return angle between vectors. 1809 | 1810 | If directed is False, the input vectors are interpreted as undirected axes, 1811 | i.e. the maximum angle is pi/2. 1812 | 1813 | >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3]) 1814 | >>> numpy.allclose(a, math.pi) 1815 | True 1816 | >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3], directed=False) 1817 | >>> numpy.allclose(a, 0) 1818 | True 1819 | >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]] 1820 | >>> v1 = [[3], [0], [0]] 1821 | >>> a = angle_between_vectors(v0, v1) 1822 | >>> numpy.allclose(a, [0, 1.5708, 1.5708, 0.95532]) 1823 | True 1824 | >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]] 1825 | >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]] 1826 | >>> a = angle_between_vectors(v0, v1, axis=1) 1827 | >>> numpy.allclose(a, [1.5708, 1.5708, 1.5708, 0.95532]) 1828 | True 1829 | 1830 | """ 1831 | v0 = numpy.array(v0, dtype=numpy.float64, copy=False) 1832 | v1 = numpy.array(v1, dtype=numpy.float64, copy=False) 1833 | dot = numpy.sum(v0 * v1, axis=axis) 1834 | dot /= vector_norm(v0, axis=axis) * vector_norm(v1, axis=axis) 1835 | dot = numpy.clip(dot, -1.0, 1.0) 1836 | return numpy.arccos(dot if directed else numpy.fabs(dot)) 1837 | 1838 | 1839 | def inverse_matrix(matrix): 1840 | """Return inverse of square transformation matrix. 1841 | 1842 | >>> M0 = random_rotation_matrix() 1843 | >>> M1 = inverse_matrix(M0.T) 1844 | >>> numpy.allclose(M1, numpy.linalg.inv(M0.T)) 1845 | True 1846 | >>> for size in range(1, 7): 1847 | ... M0 = numpy.random.rand(size, size) 1848 | ... M1 = inverse_matrix(M0) 1849 | ... if not numpy.allclose(M1, numpy.linalg.inv(M0)): print(size) 1850 | 1851 | """ 1852 | return numpy.linalg.inv(matrix) 1853 | 1854 | 1855 | def concatenate_matrices(*matrices): 1856 | """Return concatenation of series of transformation matrices. 1857 | 1858 | >>> M = numpy.random.rand(16).reshape((4, 4)) - 0.5 1859 | >>> numpy.allclose(M, concatenate_matrices(M)) 1860 | True 1861 | >>> numpy.allclose(numpy.dot(M, M.T), concatenate_matrices(M, M.T)) 1862 | True 1863 | 1864 | """ 1865 | M = numpy.identity(4) 1866 | for i in matrices: 1867 | M = numpy.dot(M, i) 1868 | return M 1869 | 1870 | 1871 | def is_same_transform(matrix0, matrix1): 1872 | """Return True if two matrices perform same transformation. 1873 | 1874 | >>> is_same_transform(numpy.identity(4), numpy.identity(4)) 1875 | True 1876 | >>> is_same_transform(numpy.identity(4), random_rotation_matrix()) 1877 | False 1878 | 1879 | """ 1880 | matrix0 = numpy.array(matrix0, dtype=numpy.float64, copy=True) 1881 | matrix0 /= matrix0[3, 3] 1882 | matrix1 = numpy.array(matrix1, dtype=numpy.float64, copy=True) 1883 | matrix1 /= matrix1[3, 3] 1884 | return numpy.allclose(matrix0, matrix1) 1885 | 1886 | 1887 | def is_same_quaternion(q0, q1): 1888 | """Return True if two quaternions are equal.""" 1889 | q0 = numpy.array(q0) 1890 | q1 = numpy.array(q1) 1891 | return numpy.allclose(q0, q1) or numpy.allclose(q0, -q1) 1892 | 1893 | 1894 | def _import_module(name, package=None, warn=True, prefix='_py_', ignore='_'): 1895 | """Try import all public attributes from module into global namespace. 1896 | 1897 | Existing attributes with name clashes are renamed with prefix. 1898 | Attributes starting with underscore are ignored by default. 1899 | 1900 | Return True on successful import. 1901 | 1902 | """ 1903 | import warnings 1904 | from importlib import import_module 1905 | try: 1906 | if not package: 1907 | module = import_module(name) 1908 | else: 1909 | module = import_module('.' + name, package=package) 1910 | except ImportError: 1911 | if warn: 1912 | # warnings.warn('failed to import module %s' % name) 1913 | pass 1914 | else: 1915 | for attr in dir(module): 1916 | if ignore and attr.startswith(ignore): 1917 | continue 1918 | if prefix: 1919 | if attr in globals(): 1920 | globals()[prefix + attr] = globals()[attr] 1921 | elif warn: 1922 | warnings.warn('no Python implementation of ' + attr) 1923 | globals()[attr] = getattr(module, attr) 1924 | return True 1925 | 1926 | 1927 | _import_module('_transformations') 1928 | 1929 | if __name__ == '__main__': 1930 | import doctest 1931 | import random # noqa: used in doctests 1932 | try: 1933 | numpy.set_printoptions(suppress=True, precision=5, legacy='1.13') 1934 | except TypeError: 1935 | numpy.set_printoptions(suppress=True, precision=5) 1936 | doctest.testmod() 1937 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | def make_box(): 6 | """ 7 | function to make grids on a 3D unit box 8 | @param lower: lower bound 9 | @param upper: upper bound 10 | @param num: number of points on an axis. Default 18 11 | rvalue: 2D numpy array of dim0 = num**2*6, num1 = 3. Meaning a point cloud 12 | """ 13 | lower = -0.5 14 | upper = 0.5 15 | num = 18 16 | a = np.linspace(lower, upper, num) 17 | b = np.linspace(lower, upper, num) 18 | grid = np.transpose([np.tile(a, len(b)), np.repeat(b, len(a))]) 19 | 20 | c1 = np.repeat(0.5, len(grid)) 21 | c1 = np.reshape(c1, (len(c1), -1)) 22 | c2 = np.repeat(-0.5, len(grid)) 23 | c2 = np.reshape(c2, (len(c2), -1)) 24 | 25 | up = np.hstack((grid, c1)) # upper face, z == 0.5 26 | low = np.hstack((grid, c2)) # lower face, z == -0.5 27 | front = up[:, [0, 2, 1]] # front face, y == 0.5 28 | back = low[:, [0, 2, 1]] # back face, y == -0.5 29 | right = up[:, [2, 0, 1]] # right face, x == 0.5 30 | left = low[:, [2, 0, 1]] # left face, x == -0.5 31 | 32 | six_faces = np.vstack((front, back, right, left, up, low)) 33 | return six_faces 34 | 35 | 36 | def make_cylinder(): 37 | """ 38 | function to make a grid from a cyliner centered at (0, 0, 0). The cyliner's radius is 1, height is 0.5 39 | Method: 40 | 1) the surrounding surface is 4 times the area of the upper and lower cicle. So we sample 4 times more points from it 41 | 2) to match with the box, total number of points is 1944 42 | 3) for the upper and lower surface, points are sampled with fixed degree and fixed distance along the radius 43 | 4) for the middle surface, points are sampled along fixed lines along the height 44 | """ 45 | # make the upper and lower face, which is not inclusive of the boundary points 46 | theta = 10 # dimension 47 | n = 9 # number of points for every radius 48 | r = 0.5 49 | radius_all = np.linspace(0, 0.5, n + 2)[1:10] # radius of sub-circles 50 | res = [] 51 | for i, theta in enumerate(range(0, 360, 10)): 52 | x = math.sin(theta) 53 | y = math.cos(theta) 54 | for r in radius_all: 55 | res.append([r * x, r * y]) 56 | # add z axis 57 | z = np.reshape(np.repeat(0.5, len(res)), (len(res), -1)) 58 | upper = np.hstack((np.array(res), z)) # upper face 59 | z = np.reshape(np.repeat(-0.5, len(res)), (len(res), -1)) 60 | lower = np.hstack((np.array(res), z)) # lower face 61 | 62 | # design of middle layer: theta = 5 degree, with every divide is 18 points including boundaries 63 | height = np.linspace(-0.5, 0.5, 18) 64 | res = [] 65 | for theta in range(0, 360, 5): 66 | x = 0.5 * math.sin(theta) 67 | y = 0.5 * math.cos(theta) 68 | for z in height: 69 | res.append([x, y, z]) 70 | middle = np.array(res) 71 | 72 | cylinder = np.vstack((upper, lower, middle)) 73 | return cylinder 74 | 75 | 76 | def make_sphere(): 77 | """ 78 | function to sample a grid from a sphere 79 | """ 80 | theta = np.linspace(0, 360, 36) # determining x and y 81 | phi = np.linspace(0, 360, 54) # determining z 82 | 83 | res = [] 84 | for p in phi: 85 | z = math.sin(p) * 0.5 86 | r0 = math.cos(p) * 0.5 87 | for t in theta: 88 | x = math.sin(t) * r0 89 | y = math.cos(t) * r0 90 | res.append([x, y, z]) 91 | 92 | sphere = np.array(res) 93 | return sphere 94 | -------------------------------------------------------------------------------- /metrics/readme.md: -------------------------------------------------------------------------------- 1 | Download metrics from [PointFlow](https://github.com/stevenygd/PointFlow/tree/master/metrics). -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # CASS:Learning Canonical Shape Space for Category-Level 6D Object Pose and Size Estimation 2 | 3 | ## Evaluation Steps 4 | 1. Install the following requirements: 5 | 6 | ``` 7 | open3d==0.8.0.0 8 | opencv-python==4.1.1.26 9 | torch==1.2.0 10 | torchvision==0.4.0 11 | tqdm==4.32.1 12 | trimesh==3.2.20 13 | ``` 14 | 15 | 1. Compile "./metrics" for **re-evaluating** reconstructed models. You can skip this step and delete line 25-28 in ./tools/eval.py, if you have downloaded our results in next step. 16 | 17 | 2. Download predicted masks and pretrained models. 18 | 19 | You can download our pretrained models, results and segmentation masks of real test dataset in [NOCS](https://github.com/hughw19/NOCS_CVPR2019) from [Google Driver](https://drive.google.com/drive/folders/1yvVpvB_0YuqNAaeOzE5YfO5dvwDaoz_n). 20 | 21 | If you want to **re-calculate** CASS's results, please download the NOCS [real test dataset](http://download.cs.stanford.edu/orion/nocs/real_test.zip) and [3d models](http://download.cs.stanford.edu/orion/nocs/obj_models.zip). 22 | 23 | 3. Evaluate CASS and NOCS 24 | 25 | 1. Unzip predicted results, and specified `--save_dir` in eval.sh. You will get evaluation results of CASS and NOCS at the same time. 26 | 2. If you want to recalculate CASS's results, please place segmentation mask of NOCS, which is contained in the Google Driver, to the real-test dataset folder along with their color images. Refer to 1-2 line in ./eval.sh about how to start the evaluation. 27 | 28 | ## Acknowledgement 29 | 30 | We have referred to part of the code from [NOCS_CVPR2019](https://github.com/hughw19/NOCS_CVPR2019), [FoldingNet](https://github.com/jtpils/FoldingNet), [DenseFusion](https://github.com/j96w/DenseFusion), [Open3D](https://github.com/intel-isl/Open3D) and [PointFlow](https://github.com/stevenygd/PointFlow/tree/master). 31 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.getcwd()) 4 | -------------------------------------------------------------------------------- /tools/eval.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import glob 3 | import json 4 | import os 5 | 6 | import cv2 7 | import numpy as np 8 | import numpy.ma as ma 9 | import open3d as o3d 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torchvision.transforms as transforms 14 | import tqdm as tqdm 15 | from torch.autograd import Variable 16 | import argparse 17 | 18 | import _init_paths 19 | import utils 20 | from datasets.dataset import get_bbox, load_obj, PoseDataset 21 | from lib.models import CASS 22 | from lib.transformations import (quaternion_from_matrix, 23 | quaternion_matrix) 24 | 25 | try: 26 | from metrics.evaluation_metrics import EMD_CD 27 | except: 28 | raise "Failed to import EMD_CD metric. Please Compile `metric` if you want ti do reconstruction evaluation. Otherwise, just command this line." 29 | 30 | parser = argparse.ArgumentParser(description="eval CASS model") 31 | parser.add_argument("--resume_model", type=str, default="cass_best.pth", 32 | help="resume model in 'trained_models' folder.") 33 | parser.add_argument("--dataset_dir", type=str, default="", 34 | help="dataset root of nocs") 35 | parser.add_argument("--cuda", action="store_true", default=False) 36 | parser.add_argument("--draw", action="store_true", default=False, 37 | help="whether to draw the pointcloud image while evaluation.") 38 | parser.add_argument("--save_dir", type=str, default="", 39 | help="dictionary to save evaluation result.") 40 | parser.add_argument("--eval", action="store_true", 41 | help="whether to re-calculate result for cass") 42 | parser.add_argument("--mode", type=str, default="cass", 43 | choices=["cass", "nocs"], help="eval cass or nocs") 44 | 45 | opt = parser.parse_args() 46 | opt.intrinsics = np.array( 47 | [[591.0125, 0, 322.525], [0, 590.16775, 244.11084], [0, 0, 1]]) 48 | 49 | 50 | norm = transforms.Normalize(mean=[0.51, 0.47, 0.44], std=[0.29, 0.27, 0.28]) 51 | xmap = np.array([[j for i in range(640)] for j in range(480)]) 52 | ymap = np.array([[i for i in range(640)] for j in range(480)]) 53 | cam_cx = 322.525 54 | cam_cy = 244.11084 55 | cam_fx = 591.0125 56 | cam_fy = 590.16775 57 | cam_scale = 1000.0 58 | num_obj = 6 59 | img_width = 480 60 | img_length = 640 61 | num_points = 500 62 | iteration = 5 63 | bs = 1 64 | symmetric = [0, 1, 3] 65 | # 0 1_bottle_02876657 66 | # 1 2_bowl_02880940 67 | # 2 3_camera_02942699 68 | # 3 4_can_02946921 69 | # 4 5_laptop_03642806 70 | # 5 6_mug_03797390 71 | 72 | opt.num_objects = 6 73 | opt.num_points = 500 74 | 75 | 76 | def to_device(x): 77 | if opt.cuda: 78 | return x.cuda() 79 | else: 80 | return x.cpu() 81 | 82 | 83 | class Model(nn.Module): 84 | def __init__(self, opt): 85 | super().__init__() 86 | self.opt = opt 87 | 88 | self.casses = self.load_model() 89 | 90 | def load_model(self): 91 | cass = CASS(self.opt) 92 | resume_path = os.path.join( 93 | "trained_models", opt.resume_model) 94 | try: 95 | cass.load_state_dict(torch.load(resume_path), strict=True) 96 | except: 97 | raise FileNotFoundError(resume_path) 98 | 99 | return cass 100 | 101 | def get_model(self, cls_idx): 102 | return self.casses 103 | 104 | 105 | def get_predict_scales(recd): 106 | abs_coord_pts = np.abs(recd) 107 | return 2 * np.amax(abs_coord_pts, axis=0) 108 | 109 | 110 | def calculate_emd_cf(point_a, point_b): 111 | obj = torch.from_numpy(point_a).unsqueeze(dim=0) 112 | pre_points = torch.from_numpy( 113 | point_b).unsqueeze(dim=0) 114 | obj = to_device(obj).float() 115 | pre_points = to_device(pre_points).float() 116 | 117 | res = EMD_CD(pre_points, obj, 1, accelerated_cd=True) 118 | res = {k: (v.cpu().detach().item() if not isinstance( 119 | v, float) else v) for k, v in res.items()} 120 | 121 | return res["MMD-CD"], res["MMD-EMD"] 122 | 123 | 124 | def eval_nocs(model, img, depth, masks, cls_ids, cad_model_info, cad_model_scale): 125 | my_result = np.zeros((len(cls_ids), 7)) 126 | scales = np.zeros((len(cls_ids), 3)) 127 | chamfer_dis_cass = np.zeros((len(cls_ids))) 128 | emd_dis_cass = np.zeros((len(cls_ids))) 129 | 130 | for i in range(len(cls_ids)): 131 | # get model 132 | # cls ids zeros is not BG 133 | cass = model.get_model(cls_ids[i] - 1) 134 | try: 135 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 136 | mask_label = ma.getmaskarray(ma.masked_equal( 137 | masks, i)) # nocs mask is start from 1 138 | mask = mask_label * mask_depth 139 | 140 | rmin, rmax, cmin, cmax = get_bbox(mask) 141 | 142 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 143 | if len(choose) > num_points: 144 | c_mask = np.zeros(len(choose), dtype=int) 145 | c_mask[:num_points] = 1 146 | np.random.shuffle(c_mask) 147 | choose = choose[c_mask.nonzero()] 148 | else: 149 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') 150 | 151 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten( 152 | )[choose][:, np.newaxis].astype(np.float32) 153 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten( 154 | )[choose][:, np.newaxis].astype(np.float32) 155 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten( 156 | )[choose][:, np.newaxis].astype(np.float32) 157 | choose = np.array([choose]) 158 | 159 | pt2 = depth_masked / cam_scale 160 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 161 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 162 | cloud = np.concatenate((-pt0, -pt1, pt2), axis=1) 163 | 164 | img_masked = np.array(img)[:, :, :3] 165 | img_masked = np.transpose(img_masked, (2, 0, 1)) 166 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 167 | 168 | cloud = torch.from_numpy(cloud.astype(np.float32)) 169 | choose = torch.LongTensor(choose.astype(np.int32)) 170 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) 171 | index = torch.LongTensor([cls_ids[i] - 1]) # 0 is BG 172 | 173 | cloud = to_device(Variable(cloud)) 174 | choose = to_device(Variable(choose)) 175 | img_masked = to_device(Variable(img_masked)) 176 | index = to_device(Variable(index)) 177 | 178 | cloud = cloud.view(1, num_points, 3) 179 | img_masked = img_masked.view(1, 3, img_masked.size()[ 180 | 1], img_masked.size()[2]) 181 | 182 | folding_encode = cass.foldingnet.encode(img_masked, cloud, choose) 183 | posenet_encode = cass.estimator.encode(img_masked, cloud, choose) 184 | 185 | pred_r, pred_t, pred_c = cass.estimator.pose( 186 | torch.cat([posenet_encode, folding_encode], dim=1), 187 | index 188 | ) 189 | recd = cass.foldingnet.recon(folding_encode) 190 | 191 | # get pred_scales 192 | scale = get_predict_scales(recd[0].detach().cpu().numpy()) 193 | scales[i] = scale 194 | # load model 195 | for ii, info in enumerate(cad_model_info): 196 | if cls_ids[i] == int(info["cls_id"]): 197 | model_path = info["model_path"] 198 | model_scale = cad_model_scale[ii] 199 | 200 | cad_model = load_obj(path=os.path.join(opt.dataset_dir, model_path[:-4]+"_{}.ply".format( 201 | num_points)), ori_path=os.path.join(opt.dataset_dir, model_path), num_points=num_points) 202 | # change to the real size. 203 | cad_model = cad_model * model_scale 204 | 205 | cd, emd = calculate_emd_cf( 206 | cad_model, recd.detach()[0].cpu().numpy()) 207 | chamfer_dis_cass[i] = cd 208 | emd_dis_cass[i] = emd 209 | break 210 | # if detected an wrong object, we set dis to 0 211 | else: 212 | emd_dis_cass[i] = 0 213 | chamfer_dis_cass[i] = 0 214 | 215 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 216 | 217 | pred_c = pred_c.view(bs, num_points) 218 | how_max, which_max = torch.max(pred_c, 1) 219 | pred_t = pred_t.view(bs * num_points, 1, 3) 220 | points = cloud.view(bs * num_points, 1, 3) 221 | 222 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 223 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 224 | if cls_ids[i] - 1 not in symmetric: 225 | # Do refine for non-symmetry class and this would be useful. 226 | for ite in range(0, iteration): 227 | T = to_device(Variable(torch.from_numpy(my_t.astype(np.float32))).view( 228 | 1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)) 229 | my_mat = quaternion_matrix(my_r) 230 | R = to_device(Variable(torch.from_numpy( 231 | my_mat[:3, :3].astype(np.float32))).view(1, 3, 3)) 232 | my_mat[0:3, 3] = my_t 233 | 234 | new_cloud = torch.bmm((cloud - T), R).contiguous() 235 | pred_r, pred_t = cass.refiner( 236 | new_cloud, folding_encode, index) 237 | pred_r = pred_r.view(1, 1, -1) 238 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) 239 | my_r_2 = pred_r.view(-1).cpu().data.numpy() 240 | my_t_2 = pred_t.view(-1).cpu().data.numpy() 241 | my_mat_2 = quaternion_matrix(my_r_2) 242 | 243 | my_mat_2[0:3, 3] = my_t_2 244 | 245 | my_mat_final = np.dot(my_mat, my_mat_2) 246 | my_r_final = copy.deepcopy(my_mat_final) 247 | my_r_final[0:3, 3] = 0 248 | my_r_final = quaternion_from_matrix(my_r_final, True) 249 | my_t_final = np.array( 250 | [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) 251 | 252 | my_pred = np.append(my_r_final, my_t_final) 253 | my_r = my_r_final 254 | my_t = my_t_final 255 | else: 256 | my_pred = np.append(my_r, my_t) 257 | 258 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) 259 | my_result[i] = my_pred 260 | except: 261 | # else: 262 | print("Empty mask while eval, skip.") 263 | my_result[i] = np.zeros(7) 264 | scales[i] = np.array([0.1, 0.1, 0.1]) 265 | 266 | emd_dis_cass[i] = 0.0 267 | chamfer_dis_cass[i] = 0.0 268 | # convert to RTs 269 | my_result_ret = [] 270 | for i in range(len(cls_ids)): 271 | matrix = quaternion_matrix(my_result[i][:4]).astype(np.float32) 272 | matrix[:3, 3] = my_result[i][4:] 273 | my_result_ret.append(matrix) 274 | 275 | return my_result_ret, scales, chamfer_dis_cass, emd_dis_cass 276 | 277 | 278 | def eval_interface(model, opt, result): 279 | # do dataloading object here 280 | # as for gt mask the value is store in last channle, but we are store in first channel 281 | path = result["image_path"] 282 | masks = np.array(cv2.imread(os.path.join( 283 | opt.dataset_dir, path+"_nocs_segmentation.png"))[:, :, 0]) 284 | img = np.array(cv2.imread(os.path.join( 285 | opt.dataset_dir, path+"_color.png"))) / 255.0 286 | depth = np.array(cv2.imread(os.path.join( 287 | opt.dataset_dir, path+"_depth.png"), -1)) 288 | 289 | my_result_ret, scales, chamfer_dis_cass, emd_dis_cass = eval_nocs( 290 | model, img, depth, masks, result["pred_class_ids"], cad_model_info=result[ 291 | "model_information"], cad_model_scale=result["gt_scales_for_model_in_CASS"] 292 | ) 293 | 294 | my_result_ret = np.array(my_result_ret) 295 | scales = np.array(scales) 296 | chamfer_dis_cass = np.array(chamfer_dis_cass) 297 | emd_dis_cass = np.array(emd_dis_cass) 298 | 299 | return my_result_ret.tolist(), scales.tolist(), chamfer_dis_cass.tolist(), emd_dis_cass.tolist() 300 | 301 | 302 | def draw(opt, result): 303 | """ Load data and draw visualization results. 304 | """ 305 | path = result["image_path"] 306 | image = cv2.imread(os.path.join(opt.dataset_dir, path+"_color.png")) 307 | 308 | # Load GT Models 309 | models_for_nocs = [] 310 | models_for_cass = [] 311 | for i, mf in enumerate(result["model_information"]): 312 | model_path = mf["model_path"] 313 | cad_model = load_obj(path=os.path.join(opt.dataset_dir, model_path[:-4]+"_{}.ply".format( 314 | num_points)), ori_path=os.path.join(opt.dataset_dir, model_path), num_points=num_points) 315 | 316 | # As for nocs, the model normalized is gt points. 317 | models_for_nocs.append(copy.deepcopy(cad_model)) 318 | 319 | # As for cass, the model normalized should multiply scale to get the real size. 320 | models_for_cass.append(copy.deepcopy( 321 | cad_model * result["gt_scales_for_model_in_CASS"][i])) 322 | 323 | # Get the correct RTs for Class_ids. If the target is missing we will return np.eye(). If multi-target is matched, we only keep the first. 324 | RTs_cass = [] 325 | RTs_nocs = [] 326 | misses = [] 327 | for i, cls in enumerate(result["gt_class_ids"]): 328 | idx = result["pred_class_ids"] == cls 329 | rts_nocs = result["pred_RTs"][idx] 330 | 331 | rts_cass = result["pred_RTs_cass"][idx] 332 | 333 | miss = False 334 | if len(rts_cass) <= 0 or len(rts_nocs) <= 0: 335 | rts_cass = np.eye(4) 336 | rts_nocs = np.eye(4) 337 | miss = True 338 | elif len(rts_cass) > 1 or len(rts_nocs) > 1: 339 | rts_cass = rts_cass[0] 340 | rts_nocs = rts_nocs[0] 341 | misses.append(miss) 342 | RTs_nocs.append(rts_nocs) 343 | RTs_cass.append(rts_cass) 344 | 345 | (h, w) = image.shape[:2] 346 | center = (w/2, h/2) 347 | 348 | M = cv2.getRotationMatrix2D(center, 180, 1.0) 349 | rotated = cv2.warpAffine(image, M, (w, h)) 350 | 351 | utils.draw(rotated, RTs_cass, models_for_cass, class_ids=result["gt_class_ids"], misses=misses, intrinsics=opt.intrinsics, save_path=os.path.join( 352 | opt.save_dir, "vis", "_".join(path.split("/"))+"_cass.png")) 353 | utils.draw(image, RTs_nocs, models_for_nocs, class_ids=result["gt_class_ids"], misses=misses, intrinsics=opt.intrinsics, save_path=os.path.join( 354 | opt.save_dir, "vis", "_".join(path.split("/"))+"_nocs.png")) 355 | 356 | 357 | if __name__ == "__main__": 358 | opt.class_names = PoseDataset.get_class_names() 359 | 360 | eval_dir = os.path.join(opt.save_dir, "eval_{}".format(opt.mode)) 361 | os.makedirs(eval_dir, exist_ok=True) 362 | 363 | if opt.mode == "cass": 364 | 365 | if opt.eval: 366 | model = to_device(Model(opt)).eval() 367 | 368 | result_json_list = glob.glob( 369 | os.path.join(opt.save_dir, "gt", "*.json")) 370 | result_json_list = sorted(result_json_list) 371 | 372 | final_results = [] 373 | for filename in tqdm.tqdm(result_json_list, desc="loading"): 374 | 375 | if opt.eval: 376 | with open(filename, "r") as f: 377 | result = json.load(f) 378 | 379 | pred_RTs_cass, pred_scales_cass, chamfer_dis_cass, emd_dis_cass = eval_interface( 380 | model, opt, result) 381 | 382 | result["pred_RTs_cass"] = pred_RTs_cass 383 | result["pred_scales_cass"] = pred_scales_cass 384 | 385 | result["chamfer_dis_cass"] = chamfer_dis_cass 386 | result["emd_dis_cass"] = emd_dis_cass 387 | 388 | with open(os.path.join(eval_dir, os.path.basename(filename)), "w") as f: 389 | json.dump(result, f, indent=4) 390 | else: 391 | with open(os.path.join(eval_dir, os.path.basename(filename)), "r") as f: 392 | result = json.load(f) 393 | 394 | gt_class_ids = [] 395 | gt_scales_for_CASS = [] 396 | for m in result["model_information"]: 397 | gt_class_ids.append(int(m["cls_id"])) 398 | gt_scales_for_CASS.append(m["gt_scales_for_CASS"]) 399 | result["gt_class_ids"] = gt_class_ids 400 | result["gt_handle_visibility"] = [1] * len(gt_class_ids) 401 | result["gt_scales_for_CASS"] = gt_scales_for_CASS 402 | 403 | # convert all label information to np.array if possible 404 | r = {} 405 | for k, v in result.items(): 406 | if isinstance(v, (list, tuple)): 407 | r[k] = np.array(v) 408 | else: 409 | r[k] = v 410 | final_results.append(r) 411 | 412 | if opt.draw: 413 | os.makedirs(os.path.join(opt.save_dir, "vis"), exist_ok=True) 414 | for r in tqdm.tqdm(final_results, desc="draw"): 415 | draw(opt, r) 416 | 417 | synset_names = ["BG"] + opt.class_names 418 | 419 | # eval 420 | eval_results = [] 421 | for i in final_results: 422 | i["pred_scales"] = i["pred_scales_cass"] 423 | i["pred_RTs"] = i["pred_RTs_cass"] 424 | i["pred_class_ids"] = i["pred_class_ids"] 425 | i["gt_scales"] = i["gt_scales_for_CASS"] 426 | i["gt_RTs"] = i["gt_RTs_for_CASS"] 427 | eval_results.append(i) 428 | aps = utils.compute_degree_cm_mAP( 429 | eval_results, synset_names, eval_dir, 430 | degree_thresholds=range(0, 61, 1), 431 | shift_thresholds=np.linspace(0, 1, 31)*15, 432 | iou_3d_thresholds=np.linspace(0, 1, 101), 433 | iou_pose_thres=0.1, 434 | use_matches_for_pose=True, eval_recon=True 435 | ) 436 | elif opt.mode == "nocs": 437 | result_json_list = glob.glob( 438 | os.path.join(opt.save_dir, "gt", "*.json")) 439 | result_json_list = sorted(result_json_list) 440 | 441 | final_results = [] 442 | for filename in tqdm.tqdm(result_json_list, desc="loading"): 443 | with open(os.path.join(filename), "r") as f: 444 | result = json.load(f) 445 | 446 | # convert all label information to np.array if possible 447 | r = {} 448 | for k, v in result.items(): 449 | if isinstance(v, (list, tuple)): 450 | r[k] = np.array(v) 451 | else: 452 | r[k] = v 453 | final_results.append(r) 454 | 455 | synset_names = ["BG"] + opt.class_names 456 | 457 | aps = utils.compute_degree_cm_mAP( 458 | final_results, synset_names, eval_dir, 459 | degree_thresholds=range(0, 61, 1), 460 | shift_thresholds=np.linspace(0, 1, 31)*15, 461 | iou_3d_thresholds=np.linspace(0, 1, 101), 462 | iou_pose_thres=0.1, 463 | use_matches_for_pose=True, eval_recon=False 464 | ) 465 | -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import logging 4 | import math 5 | import os 6 | from ctypes import * 7 | from pprint import pprint 8 | 9 | import cv2 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import scipy.misc 13 | import skimage.color 14 | from tqdm import tqdm 15 | 16 | 17 | def setup_logger(logger_name, log_file, level=logging.INFO): 18 | l = logging.getLogger(logger_name) 19 | formatter = logging.Formatter('%(asctime)s : %(message)s') 20 | fileHandler = logging.FileHandler(log_file, mode='w') 21 | fileHandler.setFormatter(formatter) 22 | 23 | l.setLevel(level) 24 | l.addHandler(fileHandler) 25 | 26 | streamHandler = logging.StreamHandler() 27 | streamHandler.setFormatter(formatter) 28 | l.addHandler(streamHandler) 29 | return l 30 | 31 | 32 | def compute_3d_iou_new(RT_1, RT_2, scales_1, scales_2, handle_visibility, class_name_1, class_name_2): 33 | '''Computes IoU overlaps between two 3d bboxes. 34 | bbox_3d_1, bbox_3d_1: [3, 8] 35 | ''' 36 | # flatten masks 37 | def asymmetric_3d_iou(RT_1, RT_2, scales_1, scales_2): 38 | noc_cube_1 = get_3d_bbox(scales_1, 0) 39 | bbox_3d_1 = transform_coordinates_3d(noc_cube_1, RT_1) 40 | 41 | noc_cube_2 = get_3d_bbox(scales_2, 0) 42 | bbox_3d_2 = transform_coordinates_3d(noc_cube_2, RT_2) 43 | 44 | bbox_1_max = np.amax(bbox_3d_1, axis=0) 45 | bbox_1_min = np.amin(bbox_3d_1, axis=0) 46 | bbox_2_max = np.amax(bbox_3d_2, axis=0) 47 | bbox_2_min = np.amin(bbox_3d_2, axis=0) 48 | 49 | overlap_min = np.maximum(bbox_1_min, bbox_2_min) 50 | overlap_max = np.minimum(bbox_1_max, bbox_2_max) 51 | 52 | # intersections and union 53 | if np.amin(overlap_max - overlap_min) < 0: 54 | intersections = 0 55 | else: 56 | intersections = np.prod(overlap_max - overlap_min) 57 | union = np.prod(bbox_1_max - bbox_1_min) + \ 58 | np.prod(bbox_2_max - bbox_2_min) - intersections 59 | overlaps = intersections / union 60 | return overlaps 61 | 62 | if RT_1 is None or RT_2 is None: 63 | return -1 64 | 65 | symmetry_flag = False 66 | if (class_name_1 in ['bottle', 'bowl', 'can'] and class_name_1 == class_name_2) or (class_name_1 == 'mug' and class_name_1 == class_name_2 and handle_visibility == 0): 67 | # print('*'*10) 68 | 69 | noc_cube_1 = get_3d_bbox(scales_1, 0) 70 | noc_cube_2 = get_3d_bbox(scales_2, 0) 71 | bbox_3d_2 = transform_coordinates_3d(noc_cube_2, RT_2) 72 | 73 | def y_rotation_matrix(theta): 74 | return np.array([[np.cos(theta), 0, np.sin(theta), 0], 75 | [0, 1, 0, 0], 76 | [-np.sin(theta), 0, np.cos(theta), 0], 77 | [0, 0, 0, 1]]) 78 | 79 | n = 20 80 | max_iou = 0 81 | for i in range(n): 82 | rotated_RT_1 = RT_1@y_rotation_matrix(2*math.pi*i/float(n)) 83 | max_iou = max(max_iou, 84 | asymmetric_3d_iou(rotated_RT_1, RT_2, scales_1, scales_2)) 85 | else: 86 | max_iou = asymmetric_3d_iou(RT_1, RT_2, scales_1, scales_2) 87 | 88 | return max_iou 89 | 90 | 91 | def compute_RT_degree_cm_symmetry(RT_1, RT_2, class_id, handle_visibility, synset_names): 92 | ''' 93 | :param RT_1: [4, 4]. homogeneous affine transformation 94 | :param RT_2: [4, 4]. homogeneous affine transformation 95 | :return: theta: angle difference of R in degree, shift: l2 difference of T in centimeter 96 | 97 | 98 | synset_names = ['BG', # 0 99 | 'bottle', # 1 100 | 'bowl', # 2 101 | 'camera', # 3 102 | 'can', # 4 103 | 'cap', # 5 104 | 'phone', # 6 105 | 'monitor', # 7 106 | 'laptop', # 8 107 | 'mug' # 9 108 | ] 109 | 110 | synset_names = ['BG', # 0 111 | 'bottle', # 1 112 | 'bowl', # 2 113 | 'camera', # 3 114 | 'can', # 4 115 | 'laptop', # 5 116 | 'mug' # 6 117 | ] 118 | ''' 119 | 120 | # make sure the last row is [0, 0, 0, 1] 121 | if RT_1 is None or RT_2 is None: 122 | return -1 123 | try: 124 | assert np.array_equal(RT_1[3, :], RT_2[3, :]) 125 | assert np.array_equal(RT_1[3, :], np.array([0, 0, 0, 1])) 126 | except AssertionError: 127 | print(RT_1[3, :], RT_2[3, :]) 128 | exit() 129 | 130 | R1 = RT_1[:3, :3] / np.cbrt(np.linalg.det(RT_1[:3, :3])) 131 | T1 = RT_1[:3, 3] 132 | 133 | R2 = RT_2[:3, :3] / np.cbrt(np.linalg.det(RT_2[:3, :3])) 134 | T2 = RT_2[:3, 3] 135 | 136 | # symmetric when rotating around y-axis 137 | if synset_names[class_id] in ['bottle', 'can', 'bowl']: 138 | y = np.array([0, 1, 0]) 139 | y1 = R1 @ y 140 | y2 = R2 @ y 141 | theta = np.arccos( 142 | y1.dot(y2) / (np.linalg.norm(y1) * np.linalg.norm(y2))) 143 | # symmetric when rotating around y-axis 144 | elif synset_names[class_id] == 'mug' and handle_visibility == 0: 145 | y = np.array([0, 1, 0]) 146 | y1 = R1 @ y 147 | y2 = R2 @ y 148 | theta = np.arccos( 149 | y1.dot(y2) / (np.linalg.norm(y1) * np.linalg.norm(y2))) 150 | elif synset_names[class_id] in ['phone', 'eggbox', 'glue']: 151 | y_180_RT = np.diag([-1.0, 1.0, -1.0]) 152 | R = R1 @ R2.transpose() 153 | R_rot = R1 @ y_180_RT @ R2.transpose() 154 | theta = min(np.arccos((np.trace(R) - 1) / 2), 155 | np.arccos((np.trace(R_rot) - 1) / 2)) 156 | else: 157 | R = R1 @ R2.transpose() 158 | theta = np.arccos((np.trace(R) - 1) / 2) 159 | 160 | theta *= 180 / np.pi 161 | shift = np.linalg.norm(T1 - T2) * 100 162 | result = np.array([theta, shift]) 163 | 164 | return result 165 | 166 | 167 | def get_3d_bbox(scale, shift=0): 168 | """ 169 | Input: 170 | scale: [3] or scalar 171 | shift: [3] or scalar 172 | Return 173 | bbox_3d: [3, N] 174 | 175 | """ 176 | if hasattr(scale, "__iter__"): 177 | bbox_3d = np.array([[scale[0] / 2, +scale[1] / 2, scale[2] / 2], 178 | [scale[0] / 2, +scale[1] / 2, -scale[2] / 2], 179 | [-scale[0] / 2, +scale[1] / 2, scale[2] / 2], 180 | [-scale[0] / 2, +scale[1] / 2, -scale[2] / 2], 181 | [+scale[0] / 2, -scale[1] / 2, scale[2] / 2], 182 | [+scale[0] / 2, -scale[1] / 2, -scale[2] / 2], 183 | [-scale[0] / 2, -scale[1] / 2, scale[2] / 2], 184 | [-scale[0] / 2, -scale[1] / 2, -scale[2] / 2]]) + shift 185 | else: 186 | bbox_3d = np.array([[scale / 2, +scale / 2, scale / 2], 187 | [scale / 2, +scale / 2, -scale / 2], 188 | [-scale / 2, +scale / 2, scale / 2], 189 | [-scale / 2, +scale / 2, -scale / 2], 190 | [+scale / 2, -scale / 2, scale / 2], 191 | [+scale / 2, -scale / 2, -scale / 2], 192 | [-scale / 2, -scale / 2, scale / 2], 193 | [-scale / 2, -scale / 2, -scale / 2]]) + shift 194 | 195 | bbox_3d = bbox_3d.transpose() 196 | return bbox_3d 197 | 198 | 199 | def transform_coordinates_3d(coordinates, RT): 200 | """ 201 | Input: 202 | coordinates: [3, N] 203 | RT: [4, 4] 204 | Return 205 | new_coordinates: [3, N] 206 | 207 | """ 208 | assert coordinates.shape[0] == 3 209 | coordinates = np.vstack([coordinates, np.ones( 210 | (1, coordinates.shape[1]), dtype=np.float32)]) 211 | new_coordinates = RT @ coordinates 212 | new_coordinates = new_coordinates[:3, :]/new_coordinates[3, :] 213 | return new_coordinates 214 | 215 | 216 | def calculate_2d_projections(coordinates_3d, intrinsics): 217 | """ 218 | Input: 219 | coordinates: [3, N] 220 | intrinsics: [3, 3] 221 | Return 222 | projected_coordinates: [N, 2] 223 | """ 224 | projected_coordinates = intrinsics @ coordinates_3d 225 | projected_coordinates = projected_coordinates[:2, 226 | :] / projected_coordinates[2, :] 227 | projected_coordinates = projected_coordinates.transpose() 228 | projected_coordinates = np.array(projected_coordinates, dtype=np.int32) 229 | 230 | return projected_coordinates 231 | 232 | 233 | def trim_zeros(x): 234 | """It's common to have tensors larger than the available data and 235 | pad with zeros. This function removes rows that are all zeros. 236 | x: [rows, columns]. 237 | """ 238 | 239 | pre_shape = x.shape 240 | assert len(x.shape) == 2, x.shape 241 | new_x = x[~np.all(x == 0, axis=1)] 242 | post_shape = new_x.shape 243 | assert pre_shape[0] == post_shape[0] 244 | assert pre_shape[1] == post_shape[1] 245 | 246 | return new_x 247 | 248 | 249 | def compute_3d_matches(gt_class_ids, gt_RTs, gt_scales, gt_handle_visibility, synset_names, 250 | pred_boxes, pred_class_ids, pred_scores, pred_RTs, pred_scales, 251 | iou_3d_thresholds, score_threshold=0): 252 | """Finds matches between prediction and ground truth instances. 253 | Returns: 254 | gt_matches: 2-D array. For each GT box it has the index of the matched 255 | predicted box. 256 | pred_matches: 2-D array. For each predicted box, it has the index of 257 | the matched ground truth box. 258 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 259 | """ 260 | # Trim zero padding 261 | # TODO: cleaner to do zero unpadding upstream 262 | num_pred = len(pred_class_ids) 263 | num_gt = len(gt_class_ids) 264 | indices = np.zeros(0) 265 | 266 | if num_pred: 267 | pred_boxes = trim_zeros(pred_boxes).copy() 268 | pred_scores = pred_scores[:pred_boxes.shape[0]].copy() 269 | 270 | # Sort predictions by score from high to low 271 | indices = np.argsort(pred_scores)[::-1] 272 | 273 | pred_boxes = pred_boxes[indices].copy() 274 | pred_class_ids = pred_class_ids[indices].copy() 275 | pred_scores = pred_scores[indices].copy() 276 | pred_scales = pred_scales[indices].copy() 277 | pred_RTs = pred_RTs[indices].copy() 278 | 279 | # Compute IoU overlaps [pred_bboxs gt_bboxs] 280 | #overlaps = [[0 for j in range(num_gt)] for i in range(num_pred)] 281 | overlaps = np.zeros((num_pred, num_gt), dtype=np.float32) 282 | for i in range(num_pred): 283 | for j in range(num_gt): 284 | # overlaps[i, j] = compute_3d_iou(pred_3d_bboxs[i], gt_3d_bboxs[j], gt_handle_visibility[j], 285 | # synset_names[pred_class_ids[i]], synset_names[gt_class_ids[j]]) 286 | overlaps[i, j] = compute_3d_iou_new(pred_RTs[i], gt_RTs[j], pred_scales[i, :], gt_scales[j], 287 | gt_handle_visibility[j], synset_names[pred_class_ids[i]], synset_names[gt_class_ids[j]]) 288 | 289 | # Loop through predictions and find matching ground truth boxes 290 | num_iou_3d_thres = len(iou_3d_thresholds) 291 | pred_matches = -1 * np.ones([num_iou_3d_thres, num_pred]) 292 | gt_matches = -1 * np.ones([num_iou_3d_thres, num_gt]) 293 | 294 | for s, iou_thres in enumerate(iou_3d_thresholds): 295 | for i in range(len(pred_boxes)): 296 | # Find best matching ground truth box 297 | # 1. Sort matches by score 298 | sorted_ixs = np.argsort(overlaps[i])[::-1] 299 | # 2. Remove low scores 300 | low_score_idx = np.where( 301 | overlaps[i, sorted_ixs] < score_threshold)[0] 302 | if low_score_idx.size > 0: 303 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 304 | # 3. Find the match 305 | for j in sorted_ixs: 306 | # If ground truth box is already matched, go to next one 307 | #print('gt_match: ', gt_match[j]) 308 | if gt_matches[s, j] > -1: 309 | continue 310 | # If we reach IoU smaller than the threshold, end the loop 311 | iou = overlaps[i, j] 312 | #print('iou: ', iou) 313 | if iou < iou_thres: 314 | break 315 | # Do we have a match? 316 | if not pred_class_ids[i] == gt_class_ids[j]: 317 | continue 318 | 319 | if iou > iou_thres: 320 | gt_matches[s, j] = i 321 | pred_matches[s, i] = j 322 | break 323 | 324 | return gt_matches, pred_matches, overlaps, indices 325 | 326 | 327 | def compute_ap_from_matches_scores(pred_match, pred_scores, gt_match): 328 | # sort the scores from high to low 329 | # print(pred_match.shape, pred_scores.shape) 330 | assert pred_match.shape[0] == pred_scores.shape[0] 331 | 332 | score_indices = np.argsort(pred_scores)[::-1] 333 | pred_scores = pred_scores[score_indices] 334 | pred_match = pred_match[score_indices] 335 | 336 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 337 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 338 | 339 | # Pad with start and end values to simplify the math 340 | precisions = np.concatenate([[0], precisions, [0]]) 341 | recalls = np.concatenate([[0], recalls, [1]]) 342 | 343 | # Ensure precision values decrease but don't increase. This way, the 344 | # precision value at each recall threshold is the maximum it can be 345 | # for all following recall thresholds, as specified by the VOC paper. 346 | for i in range(len(precisions) - 2, -1, -1): 347 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 348 | 349 | # Compute mean AP over recall range 350 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 351 | ap = np.sum((recalls[indices] - recalls[indices - 1]) 352 | * precisions[indices]) 353 | return ap 354 | 355 | 356 | def compute_RT_overlaps(gt_class_ids, gt_RTs, gt_handle_visibility, 357 | pred_class_ids, pred_RTs, 358 | synset_names): 359 | """Finds overlaps between prediction and ground truth instances. 360 | Returns: 361 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 362 | """ 363 | # print('num of gt instances: {}, num of pred instances: {}'.format(len(gt_class_ids), len(gt_class_ids))) 364 | num_pred = len(pred_class_ids) 365 | num_gt = len(gt_class_ids) 366 | 367 | # Compute IoU overlaps [pred_bboxs gt_bboxs] 368 | #overlaps = [[0 for j in range(num_gt)] for i in range(num_pred)] 369 | overlaps = np.zeros((num_pred, num_gt, 2)) 370 | 371 | for i in range(num_pred): 372 | for j in range(num_gt): 373 | overlaps[i, j, :] = compute_RT_degree_cm_symmetry(pred_RTs[i], 374 | gt_RTs[j], 375 | gt_class_ids[j], 376 | gt_handle_visibility[j], 377 | synset_names) 378 | 379 | return overlaps 380 | 381 | 382 | def compute_match_from_degree_cm(overlaps, pred_class_ids, gt_class_ids, degree_thres_list, shift_thres_list): 383 | num_degree_thres = len(degree_thres_list) 384 | num_shift_thres = len(shift_thres_list) 385 | 386 | num_pred = len(pred_class_ids) 387 | num_gt = len(gt_class_ids) 388 | 389 | pred_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_pred)) 390 | gt_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_gt)) 391 | 392 | if num_pred == 0 or num_gt == 0: 393 | return gt_matches, pred_matches 394 | 395 | assert num_pred == overlaps.shape[0] 396 | assert num_gt == overlaps.shape[1] 397 | assert overlaps.shape[2] == 2 398 | 399 | for d, degree_thres in enumerate(degree_thres_list): 400 | for s, shift_thres in enumerate(shift_thres_list): 401 | for i in range(num_pred): 402 | # Find best matching ground truth box 403 | # 1. Sort matches by scores from low to high 404 | sum_degree_shift = np.sum(overlaps[i, :, :], axis=-1) 405 | sorted_ixs = np.argsort(sum_degree_shift) 406 | # 2. Remove low scores 407 | # low_score_idx = np.where(sum_degree_shift >= 100)[0] 408 | # if low_score_idx.size > 0: 409 | # sorted_ixs = sorted_ixs[:low_score_idx[0]] 410 | # 3. Find the match 411 | for j in sorted_ixs: 412 | # If ground truth box is already matched, go to next one 413 | #print(j, len(gt_match), len(pred_class_ids), len(gt_class_ids)) 414 | if gt_matches[d, s, j] > -1 or pred_class_ids[i] != gt_class_ids[j]: 415 | continue 416 | # If we reach IoU smaller than the threshold, end the loop 417 | if overlaps[i, j, 0] > degree_thres or overlaps[i, j, 1] > shift_thres: 418 | continue 419 | 420 | gt_matches[d, s, j] = i 421 | pred_matches[d, s, i] = j 422 | break 423 | 424 | return gt_matches, pred_matches 425 | 426 | 427 | def compute_degree_cm_mAP(final_results, synset_names, log_dir, degree_thresholds=[360], shift_thresholds=[100], iou_3d_thresholds=[0.1], iou_pose_thres=0.1, use_matches_for_pose=False, eval_recon=False): 428 | """Compute Average Precision at a set IoU threshold (default 0.5). 429 | Returns: 430 | mAP: Mean Average Precision 431 | precisions: List of precisions at different class score thresholds. 432 | recalls: List of recall values at different class score thresholds. 433 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 434 | """ 435 | 436 | num_classes = len(synset_names) 437 | degree_thres_list = list(degree_thresholds) + [360] 438 | num_degree_thres = len(degree_thres_list) 439 | 440 | shift_thres_list = list(shift_thresholds) + [100] 441 | num_shift_thres = len(shift_thres_list) 442 | 443 | iou_thres_list = list(iou_3d_thresholds) 444 | num_iou_thres = len(iou_thres_list) 445 | 446 | if use_matches_for_pose: 447 | assert iou_pose_thres in iou_thres_list 448 | 449 | iou_3d_aps = np.zeros((num_classes + 1, num_iou_thres)) 450 | iou_pred_matches_all = [np.zeros((num_iou_thres, 0)) 451 | for _ in range(num_classes)] 452 | iou_pred_scores_all = [np.zeros((num_iou_thres, 0)) 453 | for _ in range(num_classes)] 454 | iou_gt_matches_all = [np.zeros((num_iou_thres, 0)) 455 | for _ in range(num_classes)] 456 | 457 | pose_aps = np.zeros((num_classes + 1, num_degree_thres, num_shift_thres)) 458 | pose_pred_matches_all = [ 459 | np.zeros((num_degree_thres, num_shift_thres, 0)) for _ in range(num_classes)] 460 | pose_gt_matches_all = [ 461 | np.zeros((num_degree_thres, num_shift_thres, 0)) for _ in range(num_classes)] 462 | pose_pred_scores_all = [ 463 | np.zeros((num_degree_thres, num_shift_thres, 0)) for _ in range(num_classes)] 464 | 465 | # loop over results to gather pred matches and gt matches for iou and pose metrics 466 | progress = tqdm(final_results, desc="eval") 467 | # for progress, result in enumerate(final_results): 468 | for result in progress: 469 | # print(progress, len(final_results)) 470 | gt_class_ids = result['gt_class_ids'].astype(np.int32) 471 | gt_RTs = np.array(result['gt_RTs']) 472 | gt_scales = np.array(result['gt_scales']) 473 | gt_handle_visibility = result['gt_handle_visibility'] 474 | 475 | pred_bboxes = np.array(result['pred_bboxes']) 476 | pred_class_ids = result['pred_class_ids'] 477 | pred_scales = result['pred_scales'] 478 | pred_scores = result['pred_scores'] 479 | pred_RTs = np.array(result['pred_RTs']) 480 | #print(pred_bboxes.shape[0], pred_class_ids.shape[0], pred_scores.shape[0], pred_RTs.shape[0]) 481 | 482 | if len(gt_class_ids) == 0 and len(pred_class_ids) == 0: 483 | continue 484 | 485 | for cls_id in range(1, num_classes): 486 | # get gt and predictions in this class 487 | cls_gt_class_ids = gt_class_ids[gt_class_ids == cls_id] if len( 488 | gt_class_ids) else np.zeros(0) 489 | cls_gt_scales = gt_scales[gt_class_ids == cls_id] if len( 490 | gt_class_ids) else np.zeros((0, 3)) 491 | cls_gt_RTs = gt_RTs[gt_class_ids == cls_id] if len( 492 | gt_class_ids) else np.zeros((0, 4, 4)) 493 | 494 | cls_pred_class_ids = pred_class_ids[pred_class_ids == cls_id] if len( 495 | pred_class_ids) else np.zeros(0) 496 | cls_pred_bboxes = pred_bboxes[pred_class_ids == cls_id, :] if len( 497 | pred_class_ids) else np.zeros((0, 4)) 498 | cls_pred_scores = pred_scores[pred_class_ids == cls_id] if len( 499 | pred_class_ids) else np.zeros(0) 500 | cls_pred_RTs = pred_RTs[pred_class_ids == cls_id] if len( 501 | pred_class_ids) else np.zeros((0, 4, 4)) 502 | cls_pred_scales = pred_scales[pred_class_ids == cls_id] if len( 503 | pred_class_ids) else np.zeros((0, 3)) 504 | 505 | # calculate the overlap between each gt instance and pred instance 506 | if synset_names[cls_id] != 'mug': 507 | cls_gt_handle_visibility = np.ones_like(cls_gt_class_ids) 508 | else: 509 | cls_gt_handle_visibility = gt_handle_visibility[gt_class_ids == cls_id] if len( 510 | gt_class_ids) else np.ones(0) 511 | 512 | iou_cls_gt_match, iou_cls_pred_match, _, iou_pred_indices = compute_3d_matches(cls_gt_class_ids, cls_gt_RTs, cls_gt_scales, cls_gt_handle_visibility, synset_names, 513 | cls_pred_bboxes, cls_pred_class_ids, cls_pred_scores, cls_pred_RTs, cls_pred_scales, 514 | iou_thres_list) 515 | if len(iou_pred_indices): 516 | cls_pred_class_ids = cls_pred_class_ids[iou_pred_indices] 517 | cls_pred_RTs = cls_pred_RTs[iou_pred_indices] 518 | cls_pred_scores = cls_pred_scores[iou_pred_indices] 519 | cls_pred_bboxes = cls_pred_bboxes[iou_pred_indices] 520 | 521 | iou_pred_matches_all[cls_id] = np.concatenate( 522 | (iou_pred_matches_all[cls_id], iou_cls_pred_match), axis=-1) 523 | cls_pred_scores_tile = np.tile(cls_pred_scores, (num_iou_thres, 1)) 524 | iou_pred_scores_all[cls_id] = np.concatenate( 525 | (iou_pred_scores_all[cls_id], cls_pred_scores_tile), axis=-1) 526 | assert iou_pred_matches_all[cls_id].shape[1] == iou_pred_scores_all[cls_id].shape[1] 527 | iou_gt_matches_all[cls_id] = np.concatenate( 528 | (iou_gt_matches_all[cls_id], iou_cls_gt_match), axis=-1) 529 | 530 | if use_matches_for_pose: 531 | thres_ind = list(iou_thres_list).index(iou_pose_thres) 532 | 533 | iou_thres_pred_match = iou_cls_pred_match[thres_ind, :] 534 | 535 | cls_pred_class_ids = cls_pred_class_ids[iou_thres_pred_match > -1] if len( 536 | iou_thres_pred_match) > 0 else np.zeros(0) 537 | cls_pred_RTs = cls_pred_RTs[iou_thres_pred_match > -1] if len( 538 | iou_thres_pred_match) > 0 else np.zeros((0, 4, 4)) 539 | cls_pred_scores = cls_pred_scores[iou_thres_pred_match > -1] if len( 540 | iou_thres_pred_match) > 0 else np.zeros(0) 541 | cls_pred_bboxes = cls_pred_bboxes[iou_thres_pred_match > -1] if len( 542 | iou_thres_pred_match) > 0 else np.zeros((0, 4)) 543 | 544 | iou_thres_gt_match = iou_cls_gt_match[thres_ind, :] 545 | cls_gt_class_ids = cls_gt_class_ids[iou_thres_gt_match > -1] if len( 546 | iou_thres_gt_match) > 0 else np.zeros(0) 547 | cls_gt_RTs = cls_gt_RTs[iou_thres_gt_match > -1] if len( 548 | iou_thres_gt_match) > 0 else np.zeros((0, 4, 4)) 549 | cls_gt_handle_visibility = cls_gt_handle_visibility[iou_thres_gt_match > -1] if len( 550 | iou_thres_gt_match) > 0 else np.zeros(0) 551 | 552 | RT_overlaps = compute_RT_overlaps(cls_gt_class_ids, cls_gt_RTs, cls_gt_handle_visibility, 553 | cls_pred_class_ids, cls_pred_RTs, 554 | synset_names) 555 | 556 | pose_cls_gt_match, pose_cls_pred_match = compute_match_from_degree_cm(RT_overlaps, 557 | cls_pred_class_ids, 558 | cls_gt_class_ids, 559 | degree_thres_list, 560 | shift_thres_list) 561 | 562 | pose_pred_matches_all[cls_id] = np.concatenate( 563 | (pose_pred_matches_all[cls_id], pose_cls_pred_match), axis=-1) 564 | 565 | cls_pred_scores_tile = np.tile( 566 | cls_pred_scores, (num_degree_thres, num_shift_thres, 1)) 567 | pose_pred_scores_all[cls_id] = np.concatenate( 568 | (pose_pred_scores_all[cls_id], cls_pred_scores_tile), axis=-1) 569 | assert pose_pred_scores_all[cls_id].shape[2] == pose_pred_matches_all[cls_id].shape[2], '{} vs. {}'.format( 570 | pose_pred_scores_all[cls_id].shape, pose_pred_matches_all[cls_id].shape) 571 | pose_gt_matches_all[cls_id] = np.concatenate( 572 | (pose_gt_matches_all[cls_id], pose_cls_gt_match), axis=-1) 573 | 574 | # draw iou 3d AP vs. iou thresholds 575 | fig_iou = plt.figure() 576 | # ax_iou = plt.subplot(111) 577 | ax_iou = plt.subplot(131) 578 | plt.ylabel('AP') 579 | plt.ylim((0, 1)) 580 | plt.xlabel('3D IoU thresholds') 581 | 582 | iou_dict = {} 583 | iou_dict['thres_list'] = iou_thres_list 584 | for cls_id in range(1, num_classes): 585 | class_name = synset_names[cls_id] 586 | # print(class_name) 587 | for s, _ in enumerate(iou_thres_list): 588 | iou_3d_aps[cls_id, s] = compute_ap_from_matches_scores(iou_pred_matches_all[cls_id][s, :], 589 | iou_pred_scores_all[cls_id][s, :], 590 | iou_gt_matches_all[cls_id][s, :]) 591 | ax_iou.plot(iou_thres_list, iou_3d_aps[cls_id, :], label=class_name) 592 | 593 | iou_3d_aps[-1, :] = np.mean(iou_3d_aps[1:-1, :], axis=0) 594 | ax_iou.plot(iou_thres_list, iou_3d_aps[-1, :], label='mean') 595 | iou_dict['aps'] = iou_3d_aps 596 | 597 | # draw pose AP vs. thresholds 598 | if use_matches_for_pose: 599 | prefix = 'Pose_Only_' 600 | else: 601 | prefix = 'Pose_Detection_' 602 | 603 | pose_dict = {} 604 | pose_dict['degree_thres'] = degree_thres_list 605 | pose_dict['shift_thres_list'] = shift_thres_list 606 | 607 | for i, _ in enumerate(degree_thres_list): 608 | for j, _ in enumerate(shift_thres_list): 609 | for cls_id in range(1, num_classes): 610 | cls_pose_pred_matches_all = pose_pred_matches_all[cls_id][i, j, :] 611 | cls_pose_gt_matches_all = pose_gt_matches_all[cls_id][i, j, :] 612 | cls_pose_pred_scores_all = pose_pred_scores_all[cls_id][i, j, :] 613 | 614 | pose_aps[cls_id, i, j] = compute_ap_from_matches_scores(cls_pose_pred_matches_all, 615 | cls_pose_pred_scores_all, 616 | cls_pose_gt_matches_all) 617 | 618 | pose_aps[-1, i, j] = np.mean(pose_aps[1:-1, i, j]) 619 | 620 | ax_trans = plt.subplot(132) 621 | plt.ylim((0, 1)) 622 | 623 | plt.xlabel('Rotation/degree') 624 | for cls_id in range(1, num_classes): 625 | class_name = synset_names[cls_id] 626 | # print(class_name) 627 | ax_trans.plot( 628 | degree_thres_list[:-1], pose_aps[cls_id, :-1, -1], label=class_name) 629 | 630 | ax_trans.plot(degree_thres_list[:-1], pose_aps[-1, :-1, -1], label='mean') 631 | pose_dict['aps'] = pose_aps 632 | 633 | ax_rot = plt.subplot(133) 634 | plt.ylim((0, 1)) 635 | plt.xlabel('translation/cm') 636 | for cls_id in range(1, num_classes): 637 | class_name = synset_names[cls_id] 638 | # print(class_name) 639 | ax_rot.plot(shift_thres_list[:-1], 640 | pose_aps[cls_id, -1, :-1], label=class_name) 641 | 642 | ax_rot.plot(shift_thres_list[:-1], pose_aps[-1, -1, :-1], label='mean') 643 | output_path = os.path.join( 644 | log_dir, prefix+'mAP_{}-{}cm.png'.format(shift_thres_list[0], shift_thres_list[-2])) 645 | ax_rot.legend() 646 | 647 | fig_iou.savefig(output_path) 648 | plt.close(fig_iou) 649 | 650 | iou_aps = iou_3d_aps 651 | kind_result = {} 652 | kind_result["3D IOU at 25"] = "{:.1f}".format( 653 | iou_aps[-1, iou_thres_list.index(0.25)] * 100) 654 | kind_result["3D IOU at 50"] = "{:.1f}".format( 655 | iou_aps[-1, iou_thres_list.index(0.5)] * 100) 656 | kind_result["5 degree, 5 cm"] = "{:.1f}".format( 657 | pose_aps[-1, degree_thres_list.index(5), shift_thres_list.index(5)] * 100) 658 | kind_result["10 degree, 5 cm"] = "{:.1f}".format( 659 | pose_aps[-1, degree_thres_list.index(10), shift_thres_list.index(5)] * 100) 660 | kind_result["10 degree, 10 cm"] = "{:.1f}".format( 661 | pose_aps[-1, degree_thres_list.index(10), shift_thres_list.index(10)] * 100) 662 | 663 | # The following is computing for recon 664 | if eval_recon: 665 | emd_dis_all = {c: [] for c in synset_names} 666 | cmf_dis_all = {c: [] for c in synset_names} 667 | 668 | for result in tqdm(final_results, desc="recon"): 669 | 670 | pred_class_ids = result['pred_class_ids'] 671 | if len(pred_class_ids) <= 0: 672 | continue 673 | chamfer_dis_cass = result["chamfer_dis_cass"] 674 | emd_dis_cass = result["emd_dis_cass"] 675 | 676 | for cls_id in range(1, num_classes): 677 | # get gt and predictions in this class 678 | 679 | cmf_dis = chamfer_dis_cass[pred_class_ids == cls_id] 680 | emd_dis = emd_dis_cass[pred_class_ids == cls_id] 681 | if len(cmf_dis) <= 0 or len(emd_dis) <= 0: 682 | continue 683 | cmf_dis_all[synset_names[cls_id]] += cmf_dis.tolist() 684 | emd_dis_all[synset_names[cls_id]] += emd_dis.tolist() 685 | 686 | emd_dis = {} 687 | for k, v in emd_dis_all.items(): 688 | if k != "BG" and len(v): 689 | emd_dis[k] = np.mean(np.asarray(v)) 690 | emd_dis["mean"] = np.mean(np.array([v for v in emd_dis.values()])) 691 | 692 | cmf_dis = {} 693 | for k, v in cmf_dis_all.items(): 694 | if k != "BG" and len(v): 695 | cmf_dis[k] = np.mean(np.asarray(v)) 696 | cmf_dis["mean"] = np.mean(np.array([v for v in cmf_dis.values()])) 697 | 698 | kind_result["emd"] = emd_dis 699 | kind_result["cmf"] = cmf_dis 700 | 701 | pprint(kind_result) 702 | with open(os.path.join(log_dir, "eval_result.json"), "w") as f: 703 | json.dump(kind_result, f, indent=4) 704 | return iou_3d_aps, pose_aps 705 | 706 | 707 | color_map = [ 708 | (255, 0, 0), 709 | (0, 255, 0), 710 | (0, 0, 255), 711 | (255, 0, 255), 712 | (0, 255, 255), 713 | (255, 255, 0) 714 | ] 715 | 716 | 717 | def draw(image, RTs, models, class_ids, misses, intrinsics, save_path="bbox.png"): 718 | draw_image = image.copy() 719 | RTs = np.array(RTs) 720 | models = np.array(models) 721 | 722 | for RT, model, cls, miss in zip(RTs, models, class_ids, misses): 723 | if miss: 724 | continue 725 | model = model.transpose(1, 0) 726 | RT = RT.reshape(4, 4) 727 | transformed_pts = transform_coordinates_3d(model, RT) 728 | projected_axes = calculate_2d_projections( 729 | transformed_pts, intrinsics) 730 | 731 | for p in projected_axes: 732 | cv2.circle(draw_image, center=tuple(p), radius=3, 733 | color=color_map[int(cls-1)], thickness=-4) 734 | 735 | if "cass" in save_path: 736 | (h, w) = draw_image.shape[:2] 737 | center = (w/2, h/2) 738 | 739 | 740 | M = cv2.getRotationMatrix2D(center, 180, 1.0) 741 | draw_image = cv2.warpAffine(draw_image, M, (w, h)) 742 | 743 | cv2.imwrite(save_path, draw_image[:, :, (2, 1, 0)]) 744 | -------------------------------------------------------------------------------- /trained_models/placeholder: -------------------------------------------------------------------------------- 1 | put the downloaded model here. --------------------------------------------------------------------------------