├── .gitignore
├── datasets
├── dataset.py
├── dataset_config
│ ├── classes.txt
│ ├── test_data_list.txt
│ └── train_data_list.txt
└── utils.py
├── eval.sh
├── lib
├── extractors.py
├── foldingnet.py
├── models.py
├── network.py
├── pointnet.py
├── pspnet.py
├── transformations.py
└── utils.py
├── metrics
└── readme.md
├── readme.md
├── tools
├── _init_paths.py
├── eval.py
└── utils.py
└── trained_models
└── placeholder
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | wheels/
22 | pip-wheel-metadata/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 | db.sqlite3-journal
62 |
63 | # Flask stuff:
64 | instance/
65 | .webassets-cache
66 |
67 | # Scrapy stuff:
68 | .scrapy
69 |
70 | # Sphinx documentation
71 | docs/_build/
72 |
73 | # PyBuilder
74 | target/
75 |
76 | # Jupyter Notebook
77 | .ipynb_checkpoints
78 |
79 | # IPython
80 | profile_default/
81 | ipython_config.py
82 |
83 | # pyenv
84 | .python-version
85 |
86 | # pipenv
87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
90 | # install all needed dependencies.
91 | #Pipfile.lock
92 |
93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
94 | __pypackages__/
95 |
96 | # Celery stuff
97 | celerybeat-schedule
98 | celerybeat.pid
99 |
100 | # SageMath parsed files
101 | *.sage.py
102 |
103 | # Environments
104 | .env
105 | .venv
106 | env/
107 | venv/
108 | ENV/
109 | env.bak/
110 | venv.bak/
111 |
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 |
116 | # Rope project settings
117 | .ropeproject
118 |
119 | # mkdocs documentation
120 | /site
121 |
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 |
127 | # Pyre type checker
128 | .pyre/
129 |
--------------------------------------------------------------------------------
/datasets/dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cv2
4 | import numpy as np
5 | import numpy.ma as ma
6 | import torch
7 | import torch.utils.data as data
8 | import torchvision.transforms as transforms
9 |
10 | import datasets.utils as dutils
11 | from lib.transformations import quaternion_from_matrix
12 |
13 |
14 | def align(class_ids, masks, coords, depth, intr):
15 | num_instances = len(class_ids)
16 | RTs = np.zeros((num_instances, 4, 4), dtype=np.float32)
17 | scales = np.ones((num_instances, 3), dtype=np.float32)
18 |
19 | for i in range(num_instances):
20 | mask = ma.getmaskarray(ma.masked_equal(masks, class_ids[i]))
21 | if np.sum(mask) < 50:
22 | RTs[i] = np.eye(4)
23 | continue
24 |
25 | pts, idxs = dutils.backproject(depth, intr, mask)
26 | pts = pts / 1000.0
27 | if len(pts) < 50:
28 | RTs[i] = np.eye(4)
29 | continue
30 | coord_pts = coords[idxs[0], idxs[1], :] - 0.5
31 |
32 | scale, rotation, trans, _ = dutils.estimateSimilarityTranform(
33 | coord_pts, pts)
34 | if rotation is None or trans is None or np.any(np.isnan(rotation)) or np.any(np.isnan(trans))\
35 | or np.any(np.isinf(trans)) or np.any(np.isinf(rotation)):
36 | RTs[i] = np.eye(4)
37 | continue
38 |
39 | aligned_RT = np.eye(4)
40 | aligned_RT[:3, :3] = rotation.T
41 |
42 | aligned_RT[:3, 3] = trans
43 | aligned_RT[3, 3] = 1
44 |
45 | RTs[i, :, :] = aligned_RT
46 | scales[i] = scale
47 |
48 | return RTs, scales
49 |
50 |
51 | def load_obj(path, ori_path, num_points):
52 | if os.path.isfile(path):
53 | return dutils.load_obj(path)
54 | else:
55 | vertex = dutils.sample_obj(ori_path, num_points, True)
56 | dutils.save_obj(vertex, path[:-3]+"ply")
57 | return np.asarray(vertex)
58 |
59 |
60 | class PoseDataset(data.Dataset):
61 | def __init__(self, mode, num_pt, root):
62 | if mode == 'train':
63 | self.path = 'datasets/dataset_config/train_data_list.txt'
64 | elif mode == 'test':
65 | self.path = 'datasets/dataset_config/test_data_list.txt'
66 | self.num_pt = num_pt
67 | self.root = root
68 |
69 | self.list = []
70 | self.real = []
71 | self.syn = []
72 | input_file = open(self.path)
73 | with open(self.path, "r") as input_file:
74 | while 1:
75 | input_line = input_file.readline()
76 | if not input_line:
77 | break
78 | input_line = input_line.replace("\n", "")
79 | if input_line.startswith("real"):
80 | self.real.append(input_line)
81 | else:
82 | self.syn.append(input_line)
83 | self.list.append(input_line)
84 |
85 | self.length = len(self.list)
86 | self.len_real = len(self.real)
87 | self.len_syn = len(self.syn)
88 | # real
89 | self.cam_cx_1 = 322.525
90 | self.cam_cy_1 = 244.11084
91 | self.cam_fx_1 = 591.0125
92 | self.cam_fy_1 = 590.16775
93 | # syn
94 | self.cam_cx_2 = 319.5
95 | self.cam_cy_2 = 239.5
96 | self.cam_fx_2 = 577.5
97 | self.cam_fy_2 = 577.5
98 |
99 | self.xmap = np.array([[j for i in range(640)] for j in range(480)])
100 | self.ymap = np.array([[i for i in range(640)] for j in range(480)])
101 |
102 | self.minimum_num_pt = 50
103 |
104 | self.norm = transforms.Normalize(
105 | mean=[0.51, 0.47, 0.44], std=[0.29, 0.27, 0.28])
106 | self.symmetry_obj_idx = [0, 1, 3]
107 |
108 | self.class_names = PoseDataset.get_class_names()
109 | print(len(self.list))
110 |
111 | @staticmethod
112 | def get_class_names():
113 | class_names = []
114 | with open("datasets/dataset_config/classes.txt", "r") as f:
115 | class_names = ["_".join(line.split("_")[1:2]) for line in f]
116 | class_names = [c.replace("\n", "") for c in class_names]
117 |
118 | return class_names
119 |
120 | def __getitem__(self, index):
121 | try:
122 | img = np.array(cv2.imread(
123 | '{0}/{1}_color.png'.format(self.root, self.list[index]))) / 255.
124 | depth = np.array(cv2.imread(
125 | '{0}/{1}_depth.png'.format(self.root, self.list[index]), -1))
126 | if len(depth.shape) == 3:
127 | depth = np.uint16(depth[:, :, 1] * 256) + \
128 | np.uint16(depth[:, :, 2])
129 | label = np.array(cv2.imread(
130 | '{0}/{1}_mask.png'.format(self.root, self.list[index]))[:, :, 2])
131 |
132 | meta = dict()
133 | with open("{0}/{1}_meta.txt".format(self.root, self.list[index]), "r") as f:
134 | for line in f:
135 | line = line.replace("\n", "")
136 | line = line.split(" ")
137 | if int(line[1]) == 0: # mask out background
138 | continue
139 | d = {"cls_id": line[1], "inst_name": line[2]}
140 | if "real_train" in self.list[index]:
141 | d["inst_dir"] = os.path.join(self.root, "obj_models", "real_train",
142 | line[2]+"_{}.ply".format(self.num_pt))
143 | d["ori_inst_dir"] = os.path.join(self.root,
144 | "obj_models", "real_train", line[2]+".obj")
145 | elif "real_test" in self.list[index]:
146 | d["inst_dir"] = os.path.join(self.root, "obj_models", "real_test",
147 | line[2]+"_{}.ply".format(self.num_pt))
148 | d["ori_inst_dir"] = os.path.join(
149 | self.root, "obj_models", "real_test", line[2]+".obj")
150 | else:
151 | d["inst_dir"] = os.path.join(self.root, "obj_models", "train",
152 | *line[2:], "model_{}.ply".format(self.num_pt))
153 | d["ori_inst_dir"] = os.path.join(self.root, "obj_models", "train",
154 | *line[2:], "model.obj")
155 | meta[int(line[0])] = d
156 |
157 | if not self.list[index].startswith("real"):
158 | cam_cx = self.cam_cx_2
159 | cam_cy = self.cam_cy_2
160 | cam_fx = self.cam_fx_2
161 | cam_fy = self.cam_fy_2
162 | else:
163 | cam_cx = self.cam_cx_1
164 | cam_cy = self.cam_cy_1
165 | cam_fx = self.cam_fx_1
166 | cam_fy = self.cam_fy_1
167 |
168 | obj = list(meta.keys())
169 | iidx = np.arange(len(obj))
170 | np.random.shuffle(iidx)
171 | for idx in iidx:
172 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
173 | mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx]))
174 | mask = mask_label * mask_depth
175 | if len(mask.nonzero()[0]) > self.minimum_num_pt:
176 | break
177 | else:
178 | print("Can't find any valid training object in {}".format(
179 | self.list[index]))
180 | raise ValueError
181 |
182 | # A method to load target_r and target_t
183 | if os.path.isfile("{}/gts/{}_poses.txt".format(self.root, self.list[index])) and os.path.isfile("{}/gts/{}_scales.txt".format(self.root, self.list[index])):
184 | meta["poses"] = np.loadtxt(
185 | "{}/gts/{}_poses.txt".format(self.root, self.list[index])).reshape(-1, 4, 4)
186 | meta["scales"] = np.loadtxt(
187 | "{}/gts/{}_scales.txt".format(self.root, self.list[index])).reshape(-1, 3)
188 | else:
189 | coord = cv2.imread(
190 | '{0}/{1}_coord.png'.format(self.root, self.list[index]))[:, :, :3][:, :, (2, 1, 0)]
191 | coord = np.array(coord, dtype=np.float32) / 255.
192 | coord[:, :, 2] = 1.0 - coord[:, :, 2]
193 | intr = np.array(
194 | [[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], [0., 0., 1.]])
195 | poses, scales = align(obj, label, coord, depth, intr)
196 | os.makedirs(os.path.dirname(
197 | "{}/gts/{}_poses.txt".format(self.root, self.list[index])), exist_ok=True)
198 | np.savetxt("{}/gts/{}_poses.txt".format(self.root, self.list[index]),
199 | poses.reshape(-1, 4))
200 | np.savetxt("{}/gts/{}_scales.txt".format(self.root,
201 | self.list[index]), scales.reshape(-1, 3))
202 | meta["poses"] = poses
203 | meta["scales"] = scales
204 | rmin, rmax, cmin, cmax = get_bbox(mask_label)
205 | img_masked = np.transpose(img, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
206 | target_r = meta['poses'][idx][:3, 0:3]
207 | target_t = np.array([meta['poses'][idx][:3, 3:4].flatten()])
208 |
209 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
210 | if len(choose) > self.num_pt:
211 | c_mask = np.zeros(len(choose), dtype=int)
212 | c_mask[:self.num_pt] = 1
213 | np.random.shuffle(c_mask)
214 | choose = choose[c_mask.nonzero()]
215 | else:
216 | choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')
217 |
218 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten(
219 | )[choose][:, np.newaxis].astype(np.float32)
220 | xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten(
221 | )[choose][:, np.newaxis].astype(np.float32)
222 | ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten(
223 | )[choose][:, np.newaxis].astype(np.float32)
224 | choose = np.array([choose])
225 |
226 | cam_scale = 1000.0
227 | pt2 = depth_masked / cam_scale
228 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
229 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
230 | cloud = np.concatenate((-pt0, -pt1, pt2), axis=1)
231 |
232 | model_points = load_obj(
233 | path=meta[obj[idx]]["inst_dir"],
234 | ori_path=meta[obj[idx]]["ori_inst_dir"], num_points=self.num_pt)
235 |
236 | model_points = model_points * meta["scales"][idx]
237 |
238 | target = np.dot(model_points, target_r.T)
239 | target = np.add(target, target_t)
240 | matrix = np.eye(4)
241 | matrix[:3, :3] = target_r
242 | quat = quaternion_from_matrix(matrix)
243 |
244 | return torch.from_numpy(cloud.astype(np.float32)), \
245 | torch.LongTensor(choose.astype(np.int32)), \
246 | self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
247 | torch.from_numpy(target.astype(np.float32)), \
248 | torch.from_numpy(model_points.astype(np.float32)), \
249 | torch.LongTensor([int(meta[obj[idx]]["cls_id"])-1]), \
250 | torch.from_numpy(quat.astype(np.float32)), \
251 | torch.from_numpy(target_t.astype(np.float32))
252 | except:
253 | return self.__getitem__(index//2)
254 |
255 | def __len__(self):
256 | return self.length
257 |
258 | def get_sym_list(self):
259 | return self.symmetry_obj_idx
260 |
261 | def get_num_points_mesh(self):
262 | return self.num_pt
263 |
264 |
265 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280,
266 | 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
267 | img_width = 480
268 | img_length = 640
269 |
270 |
271 | def get_bbox(label):
272 | rows = np.any(label, axis=1)
273 | cols = np.any(label, axis=0)
274 | rmin, rmax = np.where(rows)[0][[0, -1]]
275 | cmin, cmax = np.where(cols)[0][[0, -1]]
276 | rmax += 1
277 | cmax += 1
278 | r_b = rmax - rmin
279 | for tt in range(len(border_list)):
280 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
281 | r_b = border_list[tt + 1]
282 | break
283 | c_b = cmax - cmin
284 | for tt in range(len(border_list)):
285 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
286 | c_b = border_list[tt + 1]
287 | break
288 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
289 | rmin = center[0] - int(r_b / 2)
290 | rmax = center[0] + int(r_b / 2)
291 | cmin = center[1] - int(c_b / 2)
292 | cmax = center[1] + int(c_b / 2)
293 | if rmin < 0:
294 | delt = -rmin
295 | rmin = 0
296 | rmax += delt
297 | if cmin < 0:
298 | delt = -cmin
299 | cmin = 0
300 | cmax += delt
301 | if rmax > img_width:
302 | delt = rmax - img_width
303 | rmax = img_width
304 | rmin -= delt
305 | if cmax > img_length:
306 | delt = cmax - img_length
307 | cmax = img_length
308 | cmin -= delt
309 | return rmin, rmax, cmin, cmax
310 |
--------------------------------------------------------------------------------
/datasets/dataset_config/classes.txt:
--------------------------------------------------------------------------------
1 | 1_bottle_02876657
2 | 2_bowl_02880940
3 | 3_camera_02942699
4 | 4_can_02946921
5 | 5_laptop_03642806
6 | 6_mug_03797390
--------------------------------------------------------------------------------
/datasets/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import random
3 |
4 | import numpy as np
5 | import open3d as o3d
6 | import trimesh
7 |
8 |
9 | def save_obj(vertex: np.array, path: str):
10 | """ vertex: [N x 3]
11 | """
12 | pcd = o3d.geometry.PointCloud()
13 | pcd.points = o3d.utility.Vector3dVector(vertex)
14 | o3d.io.write_point_cloud(path, pcd)
15 |
16 |
17 | def load_obj(path):
18 | """return np.array
19 | """
20 | pcd_load = o3d.io.read_point_cloud(path)
21 | return np.asarray(pcd_load.points)
22 |
23 |
24 | def estimateSimilarityTranform(source: np.array, target: np.array):
25 | source_hom = np.transpose(
26 | np.hstack([source, np.ones([source.shape[0], 1])]))
27 | target_hom = np.transpose(
28 | np.hstack([target, np.ones([source.shape[0], 1])]))
29 |
30 | # auto-parameter selection based on source-target heuritics
31 | target_norm = np.mean(np.linalg.norm(target, axis=1))
32 | source_norm = np.mean(np.linalg.norm(source, axis=1))
33 | ratio_TS = (target_norm / source_norm)
34 | ratio_ST = (source_norm / target_norm)
35 |
36 | pass_T = ratio_ST if ratio_ST > ratio_TS else ratio_TS
37 | stop_T = pass_T / 100.
38 | n_iter = 100
39 |
40 | source_inliers_hom, target_inliers_hom, best_inlier_ratio = getRANSACInliers(
41 | source_hom, target_hom, max_iterations=n_iter, pass_threshold=pass_T, stop_threshold=stop_T)
42 | if best_inlier_ratio < 0.1:
43 | return None, None, None, None
44 |
45 | scales, rotation, translation, out_transform = estimateSimilarityUmeyama(
46 | source_inliers_hom, target_inliers_hom)
47 |
48 | return scales, rotation, translation, out_transform
49 |
50 |
51 | def getRANSACInliers(source_hom, target_hom, max_iterations=100, pass_threshold=200, stop_threshold=1):
52 | best_residual = 1e10
53 | best_inlier_ratio = 0
54 | best_inlier_idx = np.arange(source_hom.shape[1])
55 | for _ in range(max_iterations):
56 | # pick up 5 random (but corresponding) points from source and target
57 | rand_idx = np.random.randint(source_hom.shape[1], size=5)
58 | _, _, _, out_transform = estimateSimilarityUmeyama(
59 | source_hom[:, rand_idx], target_hom[:, rand_idx])
60 | residual, inlier_ratio, inlier_idx = evaluateModel(
61 | out_transform, source_hom, target_hom, pass_threshold)
62 | if residual < best_residual:
63 | best_residual = residual
64 | best_inlier_ratio = inlier_ratio
65 | best_inlier_idx = inlier_idx
66 | if best_residual < stop_threshold:
67 | break
68 | return source_hom[:, best_inlier_idx], target_hom[:, best_inlier_idx], best_inlier_ratio
69 |
70 |
71 | def evaluateModel(out_transform, source_hom, target_hom, pass_threshold):
72 | diff = target_hom - np.matmul(out_transform, source_hom)
73 | residual_vec = np.linalg.norm(diff[:3, :], axis=0)
74 | residual = np.linalg.norm(residual_vec)
75 | inlier_idx = np.where(residual_vec < pass_threshold)
76 | n_inliers = np.count_nonzero(inlier_idx)
77 | inliner_ratio = n_inliers / source_hom.shape[1]
78 | return residual, inliner_ratio, inlier_idx[0]
79 |
80 |
81 | def estimateSimilarityUmeyama(source_hom, target_hom):
82 | source_centroid = np.mean(source_hom[:3, :], axis=1)
83 | target_centroid = np.mean(target_hom[:3, :], axis=1)
84 | n_points = source_hom.shape[1]
85 |
86 | centered_source = source_hom[:3, :] - \
87 | np.tile(source_centroid, (n_points, 1)).transpose()
88 | centered_target = target_hom[:3, :] - \
89 | np.tile(target_centroid, (n_points, 1)).transpose()
90 |
91 | cov_matrix = np.matmul(
92 | centered_target, np.transpose(centered_source)) / n_points
93 |
94 | if np.isnan(cov_matrix).any():
95 | raise RuntimeError("There are NaNs in the input.")
96 |
97 | U, D, Vh = np.linalg.svd(cov_matrix, full_matrices=True)
98 | d = (np.linalg.det(U) * np.linalg.det(Vh)) < 0.0
99 | if d:
100 | D[-1] = -D[-1]
101 | U[:, -1] = -U[:, -1]
102 |
103 | rotation = np.matmul(U, Vh).T
104 |
105 | var_p = np.var(source_hom[:3, :], axis=1).sum()
106 | scale_fact = 1 / var_p * np.sum(D)
107 | scales = np.array([scale_fact, scale_fact, scale_fact])
108 | scale_matrix = np.diag(scales)
109 |
110 | translation = target_hom[:3, :].mean(
111 | axis=1) - source_hom[:3, :].mean(axis=1).dot(scale_fact * rotation)
112 |
113 | out_transform = np.identity(4)
114 | out_transform[:3, :3] = scale_matrix @ rotation
115 | out_transform[:3, 3] = translation
116 |
117 | return scales, rotation, translation, out_transform
118 |
119 |
120 | def backproject(depth, intr, mask):
121 | intr_inv = np.linalg.inv(intr)
122 |
123 | non_zero_mask = depth > 0
124 | final_instance_mask = np.logical_and(mask, non_zero_mask)
125 |
126 | idxs = np.where(final_instance_mask)
127 | grid = np.array([idxs[1], idxs[0]])
128 |
129 | length = grid.shape[1]
130 | ones = np.ones([1, length])
131 | uv_grid = np.concatenate([grid, ones], axis=0)
132 |
133 | xyz = intr_inv @ uv_grid
134 | xyz = np.transpose(xyz)
135 |
136 | z = depth[idxs[0], idxs[1]]
137 |
138 | pts = xyz * z[:, np.newaxis] / xyz[:, -1:]
139 | pts[:, 0] = -pts[:, 0]
140 | pts[:, 1] = -pts[:, 1]
141 | return pts, idxs
142 |
143 |
144 | def triangle_area(v1, v2, v3):
145 | a = np.array(v2) - np.array(v1)
146 | b = np.array(v3) - np.array(v1)
147 | domain = np.dot(a, a) * np.dot(b, b) - (np.dot(a, b) ** 2)
148 | domain = domain if domain > 0 else 0.0
149 |
150 | return math.sqrt(domain) / 2.0
151 |
152 |
153 | def cal_surface_area(mesh):
154 | areas = []
155 | if hasattr(mesh, "faces"):
156 | for face in mesh.faces:
157 | v1, v2, v3 = face
158 | v1 = mesh.vertices[v1]
159 | v2 = mesh.vertices[v2]
160 | v3 = mesh.vertices[v3]
161 |
162 | areas += [triangle_area(v1, v2, v3)]
163 | else:
164 | for face in mesh.triangles:
165 | v1, v2, v3 = face
166 |
167 | areas += [triangle_area(v1, v2, v3)]
168 | return np.array(areas)
169 |
170 |
171 | def sample_obj(path, num_points, norm):
172 | """sample uniform point from .obj mesh file.
173 | if norm, we ill normalize it.
174 | """
175 | mesh = trimesh.load(path)
176 | areas = cal_surface_area(mesh)
177 | prefix_sum = np.cumsum(areas)
178 |
179 | total_area = prefix_sum[-1]
180 | sample_points = []
181 |
182 | for _ in range(num_points):
183 | prob = random.random()
184 | sample_pos = prob * total_area
185 |
186 | # binary search
187 | left_bound, right_bound = 0, len(areas) - 1
188 | while left_bound < right_bound:
189 | mid = (left_bound + right_bound) // 2
190 | if sample_pos <= prefix_sum[mid]:
191 | right_bound = mid
192 | else:
193 | left_bound = mid + 1
194 |
195 | target_surface = right_bound
196 |
197 | # sampel point
198 | if hasattr(mesh, "faces"):
199 | v1, v2, v3 = mesh.faces[target_surface]
200 |
201 | v1, v2, v3 = mesh.vertices[v1], mesh.vertices[v2], mesh.vertices[v3]
202 | else:
203 | v1, v2, v3 = mesh.triangles[target_surface]
204 |
205 | edge_vec1 = np.array(v2) - np.array(v1)
206 | edge_vec2 = np.array(v3) - np.array(v1)
207 |
208 | prob_vec1, prob_vec2 = random.random(), random.random()
209 | if prob_vec1 + prob_vec2 > 1:
210 | prob_vec1 = 1 - prob_vec1
211 | prob_vec2 = 1 - prob_vec2
212 |
213 | target_point = np.array(
214 | v1) + (edge_vec1 * prob_vec1 + edge_vec2 * prob_vec2)
215 |
216 | sample_points.append(target_point)
217 | sample_points = np.stack(sample_points, axis=0)
218 |
219 | if norm:
220 | min_ = np.min(sample_points, axis=0)
221 | max_ = np.max(sample_points, axis=0)
222 | dis_ = max_ - min_
223 |
224 | scale = 1 / np.sqrt(np.sum(dis_ * dis_))
225 |
226 | sample_points *= scale
227 |
228 | return sample_points
229 |
--------------------------------------------------------------------------------
/eval.sh:
--------------------------------------------------------------------------------
1 | # echo "EVAL CASS ..."
2 | # python ./tools/eval.py --resume_model cass_best.pth --dataset_dir ../nocs --cuda --save_dir ../predicted_result --eval --mode cass
3 |
4 | echo "EVAL CASS ..."
5 | python ./tools/eval.py --save_dir ../predicted_result --mode cass
6 |
7 |
8 | echo "EVAL NOCS ..."
9 | python ./tools/eval.py --save_dir ../predicted_result --mode nocs
10 |
--------------------------------------------------------------------------------
/lib/extractors.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import math
3 | import random
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | def load_weights_sequential(target, source_state):
9 | new_dict = OrderedDict()
10 | for (k1, v1), (k2, v2) in zip(target.state_dict().items(), source_state.items()):
11 | new_dict[k1] = v2
12 | target.load_state_dict(new_dict)
13 |
14 | def conv3x3(in_planes, out_planes, stride=1, dilation=1):
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=dilation, dilation=dilation, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride=stride, dilation=dilation)
24 | self.relu = nn.ReLU(inplace=True)
25 | self.conv2 = conv3x3(planes, planes, stride=1, dilation=dilation)
26 | self.downsample = downsample
27 | self.stride = stride
28 |
29 | def forward(self, x):
30 | residual = x
31 |
32 | out = self.conv1(x)
33 | out = self.relu(out)
34 |
35 | out = self.conv2(out)
36 |
37 | if self.downsample is not None:
38 | residual = self.downsample(x)
39 |
40 | out += residual
41 | out = self.relu(out)
42 |
43 | return out
44 |
45 |
46 | class Bottleneck(nn.Module):
47 | expansion = 4
48 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
49 | super(Bottleneck, self).__init__()
50 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
51 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, dilation=dilation,
52 | padding=dilation, bias=False)
53 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
54 | self.relu = nn.ReLU(inplace=True)
55 | self.downsample = downsample
56 | self.stride = stride
57 |
58 | def forward(self, x):
59 | residual = x
60 |
61 | out = self.conv1(x)
62 | out = self.relu(out)
63 |
64 | out = self.conv2(out)
65 | out = self.relu(out)
66 |
67 | out = self.conv3(out)
68 |
69 | if self.downsample is not None:
70 | residual = self.downsample(x)
71 |
72 | out += residual
73 | out = self.relu(out)
74 |
75 | return out
76 |
77 |
78 | class ResNet(nn.Module):
79 | def __init__(self, block, layers=(3, 4, 23, 3)):
80 | self.inplanes = 64
81 | super(ResNet, self).__init__()
82 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
83 | bias=False)
84 | self.relu = nn.ReLU(inplace=True)
85 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
86 | self.layer1 = self._make_layer(block, 64, layers[0])
87 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
88 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
89 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)
90 |
91 | for m in self.modules():
92 | if isinstance(m, nn.Conv2d):
93 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
94 | m.weight.data.normal_(0, math.sqrt(2. / n))
95 | elif isinstance(m, nn.BatchNorm2d):
96 | m.weight.data.fill_(1)
97 | m.bias.data.zero_()
98 |
99 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
100 | downsample = None
101 | if stride != 1 or self.inplanes != planes * block.expansion:
102 | downsample = nn.Sequential(
103 | nn.Conv2d(self.inplanes, planes * block.expansion,
104 | kernel_size=1, stride=stride, bias=False)
105 | )
106 |
107 | layers = [block(self.inplanes, planes, stride, downsample)]
108 | self.inplanes = planes * block.expansion
109 | for i in range(1, blocks):
110 | layers.append(block(self.inplanes, planes, dilation=dilation))
111 |
112 | return nn.Sequential(*layers)
113 |
114 | def forward(self, x):
115 | x = self.conv1(x)
116 | x = self.relu(x)
117 | x = self.maxpool(x)
118 |
119 | x = self.layer1(x)
120 | x = self.layer2(x)
121 | x_3 = self.layer3(x)
122 | x = self.layer4(x_3)
123 |
124 | return x, x_3
125 |
126 |
127 | def resnet18(pretrained=False):
128 | model = ResNet(BasicBlock, [2, 2, 2, 2])
129 | return model
130 |
131 | def resnet34(pretrained=False):
132 | model = ResNet(BasicBlock, [3, 4, 6, 3])
133 | return model
134 |
135 | def resnet50(pretrained=False):
136 | model = ResNet(Bottleneck, [3, 4, 6, 3])
137 | return model
138 |
139 | def resnet101(pretrained=False):
140 | model = ResNet(Bottleneck, [3, 4, 23, 3])
141 | return model
142 |
143 | def resnet152(pretrained=False):
144 | model = ResNet(Bottleneck, [3, 8, 36, 3])
145 | return model
146 |
--------------------------------------------------------------------------------
/lib/foldingnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 |
5 | from lib.pointnet import PointNetGlobalMax, get_MLP_layers, PointNetVanilla, PointwiseMLP
6 | from lib.utils import make_box, make_sphere, make_cylinder
7 |
8 | class ChamfersDistance3(nn.Module):
9 | '''
10 | Extensively search to compute the Chamfersdistance. No reference to external implementation Incomplete
11 | '''
12 | def forward(self, input1, input2):
13 | # input1, input2: BxNxK, BxMxK, K = 3
14 | B, N, K = input1.shape
15 | _, M, _ = input2.shape
16 |
17 | # Repeat (x,y,z) M times in a row
18 | input11 = input1.unsqueeze(2) # BxNx1xK
19 | input11 = input11.expand(B, N, M, K) # BxNxMxK
20 | # Repeat (x,y,z) N times in a column
21 | input22 = input2.unsqueeze(1) # Bx1xMxK
22 | input22 = input22.expand(B, N, M, K) # BxNxMxK
23 | # compute the distance matrix
24 | D = input11 - input22 # BxNxMxK
25 | D = torch.norm( D, p=2, dim=3 ) # BxNxM
26 |
27 | dist0, _ = torch.min( D, dim=1 ) # BxM
28 | dist1, _ = torch.min( D, dim=2 ) # BxN
29 |
30 | loss = torch.mean(dist0, 1) + torch.mean(dist1, 1) # B
31 | loss = torch.mean(loss) # 1
32 | return loss
33 |
34 |
35 | class FoldingNetSingle(nn.Module):
36 | def __init__(self, dims):
37 | super(FoldingNetSingle, self).__init__()
38 | self.mlp = PointwiseMLP(dims, doLastRelu=False)
39 |
40 | def forward(self, X):
41 | return self.mlp.forward(X)
42 |
43 |
44 | class FoldingNetVanilla(nn.Module): # PointNetVanilla or nn.Sequential
45 | def __init__(self, MLP_dims, FC_dims, grid_dims, Folding1_dims,
46 | Folding2_dims, MLP_doLastRelu=False):
47 | assert(MLP_dims[-1]==FC_dims[0])
48 | super(FoldingNetVanilla, self).__init__()
49 | # Encoder
50 | # PointNet
51 | self.PointNet = PointNetVanilla(MLP_dims, FC_dims, MLP_doLastRelu)
52 |
53 | # Decoder
54 | # Folding
55 | # 2D grid: (grid_dims(0) * grid_dims(1)) x 2
56 | # TODO: normalize the grid to align with the input data
57 | self.N = grid_dims[0] * grid_dims[1]
58 | u = (torch.arange(0, grid_dims[0]) / grid_dims[0] - 0.5).repeat(grid_dims[1])
59 | v = (torch.arange(0, grid_dims[1]) / grid_dims[1] - 0.5).expand(grid_dims[0], -1).t().reshape(-1)
60 | self.grid = torch.stack((u, v), 1) # Nx2
61 |
62 | # 1st folding
63 | self.Fold1 = FoldingNetSingle(Folding1_dims)
64 | # 2nd folding
65 | self.Fold2 = FoldingNetSingle(Folding2_dims)
66 |
67 |
68 | def forward(self, X):
69 | # encoding
70 | f = self.PointNet.forward(X) # BxK
71 | f = f.unsqueeze(1) # Bx1xK
72 | codeword = f.expand(-1, self.N, -1) # BxNxK
73 |
74 | # cat 2d grid and feature
75 | B = codeword.shape[0] # extract batch size
76 | if not X.is_cuda:
77 | tmpGrid = self.grid # Nx2
78 | else:
79 | tmpGrid = self.grid.cuda() # Nx2
80 | tmpGrid = tmpGrid.unsqueeze(0)
81 | tmpGrid = tmpGrid.expand(B, -1, -1) # BxNx2
82 |
83 | # 1st folding
84 | f = torch.cat((tmpGrid, codeword), 2 ) # BxNx(K+2)
85 | f = self.Fold1.forward(f) # BxNx3
86 |
87 | # 2nd folding
88 | f = torch.cat((f, codeword), 2 ) # BxNx(K+3)
89 | f = self.Fold2.forward(f) # BxNx3
90 | return f
91 |
92 |
93 | class FoldingNetShapes(nn.Module):
94 | ## add 3 shapes to choose and a learnable layer
95 | def __init__(self, MLP_dims, FC_dims, Folding1_dims,
96 | Folding2_dims, MLP_doLastRelu=False):
97 | assert(MLP_dims[-1]==FC_dims[0])
98 | super(FoldingNetShapes, self).__init__()
99 | # Encoder
100 | # PointNet
101 | self.PointNet = PointNetVanilla(MLP_dims, FC_dims, MLP_doLastRelu)
102 |
103 | # Decoder
104 | # Folding
105 | self.box = make_box() # 18 * 18 * 6 points
106 | self.cylinder = make_cylinder() # same as 1944
107 | self.sphere = make_sphere() # 1944 points
108 | self.grid = torch.Tensor(np.hstack((self.box, self.cylinder, self.sphere)))
109 |
110 | # 1st folding
111 | self.Fold1 = FoldingNetSingle(Folding1_dims)
112 | # 2nd folding
113 | self.Fold2 = FoldingNetSingle(Folding2_dims)
114 | self.N = 1944 # number of points needed to replicate codeword later; also points in Grid
115 | self.fc = nn.Linear(9, 9, True) # geometric transformation
116 |
117 |
118 | def forward(self, X):
119 | # encoding
120 | f = self.PointNet.forward(X) # BxK
121 | f = f.unsqueeze(1) # Bx1xK
122 | codeword = f.expand(-1, self.N, -1) # BxNxK
123 |
124 | # cat 2d grid and feature
125 | B = codeword.shape[0] # extract batch size
126 | if not X.is_cuda:
127 | tmpGrid = self.grid # Nx9
128 | else:
129 | tmpGrid = self.grid.cuda() # Nx9
130 | tmpGrid = tmpGrid.unsqueeze(0)
131 | tmpGrid = tmpGrid.expand(B, -1, -1) # BxNx9
132 | tmpGrid = self.fc(tmpGrid) # transform
133 |
134 |
135 | # 1st folding
136 | f = torch.cat((tmpGrid, codeword), 2) # BxNx(K+9)
137 | f = self.Fold1.forward(f) # BxNx3
138 |
139 | # 2nd folding
140 | f = torch.cat((f, codeword), 2 ) # BxNx(K+3)
141 | f = self.Fold2.forward(f) # BxNx3
142 | return f
143 |
144 |
145 | class Recon(nn.Module):
146 | def __init__(self, Folding1_dims, Folding2_dims):
147 | super(Recon, self).__init__()
148 | # Decoder
149 | # Folding
150 | self.box = make_box() # 18 * 18 * 6 points
151 | self.cylinder = make_cylinder() # same as 1944
152 | self.sphere = make_sphere() # 1944 points
153 | self.grid = torch.Tensor(np.hstack((self.box, self.cylinder, self.sphere)))
154 |
155 | # 1st folding
156 | self.Fold1 = FoldingNetSingle(Folding1_dims)
157 | # 2nd folding
158 | self.Fold2 = FoldingNetSingle(Folding2_dims)
159 | self.N = 1944 # number of points needed to replicate codeword later; also points in Grid
160 | self.fc = nn.Linear(9, 9, True) # geometric transformation
161 |
162 |
163 | def forward(self, codeword):
164 | # cat 2d grid and feature
165 | codeword = codeword.transpose(1, 2)
166 | B = codeword.shape[0] # extract batch size
167 | if not codeword.is_cuda:
168 | tmpGrid = self.grid # Nx2
169 | else:
170 | tmpGrid = self.grid.cuda() # Nx2
171 | tmpGrid = tmpGrid.unsqueeze(0)
172 | tmpGrid = tmpGrid.expand(B, -1, -1) # BxNx2
173 |
174 | # 1st folding
175 | f = torch.cat((tmpGrid, codeword), 2 ) # BxNx(K+2)
176 | f = self.Fold1.forward(f) # BxNx3
177 |
178 | # 2nd folding
179 | f = torch.cat((f, codeword), 2 ) # BxNx(K+3)
180 | f = self.Fold2.forward(f) # BxNx3
181 | return f
182 |
--------------------------------------------------------------------------------
/lib/models.py:
--------------------------------------------------------------------------------
1 | import lib.network as dlib
2 | import lib.foldingnet as flib
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class ModifiedEncode(dlib.Encode):
9 | def __init__(self, *args, **kwargs):
10 | super(ModifiedEncode, self).__init__(*args, **kwargs)
11 |
12 |
13 | class ModifiedRecon(flib.Recon):
14 | def __init__(self, num_points, *args, **kwargs):
15 | assert num_points <= 1944
16 | super(ModifiedRecon, self).__init__(*args, **kwargs)
17 |
18 | stride = 1944 // num_points
19 | self.grid = [self.grid[i] for i in range(0, 1944, stride)]
20 | self.grid = torch.stack(self.grid, dim=0)[:num_points]
21 |
22 | self.N = num_points
23 |
24 | self.register_buffer("grid_buf", self.grid)
25 |
26 | self.var = nn.Linear(num_points, 1)
27 |
28 | def forward(self, codeword):
29 | if self.training:
30 | # ADD VAE MODULE HERE
31 | noise = self.var(codeword)
32 |
33 | eps = torch.randn_like(noise)
34 | codeword = (codeword + torch.exp(noise / 2.0) * eps)
35 | kl_loss = torch.mean(0.5 * torch.sum(torch.exp(noise) + codeword ** 2 - 1.0 - noise, 1))
36 | return super().forward(codeword), kl_loss
37 | else:
38 | return super().forward(codeword)
39 |
40 | class ModifiedPose(dlib.Pose):
41 | def __init__(self, *args, **kwargs):
42 | super(ModifiedPose, self).__init__(*args, **kwargs)
43 |
44 | self.conv1_r = torch.nn.Conv1d(1408 * 2, 640, 1)
45 | self.conv1_t = torch.nn.Conv1d(1408 * 2, 640, 1)
46 | self.conv1_c = torch.nn.Conv1d(1408 * 2, 640, 1)
47 |
48 |
49 | class ModifiedFoldingNetShapes(nn.Module):
50 | def __init__(self, num_points, MLP_dims, FC_dims, Folding1_dims, Folding2_dims, MLP_doLastRelu):
51 | super(ModifiedFoldingNetShapes, self).__init__()
52 |
53 | self.encoding = ModifiedEncode(num_points)
54 |
55 | self.reconstructing = ModifiedRecon(
56 | num_points, Folding1_dims, Folding2_dims)
57 |
58 | # self.var = nn.Linear(num_points, 1)
59 |
60 | def encode(self, img, x, choose):
61 | return self.encoding(img, x, choose)
62 |
63 | def recon(self, codeword):
64 | return self.reconstructing(codeword)
65 | # if self.training:
66 | # # ADD VAE MODULE HERE
67 | # noise = self.var(codeword)
68 |
69 | # eps = torch.randn_like(noise)
70 | # codeword = (codeword + torch.exp(noise / 2.0) * eps)
71 | # kl_loss = torch.mean(0.5 * torch.sum(torch.exp(noise) + codeword ** 2 - 1.0 - noise, 1))
72 | # return self.reconstructing(codeword), kl_loss
73 | # else:
74 | # return self.reconstructing(codeword)
75 |
76 |
77 | class ModifiedPoseNet(nn.Module):
78 | def __init__(self, num_points, num_obj):
79 | super(ModifiedPoseNet, self).__init__()
80 |
81 | self.encoding = ModifiedEncode(num_points)
82 |
83 | self.posing = ModifiedPose(num_points, num_obj)
84 |
85 | def encode(self, img, x, choose):
86 | return self.encoding(img, x, choose)
87 |
88 | def pose(self, codeword, obj):
89 | return self.posing(codeword, obj)
90 |
91 |
92 | class ModifiedPoseRefineNet(dlib.PoseRefineNet):
93 | def __init__(self, *args, **kwargs):
94 | super(ModifiedPoseRefineNet, self).__init__(*args, **kwargs)
95 |
96 |
97 | class CASS(nn.Module):
98 | def __init__(self, opt):
99 | super().__init__()
100 |
101 | MLP_dims = (3, 64, 64, 64, 128, 1024)
102 | FC_dims = (1024, 512, 1408)
103 | Folding1_dims = (1408+9, 512, 512, 3)
104 | Folding2_dims = (1408+3, 512, 512, 3)
105 | MLP_doLastRelu = False
106 | self.opt = opt
107 | self.estimator = ModifiedPoseNet(
108 | num_points=opt.num_points, num_obj=opt.num_objects
109 | )
110 | self.refiner = ModifiedPoseRefineNet(
111 | num_points=opt.num_points, num_obj=opt.num_objects
112 | )
113 | self.foldingnet = ModifiedFoldingNetShapes(
114 | opt.num_points,
115 | MLP_dims, FC_dims, Folding1_dims, Folding2_dims, MLP_doLastRelu
116 | )
117 |
--------------------------------------------------------------------------------
/lib/network.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import torch.nn.functional as F
5 | from lib.pspnet import PSPNet
6 |
7 | psp_models = {
8 | 'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'),
9 | 'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'),
10 | 'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'),
11 | 'resnet101': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet101'),
12 | 'resnet152': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet152')
13 | }
14 |
15 |
16 | class ModifiedResnet(nn.Module):
17 |
18 | def __init__(self, usegpu=True):
19 | super(ModifiedResnet, self).__init__()
20 |
21 | self.model = psp_models['resnet18'.lower()]()
22 |
23 | def forward(self, x):
24 | x = self.model(x)
25 | return x
26 |
27 |
28 | class PoseNetFeat(nn.Module):
29 | def __init__(self, num_points):
30 | super(PoseNetFeat, self).__init__()
31 | self.conv1 = torch.nn.Conv1d(3, 64, 1)
32 | self.conv2 = torch.nn.Conv1d(64, 128, 1)
33 |
34 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1)
35 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1)
36 |
37 | self.conv5 = torch.nn.Conv1d(256, 512, 1)
38 | self.conv6 = torch.nn.Conv1d(512, 1024, 1)
39 |
40 | self.ap1 = torch.nn.AvgPool1d(num_points)
41 | self.num_points = num_points
42 |
43 | def forward(self, x, emb):
44 | x = F.relu(self.conv1(x))
45 | emb = F.relu(self.e_conv1(emb))
46 | pointfeat_1 = torch.cat((x, emb), dim=1)
47 |
48 | x = F.relu(self.conv2(x))
49 | emb = F.relu(self.e_conv2(emb))
50 | pointfeat_2 = torch.cat((x, emb), dim=1)
51 |
52 | x = F.relu(self.conv5(pointfeat_2))
53 | x = F.relu(self.conv6(x))
54 |
55 | ap_x = self.ap1(x)
56 |
57 | ap_x = ap_x.view(-1, 1024, 1).repeat(1, 1, self.num_points)
58 | # 128 + 256 + 1024
59 | return torch.cat([pointfeat_1, pointfeat_2, ap_x], 1)
60 |
61 |
62 | class PoseNet(nn.Module):
63 | def __init__(self, num_points, num_obj):
64 | super(PoseNet, self).__init__()
65 | self.num_points = num_points
66 | self.cnn = ModifiedResnet()
67 | self.feat = PoseNetFeat(num_points)
68 |
69 | self.conv1_r = torch.nn.Conv1d(1408, 640, 1)
70 | self.conv1_t = torch.nn.Conv1d(1408, 640, 1)
71 | self.conv1_c = torch.nn.Conv1d(1408, 640, 1)
72 |
73 | self.conv2_r = torch.nn.Conv1d(640, 256, 1)
74 | self.conv2_t = torch.nn.Conv1d(640, 256, 1)
75 | self.conv2_c = torch.nn.Conv1d(640, 256, 1)
76 |
77 | self.conv3_r = torch.nn.Conv1d(256, 128, 1)
78 | self.conv3_t = torch.nn.Conv1d(256, 128, 1)
79 | self.conv3_c = torch.nn.Conv1d(256, 128, 1)
80 |
81 | self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) # quaternion
82 | self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) # translation
83 | self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) # confidence
84 |
85 | self.num_obj = num_obj
86 |
87 | def forward(self, img, x, choose, obj):
88 | out_img = self.cnn(img)
89 |
90 | bs, di, _, _ = out_img.size()
91 |
92 | emb = out_img.view(bs, di, -1)
93 | choose = choose.repeat(1, di, 1)
94 | emb = torch.gather(emb, 2, choose).contiguous()
95 |
96 | x = x.transpose(2, 1).contiguous()
97 | ap_x = self.feat(x, emb)
98 |
99 | rx = F.relu(self.conv1_r(ap_x))
100 | tx = F.relu(self.conv1_t(ap_x))
101 | cx = F.relu(self.conv1_c(ap_x))
102 |
103 | rx = F.relu(self.conv2_r(rx))
104 | tx = F.relu(self.conv2_t(tx))
105 | cx = F.relu(self.conv2_c(cx))
106 |
107 | rx = F.relu(self.conv3_r(rx))
108 | tx = F.relu(self.conv3_t(tx))
109 | cx = F.relu(self.conv3_c(cx))
110 |
111 | rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points)
112 | tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points)
113 | cx = torch.sigmoid(self.conv4_c(cx)).view(
114 | bs, self.num_obj, 1, self.num_points)
115 |
116 | b = 0
117 | out_rx = torch.index_select(rx[b], 0, obj[b])
118 | out_tx = torch.index_select(tx[b], 0, obj[b])
119 | out_cx = torch.index_select(cx[b], 0, obj[b])
120 |
121 | out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
122 | out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
123 | out_tx = out_tx.contiguous().transpose(2, 1).contiguous()
124 |
125 | return out_rx, out_tx, out_cx, emb.detach()
126 |
127 |
128 | class PoseRefineNetFeat(nn.Module):
129 | def __init__(self, num_points):
130 | super(PoseRefineNetFeat, self).__init__()
131 | self.conv1 = torch.nn.Conv1d(3, 64, 1)
132 | self.conv2 = torch.nn.Conv1d(64, 128, 1)
133 |
134 | self.pre_conv1 = torch.nn.Conv1d(1408, 512, 1)
135 | self.pre_conv2 = torch.nn.Conv1d(512, 256, 1)
136 | self.pre_conv3 = torch.nn.Conv1d(256, 32, 1)
137 |
138 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1)
139 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1)
140 |
141 | self.conv5 = torch.nn.Conv1d(384, 512, 1)
142 | self.conv6 = torch.nn.Conv1d(512, 1024, 1)
143 |
144 | self.ap1 = torch.nn.AvgPool1d(num_points)
145 | self.num_points = num_points
146 |
147 | def forward(self, x, emb):
148 | emb = F.relu(self.pre_conv1(emb))
149 | emb = F.relu(self.pre_conv2(emb))
150 | emb = F.relu(self.pre_conv3(emb))
151 | x = F.relu(self.conv1(x))
152 | emb = F.relu(self.e_conv1(emb))
153 | pointfeat_1 = torch.cat([x, emb], dim=1)
154 |
155 | x = F.relu(self.conv2(x))
156 | emb = F.relu(self.e_conv2(emb))
157 | pointfeat_2 = torch.cat([x, emb], dim=1)
158 |
159 | pointfeat_3 = torch.cat([pointfeat_1, pointfeat_2], dim=1)
160 |
161 | x = F.relu(self.conv5(pointfeat_3))
162 | x = F.relu(self.conv6(x))
163 |
164 | ap_x = self.ap1(x)
165 |
166 | ap_x = ap_x.view(-1, 1024)
167 | return ap_x
168 |
169 |
170 | class PoseRefineNet(nn.Module):
171 | def __init__(self, num_points, num_obj):
172 | super(PoseRefineNet, self).__init__()
173 | self.num_points = num_points
174 | self.feat = PoseRefineNetFeat(num_points)
175 |
176 | self.conv1_r = torch.nn.Linear(1024, 512)
177 | self.conv1_t = torch.nn.Linear(1024, 512)
178 |
179 | self.conv2_r = torch.nn.Linear(512, 128)
180 | self.conv2_t = torch.nn.Linear(512, 128)
181 |
182 | self.conv3_r = torch.nn.Linear(128, num_obj*4) # quaternion
183 | self.conv3_t = torch.nn.Linear(128, num_obj*3) # translation
184 |
185 | self.num_obj = num_obj
186 |
187 | def forward(self, x, emb, obj):
188 | bs = x.size()[0]
189 |
190 | x = x.transpose(2, 1).contiguous()
191 | ap_x = self.feat(x, emb)
192 |
193 | rx = F.relu(self.conv1_r(ap_x))
194 | tx = F.relu(self.conv1_t(ap_x))
195 |
196 | rx = F.relu(self.conv2_r(rx))
197 | tx = F.relu(self.conv2_t(tx))
198 |
199 | rx = self.conv3_r(rx).view(bs, self.num_obj, 4)
200 | tx = self.conv3_t(tx).view(bs, self.num_obj, 3)
201 |
202 | b = 0
203 | out_rx = torch.index_select(rx[b], 0, obj[b])
204 | out_tx = torch.index_select(tx[b], 0, obj[b])
205 |
206 | return out_rx, out_tx
207 |
208 |
209 | class Encode(nn.Module):
210 | def __init__(self, num_points):
211 | super(Encode, self).__init__()
212 |
213 | self.num_points = num_points
214 | self.cnn = ModifiedResnet()
215 | self.feat = PoseNetFeat(num_points)
216 |
217 | def forward(self, img, x, choose):
218 | out_img = self.cnn(img)
219 |
220 | bs, di, _, _ = out_img.size()
221 |
222 | emb = out_img.view(bs, di, -1)
223 | choose = choose.repeat(1, di, 1)
224 | emb = torch.gather(emb, 2, choose).contiguous()
225 |
226 | x = x.transpose(2, 1).contiguous()
227 | ap_x = self.feat(x, emb)
228 |
229 | return ap_x
230 |
231 |
232 | class Pose(nn.Module):
233 | def __init__(self, num_points, num_obj):
234 | super(Pose, self).__init__()
235 | self.conv1_r = torch.nn.Conv1d(1408, 640, 1)
236 | self.conv1_t = torch.nn.Conv1d(1408, 640, 1)
237 | self.conv1_c = torch.nn.Conv1d(1408, 640, 1)
238 |
239 | self.conv2_r = torch.nn.Conv1d(640, 256, 1)
240 | self.conv2_t = torch.nn.Conv1d(640, 256, 1)
241 | self.conv2_c = torch.nn.Conv1d(640, 256, 1)
242 |
243 | self.conv3_r = torch.nn.Conv1d(256, 128, 1)
244 | self.conv3_t = torch.nn.Conv1d(256, 128, 1)
245 | self.conv3_c = torch.nn.Conv1d(256, 128, 1)
246 |
247 | self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) # quaternion
248 | self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) # translation
249 | self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) # confidence
250 |
251 | self.num_obj = num_obj
252 | self.num_points = num_points
253 |
254 | def forward(self, codeword, obj):
255 |
256 | bs = codeword.size(0)
257 |
258 | rx = F.relu(self.conv1_r(codeword))
259 | tx = F.relu(self.conv1_t(codeword))
260 | cx = F.relu(self.conv1_c(codeword))
261 |
262 | rx = F.relu(self.conv2_r(rx))
263 | tx = F.relu(self.conv2_t(tx))
264 | cx = F.relu(self.conv2_c(cx))
265 |
266 | rx = F.relu(self.conv3_r(rx))
267 | tx = F.relu(self.conv3_t(tx))
268 | cx = F.relu(self.conv3_c(cx))
269 |
270 | rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points)
271 | tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points)
272 | cx = torch.sigmoid(self.conv4_c(cx)).view(
273 | bs, self.num_obj, 1, self.num_points)
274 |
275 | b = 0
276 | out_rx = torch.index_select(rx[b], 0, obj[b])
277 | out_tx = torch.index_select(tx[b], 0, obj[b])
278 | out_cx = torch.index_select(cx[b], 0, obj[b])
279 |
280 | out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
281 | out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
282 | out_tx = out_tx.contiguous().transpose(2, 1).contiguous()
283 |
284 | return out_rx, out_tx, out_cx
285 |
--------------------------------------------------------------------------------
/lib/pointnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as Functional
4 |
5 |
6 | def get_and_init_FC_layer(din, dout):
7 | li = nn.Linear(din, dout)
8 | # init weights/bias
9 | nn.init.xavier_uniform_(
10 | li.weight.data, gain=nn.init.calculate_gain('relu'))
11 | li.bias.data.fill_(0.)
12 | return li
13 |
14 |
15 | def get_MLP_layers(dims, doLastRelu):
16 | layers = []
17 | for i in range(1, len(dims)):
18 | layers.append(get_and_init_FC_layer(dims[i-1], dims[i]))
19 | if i == len(dims)-1 and not doLastRelu:
20 | continue
21 | layers.append(nn.ReLU())
22 | return layers
23 |
24 |
25 | class PointwiseMLP(nn.Sequential):
26 | '''Nxdin ->Nxd1->Nxd2->...-> Nxdout'''
27 |
28 | def __init__(self, dims, doLastRelu=False):
29 | layers = get_MLP_layers(dims, doLastRelu)
30 | super(PointwiseMLP, self).__init__(*layers)
31 |
32 |
33 | class GlobalPool(nn.Module):
34 | '''BxNxK -> BxK'''
35 |
36 | def __init__(self, pool_layer):
37 | super(GlobalPool, self).__init__()
38 | self.Pool = pool_layer
39 |
40 | def forward(self, X):
41 | X = X.unsqueeze(-3) # Bx1xNxK
42 | X = self.Pool(X)
43 | X = X.squeeze(-2)
44 | X = X.squeeze(-2) # BxK
45 | return X
46 |
47 |
48 | class PointNetGlobalMax(nn.Sequential):
49 | '''BxNxdims[0] -> Bxdims[-1]'''
50 |
51 | def __init__(self, dims, doLastRelu=False):
52 | layers = [
53 | PointwiseMLP(dims, doLastRelu=doLastRelu), # BxNxK
54 | GlobalPool(nn.AdaptiveMaxPool2d((1, dims[-1]))), # BxK
55 | ]
56 | super(PointNetGlobalMax, self).__init__(*layers)
57 |
58 |
59 | class PointNetGlobalAvg(nn.Sequential):
60 | '''BxNxdims[0] -> Bxdims[-1]'''
61 |
62 | def __init__(self, dims, doLastRelu=True):
63 | layers = [
64 | PointwiseMLP(dims, doLastRelu=doLastRelu), # BxNxK
65 | GlobalPool(nn.AdaptiveAvgPool2d((1, dims[-1]))), # BxK
66 | ]
67 | super(PointNetGlobalAvg, self).__init__(*layers)
68 |
69 |
70 | class PointNetVanilla(nn.Sequential):
71 |
72 | def __init__(self, MLP_dims, FC_dims, MLP_doLastRelu=False):
73 | assert(MLP_dims[-1] == FC_dims[0])
74 | layers = [
75 | PointNetGlobalMax(MLP_dims, doLastRelu=MLP_doLastRelu), # BxK
76 | ]
77 | layers.extend(get_MLP_layers(FC_dims, False))
78 | super(PointNetVanilla, self).__init__(*layers)
79 |
80 |
81 | class PointNetTplMatch(nn.Module):
82 | '''this can learn, but no better than PointNetVanilla'''
83 |
84 | def __init__(self, MLP_dims, C_tpls, M_points):
85 | super(PointNetTplMatch, self).__init__()
86 | self.P = nn.Parameter(torch.rand(
87 | C_tpls, M_points, MLP_dims[0])*2-1.0) # CxMx3
88 | self.G = PointNetGlobalMax(MLP_dims)
89 |
90 | def forward(self, X):
91 | Fx = self.G.forward(X) # BxNx3 -> BxK
92 | Fp = self.G.forward(self.P) # CxMx3 -> CxK
93 | S = torch.mm(Fx, Fp.t()) # BxC
94 | return S
95 |
96 |
97 | class PairwiseDistanceMatrix(nn.Module):
98 |
99 | def __init__(self):
100 | super(PairwiseDistanceMatrix, self).__init__()
101 |
102 | def forward(self, X, Y):
103 | X2 = (X**2).sum(1).view(-1, 1)
104 | Y2 = (Y**2).sum(1).view(1, -1)
105 | D = X2 + Y2 - 2.0*torch.mm(X, Y.t())
106 | return D
107 |
108 |
109 | class PointNetAttentionPool(nn.Module):
110 |
111 | def __init__(self, MLP_dims, Attention_dims, FC_dims, MLP_doLastRelu=False):
112 | assert(MLP_dims[-1]*Attention_dims[-1] == FC_dims[0])
113 | # assert(Attention_dims[-1]==1)
114 | super(PointNetAttentionPool, self).__init__()
115 | self.add_module(
116 | 'F',
117 | PointwiseMLP(MLP_dims, doLastRelu=MLP_doLastRelu), # BxNxK
118 | )
119 | self.S = nn.Sequential(
120 | PointwiseMLP(Attention_dims, doLastRelu=False), # BxNxM
121 | nn.Softmax(dim=-2) # BxNxM
122 | )
123 | self.L = nn.Sequential(*get_MLP_layers(FC_dims, False))
124 |
125 | def forward(self, X):
126 | F = self.F.forward(X) # BxNxK
127 | S = self.S.forward(X) # BxNxM
128 | S = torch.transpose(S, -1, -2) # BxMxN
129 | G = torch.bmm(S, F) # BxMxK
130 | sz = G.size()
131 | G = G.view(-1, sz[-1]*sz[-2]) # BxMK
132 | Y = self.L.forward(G) # BxFC_dims[-1]
133 | return Y
134 |
135 |
136 | class PointNetBilinearPool(nn.Module):
137 |
138 | def __init__(self, MLP1_dims, FC1_dims, MLP2_dims, FC2_dims, FC_dims):
139 | assert(MLP1_dims[-1] == FC1_dims[0])
140 | assert(MLP2_dims[-1] == FC2_dims[0])
141 | super(PointNetBilinearPool, self).__init__()
142 | self.F1 = nn.Sequential(
143 | PointNetGlobalMax(MLP1_dims),
144 | *get_MLP_layers(FC1_dims, False)
145 | ) # BxFC1_dims[-1]
146 | self.F2 = nn.Sequential(
147 | PointNetGlobalMax(MLP2_dims),
148 | *get_MLP_layers(FC2_dims, False)
149 | ) # BxFC2_dims[-1]
150 | self.L = nn.Sequential(*get_MLP_layers(FC_dims, False))
151 |
152 | def forward(self, X):
153 | F1 = self.F1.forward(X) # BxK1
154 | F2 = self.F2.forward(X) # BxK2
155 | F1 = F1.unsqueeze(-1) # BxK1x1
156 | F2 = F2.unsqueeze(-2) # Bx1xK2
157 | G = torch.bmm(F1, F2) # BxK1xK2
158 |
159 | sz = G.size()
160 | G = G.view(-1, sz[-1]*sz[-2])
161 | Y = self.L.forward(G)
162 | return Y
163 |
164 |
165 | class PointPairNet(nn.Module):
166 |
167 | def __init__(self, dims, FC_dims):
168 | assert(dims[-1] == FC_dims[0])
169 | super(PointPairNet, self).__init__()
170 | self.L = nn.Sequential(*get_MLP_layers(dims, False))
171 | self.Pool = nn.AdaptiveMaxPool2d((1, 1))
172 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False))
173 |
174 | def forward(self, X):
175 | sz = X.size() # BxNx3
176 | Xr = X.view(sz[0], 1, sz[1], sz[2]).expand(
177 | sz[0], sz[1], sz[1], sz[2]) # BxNxNx3
178 | Xrc = torch.cat((Xr, Xr.transpose(1, 2)), dim=-1) # BxNxNx6
179 | G = self.L.forward(Xrc).transpose(1, -1) # BxKxNxN
180 |
181 | P = self.Pool.forward(G).squeeze(-1).squeeze(-1) # BxK
182 | Y = self.F.forward(P)
183 | return Y
184 |
185 |
186 | class BoostedPointPairNet(PointPairNet):
187 |
188 | def __init__(self, d, dims, FC_dims, max_pool=True):
189 | super(BoostedPointPairNet, self).__init__(dims, FC_dims)
190 | self.d = d
191 | self.add_module(
192 | 'BoostPool',
193 | nn.AdaptiveMaxPool1d(1) if max_pool else nn.AdaptiveAvgPool1d(1)
194 | )
195 |
196 | def forward(self, X):
197 | n = X.size()[1]
198 | X = X.transpose(0, 1) # NxBx3
199 | # rid = torch.randperm(n)
200 | # X = X[rid,...]
201 | Xs = torch.chunk(X, self.d, dim=0)
202 | Ys = []
203 | for Xi in Xs:
204 | Xi = Xi.transpose(0, 1).contiguous() # Bxmx3
205 | Yi = super(BoostedPointPairNet, self).forward(Xi) # BxC
206 | Ys.append(Yi.unsqueeze(-1))
207 | Y = torch.cat(Ys, dim=-1) # BxCxd
208 | Y = self.BoostPool.forward(Y).squeeze(-1) # BxC
209 | return Y
210 |
211 |
212 | class BoostedPointPairNet2(nn.Module):
213 | ''' More efficiently implemented than BoostedPointPairNet '''
214 |
215 | def __init__(self, boost_factor, dims, FC_dims, sym_pool_max=True, boost_pool_max=True):
216 | assert(dims[-1] == FC_dims[0])
217 | super(BoostedPointPairNet2, self).__init__()
218 | self.boost_factor = boost_factor
219 | self.L = nn.Sequential(*get_MLP_layers(dims, False))
220 | self.SymPool = nn.AdaptiveMaxPool3d((1, 1, dims[-1])) if sym_pool_max\
221 | else nn.AdaptiveAvgPool3d((1, 1, dims[-1]))
222 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False))
223 | self.BoostPool = nn.AdaptiveMaxPool2d((1, FC_dims[-1])) if boost_pool_max\
224 | else nn.AdaptiveAvgPool2d((1, FC_dims[-1]))
225 |
226 | def forward(self, X):
227 | b, n, din = X.size()
228 | d = self.boost_factor
229 | m = n/d
230 | assert(m*d == n)
231 | Xr = X.view(b, d, 1, m, din).expand(b, d, m, m, din)
232 | Xrc = torch.cat((Xr, Xr.transpose(2, 3)), dim=-1) # bxdxmxmx6
233 | G = self.L.forward(Xrc) # bxdxmxmxK
234 | P = self.SymPool.forward(G).squeeze(-2).squeeze(-2) # bxdxK
235 | Y = self.F.forward(P) # bxdxC
236 | Y = self.BoostPool.forward(Y).squeeze(-2) # bxC
237 | return Y
238 |
239 |
240 | class BoostedPointPairNetSuccessivePool(nn.Module):
241 | ''' Change SymPool to successive pool '''
242 |
243 | def __init__(self, boost_factor, dims, FC_dims, sym_pool_max=True, boost_pool_max=True):
244 | assert(dims[-1] == FC_dims[0])
245 | super(BoostedPointPairNetSuccessivePool, self).__init__()
246 | self.boost_factor = boost_factor
247 | self.L = nn.Sequential(*get_MLP_layers(dims, False))
248 | self.dims = dims
249 | self.sym_pool_max = sym_pool_max
250 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False))
251 | self.BoostPool = nn.AdaptiveMaxPool2d((1, FC_dims[-1])) if boost_pool_max\
252 | else nn.AdaptiveAvgPool2d((1, FC_dims[-1]))
253 |
254 | def forward(self, X):
255 | b, n, din = X.size()
256 | d = self.boost_factor
257 | m = n/d
258 | assert(m*d == n)
259 | Xr = X.view(b, d, 1, m, din).expand(b, d, m, m, din)
260 | Xrc = torch.cat((Xr, Xr.transpose(2, 3)), dim=-1) # bxdxmxmx6
261 | G = self.L.forward(Xrc) # bxdxmxmxK
262 | if self.sym_pool_max: # average each point, then max across all points
263 | Pr = Functional.adaptive_avg_pool3d(
264 | G, (m, 1, self.dims[-1])).squeeze(-2) # bxdxmxK
265 | P = Functional.adaptive_max_pool2d(
266 | Pr, (1, self.dims[-1])).squeeze(-2) # bxdxK
267 | else: # max each point, then average over all points
268 | Pr = Functional.adaptive_max_pool3d(
269 | G, (m, 1, self.dims[-1])).squeeze(-2) # bxdxmxK
270 | P = Functional.adaptive_avg_pool2d(
271 | Pr, (1, self.dims[-1])).squeeze(-2) # bxdxK
272 | Y = self.F.forward(P) # bxdxC
273 | Y = self.BoostPool.forward(Y).squeeze(-2) # bxC
274 | return Y
275 |
276 |
277 | class BoostedPointNetVanilla(nn.Module):
278 |
279 | def __init__(self, boost_factor, dims, FC_dims, boost_pool_max=True):
280 | assert(dims[-1] == FC_dims[0])
281 | super(BoostedPointNetVanilla, self).__init__()
282 | self.boost_factor = boost_factor
283 | self.L = nn.Sequential(*get_MLP_layers(dims, False))
284 | self.Pool = nn.AdaptiveMaxPool2d((1, dims[-1]))
285 | self.F = nn.Sequential(*get_MLP_layers(FC_dims, False))
286 | self.BoostPool = nn.AdaptiveMaxPool2d((1, FC_dims[-1])) if boost_pool_max\
287 | else nn.AdaptiveAvgPool2d((1, FC_dims[-1]))
288 |
289 | def forward(self, X):
290 | b, n, din = X.size()
291 | d = self.boost_factor
292 | m = n/d
293 | assert(m*d == n)
294 | Xr = X.view(b, d, m, din) # bxdxmx3
295 | F = self.L.forward(Xr) # bxdxmxK
296 | Fp = self.Pool.forward(F).squeeze(-2) # bxdxK
297 | Yp = self.F.forward(Fp).unsqueeze(0) # 1xbxdxC
298 | Y = self.BoostPool.forward(Yp).squeeze(0).squeeze(-2) # bxC
299 | return Y
300 |
--------------------------------------------------------------------------------
/lib/pspnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 | import lib.extractors as extractors
5 |
6 |
7 | class PSPModule(nn.Module):
8 | def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)):
9 | super(PSPModule, self).__init__()
10 | self.stages = []
11 | self.stages = nn.ModuleList(
12 | [self._make_stage(features, size) for size in sizes])
13 | self.bottleneck = nn.Conv2d(
14 | features * (len(sizes) + 1), out_features, kernel_size=1)
15 | self.relu = nn.ReLU()
16 |
17 | def _make_stage(self, features, size):
18 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
19 | conv = nn.Conv2d(features, features, kernel_size=1, bias=False)
20 | return nn.Sequential(prior, conv)
21 |
22 | def forward(self, feats):
23 | h, w = feats.size(2), feats.size(3)
24 | priors = [F.upsample(input=stage(feats), size=(
25 | h, w), mode='bilinear') for stage in self.stages] + [feats]
26 | bottle = self.bottleneck(torch.cat(priors, 1))
27 | return self.relu(bottle)
28 |
29 |
30 | class PSPUpsample(nn.Module):
31 | def __init__(self, in_channels, out_channels):
32 | super(PSPUpsample, self).__init__()
33 | self.conv = nn.Sequential(
34 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
35 | nn.Conv2d(in_channels, out_channels, 3, padding=1),
36 | nn.PReLU()
37 | )
38 |
39 | def forward(self, x):
40 | return self.conv(x)
41 |
42 |
43 | class PSPNet(nn.Module):
44 | def __init__(self, n_classes=21, sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet18',
45 | pretrained=False):
46 | super(PSPNet, self).__init__()
47 | self.feats = getattr(extractors, backend)(pretrained)
48 | self.psp = PSPModule(psp_size, 1024, sizes)
49 | self.drop_1 = nn.Dropout2d(p=0.3)
50 |
51 | self.up_1 = PSPUpsample(1024, 256)
52 | self.up_2 = PSPUpsample(256, 64)
53 | self.up_3 = PSPUpsample(64, 64)
54 |
55 | self.drop_2 = nn.Dropout2d(p=0.15)
56 | self.final = nn.Sequential(
57 | nn.Conv2d(64, 32, kernel_size=1),
58 | nn.LogSoftmax()
59 | )
60 |
61 | self.classifier = nn.Sequential(
62 | nn.Linear(deep_features_size, 256),
63 | nn.ReLU(),
64 | nn.Linear(256, n_classes)
65 | )
66 |
67 | def forward(self, x):
68 | f, class_f = self.feats(x)
69 | p = self.psp(f)
70 | p = self.drop_1(p)
71 |
72 | p = self.up_1(p)
73 | p = self.drop_2(p)
74 |
75 | p = self.up_2(p)
76 | p = self.drop_2(p)
77 |
78 | p = self.up_3(p)
79 |
80 | return self.final(p)
81 |
--------------------------------------------------------------------------------
/lib/transformations.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # transformations.py
3 |
4 | # Copyright (c) 2006-2018, Christoph Gohlke
5 | # Copyright (c) 2006-2018, The Regents of the University of California
6 | # Produced at the Laboratory for Fluorescence Dynamics
7 | # All rights reserved.
8 | #
9 | # Redistribution and use in source and binary forms, with or without
10 | # modification, are permitted provided that the following conditions are met:
11 | #
12 | # * Redistributions of source code must retain the above copyright
13 | # notice, this list of conditions and the following disclaimer.
14 | # * Redistributions in binary form must reproduce the above copyright
15 | # notice, this list of conditions and the following disclaimer in the
16 | # documentation and/or other materials provided with the distribution.
17 | # * Neither the name of the copyright holders nor the names of any
18 | # contributors may be used to endorse or promote products derived
19 | # from this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | # POSSIBILITY OF SUCH DAMAGE.
32 |
33 | """Homogeneous Transformation Matrices and Quaternions.
34 |
35 | A library for calculating 4x4 matrices for translating, rotating, reflecting,
36 | scaling, shearing, projecting, orthogonalizing, and superimposing arrays of
37 | 3D homogeneous coordinates as well as for converting between rotation matrices,
38 | Euler angles, and quaternions. Also includes an Arcball control object and
39 | functions to decompose transformation matrices.
40 |
41 | :Author:
42 | `Christoph Gohlke `_
43 |
44 | :Organization:
45 | Laboratory for Fluorescence Dynamics, University of California, Irvine
46 |
47 | :Version: 2018.02.08
48 |
49 | Requirements
50 | ------------
51 | * `CPython 2.7 or 3.6 `_
52 | * `Numpy 1.13 `_
53 | * `Transformations.c 2018.02.08 `_
54 | (recommended for speedup of some functions)
55 |
56 | Notes
57 | -----
58 | The API is not stable yet and is expected to change between revisions.
59 |
60 | This Python code is not optimized for speed. Refer to the transformations.c
61 | module for a faster implementation of some functions.
62 |
63 | Documentation in HTML format can be generated with epydoc.
64 |
65 | Matrices (M) can be inverted using numpy.linalg.inv(M), be concatenated using
66 | numpy.dot(M0, M1), or transform homogeneous coordinate arrays (v) using
67 | numpy.dot(M, v) for shape (4, \*) column vectors, respectively
68 | numpy.dot(v, M.T) for shape (\*, 4) row vectors ("array of points").
69 |
70 | This module follows the "column vectors on the right" and "row major storage"
71 | (C contiguous) conventions. The translation components are in the right column
72 | of the transformation matrix, i.e. M[:3, 3].
73 | The transpose of the transformation matrices may have to be used to interface
74 | with other graphics systems, e.g. with OpenGL's glMultMatrixd(). See also [16].
75 |
76 | Calculations are carried out with numpy.float64 precision.
77 |
78 | Vector, point, quaternion, and matrix function arguments are expected to be
79 | "array like", i.e. tuple, list, or numpy arrays.
80 |
81 | Return types are numpy arrays unless specified otherwise.
82 |
83 | Angles are in radians unless specified otherwise.
84 |
85 | Quaternions w+ix+jy+kz are represented as [w, x, y, z].
86 |
87 | A triple of Euler angles can be applied/interpreted in 24 ways, which can
88 | be specified using a 4 character string or encoded 4-tuple:
89 |
90 | *Axes 4-string*: e.g. 'sxyz' or 'ryxy'
91 |
92 | - first character : rotations are applied to 's'tatic or 'r'otating frame
93 | - remaining characters : successive rotation axis 'x', 'y', or 'z'
94 |
95 | *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1)
96 |
97 | - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix.
98 | - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed
99 | by 'z', or 'z' is followed by 'x'. Otherwise odd (1).
100 | - repetition : first and last axis are same (1) or different (0).
101 | - frame : rotations are applied to static (0) or rotating (1) frame.
102 |
103 | Other Python packages and modules for 3D transformations and quaternions:
104 |
105 | * `Transforms3d `_
106 | includes most code of this module.
107 | * `Blender.mathutils `_
108 | * `numpy-dtypes `_
109 |
110 | References
111 | ----------
112 | (1) Matrices and transformations. Ronald Goldman.
113 | In "Graphics Gems I", pp 472-475. Morgan Kaufmann, 1990.
114 | (2) More matrices and transformations: shear and pseudo-perspective.
115 | Ronald Goldman. In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991.
116 | (3) Decomposing a matrix into simple transformations. Spencer Thomas.
117 | In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991.
118 | (4) Recovering the data from the transformation matrix. Ronald Goldman.
119 | In "Graphics Gems II", pp 324-331. Morgan Kaufmann, 1991.
120 | (5) Euler angle conversion. Ken Shoemake.
121 | In "Graphics Gems IV", pp 222-229. Morgan Kaufmann, 1994.
122 | (6) Arcball rotation control. Ken Shoemake.
123 | In "Graphics Gems IV", pp 175-192. Morgan Kaufmann, 1994.
124 | (7) Representing attitude: Euler angles, unit quaternions, and rotation
125 | vectors. James Diebel. 2006.
126 | (8) A discussion of the solution for the best rotation to relate two sets
127 | of vectors. W Kabsch. Acta Cryst. 1978. A34, 827-828.
128 | (9) Closed-form solution of absolute orientation using unit quaternions.
129 | BKP Horn. J Opt Soc Am A. 1987. 4(4):629-642.
130 | (10) Quaternions. Ken Shoemake.
131 | http://www.sfu.ca/~jwa3/cmpt461/files/quatut.pdf
132 | (11) From quaternion to matrix and back. JMP van Waveren. 2005.
133 | http://www.intel.com/cd/ids/developer/asmo-na/eng/293748.htm
134 | (12) Uniform random rotations. Ken Shoemake.
135 | In "Graphics Gems III", pp 124-132. Morgan Kaufmann, 1992.
136 | (13) Quaternion in molecular modeling. CFF Karney.
137 | J Mol Graph Mod, 25(5):595-604
138 | (14) New method for extracting the quaternion from a rotation matrix.
139 | Itzhack Y Bar-Itzhack, J Guid Contr Dynam. 2000. 23(6): 1085-1087.
140 | (15) Multiple View Geometry in Computer Vision. Hartley and Zissermann.
141 | Cambridge University Press; 2nd Ed. 2004. Chapter 4, Algorithm 4.7, p 130.
142 | (16) Column Vectors vs. Row Vectors.
143 | http://steve.hollasch.net/cgindex/math/matrix/column-vec.html
144 |
145 | Examples
146 | --------
147 | >>> alpha, beta, gamma = 0.123, -1.234, 2.345
148 | >>> origin, xaxis, yaxis, zaxis = [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
149 | >>> I = identity_matrix()
150 | >>> Rx = rotation_matrix(alpha, xaxis)
151 | >>> Ry = rotation_matrix(beta, yaxis)
152 | >>> Rz = rotation_matrix(gamma, zaxis)
153 | >>> R = concatenate_matrices(Rx, Ry, Rz)
154 | >>> euler = euler_from_matrix(R, 'rxyz')
155 | >>> numpy.allclose([alpha, beta, gamma], euler)
156 | True
157 | >>> Re = euler_matrix(alpha, beta, gamma, 'rxyz')
158 | >>> is_same_transform(R, Re)
159 | True
160 | >>> al, be, ga = euler_from_matrix(Re, 'rxyz')
161 | >>> is_same_transform(Re, euler_matrix(al, be, ga, 'rxyz'))
162 | True
163 | >>> qx = quaternion_about_axis(alpha, xaxis)
164 | >>> qy = quaternion_about_axis(beta, yaxis)
165 | >>> qz = quaternion_about_axis(gamma, zaxis)
166 | >>> q = quaternion_multiply(qx, qy)
167 | >>> q = quaternion_multiply(q, qz)
168 | >>> Rq = quaternion_matrix(q)
169 | >>> is_same_transform(R, Rq)
170 | True
171 | >>> S = scale_matrix(1.23, origin)
172 | >>> T = translation_matrix([1, 2, 3])
173 | >>> Z = shear_matrix(beta, xaxis, origin, zaxis)
174 | >>> R = random_rotation_matrix(numpy.random.rand(3))
175 | >>> M = concatenate_matrices(T, R, Z, S)
176 | >>> scale, shear, angles, trans, persp = decompose_matrix(M)
177 | >>> numpy.allclose(scale, 1.23)
178 | True
179 | >>> numpy.allclose(trans, [1, 2, 3])
180 | True
181 | >>> numpy.allclose(shear, [0, math.tan(beta), 0])
182 | True
183 | >>> is_same_transform(R, euler_matrix(axes='sxyz', *angles))
184 | True
185 | >>> M1 = compose_matrix(scale, shear, angles, trans, persp)
186 | >>> is_same_transform(M, M1)
187 | True
188 | >>> v0, v1 = random_vector(3), random_vector(3)
189 | >>> M = rotation_matrix(angle_between_vectors(v0, v1), vector_product(v0, v1))
190 | >>> v2 = numpy.dot(v0, M[:3,:3].T)
191 | >>> numpy.allclose(unit_vector(v1), unit_vector(v2))
192 | True
193 |
194 | """
195 |
196 | from __future__ import division, print_function
197 |
198 | import math
199 |
200 | import numpy
201 |
202 | __version__ = '2018.02.08'
203 | __docformat__ = 'restructuredtext en'
204 | __all__ = ()
205 |
206 |
207 | def identity_matrix():
208 | """Return 4x4 identity/unit matrix.
209 |
210 | >>> I = identity_matrix()
211 | >>> numpy.allclose(I, numpy.dot(I, I))
212 | True
213 | >>> numpy.sum(I), numpy.trace(I)
214 | (4.0, 4.0)
215 | >>> numpy.allclose(I, numpy.identity(4))
216 | True
217 |
218 | """
219 | return numpy.identity(4)
220 |
221 |
222 | def translation_matrix(direction):
223 | """Return matrix to translate by direction vector.
224 |
225 | >>> v = numpy.random.random(3) - 0.5
226 | >>> numpy.allclose(v, translation_matrix(v)[:3, 3])
227 | True
228 |
229 | """
230 | M = numpy.identity(4)
231 | M[:3, 3] = direction[:3]
232 | return M
233 |
234 |
235 | def translation_from_matrix(matrix):
236 | """Return translation vector from translation matrix.
237 |
238 | >>> v0 = numpy.random.random(3) - 0.5
239 | >>> v1 = translation_from_matrix(translation_matrix(v0))
240 | >>> numpy.allclose(v0, v1)
241 | True
242 |
243 | """
244 | return numpy.array(matrix, copy=False)[:3, 3].copy()
245 |
246 |
247 | def reflection_matrix(point, normal):
248 | """Return matrix to mirror at plane defined by point and normal vector.
249 |
250 | >>> v0 = numpy.random.random(4) - 0.5
251 | >>> v0[3] = 1.
252 | >>> v1 = numpy.random.random(3) - 0.5
253 | >>> R = reflection_matrix(v0, v1)
254 | >>> numpy.allclose(2, numpy.trace(R))
255 | True
256 | >>> numpy.allclose(v0, numpy.dot(R, v0))
257 | True
258 | >>> v2 = v0.copy()
259 | >>> v2[:3] += v1
260 | >>> v3 = v0.copy()
261 | >>> v2[:3] -= v1
262 | >>> numpy.allclose(v2, numpy.dot(R, v3))
263 | True
264 |
265 | """
266 | normal = unit_vector(normal[:3])
267 | M = numpy.identity(4)
268 | M[:3, :3] -= 2.0 * numpy.outer(normal, normal)
269 | M[:3, 3] = (2.0 * numpy.dot(point[:3], normal)) * normal
270 | return M
271 |
272 |
273 | def reflection_from_matrix(matrix):
274 | """Return mirror plane point and normal vector from reflection matrix.
275 |
276 | >>> v0 = numpy.random.random(3) - 0.5
277 | >>> v1 = numpy.random.random(3) - 0.5
278 | >>> M0 = reflection_matrix(v0, v1)
279 | >>> point, normal = reflection_from_matrix(M0)
280 | >>> M1 = reflection_matrix(point, normal)
281 | >>> is_same_transform(M0, M1)
282 | True
283 |
284 | """
285 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)
286 | # normal: unit eigenvector corresponding to eigenvalue -1
287 | w, V = numpy.linalg.eig(M[:3, :3])
288 | i = numpy.where(abs(numpy.real(w) + 1.0) < 1e-8)[0]
289 | if not len(i):
290 | raise ValueError('no unit eigenvector corresponding to eigenvalue -1')
291 | normal = numpy.real(V[:, i[0]]).squeeze()
292 | # point: any unit eigenvector corresponding to eigenvalue 1
293 | w, V = numpy.linalg.eig(M)
294 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
295 | if not len(i):
296 | raise ValueError('no unit eigenvector corresponding to eigenvalue 1')
297 | point = numpy.real(V[:, i[-1]]).squeeze()
298 | point /= point[3]
299 | return point, normal
300 |
301 |
302 | def rotation_matrix(angle, direction, point=None):
303 | """Return matrix to rotate about axis defined by point and direction.
304 |
305 | >>> R = rotation_matrix(math.pi/2, [0, 0, 1], [1, 0, 0])
306 | >>> numpy.allclose(numpy.dot(R, [0, 0, 0, 1]), [1, -1, 0, 1])
307 | True
308 | >>> angle = (random.random() - 0.5) * (2*math.pi)
309 | >>> direc = numpy.random.random(3) - 0.5
310 | >>> point = numpy.random.random(3) - 0.5
311 | >>> R0 = rotation_matrix(angle, direc, point)
312 | >>> R1 = rotation_matrix(angle-2*math.pi, direc, point)
313 | >>> is_same_transform(R0, R1)
314 | True
315 | >>> R0 = rotation_matrix(angle, direc, point)
316 | >>> R1 = rotation_matrix(-angle, -direc, point)
317 | >>> is_same_transform(R0, R1)
318 | True
319 | >>> I = numpy.identity(4, numpy.float64)
320 | >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc))
321 | True
322 | >>> numpy.allclose(2, numpy.trace(rotation_matrix(math.pi/2,
323 | ... direc, point)))
324 | True
325 |
326 | """
327 | sina = math.sin(angle)
328 | cosa = math.cos(angle)
329 | direction = unit_vector(direction[:3])
330 | # rotation matrix around unit vector
331 | R = numpy.diag([cosa, cosa, cosa])
332 | R += numpy.outer(direction, direction) * (1.0 - cosa)
333 | direction *= sina
334 | R += numpy.array([[ 0.0, -direction[2], direction[1]],
335 | [ direction[2], 0.0, -direction[0]],
336 | [-direction[1], direction[0], 0.0]])
337 | M = numpy.identity(4)
338 | M[:3, :3] = R
339 | if point is not None:
340 | # rotation not around origin
341 | point = numpy.array(point[:3], dtype=numpy.float64, copy=False)
342 | M[:3, 3] = point - numpy.dot(R, point)
343 | return M
344 |
345 |
346 | def rotation_from_matrix(matrix):
347 | """Return rotation angle and axis from rotation matrix.
348 |
349 | >>> angle = (random.random() - 0.5) * (2*math.pi)
350 | >>> direc = numpy.random.random(3) - 0.5
351 | >>> point = numpy.random.random(3) - 0.5
352 | >>> R0 = rotation_matrix(angle, direc, point)
353 | >>> angle, direc, point = rotation_from_matrix(R0)
354 | >>> R1 = rotation_matrix(angle, direc, point)
355 | >>> is_same_transform(R0, R1)
356 | True
357 |
358 | """
359 | R = numpy.array(matrix, dtype=numpy.float64, copy=False)
360 | R33 = R[:3, :3]
361 | # direction: unit eigenvector of R33 corresponding to eigenvalue of 1
362 | w, W = numpy.linalg.eig(R33.T)
363 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
364 | if not len(i):
365 | raise ValueError('no unit eigenvector corresponding to eigenvalue 1')
366 | direction = numpy.real(W[:, i[-1]]).squeeze()
367 | # point: unit eigenvector of R33 corresponding to eigenvalue of 1
368 | w, Q = numpy.linalg.eig(R)
369 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
370 | if not len(i):
371 | raise ValueError('no unit eigenvector corresponding to eigenvalue 1')
372 | point = numpy.real(Q[:, i[-1]]).squeeze()
373 | point /= point[3]
374 | # rotation angle depending on direction
375 | cosa = (numpy.trace(R33) - 1.0) / 2.0
376 | if abs(direction[2]) > 1e-8:
377 | sina = (R[1, 0] + (cosa-1.0)*direction[0]*direction[1]) / direction[2]
378 | elif abs(direction[1]) > 1e-8:
379 | sina = (R[0, 2] + (cosa-1.0)*direction[0]*direction[2]) / direction[1]
380 | else:
381 | sina = (R[2, 1] + (cosa-1.0)*direction[1]*direction[2]) / direction[0]
382 | angle = math.atan2(sina, cosa)
383 | return angle, direction, point
384 |
385 |
386 | def scale_matrix(factor, origin=None, direction=None):
387 | """Return matrix to scale by factor around origin in direction.
388 |
389 | Use factor -1 for point symmetry.
390 |
391 | >>> v = (numpy.random.rand(4, 5) - 0.5) * 20
392 | >>> v[3] = 1
393 | >>> S = scale_matrix(-1.234)
394 | >>> numpy.allclose(numpy.dot(S, v)[:3], -1.234*v[:3])
395 | True
396 | >>> factor = random.random() * 10 - 5
397 | >>> origin = numpy.random.random(3) - 0.5
398 | >>> direct = numpy.random.random(3) - 0.5
399 | >>> S = scale_matrix(factor, origin)
400 | >>> S = scale_matrix(factor, origin, direct)
401 |
402 | """
403 | if direction is None:
404 | # uniform scaling
405 | M = numpy.diag([factor, factor, factor, 1.0])
406 | if origin is not None:
407 | M[:3, 3] = origin[:3]
408 | M[:3, 3] *= 1.0 - factor
409 | else:
410 | # nonuniform scaling
411 | direction = unit_vector(direction[:3])
412 | factor = 1.0 - factor
413 | M = numpy.identity(4)
414 | M[:3, :3] -= factor * numpy.outer(direction, direction)
415 | if origin is not None:
416 | M[:3, 3] = (factor * numpy.dot(origin[:3], direction)) * direction
417 | return M
418 |
419 |
420 | def scale_from_matrix(matrix):
421 | """Return scaling factor, origin and direction from scaling matrix.
422 |
423 | >>> factor = random.random() * 10 - 5
424 | >>> origin = numpy.random.random(3) - 0.5
425 | >>> direct = numpy.random.random(3) - 0.5
426 | >>> S0 = scale_matrix(factor, origin)
427 | >>> factor, origin, direction = scale_from_matrix(S0)
428 | >>> S1 = scale_matrix(factor, origin, direction)
429 | >>> is_same_transform(S0, S1)
430 | True
431 | >>> S0 = scale_matrix(factor, origin, direct)
432 | >>> factor, origin, direction = scale_from_matrix(S0)
433 | >>> S1 = scale_matrix(factor, origin, direction)
434 | >>> is_same_transform(S0, S1)
435 | True
436 |
437 | """
438 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)
439 | M33 = M[:3, :3]
440 | factor = numpy.trace(M33) - 2.0
441 | try:
442 | # direction: unit eigenvector corresponding to eigenvalue factor
443 | w, V = numpy.linalg.eig(M33)
444 | i = numpy.where(abs(numpy.real(w) - factor) < 1e-8)[0][0]
445 | direction = numpy.real(V[:, i]).squeeze()
446 | direction /= vector_norm(direction)
447 | except IndexError:
448 | # uniform scaling
449 | factor = (factor + 2.0) / 3.0
450 | direction = None
451 | # origin: any eigenvector corresponding to eigenvalue 1
452 | w, V = numpy.linalg.eig(M)
453 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
454 | if not len(i):
455 | raise ValueError('no eigenvector corresponding to eigenvalue 1')
456 | origin = numpy.real(V[:, i[-1]]).squeeze()
457 | origin /= origin[3]
458 | return factor, origin, direction
459 |
460 |
461 | def projection_matrix(point, normal, direction=None,
462 | perspective=None, pseudo=False):
463 | """Return matrix to project onto plane defined by point and normal.
464 |
465 | Using either perspective point, projection direction, or none of both.
466 |
467 | If pseudo is True, perspective projections will preserve relative depth
468 | such that Perspective = dot(Orthogonal, PseudoPerspective).
469 |
470 | >>> P = projection_matrix([0, 0, 0], [1, 0, 0])
471 | >>> numpy.allclose(P[1:, 1:], numpy.identity(4)[1:, 1:])
472 | True
473 | >>> point = numpy.random.random(3) - 0.5
474 | >>> normal = numpy.random.random(3) - 0.5
475 | >>> direct = numpy.random.random(3) - 0.5
476 | >>> persp = numpy.random.random(3) - 0.5
477 | >>> P0 = projection_matrix(point, normal)
478 | >>> P1 = projection_matrix(point, normal, direction=direct)
479 | >>> P2 = projection_matrix(point, normal, perspective=persp)
480 | >>> P3 = projection_matrix(point, normal, perspective=persp, pseudo=True)
481 | >>> is_same_transform(P2, numpy.dot(P0, P3))
482 | True
483 | >>> P = projection_matrix([3, 0, 0], [1, 1, 0], [1, 0, 0])
484 | >>> v0 = (numpy.random.rand(4, 5) - 0.5) * 20
485 | >>> v0[3] = 1
486 | >>> v1 = numpy.dot(P, v0)
487 | >>> numpy.allclose(v1[1], v0[1])
488 | True
489 | >>> numpy.allclose(v1[0], 3-v1[1])
490 | True
491 |
492 | """
493 | M = numpy.identity(4)
494 | point = numpy.array(point[:3], dtype=numpy.float64, copy=False)
495 | normal = unit_vector(normal[:3])
496 | if perspective is not None:
497 | # perspective projection
498 | perspective = numpy.array(perspective[:3], dtype=numpy.float64,
499 | copy=False)
500 | M[0, 0] = M[1, 1] = M[2, 2] = numpy.dot(perspective-point, normal)
501 | M[:3, :3] -= numpy.outer(perspective, normal)
502 | if pseudo:
503 | # preserve relative depth
504 | M[:3, :3] -= numpy.outer(normal, normal)
505 | M[:3, 3] = numpy.dot(point, normal) * (perspective+normal)
506 | else:
507 | M[:3, 3] = numpy.dot(point, normal) * perspective
508 | M[3, :3] = -normal
509 | M[3, 3] = numpy.dot(perspective, normal)
510 | elif direction is not None:
511 | # parallel projection
512 | direction = numpy.array(direction[:3], dtype=numpy.float64, copy=False)
513 | scale = numpy.dot(direction, normal)
514 | M[:3, :3] -= numpy.outer(direction, normal) / scale
515 | M[:3, 3] = direction * (numpy.dot(point, normal) / scale)
516 | else:
517 | # orthogonal projection
518 | M[:3, :3] -= numpy.outer(normal, normal)
519 | M[:3, 3] = numpy.dot(point, normal) * normal
520 | return M
521 |
522 |
523 | def projection_from_matrix(matrix, pseudo=False):
524 | """Return projection plane and perspective point from projection matrix.
525 |
526 | Return values are same as arguments for projection_matrix function:
527 | point, normal, direction, perspective, and pseudo.
528 |
529 | >>> point = numpy.random.random(3) - 0.5
530 | >>> normal = numpy.random.random(3) - 0.5
531 | >>> direct = numpy.random.random(3) - 0.5
532 | >>> persp = numpy.random.random(3) - 0.5
533 | >>> P0 = projection_matrix(point, normal)
534 | >>> result = projection_from_matrix(P0)
535 | >>> P1 = projection_matrix(*result)
536 | >>> is_same_transform(P0, P1)
537 | True
538 | >>> P0 = projection_matrix(point, normal, direct)
539 | >>> result = projection_from_matrix(P0)
540 | >>> P1 = projection_matrix(*result)
541 | >>> is_same_transform(P0, P1)
542 | True
543 | >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=False)
544 | >>> result = projection_from_matrix(P0, pseudo=False)
545 | >>> P1 = projection_matrix(*result)
546 | >>> is_same_transform(P0, P1)
547 | True
548 | >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=True)
549 | >>> result = projection_from_matrix(P0, pseudo=True)
550 | >>> P1 = projection_matrix(*result)
551 | >>> is_same_transform(P0, P1)
552 | True
553 |
554 | """
555 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)
556 | M33 = M[:3, :3]
557 | w, V = numpy.linalg.eig(M)
558 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
559 | if not pseudo and len(i):
560 | # point: any eigenvector corresponding to eigenvalue 1
561 | point = numpy.real(V[:, i[-1]]).squeeze()
562 | point /= point[3]
563 | # direction: unit eigenvector corresponding to eigenvalue 0
564 | w, V = numpy.linalg.eig(M33)
565 | i = numpy.where(abs(numpy.real(w)) < 1e-8)[0]
566 | if not len(i):
567 | raise ValueError('no eigenvector corresponding to eigenvalue 0')
568 | direction = numpy.real(V[:, i[0]]).squeeze()
569 | direction /= vector_norm(direction)
570 | # normal: unit eigenvector of M33.T corresponding to eigenvalue 0
571 | w, V = numpy.linalg.eig(M33.T)
572 | i = numpy.where(abs(numpy.real(w)) < 1e-8)[0]
573 | if len(i):
574 | # parallel projection
575 | normal = numpy.real(V[:, i[0]]).squeeze()
576 | normal /= vector_norm(normal)
577 | return point, normal, direction, None, False
578 | else:
579 | # orthogonal projection, where normal equals direction vector
580 | return point, direction, None, None, False
581 | else:
582 | # perspective projection
583 | i = numpy.where(abs(numpy.real(w)) > 1e-8)[0]
584 | if not len(i):
585 | raise ValueError(
586 | 'no eigenvector not corresponding to eigenvalue 0')
587 | point = numpy.real(V[:, i[-1]]).squeeze()
588 | point /= point[3]
589 | normal = - M[3, :3]
590 | perspective = M[:3, 3] / numpy.dot(point[:3], normal)
591 | if pseudo:
592 | perspective -= normal
593 | return point, normal, None, perspective, pseudo
594 |
595 |
596 | def clip_matrix(left, right, bottom, top, near, far, perspective=False):
597 | """Return matrix to obtain normalized device coordinates from frustum.
598 |
599 | The frustum bounds are axis-aligned along x (left, right),
600 | y (bottom, top) and z (near, far).
601 |
602 | Normalized device coordinates are in range [-1, 1] if coordinates are
603 | inside the frustum.
604 |
605 | If perspective is True the frustum is a truncated pyramid with the
606 | perspective point at origin and direction along z axis, otherwise an
607 | orthographic canonical view volume (a box).
608 |
609 | Homogeneous coordinates transformed by the perspective clip matrix
610 | need to be dehomogenized (divided by w coordinate).
611 |
612 | >>> frustum = numpy.random.rand(6)
613 | >>> frustum[1] += frustum[0]
614 | >>> frustum[3] += frustum[2]
615 | >>> frustum[5] += frustum[4]
616 | >>> M = clip_matrix(perspective=False, *frustum)
617 | >>> numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1])
618 | array([-1., -1., -1., 1.])
619 | >>> numpy.dot(M, [frustum[1], frustum[3], frustum[5], 1])
620 | array([ 1., 1., 1., 1.])
621 | >>> M = clip_matrix(perspective=True, *frustum)
622 | >>> v = numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1])
623 | >>> v / v[3]
624 | array([-1., -1., -1., 1.])
625 | >>> v = numpy.dot(M, [frustum[1], frustum[3], frustum[4], 1])
626 | >>> v / v[3]
627 | array([ 1., 1., -1., 1.])
628 |
629 | """
630 | if left >= right or bottom >= top or near >= far:
631 | raise ValueError('invalid frustum')
632 | if perspective:
633 | if near <= _EPS:
634 | raise ValueError('invalid frustum: near <= 0')
635 | t = 2.0 * near
636 | M = [[t/(left-right), 0.0, (right+left)/(right-left), 0.0],
637 | [0.0, t/(bottom-top), (top+bottom)/(top-bottom), 0.0],
638 | [0.0, 0.0, (far+near)/(near-far), t*far/(far-near)],
639 | [0.0, 0.0, -1.0, 0.0]]
640 | else:
641 | M = [[2.0/(right-left), 0.0, 0.0, (right+left)/(left-right)],
642 | [0.0, 2.0/(top-bottom), 0.0, (top+bottom)/(bottom-top)],
643 | [0.0, 0.0, 2.0/(far-near), (far+near)/(near-far)],
644 | [0.0, 0.0, 0.0, 1.0]]
645 | return numpy.array(M)
646 |
647 |
648 | def shear_matrix(angle, direction, point, normal):
649 | """Return matrix to shear by angle along direction vector on shear plane.
650 |
651 | The shear plane is defined by a point and normal vector. The direction
652 | vector must be orthogonal to the plane's normal vector.
653 |
654 | A point P is transformed by the shear matrix into P" such that
655 | the vector P-P" is parallel to the direction vector and its extent is
656 | given by the angle of P-P'-P", where P' is the orthogonal projection
657 | of P onto the shear plane.
658 |
659 | >>> angle = (random.random() - 0.5) * 4*math.pi
660 | >>> direct = numpy.random.random(3) - 0.5
661 | >>> point = numpy.random.random(3) - 0.5
662 | >>> normal = numpy.cross(direct, numpy.random.random(3))
663 | >>> S = shear_matrix(angle, direct, point, normal)
664 | >>> numpy.allclose(1, numpy.linalg.det(S))
665 | True
666 |
667 | """
668 | normal = unit_vector(normal[:3])
669 | direction = unit_vector(direction[:3])
670 | if abs(numpy.dot(normal, direction)) > 1e-6:
671 | raise ValueError('direction and normal vectors are not orthogonal')
672 | angle = math.tan(angle)
673 | M = numpy.identity(4)
674 | M[:3, :3] += angle * numpy.outer(direction, normal)
675 | M[:3, 3] = -angle * numpy.dot(point[:3], normal) * direction
676 | return M
677 |
678 |
679 | def shear_from_matrix(matrix):
680 | """Return shear angle, direction and plane from shear matrix.
681 |
682 | >>> angle = (random.random() - 0.5) * 4*math.pi
683 | >>> direct = numpy.random.random(3) - 0.5
684 | >>> point = numpy.random.random(3) - 0.5
685 | >>> normal = numpy.cross(direct, numpy.random.random(3))
686 | >>> S0 = shear_matrix(angle, direct, point, normal)
687 | >>> angle, direct, point, normal = shear_from_matrix(S0)
688 | >>> S1 = shear_matrix(angle, direct, point, normal)
689 | >>> is_same_transform(S0, S1)
690 | True
691 |
692 | """
693 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)
694 | M33 = M[:3, :3]
695 | # normal: cross independent eigenvectors corresponding to the eigenvalue 1
696 | w, V = numpy.linalg.eig(M33)
697 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-4)[0]
698 | if len(i) < 2:
699 | raise ValueError('no two linear independent eigenvectors found %s' % w)
700 | V = numpy.real(V[:, i]).squeeze().T
701 | lenorm = -1.0
702 | for i0, i1 in ((0, 1), (0, 2), (1, 2)):
703 | n = numpy.cross(V[i0], V[i1])
704 | w = vector_norm(n)
705 | if w > lenorm:
706 | lenorm = w
707 | normal = n
708 | normal /= lenorm
709 | # direction and angle
710 | direction = numpy.dot(M33 - numpy.identity(3), normal)
711 | angle = vector_norm(direction)
712 | direction /= angle
713 | angle = math.atan(angle)
714 | # point: eigenvector corresponding to eigenvalue 1
715 | w, V = numpy.linalg.eig(M)
716 | i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0]
717 | if not len(i):
718 | raise ValueError('no eigenvector corresponding to eigenvalue 1')
719 | point = numpy.real(V[:, i[-1]]).squeeze()
720 | point /= point[3]
721 | return angle, direction, point, normal
722 |
723 |
724 | def decompose_matrix(matrix):
725 | """Return sequence of transformations from transformation matrix.
726 |
727 | matrix : array_like
728 | Non-degenerative homogeneous transformation matrix
729 |
730 | Return tuple of:
731 | scale : vector of 3 scaling factors
732 | shear : list of shear factors for x-y, x-z, y-z axes
733 | angles : list of Euler angles about static x, y, z axes
734 | translate : translation vector along x, y, z axes
735 | perspective : perspective partition of matrix
736 |
737 | Raise ValueError if matrix is of wrong type or degenerative.
738 |
739 | >>> T0 = translation_matrix([1, 2, 3])
740 | >>> scale, shear, angles, trans, persp = decompose_matrix(T0)
741 | >>> T1 = translation_matrix(trans)
742 | >>> numpy.allclose(T0, T1)
743 | True
744 | >>> S = scale_matrix(0.123)
745 | >>> scale, shear, angles, trans, persp = decompose_matrix(S)
746 | >>> scale[0]
747 | 0.123
748 | >>> R0 = euler_matrix(1, 2, 3)
749 | >>> scale, shear, angles, trans, persp = decompose_matrix(R0)
750 | >>> R1 = euler_matrix(*angles)
751 | >>> numpy.allclose(R0, R1)
752 | True
753 |
754 | """
755 | M = numpy.array(matrix, dtype=numpy.float64, copy=True).T
756 | if abs(M[3, 3]) < _EPS:
757 | raise ValueError('M[3, 3] is zero')
758 | M /= M[3, 3]
759 | P = M.copy()
760 | P[:, 3] = 0.0, 0.0, 0.0, 1.0
761 | if not numpy.linalg.det(P):
762 | raise ValueError('matrix is singular')
763 |
764 | scale = numpy.zeros((3, ))
765 | shear = [0.0, 0.0, 0.0]
766 | angles = [0.0, 0.0, 0.0]
767 |
768 | if any(abs(M[:3, 3]) > _EPS):
769 | perspective = numpy.dot(M[:, 3], numpy.linalg.inv(P.T))
770 | M[:, 3] = 0.0, 0.0, 0.0, 1.0
771 | else:
772 | perspective = numpy.array([0.0, 0.0, 0.0, 1.0])
773 |
774 | translate = M[3, :3].copy()
775 | M[3, :3] = 0.0
776 |
777 | row = M[:3, :3].copy()
778 | scale[0] = vector_norm(row[0])
779 | row[0] /= scale[0]
780 | shear[0] = numpy.dot(row[0], row[1])
781 | row[1] -= row[0] * shear[0]
782 | scale[1] = vector_norm(row[1])
783 | row[1] /= scale[1]
784 | shear[0] /= scale[1]
785 | shear[1] = numpy.dot(row[0], row[2])
786 | row[2] -= row[0] * shear[1]
787 | shear[2] = numpy.dot(row[1], row[2])
788 | row[2] -= row[1] * shear[2]
789 | scale[2] = vector_norm(row[2])
790 | row[2] /= scale[2]
791 | shear[1:] /= scale[2]
792 |
793 | if numpy.dot(row[0], numpy.cross(row[1], row[2])) < 0:
794 | numpy.negative(scale, scale)
795 | numpy.negative(row, row)
796 |
797 | angles[1] = math.asin(-row[0, 2])
798 | if math.cos(angles[1]):
799 | angles[0] = math.atan2(row[1, 2], row[2, 2])
800 | angles[2] = math.atan2(row[0, 1], row[0, 0])
801 | else:
802 | # angles[0] = math.atan2(row[1, 0], row[1, 1])
803 | angles[0] = math.atan2(-row[2, 1], row[1, 1])
804 | angles[2] = 0.0
805 |
806 | return scale, shear, angles, translate, perspective
807 |
808 |
809 | def compose_matrix(scale=None, shear=None, angles=None, translate=None,
810 | perspective=None):
811 | """Return transformation matrix from sequence of transformations.
812 |
813 | This is the inverse of the decompose_matrix function.
814 |
815 | Sequence of transformations:
816 | scale : vector of 3 scaling factors
817 | shear : list of shear factors for x-y, x-z, y-z axes
818 | angles : list of Euler angles about static x, y, z axes
819 | translate : translation vector along x, y, z axes
820 | perspective : perspective partition of matrix
821 |
822 | >>> scale = numpy.random.random(3) - 0.5
823 | >>> shear = numpy.random.random(3) - 0.5
824 | >>> angles = (numpy.random.random(3) - 0.5) * (2*math.pi)
825 | >>> trans = numpy.random.random(3) - 0.5
826 | >>> persp = numpy.random.random(4) - 0.5
827 | >>> M0 = compose_matrix(scale, shear, angles, trans, persp)
828 | >>> result = decompose_matrix(M0)
829 | >>> M1 = compose_matrix(*result)
830 | >>> is_same_transform(M0, M1)
831 | True
832 |
833 | """
834 | M = numpy.identity(4)
835 | if perspective is not None:
836 | P = numpy.identity(4)
837 | P[3, :] = perspective[:4]
838 | M = numpy.dot(M, P)
839 | if translate is not None:
840 | T = numpy.identity(4)
841 | T[:3, 3] = translate[:3]
842 | M = numpy.dot(M, T)
843 | if angles is not None:
844 | R = euler_matrix(angles[0], angles[1], angles[2], 'sxyz')
845 | M = numpy.dot(M, R)
846 | if shear is not None:
847 | Z = numpy.identity(4)
848 | Z[1, 2] = shear[2]
849 | Z[0, 2] = shear[1]
850 | Z[0, 1] = shear[0]
851 | M = numpy.dot(M, Z)
852 | if scale is not None:
853 | S = numpy.identity(4)
854 | S[0, 0] = scale[0]
855 | S[1, 1] = scale[1]
856 | S[2, 2] = scale[2]
857 | M = numpy.dot(M, S)
858 | M /= M[3, 3]
859 | return M
860 |
861 |
862 | def orthogonalization_matrix(lengths, angles):
863 | """Return orthogonalization matrix for crystallographic cell coordinates.
864 |
865 | Angles are expected in degrees.
866 |
867 | The de-orthogonalization matrix is the inverse.
868 |
869 | >>> O = orthogonalization_matrix([10, 10, 10], [90, 90, 90])
870 | >>> numpy.allclose(O[:3, :3], numpy.identity(3, float) * 10)
871 | True
872 | >>> O = orthogonalization_matrix([9.8, 12.0, 15.5], [87.2, 80.7, 69.7])
873 | >>> numpy.allclose(numpy.sum(O), 43.063229)
874 | True
875 |
876 | """
877 | a, b, c = lengths
878 | angles = numpy.radians(angles)
879 | sina, sinb, _ = numpy.sin(angles)
880 | cosa, cosb, cosg = numpy.cos(angles)
881 | co = (cosa * cosb - cosg) / (sina * sinb)
882 | return numpy.array([
883 | [ a*sinb*math.sqrt(1.0-co*co), 0.0, 0.0, 0.0],
884 | [-a*sinb*co, b*sina, 0.0, 0.0],
885 | [ a*cosb, b*cosa, c, 0.0],
886 | [ 0.0, 0.0, 0.0, 1.0]])
887 |
888 |
889 | def affine_matrix_from_points(v0, v1, shear=True, scale=True, usesvd=True):
890 | """Return affine transform matrix to register two point sets.
891 |
892 | v0 and v1 are shape (ndims, \*) arrays of at least ndims non-homogeneous
893 | coordinates, where ndims is the dimensionality of the coordinate space.
894 |
895 | If shear is False, a similarity transformation matrix is returned.
896 | If also scale is False, a rigid/Euclidean transformation matrix
897 | is returned.
898 |
899 | By default the algorithm by Hartley and Zissermann [15] is used.
900 | If usesvd is True, similarity and Euclidean transformation matrices
901 | are calculated by minimizing the weighted sum of squared deviations
902 | (RMSD) according to the algorithm by Kabsch [8].
903 | Otherwise, and if ndims is 3, the quaternion based algorithm by Horn [9]
904 | is used, which is slower when using this Python implementation.
905 |
906 | The returned matrix performs rotation, translation and uniform scaling
907 | (if specified).
908 |
909 | >>> v0 = [[0, 1031, 1031, 0], [0, 0, 1600, 1600]]
910 | >>> v1 = [[675, 826, 826, 677], [55, 52, 281, 277]]
911 | >>> affine_matrix_from_points(v0, v1)
912 | array([[ 0.14549, 0.00062, 675.50008],
913 | [ 0.00048, 0.14094, 53.24971],
914 | [ 0. , 0. , 1. ]])
915 | >>> T = translation_matrix(numpy.random.random(3)-0.5)
916 | >>> R = random_rotation_matrix(numpy.random.random(3))
917 | >>> S = scale_matrix(random.random())
918 | >>> M = concatenate_matrices(T, R, S)
919 | >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20
920 | >>> v0[3] = 1
921 | >>> v1 = numpy.dot(M, v0)
922 | >>> v0[:3] += numpy.random.normal(0, 1e-8, 300).reshape(3, -1)
923 | >>> M = affine_matrix_from_points(v0[:3], v1[:3])
924 | >>> numpy.allclose(v1, numpy.dot(M, v0))
925 | True
926 |
927 | More examples in superimposition_matrix()
928 |
929 | """
930 | v0 = numpy.array(v0, dtype=numpy.float64, copy=True)
931 | v1 = numpy.array(v1, dtype=numpy.float64, copy=True)
932 |
933 | ndims = v0.shape[0]
934 | if ndims < 2 or v0.shape[1] < ndims or v0.shape != v1.shape:
935 | raise ValueError('input arrays are of wrong shape or type')
936 |
937 | # move centroids to origin
938 | t0 = -numpy.mean(v0, axis=1)
939 | M0 = numpy.identity(ndims+1)
940 | M0[:ndims, ndims] = t0
941 | v0 += t0.reshape(ndims, 1)
942 | t1 = -numpy.mean(v1, axis=1)
943 | M1 = numpy.identity(ndims+1)
944 | M1[:ndims, ndims] = t1
945 | v1 += t1.reshape(ndims, 1)
946 |
947 | if shear:
948 | # Affine transformation
949 | A = numpy.concatenate((v0, v1), axis=0)
950 | u, s, vh = numpy.linalg.svd(A.T)
951 | vh = vh[:ndims].T
952 | B = vh[:ndims]
953 | C = vh[ndims:2*ndims]
954 | t = numpy.dot(C, numpy.linalg.pinv(B))
955 | t = numpy.concatenate((t, numpy.zeros((ndims, 1))), axis=1)
956 | M = numpy.vstack((t, ((0.0,)*ndims) + (1.0,)))
957 | elif usesvd or ndims != 3:
958 | # Rigid transformation via SVD of covariance matrix
959 | u, s, vh = numpy.linalg.svd(numpy.dot(v1, v0.T))
960 | # rotation matrix from SVD orthonormal bases
961 | R = numpy.dot(u, vh)
962 | if numpy.linalg.det(R) < 0.0:
963 | # R does not constitute right handed system
964 | R -= numpy.outer(u[:, ndims-1], vh[ndims-1, :]*2.0)
965 | s[-1] *= -1.0
966 | # homogeneous transformation matrix
967 | M = numpy.identity(ndims+1)
968 | M[:ndims, :ndims] = R
969 | else:
970 | # Rigid transformation matrix via quaternion
971 | # compute symmetric matrix N
972 | xx, yy, zz = numpy.sum(v0 * v1, axis=1)
973 | xy, yz, zx = numpy.sum(v0 * numpy.roll(v1, -1, axis=0), axis=1)
974 | xz, yx, zy = numpy.sum(v0 * numpy.roll(v1, -2, axis=0), axis=1)
975 | N = [[xx+yy+zz, 0.0, 0.0, 0.0],
976 | [yz-zy, xx-yy-zz, 0.0, 0.0],
977 | [zx-xz, xy+yx, yy-xx-zz, 0.0],
978 | [xy-yx, zx+xz, yz+zy, zz-xx-yy]]
979 | # quaternion: eigenvector corresponding to most positive eigenvalue
980 | w, V = numpy.linalg.eigh(N)
981 | q = V[:, numpy.argmax(w)]
982 | q /= vector_norm(q) # unit quaternion
983 | # homogeneous transformation matrix
984 | M = quaternion_matrix(q)
985 |
986 | if scale and not shear:
987 | # Affine transformation; scale is ratio of RMS deviations from centroid
988 | v0 *= v0
989 | v1 *= v1
990 | M[:ndims, :ndims] *= math.sqrt(numpy.sum(v1) / numpy.sum(v0))
991 |
992 | # move centroids back
993 | M = numpy.dot(numpy.linalg.inv(M1), numpy.dot(M, M0))
994 | M /= M[ndims, ndims]
995 | return M
996 |
997 |
998 | def superimposition_matrix(v0, v1, scale=False, usesvd=True):
999 | """Return matrix to transform given 3D point set into second point set.
1000 |
1001 | v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 points.
1002 |
1003 | The parameters scale and usesvd are explained in the more general
1004 | affine_matrix_from_points function.
1005 |
1006 | The returned matrix is a similarity or Euclidean transformation matrix.
1007 | This function has a fast C implementation in transformations.c.
1008 |
1009 | >>> v0 = numpy.random.rand(3, 10)
1010 | >>> M = superimposition_matrix(v0, v0)
1011 | >>> numpy.allclose(M, numpy.identity(4))
1012 | True
1013 | >>> R = random_rotation_matrix(numpy.random.random(3))
1014 | >>> v0 = [[1,0,0], [0,1,0], [0,0,1], [1,1,1]]
1015 | >>> v1 = numpy.dot(R, v0)
1016 | >>> M = superimposition_matrix(v0, v1)
1017 | >>> numpy.allclose(v1, numpy.dot(M, v0))
1018 | True
1019 | >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20
1020 | >>> v0[3] = 1
1021 | >>> v1 = numpy.dot(R, v0)
1022 | >>> M = superimposition_matrix(v0, v1)
1023 | >>> numpy.allclose(v1, numpy.dot(M, v0))
1024 | True
1025 | >>> S = scale_matrix(random.random())
1026 | >>> T = translation_matrix(numpy.random.random(3)-0.5)
1027 | >>> M = concatenate_matrices(T, R, S)
1028 | >>> v1 = numpy.dot(M, v0)
1029 | >>> v0[:3] += numpy.random.normal(0, 1e-9, 300).reshape(3, -1)
1030 | >>> M = superimposition_matrix(v0, v1, scale=True)
1031 | >>> numpy.allclose(v1, numpy.dot(M, v0))
1032 | True
1033 | >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False)
1034 | >>> numpy.allclose(v1, numpy.dot(M, v0))
1035 | True
1036 | >>> v = numpy.empty((4, 100, 3))
1037 | >>> v[:, :, 0] = v0
1038 | >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False)
1039 | >>> numpy.allclose(v1, numpy.dot(M, v[:, :, 0]))
1040 | True
1041 |
1042 | """
1043 | v0 = numpy.array(v0, dtype=numpy.float64, copy=False)[:3]
1044 | v1 = numpy.array(v1, dtype=numpy.float64, copy=False)[:3]
1045 | return affine_matrix_from_points(v0, v1, shear=False,
1046 | scale=scale, usesvd=usesvd)
1047 |
1048 |
1049 | def euler_matrix(ai, aj, ak, axes='sxyz'):
1050 | """Return homogeneous rotation matrix from Euler angles and axis sequence.
1051 |
1052 | ai, aj, ak : Euler's roll, pitch and yaw angles
1053 | axes : One of 24 axis sequences as string or encoded tuple
1054 |
1055 | >>> R = euler_matrix(1, 2, 3, 'syxz')
1056 | >>> numpy.allclose(numpy.sum(R[0]), -1.34786452)
1057 | True
1058 | >>> R = euler_matrix(1, 2, 3, (0, 1, 0, 1))
1059 | >>> numpy.allclose(numpy.sum(R[0]), -0.383436184)
1060 | True
1061 | >>> ai, aj, ak = (4*math.pi) * (numpy.random.random(3) - 0.5)
1062 | >>> for axes in _AXES2TUPLE.keys():
1063 | ... R = euler_matrix(ai, aj, ak, axes)
1064 | >>> for axes in _TUPLE2AXES.keys():
1065 | ... R = euler_matrix(ai, aj, ak, axes)
1066 |
1067 | """
1068 | try:
1069 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
1070 | except (AttributeError, KeyError):
1071 | _TUPLE2AXES[axes] # validation
1072 | firstaxis, parity, repetition, frame = axes
1073 |
1074 | i = firstaxis
1075 | j = _NEXT_AXIS[i+parity]
1076 | k = _NEXT_AXIS[i-parity+1]
1077 |
1078 | if frame:
1079 | ai, ak = ak, ai
1080 | if parity:
1081 | ai, aj, ak = -ai, -aj, -ak
1082 |
1083 | si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
1084 | ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
1085 | cc, cs = ci*ck, ci*sk
1086 | sc, ss = si*ck, si*sk
1087 |
1088 | M = numpy.identity(4)
1089 | if repetition:
1090 | M[i, i] = cj
1091 | M[i, j] = sj*si
1092 | M[i, k] = sj*ci
1093 | M[j, i] = sj*sk
1094 | M[j, j] = -cj*ss+cc
1095 | M[j, k] = -cj*cs-sc
1096 | M[k, i] = -sj*ck
1097 | M[k, j] = cj*sc+cs
1098 | M[k, k] = cj*cc-ss
1099 | else:
1100 | M[i, i] = cj*ck
1101 | M[i, j] = sj*sc-cs
1102 | M[i, k] = sj*cc+ss
1103 | M[j, i] = cj*sk
1104 | M[j, j] = sj*ss+cc
1105 | M[j, k] = sj*cs-sc
1106 | M[k, i] = -sj
1107 | M[k, j] = cj*si
1108 | M[k, k] = cj*ci
1109 | return M
1110 |
1111 |
1112 | def euler_from_matrix(matrix, axes='sxyz'):
1113 | """Return Euler angles from rotation matrix for specified axis sequence.
1114 |
1115 | axes : One of 24 axis sequences as string or encoded tuple
1116 |
1117 | Note that many Euler angle triplets can describe one matrix.
1118 |
1119 | >>> R0 = euler_matrix(1, 2, 3, 'syxz')
1120 | >>> al, be, ga = euler_from_matrix(R0, 'syxz')
1121 | >>> R1 = euler_matrix(al, be, ga, 'syxz')
1122 | >>> numpy.allclose(R0, R1)
1123 | True
1124 | >>> angles = (4*math.pi) * (numpy.random.random(3) - 0.5)
1125 | >>> for axes in _AXES2TUPLE.keys():
1126 | ... R0 = euler_matrix(axes=axes, *angles)
1127 | ... R1 = euler_matrix(axes=axes, *euler_from_matrix(R0, axes))
1128 | ... if not numpy.allclose(R0, R1): print(axes, "failed")
1129 |
1130 | """
1131 | try:
1132 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
1133 | except (AttributeError, KeyError):
1134 | _TUPLE2AXES[axes] # validation
1135 | firstaxis, parity, repetition, frame = axes
1136 |
1137 | i = firstaxis
1138 | j = _NEXT_AXIS[i+parity]
1139 | k = _NEXT_AXIS[i-parity+1]
1140 |
1141 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:3, :3]
1142 | if repetition:
1143 | sy = math.sqrt(M[i, j]*M[i, j] + M[i, k]*M[i, k])
1144 | if sy > _EPS:
1145 | ax = math.atan2( M[i, j], M[i, k])
1146 | ay = math.atan2( sy, M[i, i])
1147 | az = math.atan2( M[j, i], -M[k, i])
1148 | else:
1149 | ax = math.atan2(-M[j, k], M[j, j])
1150 | ay = math.atan2( sy, M[i, i])
1151 | az = 0.0
1152 | else:
1153 | cy = math.sqrt(M[i, i]*M[i, i] + M[j, i]*M[j, i])
1154 | if cy > _EPS:
1155 | ax = math.atan2( M[k, j], M[k, k])
1156 | ay = math.atan2(-M[k, i], cy)
1157 | az = math.atan2( M[j, i], M[i, i])
1158 | else:
1159 | ax = math.atan2(-M[j, k], M[j, j])
1160 | ay = math.atan2(-M[k, i], cy)
1161 | az = 0.0
1162 |
1163 | if parity:
1164 | ax, ay, az = -ax, -ay, -az
1165 | if frame:
1166 | ax, az = az, ax
1167 | return ax, ay, az
1168 |
1169 |
1170 | def euler_from_quaternion(quaternion, axes='sxyz'):
1171 | """Return Euler angles from quaternion for specified axis sequence.
1172 |
1173 | >>> angles = euler_from_quaternion([0.99810947, 0.06146124, 0, 0])
1174 | >>> numpy.allclose(angles, [0.123, 0, 0])
1175 | True
1176 |
1177 | """
1178 | return euler_from_matrix(quaternion_matrix(quaternion), axes)
1179 |
1180 |
1181 | def quaternion_from_euler(ai, aj, ak, axes='sxyz'):
1182 | """Return quaternion from Euler angles and axis sequence.
1183 |
1184 | ai, aj, ak : Euler's roll, pitch and yaw angles
1185 | axes : One of 24 axis sequences as string or encoded tuple
1186 |
1187 | >>> q = quaternion_from_euler(1, 2, 3, 'ryxz')
1188 | >>> numpy.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435])
1189 | True
1190 |
1191 | """
1192 | try:
1193 | firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
1194 | except (AttributeError, KeyError):
1195 | _TUPLE2AXES[axes] # validation
1196 | firstaxis, parity, repetition, frame = axes
1197 |
1198 | i = firstaxis + 1
1199 | j = _NEXT_AXIS[i+parity-1] + 1
1200 | k = _NEXT_AXIS[i-parity] + 1
1201 |
1202 | if frame:
1203 | ai, ak = ak, ai
1204 | if parity:
1205 | aj = -aj
1206 |
1207 | ai /= 2.0
1208 | aj /= 2.0
1209 | ak /= 2.0
1210 | ci = math.cos(ai)
1211 | si = math.sin(ai)
1212 | cj = math.cos(aj)
1213 | sj = math.sin(aj)
1214 | ck = math.cos(ak)
1215 | sk = math.sin(ak)
1216 | cc = ci*ck
1217 | cs = ci*sk
1218 | sc = si*ck
1219 | ss = si*sk
1220 |
1221 | q = numpy.empty((4, ))
1222 | if repetition:
1223 | q[0] = cj*(cc - ss)
1224 | q[i] = cj*(cs + sc)
1225 | q[j] = sj*(cc + ss)
1226 | q[k] = sj*(cs - sc)
1227 | else:
1228 | q[0] = cj*cc + sj*ss
1229 | q[i] = cj*sc - sj*cs
1230 | q[j] = cj*ss + sj*cc
1231 | q[k] = cj*cs - sj*sc
1232 | if parity:
1233 | q[j] *= -1.0
1234 |
1235 | return q
1236 |
1237 |
1238 | def quaternion_about_axis(angle, axis):
1239 | """Return quaternion for rotation about axis.
1240 |
1241 | >>> q = quaternion_about_axis(0.123, [1, 0, 0])
1242 | >>> numpy.allclose(q, [0.99810947, 0.06146124, 0, 0])
1243 | True
1244 |
1245 | """
1246 | q = numpy.array([0.0, axis[0], axis[1], axis[2]])
1247 | qlen = vector_norm(q)
1248 | if qlen > _EPS:
1249 | q *= math.sin(angle/2.0) / qlen
1250 | q[0] = math.cos(angle/2.0)
1251 | return q
1252 |
1253 |
1254 | def quaternion_matrix(quaternion):
1255 | """Return homogeneous rotation matrix from quaternion.
1256 |
1257 | >>> M = quaternion_matrix([0.99810947, 0.06146124, 0, 0])
1258 | >>> numpy.allclose(M, rotation_matrix(0.123, [1, 0, 0]))
1259 | True
1260 | >>> M = quaternion_matrix([1, 0, 0, 0])
1261 | >>> numpy.allclose(M, numpy.identity(4))
1262 | True
1263 | >>> M = quaternion_matrix([0, 1, 0, 0])
1264 | >>> numpy.allclose(M, numpy.diag([1, -1, -1, 1]))
1265 | True
1266 |
1267 | """
1268 | q = numpy.array(quaternion, dtype=numpy.float64, copy=True)
1269 | n = numpy.dot(q, q)
1270 | if n < _EPS:
1271 | return numpy.identity(4)
1272 | q *= math.sqrt(2.0 / n)
1273 | q = numpy.outer(q, q)
1274 | return numpy.array([
1275 | [1.0-q[2, 2]-q[3, 3], q[1, 2]-q[3, 0], q[1, 3]+q[2, 0], 0.0],
1276 | [ q[1, 2]+q[3, 0], 1.0-q[1, 1]-q[3, 3], q[2, 3]-q[1, 0], 0.0],
1277 | [ q[1, 3]-q[2, 0], q[2, 3]+q[1, 0], 1.0-q[1, 1]-q[2, 2], 0.0],
1278 | [ 0.0, 0.0, 0.0, 1.0]])
1279 |
1280 |
1281 | def quaternion_from_matrix(matrix, isprecise=False):
1282 | """Return quaternion from rotation matrix.
1283 |
1284 | If isprecise is True, the input matrix is assumed to be a precise rotation
1285 | matrix and a faster algorithm is used.
1286 |
1287 | >>> q = quaternion_from_matrix(numpy.identity(4), True)
1288 | >>> numpy.allclose(q, [1, 0, 0, 0])
1289 | True
1290 | >>> q = quaternion_from_matrix(numpy.diag([1, -1, -1, 1]))
1291 | >>> numpy.allclose(q, [0, 1, 0, 0]) or numpy.allclose(q, [0, -1, 0, 0])
1292 | True
1293 | >>> R = rotation_matrix(0.123, (1, 2, 3))
1294 | >>> q = quaternion_from_matrix(R, True)
1295 | >>> numpy.allclose(q, [0.9981095, 0.0164262, 0.0328524, 0.0492786])
1296 | True
1297 | >>> R = [[-0.545, 0.797, 0.260, 0], [0.733, 0.603, -0.313, 0],
1298 | ... [-0.407, 0.021, -0.913, 0], [0, 0, 0, 1]]
1299 | >>> q = quaternion_from_matrix(R)
1300 | >>> numpy.allclose(q, [0.19069, 0.43736, 0.87485, -0.083611])
1301 | True
1302 | >>> R = [[0.395, 0.362, 0.843, 0], [-0.626, 0.796, -0.056, 0],
1303 | ... [-0.677, -0.498, 0.529, 0], [0, 0, 0, 1]]
1304 | >>> q = quaternion_from_matrix(R)
1305 | >>> numpy.allclose(q, [0.82336615, -0.13610694, 0.46344705, -0.29792603])
1306 | True
1307 | >>> R = random_rotation_matrix()
1308 | >>> q = quaternion_from_matrix(R)
1309 | >>> is_same_transform(R, quaternion_matrix(q))
1310 | True
1311 | >>> is_same_quaternion(quaternion_from_matrix(R, isprecise=False),
1312 | ... quaternion_from_matrix(R, isprecise=True))
1313 | True
1314 | >>> R = euler_matrix(0.0, 0.0, numpy.pi/2.0)
1315 | >>> is_same_quaternion(quaternion_from_matrix(R, isprecise=False),
1316 | ... quaternion_from_matrix(R, isprecise=True))
1317 | True
1318 |
1319 | """
1320 | M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:4, :4]
1321 | if isprecise:
1322 | q = numpy.empty((4, ))
1323 | t = numpy.trace(M)
1324 | if t > M[3, 3]:
1325 | q[0] = t
1326 | q[3] = M[1, 0] - M[0, 1]
1327 | q[2] = M[0, 2] - M[2, 0]
1328 | q[1] = M[2, 1] - M[1, 2]
1329 | else:
1330 | i, j, k = 0, 1, 2
1331 | if M[1, 1] > M[0, 0]:
1332 | i, j, k = 1, 2, 0
1333 | if M[2, 2] > M[i, i]:
1334 | i, j, k = 2, 0, 1
1335 | t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3]
1336 | q[i] = t
1337 | q[j] = M[i, j] + M[j, i]
1338 | q[k] = M[k, i] + M[i, k]
1339 | q[3] = M[k, j] - M[j, k]
1340 | q = q[[3, 0, 1, 2]]
1341 | q *= 0.5 / math.sqrt(t * M[3, 3])
1342 | else:
1343 | m00 = M[0, 0]
1344 | m01 = M[0, 1]
1345 | m02 = M[0, 2]
1346 | m10 = M[1, 0]
1347 | m11 = M[1, 1]
1348 | m12 = M[1, 2]
1349 | m20 = M[2, 0]
1350 | m21 = M[2, 1]
1351 | m22 = M[2, 2]
1352 | # symmetric matrix K
1353 | K = numpy.array([[m00-m11-m22, 0.0, 0.0, 0.0],
1354 | [m01+m10, m11-m00-m22, 0.0, 0.0],
1355 | [m02+m20, m12+m21, m22-m00-m11, 0.0],
1356 | [m21-m12, m02-m20, m10-m01, m00+m11+m22]])
1357 | K /= 3.0
1358 | # quaternion is eigenvector of K that corresponds to largest eigenvalue
1359 | w, V = numpy.linalg.eigh(K)
1360 | q = V[[3, 0, 1, 2], numpy.argmax(w)]
1361 | if q[0] < 0.0:
1362 | numpy.negative(q, q)
1363 | return q
1364 |
1365 |
1366 | def quaternion_multiply(quaternion1, quaternion0):
1367 | """Return multiplication of two quaternions.
1368 |
1369 | >>> q = quaternion_multiply([4, 1, -2, 3], [8, -5, 6, 7])
1370 | >>> numpy.allclose(q, [28, -44, -14, 48])
1371 | True
1372 |
1373 | """
1374 | w0, x0, y0, z0 = quaternion0
1375 | w1, x1, y1, z1 = quaternion1
1376 | return numpy.array([
1377 | -x1*x0 - y1*y0 - z1*z0 + w1*w0,
1378 | x1*w0 + y1*z0 - z1*y0 + w1*x0,
1379 | -x1*z0 + y1*w0 + z1*x0 + w1*y0,
1380 | x1*y0 - y1*x0 + z1*w0 + w1*z0], dtype=numpy.float64)
1381 |
1382 |
1383 | def quaternion_conjugate(quaternion):
1384 | """Return conjugate of quaternion.
1385 |
1386 | >>> q0 = random_quaternion()
1387 | >>> q1 = quaternion_conjugate(q0)
1388 | >>> q1[0] == q0[0] and all(q1[1:] == -q0[1:])
1389 | True
1390 |
1391 | """
1392 | q = numpy.array(quaternion, dtype=numpy.float64, copy=True)
1393 | numpy.negative(q[1:], q[1:])
1394 | return q
1395 |
1396 |
1397 | def quaternion_inverse(quaternion):
1398 | """Return inverse of quaternion.
1399 |
1400 | >>> q0 = random_quaternion()
1401 | >>> q1 = quaternion_inverse(q0)
1402 | >>> numpy.allclose(quaternion_multiply(q0, q1), [1, 0, 0, 0])
1403 | True
1404 |
1405 | """
1406 | q = numpy.array(quaternion, dtype=numpy.float64, copy=True)
1407 | numpy.negative(q[1:], q[1:])
1408 | return q / numpy.dot(q, q)
1409 |
1410 |
1411 | def quaternion_real(quaternion):
1412 | """Return real part of quaternion.
1413 |
1414 | >>> quaternion_real([3, 0, 1, 2])
1415 | 3.0
1416 |
1417 | """
1418 | return float(quaternion[0])
1419 |
1420 |
1421 | def quaternion_imag(quaternion):
1422 | """Return imaginary part of quaternion.
1423 |
1424 | >>> quaternion_imag([3, 0, 1, 2])
1425 | array([ 0., 1., 2.])
1426 |
1427 | """
1428 | return numpy.array(quaternion[1:4], dtype=numpy.float64, copy=True)
1429 |
1430 |
1431 | def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True):
1432 | """Return spherical linear interpolation between two quaternions.
1433 |
1434 | >>> q0 = random_quaternion()
1435 | >>> q1 = random_quaternion()
1436 | >>> q = quaternion_slerp(q0, q1, 0)
1437 | >>> numpy.allclose(q, q0)
1438 | True
1439 | >>> q = quaternion_slerp(q0, q1, 1, 1)
1440 | >>> numpy.allclose(q, q1)
1441 | True
1442 | >>> q = quaternion_slerp(q0, q1, 0.5)
1443 | >>> angle = math.acos(numpy.dot(q0, q))
1444 | >>> numpy.allclose(2, math.acos(numpy.dot(q0, q1)) / angle) or \
1445 | numpy.allclose(2, math.acos(-numpy.dot(q0, q1)) / angle)
1446 | True
1447 |
1448 | """
1449 | q0 = unit_vector(quat0[:4])
1450 | q1 = unit_vector(quat1[:4])
1451 | if fraction == 0.0:
1452 | return q0
1453 | elif fraction == 1.0:
1454 | return q1
1455 | d = numpy.dot(q0, q1)
1456 | if abs(abs(d) - 1.0) < _EPS:
1457 | return q0
1458 | if shortestpath and d < 0.0:
1459 | # invert rotation
1460 | d = -d
1461 | numpy.negative(q1, q1)
1462 | angle = math.acos(d) + spin * math.pi
1463 | if abs(angle) < _EPS:
1464 | return q0
1465 | isin = 1.0 / math.sin(angle)
1466 | q0 *= math.sin((1.0 - fraction) * angle) * isin
1467 | q1 *= math.sin(fraction * angle) * isin
1468 | q0 += q1
1469 | return q0
1470 |
1471 |
1472 | def random_quaternion(rand=None):
1473 | """Return uniform random unit quaternion.
1474 |
1475 | rand: array like or None
1476 | Three independent random variables that are uniformly distributed
1477 | between 0 and 1.
1478 |
1479 | >>> q = random_quaternion()
1480 | >>> numpy.allclose(1, vector_norm(q))
1481 | True
1482 | >>> q = random_quaternion(numpy.random.random(3))
1483 | >>> len(q.shape), q.shape[0]==4
1484 | (1, True)
1485 |
1486 | """
1487 | if rand is None:
1488 | rand = numpy.random.rand(3)
1489 | else:
1490 | assert len(rand) == 3
1491 | r1 = numpy.sqrt(1.0 - rand[0])
1492 | r2 = numpy.sqrt(rand[0])
1493 | pi2 = math.pi * 2.0
1494 | t1 = pi2 * rand[1]
1495 | t2 = pi2 * rand[2]
1496 | return numpy.array([numpy.cos(t2)*r2, numpy.sin(t1)*r1,
1497 | numpy.cos(t1)*r1, numpy.sin(t2)*r2])
1498 |
1499 |
1500 | def random_rotation_matrix(rand=None):
1501 | """Return uniform random rotation matrix.
1502 |
1503 | rand: array like
1504 | Three independent random variables that are uniformly distributed
1505 | between 0 and 1 for each returned quaternion.
1506 |
1507 | >>> R = random_rotation_matrix()
1508 | >>> numpy.allclose(numpy.dot(R.T, R), numpy.identity(4))
1509 | True
1510 |
1511 | """
1512 | return quaternion_matrix(random_quaternion(rand))
1513 |
1514 |
1515 | class Arcball(object):
1516 | """Virtual Trackball Control.
1517 |
1518 | >>> ball = Arcball()
1519 | >>> ball = Arcball(initial=numpy.identity(4))
1520 | >>> ball.place([320, 320], 320)
1521 | >>> ball.down([500, 250])
1522 | >>> ball.drag([475, 275])
1523 | >>> R = ball.matrix()
1524 | >>> numpy.allclose(numpy.sum(R), 3.90583455)
1525 | True
1526 | >>> ball = Arcball(initial=[1, 0, 0, 0])
1527 | >>> ball.place([320, 320], 320)
1528 | >>> ball.setaxes([1, 1, 0], [-1, 1, 0])
1529 | >>> ball.constrain = True
1530 | >>> ball.down([400, 200])
1531 | >>> ball.drag([200, 400])
1532 | >>> R = ball.matrix()
1533 | >>> numpy.allclose(numpy.sum(R), 0.2055924)
1534 | True
1535 | >>> ball.next()
1536 |
1537 | """
1538 | def __init__(self, initial=None):
1539 | """Initialize virtual trackball control.
1540 |
1541 | initial : quaternion or rotation matrix
1542 |
1543 | """
1544 | self._axis = None
1545 | self._axes = None
1546 | self._radius = 1.0
1547 | self._center = [0.0, 0.0]
1548 | self._vdown = numpy.array([0.0, 0.0, 1.0])
1549 | self._constrain = False
1550 | if initial is None:
1551 | self._qdown = numpy.array([1.0, 0.0, 0.0, 0.0])
1552 | else:
1553 | initial = numpy.array(initial, dtype=numpy.float64)
1554 | if initial.shape == (4, 4):
1555 | self._qdown = quaternion_from_matrix(initial)
1556 | elif initial.shape == (4, ):
1557 | initial /= vector_norm(initial)
1558 | self._qdown = initial
1559 | else:
1560 | raise ValueError("initial not a quaternion or matrix")
1561 | self._qnow = self._qpre = self._qdown
1562 |
1563 | def place(self, center, radius):
1564 | """Place Arcball, e.g. when window size changes.
1565 |
1566 | center : sequence[2]
1567 | Window coordinates of trackball center.
1568 | radius : float
1569 | Radius of trackball in window coordinates.
1570 |
1571 | """
1572 | self._radius = float(radius)
1573 | self._center[0] = center[0]
1574 | self._center[1] = center[1]
1575 |
1576 | def setaxes(self, *axes):
1577 | """Set axes to constrain rotations."""
1578 | if axes is None:
1579 | self._axes = None
1580 | else:
1581 | self._axes = [unit_vector(axis) for axis in axes]
1582 |
1583 | @property
1584 | def constrain(self):
1585 | """Return state of constrain to axis mode."""
1586 | return self._constrain
1587 |
1588 | @constrain.setter
1589 | def constrain(self, value):
1590 | """Set state of constrain to axis mode."""
1591 | self._constrain = bool(value)
1592 |
1593 | def down(self, point):
1594 | """Set initial cursor window coordinates and pick constrain-axis."""
1595 | self._vdown = arcball_map_to_sphere(point, self._center, self._radius)
1596 | self._qdown = self._qpre = self._qnow
1597 | if self._constrain and self._axes is not None:
1598 | self._axis = arcball_nearest_axis(self._vdown, self._axes)
1599 | self._vdown = arcball_constrain_to_axis(self._vdown, self._axis)
1600 | else:
1601 | self._axis = None
1602 |
1603 | def drag(self, point):
1604 | """Update current cursor window coordinates."""
1605 | vnow = arcball_map_to_sphere(point, self._center, self._radius)
1606 | if self._axis is not None:
1607 | vnow = arcball_constrain_to_axis(vnow, self._axis)
1608 | self._qpre = self._qnow
1609 | t = numpy.cross(self._vdown, vnow)
1610 | if numpy.dot(t, t) < _EPS:
1611 | self._qnow = self._qdown
1612 | else:
1613 | q = [numpy.dot(self._vdown, vnow), t[0], t[1], t[2]]
1614 | self._qnow = quaternion_multiply(q, self._qdown)
1615 |
1616 | def next(self, acceleration=0.0):
1617 | """Continue rotation in direction of last drag."""
1618 | q = quaternion_slerp(self._qpre, self._qnow, 2.0+acceleration, False)
1619 | self._qpre, self._qnow = self._qnow, q
1620 |
1621 | def matrix(self):
1622 | """Return homogeneous rotation matrix."""
1623 | return quaternion_matrix(self._qnow)
1624 |
1625 |
1626 | def arcball_map_to_sphere(point, center, radius):
1627 | """Return unit sphere coordinates from window coordinates."""
1628 | v0 = (point[0] - center[0]) / radius
1629 | v1 = (center[1] - point[1]) / radius
1630 | n = v0*v0 + v1*v1
1631 | if n > 1.0:
1632 | # position outside of sphere
1633 | n = math.sqrt(n)
1634 | return numpy.array([v0/n, v1/n, 0.0])
1635 | else:
1636 | return numpy.array([v0, v1, math.sqrt(1.0 - n)])
1637 |
1638 |
1639 | def arcball_constrain_to_axis(point, axis):
1640 | """Return sphere point perpendicular to axis."""
1641 | v = numpy.array(point, dtype=numpy.float64, copy=True)
1642 | a = numpy.array(axis, dtype=numpy.float64, copy=True)
1643 | v -= a * numpy.dot(a, v) # on plane
1644 | n = vector_norm(v)
1645 | if n > _EPS:
1646 | if v[2] < 0.0:
1647 | numpy.negative(v, v)
1648 | v /= n
1649 | return v
1650 | if a[2] == 1.0:
1651 | return numpy.array([1.0, 0.0, 0.0])
1652 | return unit_vector([-a[1], a[0], 0.0])
1653 |
1654 |
1655 | def arcball_nearest_axis(point, axes):
1656 | """Return axis, which arc is nearest to point."""
1657 | point = numpy.array(point, dtype=numpy.float64, copy=False)
1658 | nearest = None
1659 | mx = -1.0
1660 | for axis in axes:
1661 | t = numpy.dot(arcball_constrain_to_axis(point, axis), point)
1662 | if t > mx:
1663 | nearest = axis
1664 | mx = t
1665 | return nearest
1666 |
1667 |
1668 | # epsilon for testing whether a number is close to zero
1669 | _EPS = numpy.finfo(float).eps * 4.0
1670 |
1671 | # axis sequences for Euler angles
1672 | _NEXT_AXIS = [1, 2, 0, 1]
1673 |
1674 | # map axes strings to/from tuples of inner axis, parity, repetition, frame
1675 | _AXES2TUPLE = {
1676 | 'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0),
1677 | 'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0),
1678 | 'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0),
1679 | 'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0),
1680 | 'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1),
1681 | 'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1),
1682 | 'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1),
1683 | 'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)}
1684 |
1685 | _TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
1686 |
1687 |
1688 | def vector_norm(data, axis=None, out=None):
1689 | """Return length, i.e. Euclidean norm, of ndarray along axis.
1690 |
1691 | >>> v = numpy.random.random(3)
1692 | >>> n = vector_norm(v)
1693 | >>> numpy.allclose(n, numpy.linalg.norm(v))
1694 | True
1695 | >>> v = numpy.random.rand(6, 5, 3)
1696 | >>> n = vector_norm(v, axis=-1)
1697 | >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=2)))
1698 | True
1699 | >>> n = vector_norm(v, axis=1)
1700 | >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1)))
1701 | True
1702 | >>> v = numpy.random.rand(5, 4, 3)
1703 | >>> n = numpy.empty((5, 3))
1704 | >>> vector_norm(v, axis=1, out=n)
1705 | >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1)))
1706 | True
1707 | >>> vector_norm([])
1708 | 0.0
1709 | >>> vector_norm([1])
1710 | 1.0
1711 |
1712 | """
1713 | data = numpy.array(data, dtype=numpy.float64, copy=True)
1714 | if out is None:
1715 | if data.ndim == 1:
1716 | return math.sqrt(numpy.dot(data, data))
1717 | data *= data
1718 | out = numpy.atleast_1d(numpy.sum(data, axis=axis))
1719 | numpy.sqrt(out, out)
1720 | return out
1721 | else:
1722 | data *= data
1723 | numpy.sum(data, axis=axis, out=out)
1724 | numpy.sqrt(out, out)
1725 |
1726 |
1727 | def unit_vector(data, axis=None, out=None):
1728 | """Return ndarray normalized by length, i.e. Euclidean norm, along axis.
1729 |
1730 | >>> v0 = numpy.random.random(3)
1731 | >>> v1 = unit_vector(v0)
1732 | >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0))
1733 | True
1734 | >>> v0 = numpy.random.rand(5, 4, 3)
1735 | >>> v1 = unit_vector(v0, axis=-1)
1736 | >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2)
1737 | >>> numpy.allclose(v1, v2)
1738 | True
1739 | >>> v1 = unit_vector(v0, axis=1)
1740 | >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1)
1741 | >>> numpy.allclose(v1, v2)
1742 | True
1743 | >>> v1 = numpy.empty((5, 4, 3))
1744 | >>> unit_vector(v0, axis=1, out=v1)
1745 | >>> numpy.allclose(v1, v2)
1746 | True
1747 | >>> list(unit_vector([]))
1748 | []
1749 | >>> list(unit_vector([1]))
1750 | [1.0]
1751 |
1752 | """
1753 | if out is None:
1754 | data = numpy.array(data, dtype=numpy.float64, copy=True)
1755 | if data.ndim == 1:
1756 | data /= math.sqrt(numpy.dot(data, data))
1757 | return data
1758 | else:
1759 | if out is not data:
1760 | out[:] = numpy.array(data, copy=False)
1761 | data = out
1762 | length = numpy.atleast_1d(numpy.sum(data*data, axis))
1763 | numpy.sqrt(length, length)
1764 | if axis is not None:
1765 | length = numpy.expand_dims(length, axis)
1766 | data /= length
1767 | if out is None:
1768 | return data
1769 |
1770 |
1771 | def random_vector(size):
1772 | """Return array of random doubles in the half-open interval [0.0, 1.0).
1773 |
1774 | >>> v = random_vector(10000)
1775 | >>> numpy.all(v >= 0) and numpy.all(v < 1)
1776 | True
1777 | >>> v0 = random_vector(10)
1778 | >>> v1 = random_vector(10)
1779 | >>> numpy.any(v0 == v1)
1780 | False
1781 |
1782 | """
1783 | return numpy.random.random(size)
1784 |
1785 |
1786 | def vector_product(v0, v1, axis=0):
1787 | """Return vector perpendicular to vectors.
1788 |
1789 | >>> v = vector_product([2, 0, 0], [0, 3, 0])
1790 | >>> numpy.allclose(v, [0, 0, 6])
1791 | True
1792 | >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]]
1793 | >>> v1 = [[3], [0], [0]]
1794 | >>> v = vector_product(v0, v1)
1795 | >>> numpy.allclose(v, [[0, 0, 0, 0], [0, 0, 6, 6], [0, -6, 0, -6]])
1796 | True
1797 | >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]]
1798 | >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]]
1799 | >>> v = vector_product(v0, v1, axis=1)
1800 | >>> numpy.allclose(v, [[0, 0, 6], [0, -6, 0], [6, 0, 0], [0, -6, 6]])
1801 | True
1802 |
1803 | """
1804 | return numpy.cross(v0, v1, axis=axis)
1805 |
1806 |
1807 | def angle_between_vectors(v0, v1, directed=True, axis=0):
1808 | """Return angle between vectors.
1809 |
1810 | If directed is False, the input vectors are interpreted as undirected axes,
1811 | i.e. the maximum angle is pi/2.
1812 |
1813 | >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3])
1814 | >>> numpy.allclose(a, math.pi)
1815 | True
1816 | >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3], directed=False)
1817 | >>> numpy.allclose(a, 0)
1818 | True
1819 | >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]]
1820 | >>> v1 = [[3], [0], [0]]
1821 | >>> a = angle_between_vectors(v0, v1)
1822 | >>> numpy.allclose(a, [0, 1.5708, 1.5708, 0.95532])
1823 | True
1824 | >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]]
1825 | >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]]
1826 | >>> a = angle_between_vectors(v0, v1, axis=1)
1827 | >>> numpy.allclose(a, [1.5708, 1.5708, 1.5708, 0.95532])
1828 | True
1829 |
1830 | """
1831 | v0 = numpy.array(v0, dtype=numpy.float64, copy=False)
1832 | v1 = numpy.array(v1, dtype=numpy.float64, copy=False)
1833 | dot = numpy.sum(v0 * v1, axis=axis)
1834 | dot /= vector_norm(v0, axis=axis) * vector_norm(v1, axis=axis)
1835 | dot = numpy.clip(dot, -1.0, 1.0)
1836 | return numpy.arccos(dot if directed else numpy.fabs(dot))
1837 |
1838 |
1839 | def inverse_matrix(matrix):
1840 | """Return inverse of square transformation matrix.
1841 |
1842 | >>> M0 = random_rotation_matrix()
1843 | >>> M1 = inverse_matrix(M0.T)
1844 | >>> numpy.allclose(M1, numpy.linalg.inv(M0.T))
1845 | True
1846 | >>> for size in range(1, 7):
1847 | ... M0 = numpy.random.rand(size, size)
1848 | ... M1 = inverse_matrix(M0)
1849 | ... if not numpy.allclose(M1, numpy.linalg.inv(M0)): print(size)
1850 |
1851 | """
1852 | return numpy.linalg.inv(matrix)
1853 |
1854 |
1855 | def concatenate_matrices(*matrices):
1856 | """Return concatenation of series of transformation matrices.
1857 |
1858 | >>> M = numpy.random.rand(16).reshape((4, 4)) - 0.5
1859 | >>> numpy.allclose(M, concatenate_matrices(M))
1860 | True
1861 | >>> numpy.allclose(numpy.dot(M, M.T), concatenate_matrices(M, M.T))
1862 | True
1863 |
1864 | """
1865 | M = numpy.identity(4)
1866 | for i in matrices:
1867 | M = numpy.dot(M, i)
1868 | return M
1869 |
1870 |
1871 | def is_same_transform(matrix0, matrix1):
1872 | """Return True if two matrices perform same transformation.
1873 |
1874 | >>> is_same_transform(numpy.identity(4), numpy.identity(4))
1875 | True
1876 | >>> is_same_transform(numpy.identity(4), random_rotation_matrix())
1877 | False
1878 |
1879 | """
1880 | matrix0 = numpy.array(matrix0, dtype=numpy.float64, copy=True)
1881 | matrix0 /= matrix0[3, 3]
1882 | matrix1 = numpy.array(matrix1, dtype=numpy.float64, copy=True)
1883 | matrix1 /= matrix1[3, 3]
1884 | return numpy.allclose(matrix0, matrix1)
1885 |
1886 |
1887 | def is_same_quaternion(q0, q1):
1888 | """Return True if two quaternions are equal."""
1889 | q0 = numpy.array(q0)
1890 | q1 = numpy.array(q1)
1891 | return numpy.allclose(q0, q1) or numpy.allclose(q0, -q1)
1892 |
1893 |
1894 | def _import_module(name, package=None, warn=True, prefix='_py_', ignore='_'):
1895 | """Try import all public attributes from module into global namespace.
1896 |
1897 | Existing attributes with name clashes are renamed with prefix.
1898 | Attributes starting with underscore are ignored by default.
1899 |
1900 | Return True on successful import.
1901 |
1902 | """
1903 | import warnings
1904 | from importlib import import_module
1905 | try:
1906 | if not package:
1907 | module = import_module(name)
1908 | else:
1909 | module = import_module('.' + name, package=package)
1910 | except ImportError:
1911 | if warn:
1912 | # warnings.warn('failed to import module %s' % name)
1913 | pass
1914 | else:
1915 | for attr in dir(module):
1916 | if ignore and attr.startswith(ignore):
1917 | continue
1918 | if prefix:
1919 | if attr in globals():
1920 | globals()[prefix + attr] = globals()[attr]
1921 | elif warn:
1922 | warnings.warn('no Python implementation of ' + attr)
1923 | globals()[attr] = getattr(module, attr)
1924 | return True
1925 |
1926 |
1927 | _import_module('_transformations')
1928 |
1929 | if __name__ == '__main__':
1930 | import doctest
1931 | import random # noqa: used in doctests
1932 | try:
1933 | numpy.set_printoptions(suppress=True, precision=5, legacy='1.13')
1934 | except TypeError:
1935 | numpy.set_printoptions(suppress=True, precision=5)
1936 | doctest.testmod()
1937 |
--------------------------------------------------------------------------------
/lib/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 |
4 |
5 | def make_box():
6 | """
7 | function to make grids on a 3D unit box
8 | @param lower: lower bound
9 | @param upper: upper bound
10 | @param num: number of points on an axis. Default 18
11 | rvalue: 2D numpy array of dim0 = num**2*6, num1 = 3. Meaning a point cloud
12 | """
13 | lower = -0.5
14 | upper = 0.5
15 | num = 18
16 | a = np.linspace(lower, upper, num)
17 | b = np.linspace(lower, upper, num)
18 | grid = np.transpose([np.tile(a, len(b)), np.repeat(b, len(a))])
19 |
20 | c1 = np.repeat(0.5, len(grid))
21 | c1 = np.reshape(c1, (len(c1), -1))
22 | c2 = np.repeat(-0.5, len(grid))
23 | c2 = np.reshape(c2, (len(c2), -1))
24 |
25 | up = np.hstack((grid, c1)) # upper face, z == 0.5
26 | low = np.hstack((grid, c2)) # lower face, z == -0.5
27 | front = up[:, [0, 2, 1]] # front face, y == 0.5
28 | back = low[:, [0, 2, 1]] # back face, y == -0.5
29 | right = up[:, [2, 0, 1]] # right face, x == 0.5
30 | left = low[:, [2, 0, 1]] # left face, x == -0.5
31 |
32 | six_faces = np.vstack((front, back, right, left, up, low))
33 | return six_faces
34 |
35 |
36 | def make_cylinder():
37 | """
38 | function to make a grid from a cyliner centered at (0, 0, 0). The cyliner's radius is 1, height is 0.5
39 | Method:
40 | 1) the surrounding surface is 4 times the area of the upper and lower cicle. So we sample 4 times more points from it
41 | 2) to match with the box, total number of points is 1944
42 | 3) for the upper and lower surface, points are sampled with fixed degree and fixed distance along the radius
43 | 4) for the middle surface, points are sampled along fixed lines along the height
44 | """
45 | # make the upper and lower face, which is not inclusive of the boundary points
46 | theta = 10 # dimension
47 | n = 9 # number of points for every radius
48 | r = 0.5
49 | radius_all = np.linspace(0, 0.5, n + 2)[1:10] # radius of sub-circles
50 | res = []
51 | for i, theta in enumerate(range(0, 360, 10)):
52 | x = math.sin(theta)
53 | y = math.cos(theta)
54 | for r in radius_all:
55 | res.append([r * x, r * y])
56 | # add z axis
57 | z = np.reshape(np.repeat(0.5, len(res)), (len(res), -1))
58 | upper = np.hstack((np.array(res), z)) # upper face
59 | z = np.reshape(np.repeat(-0.5, len(res)), (len(res), -1))
60 | lower = np.hstack((np.array(res), z)) # lower face
61 |
62 | # design of middle layer: theta = 5 degree, with every divide is 18 points including boundaries
63 | height = np.linspace(-0.5, 0.5, 18)
64 | res = []
65 | for theta in range(0, 360, 5):
66 | x = 0.5 * math.sin(theta)
67 | y = 0.5 * math.cos(theta)
68 | for z in height:
69 | res.append([x, y, z])
70 | middle = np.array(res)
71 |
72 | cylinder = np.vstack((upper, lower, middle))
73 | return cylinder
74 |
75 |
76 | def make_sphere():
77 | """
78 | function to sample a grid from a sphere
79 | """
80 | theta = np.linspace(0, 360, 36) # determining x and y
81 | phi = np.linspace(0, 360, 54) # determining z
82 |
83 | res = []
84 | for p in phi:
85 | z = math.sin(p) * 0.5
86 | r0 = math.cos(p) * 0.5
87 | for t in theta:
88 | x = math.sin(t) * r0
89 | y = math.cos(t) * r0
90 | res.append([x, y, z])
91 |
92 | sphere = np.array(res)
93 | return sphere
94 |
--------------------------------------------------------------------------------
/metrics/readme.md:
--------------------------------------------------------------------------------
1 | Download metrics from [PointFlow](https://github.com/stevenygd/PointFlow/tree/master/metrics).
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # CASS:Learning Canonical Shape Space for Category-Level 6D Object Pose and Size Estimation
2 |
3 | ## Evaluation Steps
4 | 1. Install the following requirements:
5 |
6 | ```
7 | open3d==0.8.0.0
8 | opencv-python==4.1.1.26
9 | torch==1.2.0
10 | torchvision==0.4.0
11 | tqdm==4.32.1
12 | trimesh==3.2.20
13 | ```
14 |
15 | 1. Compile "./metrics" for **re-evaluating** reconstructed models. You can skip this step and delete line 25-28 in ./tools/eval.py, if you have downloaded our results in next step.
16 |
17 | 2. Download predicted masks and pretrained models.
18 |
19 | You can download our pretrained models, results and segmentation masks of real test dataset in [NOCS](https://github.com/hughw19/NOCS_CVPR2019) from [Google Driver](https://drive.google.com/drive/folders/1yvVpvB_0YuqNAaeOzE5YfO5dvwDaoz_n).
20 |
21 | If you want to **re-calculate** CASS's results, please download the NOCS [real test dataset](http://download.cs.stanford.edu/orion/nocs/real_test.zip) and [3d models](http://download.cs.stanford.edu/orion/nocs/obj_models.zip).
22 |
23 | 3. Evaluate CASS and NOCS
24 |
25 | 1. Unzip predicted results, and specified `--save_dir` in eval.sh. You will get evaluation results of CASS and NOCS at the same time.
26 | 2. If you want to recalculate CASS's results, please place segmentation mask of NOCS, which is contained in the Google Driver, to the real-test dataset folder along with their color images. Refer to 1-2 line in ./eval.sh about how to start the evaluation.
27 |
28 | ## Acknowledgement
29 |
30 | We have referred to part of the code from [NOCS_CVPR2019](https://github.com/hughw19/NOCS_CVPR2019), [FoldingNet](https://github.com/jtpils/FoldingNet), [DenseFusion](https://github.com/j96w/DenseFusion), [Open3D](https://github.com/intel-isl/Open3D) and [PointFlow](https://github.com/stevenygd/PointFlow/tree/master).
31 |
--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.insert(0, os.getcwd())
4 |
--------------------------------------------------------------------------------
/tools/eval.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import glob
3 | import json
4 | import os
5 |
6 | import cv2
7 | import numpy as np
8 | import numpy.ma as ma
9 | import open3d as o3d
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torchvision.transforms as transforms
14 | import tqdm as tqdm
15 | from torch.autograd import Variable
16 | import argparse
17 |
18 | import _init_paths
19 | import utils
20 | from datasets.dataset import get_bbox, load_obj, PoseDataset
21 | from lib.models import CASS
22 | from lib.transformations import (quaternion_from_matrix,
23 | quaternion_matrix)
24 |
25 | try:
26 | from metrics.evaluation_metrics import EMD_CD
27 | except:
28 | raise "Failed to import EMD_CD metric. Please Compile `metric` if you want ti do reconstruction evaluation. Otherwise, just command this line."
29 |
30 | parser = argparse.ArgumentParser(description="eval CASS model")
31 | parser.add_argument("--resume_model", type=str, default="cass_best.pth",
32 | help="resume model in 'trained_models' folder.")
33 | parser.add_argument("--dataset_dir", type=str, default="",
34 | help="dataset root of nocs")
35 | parser.add_argument("--cuda", action="store_true", default=False)
36 | parser.add_argument("--draw", action="store_true", default=False,
37 | help="whether to draw the pointcloud image while evaluation.")
38 | parser.add_argument("--save_dir", type=str, default="",
39 | help="dictionary to save evaluation result.")
40 | parser.add_argument("--eval", action="store_true",
41 | help="whether to re-calculate result for cass")
42 | parser.add_argument("--mode", type=str, default="cass",
43 | choices=["cass", "nocs"], help="eval cass or nocs")
44 |
45 | opt = parser.parse_args()
46 | opt.intrinsics = np.array(
47 | [[591.0125, 0, 322.525], [0, 590.16775, 244.11084], [0, 0, 1]])
48 |
49 |
50 | norm = transforms.Normalize(mean=[0.51, 0.47, 0.44], std=[0.29, 0.27, 0.28])
51 | xmap = np.array([[j for i in range(640)] for j in range(480)])
52 | ymap = np.array([[i for i in range(640)] for j in range(480)])
53 | cam_cx = 322.525
54 | cam_cy = 244.11084
55 | cam_fx = 591.0125
56 | cam_fy = 590.16775
57 | cam_scale = 1000.0
58 | num_obj = 6
59 | img_width = 480
60 | img_length = 640
61 | num_points = 500
62 | iteration = 5
63 | bs = 1
64 | symmetric = [0, 1, 3]
65 | # 0 1_bottle_02876657
66 | # 1 2_bowl_02880940
67 | # 2 3_camera_02942699
68 | # 3 4_can_02946921
69 | # 4 5_laptop_03642806
70 | # 5 6_mug_03797390
71 |
72 | opt.num_objects = 6
73 | opt.num_points = 500
74 |
75 |
76 | def to_device(x):
77 | if opt.cuda:
78 | return x.cuda()
79 | else:
80 | return x.cpu()
81 |
82 |
83 | class Model(nn.Module):
84 | def __init__(self, opt):
85 | super().__init__()
86 | self.opt = opt
87 |
88 | self.casses = self.load_model()
89 |
90 | def load_model(self):
91 | cass = CASS(self.opt)
92 | resume_path = os.path.join(
93 | "trained_models", opt.resume_model)
94 | try:
95 | cass.load_state_dict(torch.load(resume_path), strict=True)
96 | except:
97 | raise FileNotFoundError(resume_path)
98 |
99 | return cass
100 |
101 | def get_model(self, cls_idx):
102 | return self.casses
103 |
104 |
105 | def get_predict_scales(recd):
106 | abs_coord_pts = np.abs(recd)
107 | return 2 * np.amax(abs_coord_pts, axis=0)
108 |
109 |
110 | def calculate_emd_cf(point_a, point_b):
111 | obj = torch.from_numpy(point_a).unsqueeze(dim=0)
112 | pre_points = torch.from_numpy(
113 | point_b).unsqueeze(dim=0)
114 | obj = to_device(obj).float()
115 | pre_points = to_device(pre_points).float()
116 |
117 | res = EMD_CD(pre_points, obj, 1, accelerated_cd=True)
118 | res = {k: (v.cpu().detach().item() if not isinstance(
119 | v, float) else v) for k, v in res.items()}
120 |
121 | return res["MMD-CD"], res["MMD-EMD"]
122 |
123 |
124 | def eval_nocs(model, img, depth, masks, cls_ids, cad_model_info, cad_model_scale):
125 | my_result = np.zeros((len(cls_ids), 7))
126 | scales = np.zeros((len(cls_ids), 3))
127 | chamfer_dis_cass = np.zeros((len(cls_ids)))
128 | emd_dis_cass = np.zeros((len(cls_ids)))
129 |
130 | for i in range(len(cls_ids)):
131 | # get model
132 | # cls ids zeros is not BG
133 | cass = model.get_model(cls_ids[i] - 1)
134 | try:
135 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
136 | mask_label = ma.getmaskarray(ma.masked_equal(
137 | masks, i)) # nocs mask is start from 1
138 | mask = mask_label * mask_depth
139 |
140 | rmin, rmax, cmin, cmax = get_bbox(mask)
141 |
142 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
143 | if len(choose) > num_points:
144 | c_mask = np.zeros(len(choose), dtype=int)
145 | c_mask[:num_points] = 1
146 | np.random.shuffle(c_mask)
147 | choose = choose[c_mask.nonzero()]
148 | else:
149 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
150 |
151 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten(
152 | )[choose][:, np.newaxis].astype(np.float32)
153 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten(
154 | )[choose][:, np.newaxis].astype(np.float32)
155 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten(
156 | )[choose][:, np.newaxis].astype(np.float32)
157 | choose = np.array([choose])
158 |
159 | pt2 = depth_masked / cam_scale
160 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
161 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
162 | cloud = np.concatenate((-pt0, -pt1, pt2), axis=1)
163 |
164 | img_masked = np.array(img)[:, :, :3]
165 | img_masked = np.transpose(img_masked, (2, 0, 1))
166 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
167 |
168 | cloud = torch.from_numpy(cloud.astype(np.float32))
169 | choose = torch.LongTensor(choose.astype(np.int32))
170 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
171 | index = torch.LongTensor([cls_ids[i] - 1]) # 0 is BG
172 |
173 | cloud = to_device(Variable(cloud))
174 | choose = to_device(Variable(choose))
175 | img_masked = to_device(Variable(img_masked))
176 | index = to_device(Variable(index))
177 |
178 | cloud = cloud.view(1, num_points, 3)
179 | img_masked = img_masked.view(1, 3, img_masked.size()[
180 | 1], img_masked.size()[2])
181 |
182 | folding_encode = cass.foldingnet.encode(img_masked, cloud, choose)
183 | posenet_encode = cass.estimator.encode(img_masked, cloud, choose)
184 |
185 | pred_r, pred_t, pred_c = cass.estimator.pose(
186 | torch.cat([posenet_encode, folding_encode], dim=1),
187 | index
188 | )
189 | recd = cass.foldingnet.recon(folding_encode)
190 |
191 | # get pred_scales
192 | scale = get_predict_scales(recd[0].detach().cpu().numpy())
193 | scales[i] = scale
194 | # load model
195 | for ii, info in enumerate(cad_model_info):
196 | if cls_ids[i] == int(info["cls_id"]):
197 | model_path = info["model_path"]
198 | model_scale = cad_model_scale[ii]
199 |
200 | cad_model = load_obj(path=os.path.join(opt.dataset_dir, model_path[:-4]+"_{}.ply".format(
201 | num_points)), ori_path=os.path.join(opt.dataset_dir, model_path), num_points=num_points)
202 | # change to the real size.
203 | cad_model = cad_model * model_scale
204 |
205 | cd, emd = calculate_emd_cf(
206 | cad_model, recd.detach()[0].cpu().numpy())
207 | chamfer_dis_cass[i] = cd
208 | emd_dis_cass[i] = emd
209 | break
210 | # if detected an wrong object, we set dis to 0
211 | else:
212 | emd_dis_cass[i] = 0
213 | chamfer_dis_cass[i] = 0
214 |
215 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
216 |
217 | pred_c = pred_c.view(bs, num_points)
218 | how_max, which_max = torch.max(pred_c, 1)
219 | pred_t = pred_t.view(bs * num_points, 1, 3)
220 | points = cloud.view(bs * num_points, 1, 3)
221 |
222 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
223 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
224 | if cls_ids[i] - 1 not in symmetric:
225 | # Do refine for non-symmetry class and this would be useful.
226 | for ite in range(0, iteration):
227 | T = to_device(Variable(torch.from_numpy(my_t.astype(np.float32))).view(
228 | 1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3))
229 | my_mat = quaternion_matrix(my_r)
230 | R = to_device(Variable(torch.from_numpy(
231 | my_mat[:3, :3].astype(np.float32))).view(1, 3, 3))
232 | my_mat[0:3, 3] = my_t
233 |
234 | new_cloud = torch.bmm((cloud - T), R).contiguous()
235 | pred_r, pred_t = cass.refiner(
236 | new_cloud, folding_encode, index)
237 | pred_r = pred_r.view(1, 1, -1)
238 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
239 | my_r_2 = pred_r.view(-1).cpu().data.numpy()
240 | my_t_2 = pred_t.view(-1).cpu().data.numpy()
241 | my_mat_2 = quaternion_matrix(my_r_2)
242 |
243 | my_mat_2[0:3, 3] = my_t_2
244 |
245 | my_mat_final = np.dot(my_mat, my_mat_2)
246 | my_r_final = copy.deepcopy(my_mat_final)
247 | my_r_final[0:3, 3] = 0
248 | my_r_final = quaternion_from_matrix(my_r_final, True)
249 | my_t_final = np.array(
250 | [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
251 |
252 | my_pred = np.append(my_r_final, my_t_final)
253 | my_r = my_r_final
254 | my_t = my_t_final
255 | else:
256 | my_pred = np.append(my_r, my_t)
257 |
258 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
259 | my_result[i] = my_pred
260 | except:
261 | # else:
262 | print("Empty mask while eval, skip.")
263 | my_result[i] = np.zeros(7)
264 | scales[i] = np.array([0.1, 0.1, 0.1])
265 |
266 | emd_dis_cass[i] = 0.0
267 | chamfer_dis_cass[i] = 0.0
268 | # convert to RTs
269 | my_result_ret = []
270 | for i in range(len(cls_ids)):
271 | matrix = quaternion_matrix(my_result[i][:4]).astype(np.float32)
272 | matrix[:3, 3] = my_result[i][4:]
273 | my_result_ret.append(matrix)
274 |
275 | return my_result_ret, scales, chamfer_dis_cass, emd_dis_cass
276 |
277 |
278 | def eval_interface(model, opt, result):
279 | # do dataloading object here
280 | # as for gt mask the value is store in last channle, but we are store in first channel
281 | path = result["image_path"]
282 | masks = np.array(cv2.imread(os.path.join(
283 | opt.dataset_dir, path+"_nocs_segmentation.png"))[:, :, 0])
284 | img = np.array(cv2.imread(os.path.join(
285 | opt.dataset_dir, path+"_color.png"))) / 255.0
286 | depth = np.array(cv2.imread(os.path.join(
287 | opt.dataset_dir, path+"_depth.png"), -1))
288 |
289 | my_result_ret, scales, chamfer_dis_cass, emd_dis_cass = eval_nocs(
290 | model, img, depth, masks, result["pred_class_ids"], cad_model_info=result[
291 | "model_information"], cad_model_scale=result["gt_scales_for_model_in_CASS"]
292 | )
293 |
294 | my_result_ret = np.array(my_result_ret)
295 | scales = np.array(scales)
296 | chamfer_dis_cass = np.array(chamfer_dis_cass)
297 | emd_dis_cass = np.array(emd_dis_cass)
298 |
299 | return my_result_ret.tolist(), scales.tolist(), chamfer_dis_cass.tolist(), emd_dis_cass.tolist()
300 |
301 |
302 | def draw(opt, result):
303 | """ Load data and draw visualization results.
304 | """
305 | path = result["image_path"]
306 | image = cv2.imread(os.path.join(opt.dataset_dir, path+"_color.png"))
307 |
308 | # Load GT Models
309 | models_for_nocs = []
310 | models_for_cass = []
311 | for i, mf in enumerate(result["model_information"]):
312 | model_path = mf["model_path"]
313 | cad_model = load_obj(path=os.path.join(opt.dataset_dir, model_path[:-4]+"_{}.ply".format(
314 | num_points)), ori_path=os.path.join(opt.dataset_dir, model_path), num_points=num_points)
315 |
316 | # As for nocs, the model normalized is gt points.
317 | models_for_nocs.append(copy.deepcopy(cad_model))
318 |
319 | # As for cass, the model normalized should multiply scale to get the real size.
320 | models_for_cass.append(copy.deepcopy(
321 | cad_model * result["gt_scales_for_model_in_CASS"][i]))
322 |
323 | # Get the correct RTs for Class_ids. If the target is missing we will return np.eye(). If multi-target is matched, we only keep the first.
324 | RTs_cass = []
325 | RTs_nocs = []
326 | misses = []
327 | for i, cls in enumerate(result["gt_class_ids"]):
328 | idx = result["pred_class_ids"] == cls
329 | rts_nocs = result["pred_RTs"][idx]
330 |
331 | rts_cass = result["pred_RTs_cass"][idx]
332 |
333 | miss = False
334 | if len(rts_cass) <= 0 or len(rts_nocs) <= 0:
335 | rts_cass = np.eye(4)
336 | rts_nocs = np.eye(4)
337 | miss = True
338 | elif len(rts_cass) > 1 or len(rts_nocs) > 1:
339 | rts_cass = rts_cass[0]
340 | rts_nocs = rts_nocs[0]
341 | misses.append(miss)
342 | RTs_nocs.append(rts_nocs)
343 | RTs_cass.append(rts_cass)
344 |
345 | (h, w) = image.shape[:2]
346 | center = (w/2, h/2)
347 |
348 | M = cv2.getRotationMatrix2D(center, 180, 1.0)
349 | rotated = cv2.warpAffine(image, M, (w, h))
350 |
351 | utils.draw(rotated, RTs_cass, models_for_cass, class_ids=result["gt_class_ids"], misses=misses, intrinsics=opt.intrinsics, save_path=os.path.join(
352 | opt.save_dir, "vis", "_".join(path.split("/"))+"_cass.png"))
353 | utils.draw(image, RTs_nocs, models_for_nocs, class_ids=result["gt_class_ids"], misses=misses, intrinsics=opt.intrinsics, save_path=os.path.join(
354 | opt.save_dir, "vis", "_".join(path.split("/"))+"_nocs.png"))
355 |
356 |
357 | if __name__ == "__main__":
358 | opt.class_names = PoseDataset.get_class_names()
359 |
360 | eval_dir = os.path.join(opt.save_dir, "eval_{}".format(opt.mode))
361 | os.makedirs(eval_dir, exist_ok=True)
362 |
363 | if opt.mode == "cass":
364 |
365 | if opt.eval:
366 | model = to_device(Model(opt)).eval()
367 |
368 | result_json_list = glob.glob(
369 | os.path.join(opt.save_dir, "gt", "*.json"))
370 | result_json_list = sorted(result_json_list)
371 |
372 | final_results = []
373 | for filename in tqdm.tqdm(result_json_list, desc="loading"):
374 |
375 | if opt.eval:
376 | with open(filename, "r") as f:
377 | result = json.load(f)
378 |
379 | pred_RTs_cass, pred_scales_cass, chamfer_dis_cass, emd_dis_cass = eval_interface(
380 | model, opt, result)
381 |
382 | result["pred_RTs_cass"] = pred_RTs_cass
383 | result["pred_scales_cass"] = pred_scales_cass
384 |
385 | result["chamfer_dis_cass"] = chamfer_dis_cass
386 | result["emd_dis_cass"] = emd_dis_cass
387 |
388 | with open(os.path.join(eval_dir, os.path.basename(filename)), "w") as f:
389 | json.dump(result, f, indent=4)
390 | else:
391 | with open(os.path.join(eval_dir, os.path.basename(filename)), "r") as f:
392 | result = json.load(f)
393 |
394 | gt_class_ids = []
395 | gt_scales_for_CASS = []
396 | for m in result["model_information"]:
397 | gt_class_ids.append(int(m["cls_id"]))
398 | gt_scales_for_CASS.append(m["gt_scales_for_CASS"])
399 | result["gt_class_ids"] = gt_class_ids
400 | result["gt_handle_visibility"] = [1] * len(gt_class_ids)
401 | result["gt_scales_for_CASS"] = gt_scales_for_CASS
402 |
403 | # convert all label information to np.array if possible
404 | r = {}
405 | for k, v in result.items():
406 | if isinstance(v, (list, tuple)):
407 | r[k] = np.array(v)
408 | else:
409 | r[k] = v
410 | final_results.append(r)
411 |
412 | if opt.draw:
413 | os.makedirs(os.path.join(opt.save_dir, "vis"), exist_ok=True)
414 | for r in tqdm.tqdm(final_results, desc="draw"):
415 | draw(opt, r)
416 |
417 | synset_names = ["BG"] + opt.class_names
418 |
419 | # eval
420 | eval_results = []
421 | for i in final_results:
422 | i["pred_scales"] = i["pred_scales_cass"]
423 | i["pred_RTs"] = i["pred_RTs_cass"]
424 | i["pred_class_ids"] = i["pred_class_ids"]
425 | i["gt_scales"] = i["gt_scales_for_CASS"]
426 | i["gt_RTs"] = i["gt_RTs_for_CASS"]
427 | eval_results.append(i)
428 | aps = utils.compute_degree_cm_mAP(
429 | eval_results, synset_names, eval_dir,
430 | degree_thresholds=range(0, 61, 1),
431 | shift_thresholds=np.linspace(0, 1, 31)*15,
432 | iou_3d_thresholds=np.linspace(0, 1, 101),
433 | iou_pose_thres=0.1,
434 | use_matches_for_pose=True, eval_recon=True
435 | )
436 | elif opt.mode == "nocs":
437 | result_json_list = glob.glob(
438 | os.path.join(opt.save_dir, "gt", "*.json"))
439 | result_json_list = sorted(result_json_list)
440 |
441 | final_results = []
442 | for filename in tqdm.tqdm(result_json_list, desc="loading"):
443 | with open(os.path.join(filename), "r") as f:
444 | result = json.load(f)
445 |
446 | # convert all label information to np.array if possible
447 | r = {}
448 | for k, v in result.items():
449 | if isinstance(v, (list, tuple)):
450 | r[k] = np.array(v)
451 | else:
452 | r[k] = v
453 | final_results.append(r)
454 |
455 | synset_names = ["BG"] + opt.class_names
456 |
457 | aps = utils.compute_degree_cm_mAP(
458 | final_results, synset_names, eval_dir,
459 | degree_thresholds=range(0, 61, 1),
460 | shift_thresholds=np.linspace(0, 1, 31)*15,
461 | iou_3d_thresholds=np.linspace(0, 1, 101),
462 | iou_pose_thres=0.1,
463 | use_matches_for_pose=True, eval_recon=False
464 | )
465 |
--------------------------------------------------------------------------------
/tools/utils.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import json
3 | import logging
4 | import math
5 | import os
6 | from ctypes import *
7 | from pprint import pprint
8 |
9 | import cv2
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import scipy.misc
13 | import skimage.color
14 | from tqdm import tqdm
15 |
16 |
17 | def setup_logger(logger_name, log_file, level=logging.INFO):
18 | l = logging.getLogger(logger_name)
19 | formatter = logging.Formatter('%(asctime)s : %(message)s')
20 | fileHandler = logging.FileHandler(log_file, mode='w')
21 | fileHandler.setFormatter(formatter)
22 |
23 | l.setLevel(level)
24 | l.addHandler(fileHandler)
25 |
26 | streamHandler = logging.StreamHandler()
27 | streamHandler.setFormatter(formatter)
28 | l.addHandler(streamHandler)
29 | return l
30 |
31 |
32 | def compute_3d_iou_new(RT_1, RT_2, scales_1, scales_2, handle_visibility, class_name_1, class_name_2):
33 | '''Computes IoU overlaps between two 3d bboxes.
34 | bbox_3d_1, bbox_3d_1: [3, 8]
35 | '''
36 | # flatten masks
37 | def asymmetric_3d_iou(RT_1, RT_2, scales_1, scales_2):
38 | noc_cube_1 = get_3d_bbox(scales_1, 0)
39 | bbox_3d_1 = transform_coordinates_3d(noc_cube_1, RT_1)
40 |
41 | noc_cube_2 = get_3d_bbox(scales_2, 0)
42 | bbox_3d_2 = transform_coordinates_3d(noc_cube_2, RT_2)
43 |
44 | bbox_1_max = np.amax(bbox_3d_1, axis=0)
45 | bbox_1_min = np.amin(bbox_3d_1, axis=0)
46 | bbox_2_max = np.amax(bbox_3d_2, axis=0)
47 | bbox_2_min = np.amin(bbox_3d_2, axis=0)
48 |
49 | overlap_min = np.maximum(bbox_1_min, bbox_2_min)
50 | overlap_max = np.minimum(bbox_1_max, bbox_2_max)
51 |
52 | # intersections and union
53 | if np.amin(overlap_max - overlap_min) < 0:
54 | intersections = 0
55 | else:
56 | intersections = np.prod(overlap_max - overlap_min)
57 | union = np.prod(bbox_1_max - bbox_1_min) + \
58 | np.prod(bbox_2_max - bbox_2_min) - intersections
59 | overlaps = intersections / union
60 | return overlaps
61 |
62 | if RT_1 is None or RT_2 is None:
63 | return -1
64 |
65 | symmetry_flag = False
66 | if (class_name_1 in ['bottle', 'bowl', 'can'] and class_name_1 == class_name_2) or (class_name_1 == 'mug' and class_name_1 == class_name_2 and handle_visibility == 0):
67 | # print('*'*10)
68 |
69 | noc_cube_1 = get_3d_bbox(scales_1, 0)
70 | noc_cube_2 = get_3d_bbox(scales_2, 0)
71 | bbox_3d_2 = transform_coordinates_3d(noc_cube_2, RT_2)
72 |
73 | def y_rotation_matrix(theta):
74 | return np.array([[np.cos(theta), 0, np.sin(theta), 0],
75 | [0, 1, 0, 0],
76 | [-np.sin(theta), 0, np.cos(theta), 0],
77 | [0, 0, 0, 1]])
78 |
79 | n = 20
80 | max_iou = 0
81 | for i in range(n):
82 | rotated_RT_1 = RT_1@y_rotation_matrix(2*math.pi*i/float(n))
83 | max_iou = max(max_iou,
84 | asymmetric_3d_iou(rotated_RT_1, RT_2, scales_1, scales_2))
85 | else:
86 | max_iou = asymmetric_3d_iou(RT_1, RT_2, scales_1, scales_2)
87 |
88 | return max_iou
89 |
90 |
91 | def compute_RT_degree_cm_symmetry(RT_1, RT_2, class_id, handle_visibility, synset_names):
92 | '''
93 | :param RT_1: [4, 4]. homogeneous affine transformation
94 | :param RT_2: [4, 4]. homogeneous affine transformation
95 | :return: theta: angle difference of R in degree, shift: l2 difference of T in centimeter
96 |
97 |
98 | synset_names = ['BG', # 0
99 | 'bottle', # 1
100 | 'bowl', # 2
101 | 'camera', # 3
102 | 'can', # 4
103 | 'cap', # 5
104 | 'phone', # 6
105 | 'monitor', # 7
106 | 'laptop', # 8
107 | 'mug' # 9
108 | ]
109 |
110 | synset_names = ['BG', # 0
111 | 'bottle', # 1
112 | 'bowl', # 2
113 | 'camera', # 3
114 | 'can', # 4
115 | 'laptop', # 5
116 | 'mug' # 6
117 | ]
118 | '''
119 |
120 | # make sure the last row is [0, 0, 0, 1]
121 | if RT_1 is None or RT_2 is None:
122 | return -1
123 | try:
124 | assert np.array_equal(RT_1[3, :], RT_2[3, :])
125 | assert np.array_equal(RT_1[3, :], np.array([0, 0, 0, 1]))
126 | except AssertionError:
127 | print(RT_1[3, :], RT_2[3, :])
128 | exit()
129 |
130 | R1 = RT_1[:3, :3] / np.cbrt(np.linalg.det(RT_1[:3, :3]))
131 | T1 = RT_1[:3, 3]
132 |
133 | R2 = RT_2[:3, :3] / np.cbrt(np.linalg.det(RT_2[:3, :3]))
134 | T2 = RT_2[:3, 3]
135 |
136 | # symmetric when rotating around y-axis
137 | if synset_names[class_id] in ['bottle', 'can', 'bowl']:
138 | y = np.array([0, 1, 0])
139 | y1 = R1 @ y
140 | y2 = R2 @ y
141 | theta = np.arccos(
142 | y1.dot(y2) / (np.linalg.norm(y1) * np.linalg.norm(y2)))
143 | # symmetric when rotating around y-axis
144 | elif synset_names[class_id] == 'mug' and handle_visibility == 0:
145 | y = np.array([0, 1, 0])
146 | y1 = R1 @ y
147 | y2 = R2 @ y
148 | theta = np.arccos(
149 | y1.dot(y2) / (np.linalg.norm(y1) * np.linalg.norm(y2)))
150 | elif synset_names[class_id] in ['phone', 'eggbox', 'glue']:
151 | y_180_RT = np.diag([-1.0, 1.0, -1.0])
152 | R = R1 @ R2.transpose()
153 | R_rot = R1 @ y_180_RT @ R2.transpose()
154 | theta = min(np.arccos((np.trace(R) - 1) / 2),
155 | np.arccos((np.trace(R_rot) - 1) / 2))
156 | else:
157 | R = R1 @ R2.transpose()
158 | theta = np.arccos((np.trace(R) - 1) / 2)
159 |
160 | theta *= 180 / np.pi
161 | shift = np.linalg.norm(T1 - T2) * 100
162 | result = np.array([theta, shift])
163 |
164 | return result
165 |
166 |
167 | def get_3d_bbox(scale, shift=0):
168 | """
169 | Input:
170 | scale: [3] or scalar
171 | shift: [3] or scalar
172 | Return
173 | bbox_3d: [3, N]
174 |
175 | """
176 | if hasattr(scale, "__iter__"):
177 | bbox_3d = np.array([[scale[0] / 2, +scale[1] / 2, scale[2] / 2],
178 | [scale[0] / 2, +scale[1] / 2, -scale[2] / 2],
179 | [-scale[0] / 2, +scale[1] / 2, scale[2] / 2],
180 | [-scale[0] / 2, +scale[1] / 2, -scale[2] / 2],
181 | [+scale[0] / 2, -scale[1] / 2, scale[2] / 2],
182 | [+scale[0] / 2, -scale[1] / 2, -scale[2] / 2],
183 | [-scale[0] / 2, -scale[1] / 2, scale[2] / 2],
184 | [-scale[0] / 2, -scale[1] / 2, -scale[2] / 2]]) + shift
185 | else:
186 | bbox_3d = np.array([[scale / 2, +scale / 2, scale / 2],
187 | [scale / 2, +scale / 2, -scale / 2],
188 | [-scale / 2, +scale / 2, scale / 2],
189 | [-scale / 2, +scale / 2, -scale / 2],
190 | [+scale / 2, -scale / 2, scale / 2],
191 | [+scale / 2, -scale / 2, -scale / 2],
192 | [-scale / 2, -scale / 2, scale / 2],
193 | [-scale / 2, -scale / 2, -scale / 2]]) + shift
194 |
195 | bbox_3d = bbox_3d.transpose()
196 | return bbox_3d
197 |
198 |
199 | def transform_coordinates_3d(coordinates, RT):
200 | """
201 | Input:
202 | coordinates: [3, N]
203 | RT: [4, 4]
204 | Return
205 | new_coordinates: [3, N]
206 |
207 | """
208 | assert coordinates.shape[0] == 3
209 | coordinates = np.vstack([coordinates, np.ones(
210 | (1, coordinates.shape[1]), dtype=np.float32)])
211 | new_coordinates = RT @ coordinates
212 | new_coordinates = new_coordinates[:3, :]/new_coordinates[3, :]
213 | return new_coordinates
214 |
215 |
216 | def calculate_2d_projections(coordinates_3d, intrinsics):
217 | """
218 | Input:
219 | coordinates: [3, N]
220 | intrinsics: [3, 3]
221 | Return
222 | projected_coordinates: [N, 2]
223 | """
224 | projected_coordinates = intrinsics @ coordinates_3d
225 | projected_coordinates = projected_coordinates[:2,
226 | :] / projected_coordinates[2, :]
227 | projected_coordinates = projected_coordinates.transpose()
228 | projected_coordinates = np.array(projected_coordinates, dtype=np.int32)
229 |
230 | return projected_coordinates
231 |
232 |
233 | def trim_zeros(x):
234 | """It's common to have tensors larger than the available data and
235 | pad with zeros. This function removes rows that are all zeros.
236 | x: [rows, columns].
237 | """
238 |
239 | pre_shape = x.shape
240 | assert len(x.shape) == 2, x.shape
241 | new_x = x[~np.all(x == 0, axis=1)]
242 | post_shape = new_x.shape
243 | assert pre_shape[0] == post_shape[0]
244 | assert pre_shape[1] == post_shape[1]
245 |
246 | return new_x
247 |
248 |
249 | def compute_3d_matches(gt_class_ids, gt_RTs, gt_scales, gt_handle_visibility, synset_names,
250 | pred_boxes, pred_class_ids, pred_scores, pred_RTs, pred_scales,
251 | iou_3d_thresholds, score_threshold=0):
252 | """Finds matches between prediction and ground truth instances.
253 | Returns:
254 | gt_matches: 2-D array. For each GT box it has the index of the matched
255 | predicted box.
256 | pred_matches: 2-D array. For each predicted box, it has the index of
257 | the matched ground truth box.
258 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
259 | """
260 | # Trim zero padding
261 | # TODO: cleaner to do zero unpadding upstream
262 | num_pred = len(pred_class_ids)
263 | num_gt = len(gt_class_ids)
264 | indices = np.zeros(0)
265 |
266 | if num_pred:
267 | pred_boxes = trim_zeros(pred_boxes).copy()
268 | pred_scores = pred_scores[:pred_boxes.shape[0]].copy()
269 |
270 | # Sort predictions by score from high to low
271 | indices = np.argsort(pred_scores)[::-1]
272 |
273 | pred_boxes = pred_boxes[indices].copy()
274 | pred_class_ids = pred_class_ids[indices].copy()
275 | pred_scores = pred_scores[indices].copy()
276 | pred_scales = pred_scales[indices].copy()
277 | pred_RTs = pred_RTs[indices].copy()
278 |
279 | # Compute IoU overlaps [pred_bboxs gt_bboxs]
280 | #overlaps = [[0 for j in range(num_gt)] for i in range(num_pred)]
281 | overlaps = np.zeros((num_pred, num_gt), dtype=np.float32)
282 | for i in range(num_pred):
283 | for j in range(num_gt):
284 | # overlaps[i, j] = compute_3d_iou(pred_3d_bboxs[i], gt_3d_bboxs[j], gt_handle_visibility[j],
285 | # synset_names[pred_class_ids[i]], synset_names[gt_class_ids[j]])
286 | overlaps[i, j] = compute_3d_iou_new(pred_RTs[i], gt_RTs[j], pred_scales[i, :], gt_scales[j],
287 | gt_handle_visibility[j], synset_names[pred_class_ids[i]], synset_names[gt_class_ids[j]])
288 |
289 | # Loop through predictions and find matching ground truth boxes
290 | num_iou_3d_thres = len(iou_3d_thresholds)
291 | pred_matches = -1 * np.ones([num_iou_3d_thres, num_pred])
292 | gt_matches = -1 * np.ones([num_iou_3d_thres, num_gt])
293 |
294 | for s, iou_thres in enumerate(iou_3d_thresholds):
295 | for i in range(len(pred_boxes)):
296 | # Find best matching ground truth box
297 | # 1. Sort matches by score
298 | sorted_ixs = np.argsort(overlaps[i])[::-1]
299 | # 2. Remove low scores
300 | low_score_idx = np.where(
301 | overlaps[i, sorted_ixs] < score_threshold)[0]
302 | if low_score_idx.size > 0:
303 | sorted_ixs = sorted_ixs[:low_score_idx[0]]
304 | # 3. Find the match
305 | for j in sorted_ixs:
306 | # If ground truth box is already matched, go to next one
307 | #print('gt_match: ', gt_match[j])
308 | if gt_matches[s, j] > -1:
309 | continue
310 | # If we reach IoU smaller than the threshold, end the loop
311 | iou = overlaps[i, j]
312 | #print('iou: ', iou)
313 | if iou < iou_thres:
314 | break
315 | # Do we have a match?
316 | if not pred_class_ids[i] == gt_class_ids[j]:
317 | continue
318 |
319 | if iou > iou_thres:
320 | gt_matches[s, j] = i
321 | pred_matches[s, i] = j
322 | break
323 |
324 | return gt_matches, pred_matches, overlaps, indices
325 |
326 |
327 | def compute_ap_from_matches_scores(pred_match, pred_scores, gt_match):
328 | # sort the scores from high to low
329 | # print(pred_match.shape, pred_scores.shape)
330 | assert pred_match.shape[0] == pred_scores.shape[0]
331 |
332 | score_indices = np.argsort(pred_scores)[::-1]
333 | pred_scores = pred_scores[score_indices]
334 | pred_match = pred_match[score_indices]
335 |
336 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
337 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
338 |
339 | # Pad with start and end values to simplify the math
340 | precisions = np.concatenate([[0], precisions, [0]])
341 | recalls = np.concatenate([[0], recalls, [1]])
342 |
343 | # Ensure precision values decrease but don't increase. This way, the
344 | # precision value at each recall threshold is the maximum it can be
345 | # for all following recall thresholds, as specified by the VOC paper.
346 | for i in range(len(precisions) - 2, -1, -1):
347 | precisions[i] = np.maximum(precisions[i], precisions[i + 1])
348 |
349 | # Compute mean AP over recall range
350 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
351 | ap = np.sum((recalls[indices] - recalls[indices - 1])
352 | * precisions[indices])
353 | return ap
354 |
355 |
356 | def compute_RT_overlaps(gt_class_ids, gt_RTs, gt_handle_visibility,
357 | pred_class_ids, pred_RTs,
358 | synset_names):
359 | """Finds overlaps between prediction and ground truth instances.
360 | Returns:
361 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
362 | """
363 | # print('num of gt instances: {}, num of pred instances: {}'.format(len(gt_class_ids), len(gt_class_ids)))
364 | num_pred = len(pred_class_ids)
365 | num_gt = len(gt_class_ids)
366 |
367 | # Compute IoU overlaps [pred_bboxs gt_bboxs]
368 | #overlaps = [[0 for j in range(num_gt)] for i in range(num_pred)]
369 | overlaps = np.zeros((num_pred, num_gt, 2))
370 |
371 | for i in range(num_pred):
372 | for j in range(num_gt):
373 | overlaps[i, j, :] = compute_RT_degree_cm_symmetry(pred_RTs[i],
374 | gt_RTs[j],
375 | gt_class_ids[j],
376 | gt_handle_visibility[j],
377 | synset_names)
378 |
379 | return overlaps
380 |
381 |
382 | def compute_match_from_degree_cm(overlaps, pred_class_ids, gt_class_ids, degree_thres_list, shift_thres_list):
383 | num_degree_thres = len(degree_thres_list)
384 | num_shift_thres = len(shift_thres_list)
385 |
386 | num_pred = len(pred_class_ids)
387 | num_gt = len(gt_class_ids)
388 |
389 | pred_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_pred))
390 | gt_matches = -1 * np.ones((num_degree_thres, num_shift_thres, num_gt))
391 |
392 | if num_pred == 0 or num_gt == 0:
393 | return gt_matches, pred_matches
394 |
395 | assert num_pred == overlaps.shape[0]
396 | assert num_gt == overlaps.shape[1]
397 | assert overlaps.shape[2] == 2
398 |
399 | for d, degree_thres in enumerate(degree_thres_list):
400 | for s, shift_thres in enumerate(shift_thres_list):
401 | for i in range(num_pred):
402 | # Find best matching ground truth box
403 | # 1. Sort matches by scores from low to high
404 | sum_degree_shift = np.sum(overlaps[i, :, :], axis=-1)
405 | sorted_ixs = np.argsort(sum_degree_shift)
406 | # 2. Remove low scores
407 | # low_score_idx = np.where(sum_degree_shift >= 100)[0]
408 | # if low_score_idx.size > 0:
409 | # sorted_ixs = sorted_ixs[:low_score_idx[0]]
410 | # 3. Find the match
411 | for j in sorted_ixs:
412 | # If ground truth box is already matched, go to next one
413 | #print(j, len(gt_match), len(pred_class_ids), len(gt_class_ids))
414 | if gt_matches[d, s, j] > -1 or pred_class_ids[i] != gt_class_ids[j]:
415 | continue
416 | # If we reach IoU smaller than the threshold, end the loop
417 | if overlaps[i, j, 0] > degree_thres or overlaps[i, j, 1] > shift_thres:
418 | continue
419 |
420 | gt_matches[d, s, j] = i
421 | pred_matches[d, s, i] = j
422 | break
423 |
424 | return gt_matches, pred_matches
425 |
426 |
427 | def compute_degree_cm_mAP(final_results, synset_names, log_dir, degree_thresholds=[360], shift_thresholds=[100], iou_3d_thresholds=[0.1], iou_pose_thres=0.1, use_matches_for_pose=False, eval_recon=False):
428 | """Compute Average Precision at a set IoU threshold (default 0.5).
429 | Returns:
430 | mAP: Mean Average Precision
431 | precisions: List of precisions at different class score thresholds.
432 | recalls: List of recall values at different class score thresholds.
433 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
434 | """
435 |
436 | num_classes = len(synset_names)
437 | degree_thres_list = list(degree_thresholds) + [360]
438 | num_degree_thres = len(degree_thres_list)
439 |
440 | shift_thres_list = list(shift_thresholds) + [100]
441 | num_shift_thres = len(shift_thres_list)
442 |
443 | iou_thres_list = list(iou_3d_thresholds)
444 | num_iou_thres = len(iou_thres_list)
445 |
446 | if use_matches_for_pose:
447 | assert iou_pose_thres in iou_thres_list
448 |
449 | iou_3d_aps = np.zeros((num_classes + 1, num_iou_thres))
450 | iou_pred_matches_all = [np.zeros((num_iou_thres, 0))
451 | for _ in range(num_classes)]
452 | iou_pred_scores_all = [np.zeros((num_iou_thres, 0))
453 | for _ in range(num_classes)]
454 | iou_gt_matches_all = [np.zeros((num_iou_thres, 0))
455 | for _ in range(num_classes)]
456 |
457 | pose_aps = np.zeros((num_classes + 1, num_degree_thres, num_shift_thres))
458 | pose_pred_matches_all = [
459 | np.zeros((num_degree_thres, num_shift_thres, 0)) for _ in range(num_classes)]
460 | pose_gt_matches_all = [
461 | np.zeros((num_degree_thres, num_shift_thres, 0)) for _ in range(num_classes)]
462 | pose_pred_scores_all = [
463 | np.zeros((num_degree_thres, num_shift_thres, 0)) for _ in range(num_classes)]
464 |
465 | # loop over results to gather pred matches and gt matches for iou and pose metrics
466 | progress = tqdm(final_results, desc="eval")
467 | # for progress, result in enumerate(final_results):
468 | for result in progress:
469 | # print(progress, len(final_results))
470 | gt_class_ids = result['gt_class_ids'].astype(np.int32)
471 | gt_RTs = np.array(result['gt_RTs'])
472 | gt_scales = np.array(result['gt_scales'])
473 | gt_handle_visibility = result['gt_handle_visibility']
474 |
475 | pred_bboxes = np.array(result['pred_bboxes'])
476 | pred_class_ids = result['pred_class_ids']
477 | pred_scales = result['pred_scales']
478 | pred_scores = result['pred_scores']
479 | pred_RTs = np.array(result['pred_RTs'])
480 | #print(pred_bboxes.shape[0], pred_class_ids.shape[0], pred_scores.shape[0], pred_RTs.shape[0])
481 |
482 | if len(gt_class_ids) == 0 and len(pred_class_ids) == 0:
483 | continue
484 |
485 | for cls_id in range(1, num_classes):
486 | # get gt and predictions in this class
487 | cls_gt_class_ids = gt_class_ids[gt_class_ids == cls_id] if len(
488 | gt_class_ids) else np.zeros(0)
489 | cls_gt_scales = gt_scales[gt_class_ids == cls_id] if len(
490 | gt_class_ids) else np.zeros((0, 3))
491 | cls_gt_RTs = gt_RTs[gt_class_ids == cls_id] if len(
492 | gt_class_ids) else np.zeros((0, 4, 4))
493 |
494 | cls_pred_class_ids = pred_class_ids[pred_class_ids == cls_id] if len(
495 | pred_class_ids) else np.zeros(0)
496 | cls_pred_bboxes = pred_bboxes[pred_class_ids == cls_id, :] if len(
497 | pred_class_ids) else np.zeros((0, 4))
498 | cls_pred_scores = pred_scores[pred_class_ids == cls_id] if len(
499 | pred_class_ids) else np.zeros(0)
500 | cls_pred_RTs = pred_RTs[pred_class_ids == cls_id] if len(
501 | pred_class_ids) else np.zeros((0, 4, 4))
502 | cls_pred_scales = pred_scales[pred_class_ids == cls_id] if len(
503 | pred_class_ids) else np.zeros((0, 3))
504 |
505 | # calculate the overlap between each gt instance and pred instance
506 | if synset_names[cls_id] != 'mug':
507 | cls_gt_handle_visibility = np.ones_like(cls_gt_class_ids)
508 | else:
509 | cls_gt_handle_visibility = gt_handle_visibility[gt_class_ids == cls_id] if len(
510 | gt_class_ids) else np.ones(0)
511 |
512 | iou_cls_gt_match, iou_cls_pred_match, _, iou_pred_indices = compute_3d_matches(cls_gt_class_ids, cls_gt_RTs, cls_gt_scales, cls_gt_handle_visibility, synset_names,
513 | cls_pred_bboxes, cls_pred_class_ids, cls_pred_scores, cls_pred_RTs, cls_pred_scales,
514 | iou_thres_list)
515 | if len(iou_pred_indices):
516 | cls_pred_class_ids = cls_pred_class_ids[iou_pred_indices]
517 | cls_pred_RTs = cls_pred_RTs[iou_pred_indices]
518 | cls_pred_scores = cls_pred_scores[iou_pred_indices]
519 | cls_pred_bboxes = cls_pred_bboxes[iou_pred_indices]
520 |
521 | iou_pred_matches_all[cls_id] = np.concatenate(
522 | (iou_pred_matches_all[cls_id], iou_cls_pred_match), axis=-1)
523 | cls_pred_scores_tile = np.tile(cls_pred_scores, (num_iou_thres, 1))
524 | iou_pred_scores_all[cls_id] = np.concatenate(
525 | (iou_pred_scores_all[cls_id], cls_pred_scores_tile), axis=-1)
526 | assert iou_pred_matches_all[cls_id].shape[1] == iou_pred_scores_all[cls_id].shape[1]
527 | iou_gt_matches_all[cls_id] = np.concatenate(
528 | (iou_gt_matches_all[cls_id], iou_cls_gt_match), axis=-1)
529 |
530 | if use_matches_for_pose:
531 | thres_ind = list(iou_thres_list).index(iou_pose_thres)
532 |
533 | iou_thres_pred_match = iou_cls_pred_match[thres_ind, :]
534 |
535 | cls_pred_class_ids = cls_pred_class_ids[iou_thres_pred_match > -1] if len(
536 | iou_thres_pred_match) > 0 else np.zeros(0)
537 | cls_pred_RTs = cls_pred_RTs[iou_thres_pred_match > -1] if len(
538 | iou_thres_pred_match) > 0 else np.zeros((0, 4, 4))
539 | cls_pred_scores = cls_pred_scores[iou_thres_pred_match > -1] if len(
540 | iou_thres_pred_match) > 0 else np.zeros(0)
541 | cls_pred_bboxes = cls_pred_bboxes[iou_thres_pred_match > -1] if len(
542 | iou_thres_pred_match) > 0 else np.zeros((0, 4))
543 |
544 | iou_thres_gt_match = iou_cls_gt_match[thres_ind, :]
545 | cls_gt_class_ids = cls_gt_class_ids[iou_thres_gt_match > -1] if len(
546 | iou_thres_gt_match) > 0 else np.zeros(0)
547 | cls_gt_RTs = cls_gt_RTs[iou_thres_gt_match > -1] if len(
548 | iou_thres_gt_match) > 0 else np.zeros((0, 4, 4))
549 | cls_gt_handle_visibility = cls_gt_handle_visibility[iou_thres_gt_match > -1] if len(
550 | iou_thres_gt_match) > 0 else np.zeros(0)
551 |
552 | RT_overlaps = compute_RT_overlaps(cls_gt_class_ids, cls_gt_RTs, cls_gt_handle_visibility,
553 | cls_pred_class_ids, cls_pred_RTs,
554 | synset_names)
555 |
556 | pose_cls_gt_match, pose_cls_pred_match = compute_match_from_degree_cm(RT_overlaps,
557 | cls_pred_class_ids,
558 | cls_gt_class_ids,
559 | degree_thres_list,
560 | shift_thres_list)
561 |
562 | pose_pred_matches_all[cls_id] = np.concatenate(
563 | (pose_pred_matches_all[cls_id], pose_cls_pred_match), axis=-1)
564 |
565 | cls_pred_scores_tile = np.tile(
566 | cls_pred_scores, (num_degree_thres, num_shift_thres, 1))
567 | pose_pred_scores_all[cls_id] = np.concatenate(
568 | (pose_pred_scores_all[cls_id], cls_pred_scores_tile), axis=-1)
569 | assert pose_pred_scores_all[cls_id].shape[2] == pose_pred_matches_all[cls_id].shape[2], '{} vs. {}'.format(
570 | pose_pred_scores_all[cls_id].shape, pose_pred_matches_all[cls_id].shape)
571 | pose_gt_matches_all[cls_id] = np.concatenate(
572 | (pose_gt_matches_all[cls_id], pose_cls_gt_match), axis=-1)
573 |
574 | # draw iou 3d AP vs. iou thresholds
575 | fig_iou = plt.figure()
576 | # ax_iou = plt.subplot(111)
577 | ax_iou = plt.subplot(131)
578 | plt.ylabel('AP')
579 | plt.ylim((0, 1))
580 | plt.xlabel('3D IoU thresholds')
581 |
582 | iou_dict = {}
583 | iou_dict['thres_list'] = iou_thres_list
584 | for cls_id in range(1, num_classes):
585 | class_name = synset_names[cls_id]
586 | # print(class_name)
587 | for s, _ in enumerate(iou_thres_list):
588 | iou_3d_aps[cls_id, s] = compute_ap_from_matches_scores(iou_pred_matches_all[cls_id][s, :],
589 | iou_pred_scores_all[cls_id][s, :],
590 | iou_gt_matches_all[cls_id][s, :])
591 | ax_iou.plot(iou_thres_list, iou_3d_aps[cls_id, :], label=class_name)
592 |
593 | iou_3d_aps[-1, :] = np.mean(iou_3d_aps[1:-1, :], axis=0)
594 | ax_iou.plot(iou_thres_list, iou_3d_aps[-1, :], label='mean')
595 | iou_dict['aps'] = iou_3d_aps
596 |
597 | # draw pose AP vs. thresholds
598 | if use_matches_for_pose:
599 | prefix = 'Pose_Only_'
600 | else:
601 | prefix = 'Pose_Detection_'
602 |
603 | pose_dict = {}
604 | pose_dict['degree_thres'] = degree_thres_list
605 | pose_dict['shift_thres_list'] = shift_thres_list
606 |
607 | for i, _ in enumerate(degree_thres_list):
608 | for j, _ in enumerate(shift_thres_list):
609 | for cls_id in range(1, num_classes):
610 | cls_pose_pred_matches_all = pose_pred_matches_all[cls_id][i, j, :]
611 | cls_pose_gt_matches_all = pose_gt_matches_all[cls_id][i, j, :]
612 | cls_pose_pred_scores_all = pose_pred_scores_all[cls_id][i, j, :]
613 |
614 | pose_aps[cls_id, i, j] = compute_ap_from_matches_scores(cls_pose_pred_matches_all,
615 | cls_pose_pred_scores_all,
616 | cls_pose_gt_matches_all)
617 |
618 | pose_aps[-1, i, j] = np.mean(pose_aps[1:-1, i, j])
619 |
620 | ax_trans = plt.subplot(132)
621 | plt.ylim((0, 1))
622 |
623 | plt.xlabel('Rotation/degree')
624 | for cls_id in range(1, num_classes):
625 | class_name = synset_names[cls_id]
626 | # print(class_name)
627 | ax_trans.plot(
628 | degree_thres_list[:-1], pose_aps[cls_id, :-1, -1], label=class_name)
629 |
630 | ax_trans.plot(degree_thres_list[:-1], pose_aps[-1, :-1, -1], label='mean')
631 | pose_dict['aps'] = pose_aps
632 |
633 | ax_rot = plt.subplot(133)
634 | plt.ylim((0, 1))
635 | plt.xlabel('translation/cm')
636 | for cls_id in range(1, num_classes):
637 | class_name = synset_names[cls_id]
638 | # print(class_name)
639 | ax_rot.plot(shift_thres_list[:-1],
640 | pose_aps[cls_id, -1, :-1], label=class_name)
641 |
642 | ax_rot.plot(shift_thres_list[:-1], pose_aps[-1, -1, :-1], label='mean')
643 | output_path = os.path.join(
644 | log_dir, prefix+'mAP_{}-{}cm.png'.format(shift_thres_list[0], shift_thres_list[-2]))
645 | ax_rot.legend()
646 |
647 | fig_iou.savefig(output_path)
648 | plt.close(fig_iou)
649 |
650 | iou_aps = iou_3d_aps
651 | kind_result = {}
652 | kind_result["3D IOU at 25"] = "{:.1f}".format(
653 | iou_aps[-1, iou_thres_list.index(0.25)] * 100)
654 | kind_result["3D IOU at 50"] = "{:.1f}".format(
655 | iou_aps[-1, iou_thres_list.index(0.5)] * 100)
656 | kind_result["5 degree, 5 cm"] = "{:.1f}".format(
657 | pose_aps[-1, degree_thres_list.index(5), shift_thres_list.index(5)] * 100)
658 | kind_result["10 degree, 5 cm"] = "{:.1f}".format(
659 | pose_aps[-1, degree_thres_list.index(10), shift_thres_list.index(5)] * 100)
660 | kind_result["10 degree, 10 cm"] = "{:.1f}".format(
661 | pose_aps[-1, degree_thres_list.index(10), shift_thres_list.index(10)] * 100)
662 |
663 | # The following is computing for recon
664 | if eval_recon:
665 | emd_dis_all = {c: [] for c in synset_names}
666 | cmf_dis_all = {c: [] for c in synset_names}
667 |
668 | for result in tqdm(final_results, desc="recon"):
669 |
670 | pred_class_ids = result['pred_class_ids']
671 | if len(pred_class_ids) <= 0:
672 | continue
673 | chamfer_dis_cass = result["chamfer_dis_cass"]
674 | emd_dis_cass = result["emd_dis_cass"]
675 |
676 | for cls_id in range(1, num_classes):
677 | # get gt and predictions in this class
678 |
679 | cmf_dis = chamfer_dis_cass[pred_class_ids == cls_id]
680 | emd_dis = emd_dis_cass[pred_class_ids == cls_id]
681 | if len(cmf_dis) <= 0 or len(emd_dis) <= 0:
682 | continue
683 | cmf_dis_all[synset_names[cls_id]] += cmf_dis.tolist()
684 | emd_dis_all[synset_names[cls_id]] += emd_dis.tolist()
685 |
686 | emd_dis = {}
687 | for k, v in emd_dis_all.items():
688 | if k != "BG" and len(v):
689 | emd_dis[k] = np.mean(np.asarray(v))
690 | emd_dis["mean"] = np.mean(np.array([v for v in emd_dis.values()]))
691 |
692 | cmf_dis = {}
693 | for k, v in cmf_dis_all.items():
694 | if k != "BG" and len(v):
695 | cmf_dis[k] = np.mean(np.asarray(v))
696 | cmf_dis["mean"] = np.mean(np.array([v for v in cmf_dis.values()]))
697 |
698 | kind_result["emd"] = emd_dis
699 | kind_result["cmf"] = cmf_dis
700 |
701 | pprint(kind_result)
702 | with open(os.path.join(log_dir, "eval_result.json"), "w") as f:
703 | json.dump(kind_result, f, indent=4)
704 | return iou_3d_aps, pose_aps
705 |
706 |
707 | color_map = [
708 | (255, 0, 0),
709 | (0, 255, 0),
710 | (0, 0, 255),
711 | (255, 0, 255),
712 | (0, 255, 255),
713 | (255, 255, 0)
714 | ]
715 |
716 |
717 | def draw(image, RTs, models, class_ids, misses, intrinsics, save_path="bbox.png"):
718 | draw_image = image.copy()
719 | RTs = np.array(RTs)
720 | models = np.array(models)
721 |
722 | for RT, model, cls, miss in zip(RTs, models, class_ids, misses):
723 | if miss:
724 | continue
725 | model = model.transpose(1, 0)
726 | RT = RT.reshape(4, 4)
727 | transformed_pts = transform_coordinates_3d(model, RT)
728 | projected_axes = calculate_2d_projections(
729 | transformed_pts, intrinsics)
730 |
731 | for p in projected_axes:
732 | cv2.circle(draw_image, center=tuple(p), radius=3,
733 | color=color_map[int(cls-1)], thickness=-4)
734 |
735 | if "cass" in save_path:
736 | (h, w) = draw_image.shape[:2]
737 | center = (w/2, h/2)
738 |
739 |
740 | M = cv2.getRotationMatrix2D(center, 180, 1.0)
741 | draw_image = cv2.warpAffine(draw_image, M, (w, h))
742 |
743 | cv2.imwrite(save_path, draw_image[:, :, (2, 1, 0)])
744 |
--------------------------------------------------------------------------------
/trained_models/placeholder:
--------------------------------------------------------------------------------
1 | put the downloaded model here.
--------------------------------------------------------------------------------