├── .gitignore ├── LICENSE ├── README.md ├── config ├── __init__.py └── defaults.py ├── data ├── __init__.py ├── pointclouds.py └── preprocess │ ├── __init__.py │ ├── compute_kpt_pairs.py │ ├── compute_overlap.py │ ├── compute_radius.py │ └── fuse_fragments_3DMatch.py ├── docker ├── Dockerfile └── build.sh ├── evaluation ├── __init__.py ├── eval_geomreg_3dmatch.py ├── eval_geomreg_3dmatch.sh ├── eval_geomreg_eth.py └── eval_geomreg_eth.sh ├── figures └── pipeline.png ├── models ├── __init__.py ├── base_model.py ├── modules.py └── mvdesc.py ├── requirements.txt ├── scripts ├── __init__.py ├── configs │ ├── ours_3dmatch.yaml │ └── ours_eth.yaml ├── engine_utils.py ├── main_mvdesc.py └── ours_3dmatch │ ├── net_cnn_16.pth │ ├── net_embed_16.pth │ ├── net_pool_16.pth │ └── net_renderer_16.pth ├── soft_renderer ├── __init__.py ├── cuda │ ├── __init__.py │ ├── jit.py │ ├── soft_rasterize_cuda.cpp │ ├── soft_rasterize_cuda_kernel.cu │ └── utils.cuh └── transform.py └── utils ├── __init__.py ├── io.py ├── log.py ├── loss.py └── meters.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.so.* 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | .idea 108 | .vscode 109 | train_logs 110 | eval_logs 111 | 112 | *.pkl 113 | benchmark/*.txt 114 | benchmark/tmp/ 115 | .vscode 116 | 117 | temp/ 118 | soft_renderer/temp/ 119 | 120 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Lei Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # End-to-End Learning Local Multi-view Descriptors for 3D Point Clouds 2 | 3 | By Lei Li, Siyu Zhu, Hongbo Fu, Ping Tan, and Chiew-Lan Tai. (CVPR 2020) 4 | 5 | In this work, we propose an end-to-end framework to learn local multi-view descriptors for 3D point clouds. To adopt a similar multi-view representation, existing studies use hand-crafted viewpoints for rendering in a preprocessing stage, which is detached from the subsequent descriptor learning stage. In our framework, we integrate the multi-view rendering into neural networks by using a differentiable renderer, which allows the viewpoints to be optimizable parameters for capturing more informative local context of interest points. To obtain discriminative descriptors, we also design a soft-view pooling module to attentively fuse convolutional features across views. Extensive experiments on existing 3D registration benchmarks show that our method outperforms existing local descriptors both quantitatively and qualitatively. 6 | 7 | ![pipeline](figures/pipeline.png) 8 | 9 | ## Link 10 | 11 | [Paper](https://arxiv.org/pdf/2003.05855) 12 | 13 | ## Citation 14 | ``` 15 | @InProceedings{Li_2020_CVPR, 16 | author = {Li, Lei and Zhu, Siyu and Fu, Hongbo and Tan, Ping and Tai, Chiew-Lan}, 17 | title = {End-to-End Learning Local Multi-view Descriptors for 3D Point Clouds}, 18 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 19 | year = {2020} 20 | } 21 | ``` 22 | 23 | 24 | ## Instructions 25 | 26 | ### Dependencies 27 | 28 | - CUDA \& CUDNN 29 | 30 | - Python 3.6 or 3.7 31 | 32 | - Install packages by 33 | ``` 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | If you are familiar with Docker, a ``Dockerfile`` is provided in folder ``docker`` for building a [Docker image](https://hub.docker.com/r/craigleili/3dlocalmultiviewdesc/tags?page=1&name=mvdesc) that includes a complete running environment. 38 | 39 | ### 3DMatch Benchmark 40 | 41 | #### Training 42 | 43 | Download the 3DMatch RGB-D data via [this link](http://3dmatch.cs.princeton.edu/#rgbd-reconstruction-datasets). 44 | 45 | Go to folder ```data/preprocess```. Use the following scripts for preprocessing and generating training data. 46 | 47 | - ```fuse_fragments_3DMatch.py``` for generating point cloud fragments from the RGB-D data. 48 | - ```compute_radius.py``` for computing point radius. (May skip it to save time \& space if to use fixed-radius point rendering) 49 | - ```compute_overlap.py``` for finding partially overlapped fragment pairs. 50 | - ```compute_kpt_pairs.py``` for selecting point pairs in overlapped regions for batch-hard training. 51 | 52 | Go to folder ```scripts```. Fill the paths in ```configs/ours_3dmatch.yaml``` and run 53 | ``` 54 | python main_mvdesc.py train configs/ours_3dmatch.yaml 55 | ``` 56 | 57 | A copy of the trained weights is located in ```scripts/ours_3dmatch```. 58 | 59 | #### Evaluation 60 | 61 | Download the 3DMatch geometric registration benchmark via [this link](https://1drv.ms/u/s!Alg6Vpe53dEDgZIsbH8Vt_J2T1CCMQ?e=QMl1U0). If you use these data in your work, please consider citing [[1]](#references). 62 | 63 | Go to folder ```scripts```. Fill the paths in ```configs/ours_3dmatch.yaml``` and then extract the local multi-view descriptors by running 64 | ``` 65 | python main_mvdesc.py test configs/ours_3dmatch.yaml 66 | ``` 67 | The extracted descriptors can also be directly downloaded via [this link](https://1drv.ms/u/s!Alg6Vpe53dEDgZIsbH8Vt_J2T1CCMQ?e=QMl1U0). 68 | 69 | Compute the recall metric by running ```evaluation/eval_geomreg_3dmatch.sh```. 70 | 71 | ### ETH Benchmark 72 | 73 | Download the ETH benchmark via [this link](https://1drv.ms/u/s!Alg6Vpe53dEDgZIsbH8Vt_J2T1CCMQ?e=QMl1U0). If you use these data in your work, please consider citing [[2, 3]](#references). 74 | 75 | Go to folder ```scripts```. Fill the paths in ```configs/ours_eth.yaml``` and then extract the local multi-view descriptors by running 76 | ``` 77 | python main_mvdesc.py test configs/ours_eth.yaml 78 | ``` 79 | The extracted descriptors can also be directly downloaded via [this link](https://1drv.ms/u/s!Alg6Vpe53dEDgZIsbH8Vt_J2T1CCMQ?e=QMl1U0). 80 | 81 | Compute the recall metric by running ```evaluation/eval_geomreg_eth.sh```. 82 | 83 | ## References 84 | 85 | 1. Zeng et al. [3DMatch: Learning Local Geometric Descriptors from RGB-D Reconstructions](http://3dmatch.cs.princeton.edu/). CVPR 2017. 86 | 1. Pomerleau et al. [Challenging data sets for point cloud registration algorithms](https://projects.asl.ethz.ch/datasets/doku.php?id=laserregistration:laserregistration). IJRR 2012. 87 | 1. Gojcic et al. [The Perfect Match: 3D Point Cloud Matching with Smoothed Densities](https://github.com/zgojcic/3DSmoothNet). CVPR 2019. 88 | 1. Liu et al. [Soft Rasterizer: A differentiable renderer for image-based 3d reasoning](https://github.com/ShichenLiu/SoftRas). ICCV 2019. 89 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import M as mvdesc_cfg 2 | -------------------------------------------------------------------------------- /config/defaults.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from yacs.config import CfgNode as CN 3 | 4 | M = CN() 5 | 6 | M.general = CN() 7 | M.general.gpu = 0 8 | M.general.note = '' 9 | M.general.seed = 9 10 | 11 | M.log = CN() 12 | M.log.freq = 100 13 | M.log.identifier = '' 14 | M.log.root_path = '' 15 | 16 | M.render = CN() 17 | M.render.augment_rotations = True 18 | M.render.view_num = 1 19 | M.render.rotation_num = 0 20 | M.render.znear = 0.1 21 | M.render.zfar = 6. 22 | M.render.image_size = 64 23 | M.render.sigma = 1. / 64. 24 | M.render.gamma = 5. 25 | M.render.dist_ratio = 3. 26 | M.render.radius_ratio = 0.5 27 | M.render.draw_color = False 28 | M.render.draw_depth = True 29 | M.render.trainable = True 30 | M.render.default_radius = 0.025 31 | M.render.dist_factor = 1.0 32 | 33 | M.model = CN() 34 | M.model.cnn = 'l2net' 35 | M.model.cnn_out_channels = 128 36 | M.model.desc_dim = 128 37 | M.model.fusion_type = 'max_pool' 38 | M.model.type = 'MVPoolNet' 39 | 40 | M.l2net = CN() 41 | M.l2net.return_interims = False 42 | M.l2net.trainable = True 43 | 44 | M.view_pool = CN() 45 | M.view_pool.bias = True 46 | M.view_pool.kernel = 3 47 | 48 | M.train = CN() 49 | 50 | M.train.general = CN() 51 | M.train.general.ckpt_nets = [] 52 | M.train.general.ckpt_path = '' 53 | 54 | M.train.input = CN() 55 | M.train.input.batch_size = 1 56 | M.train.input.instance_num = 24 57 | 58 | M.train.dataset = CN() 59 | M.train.dataset.name = '' 60 | M.train.dataset.pcloud_root = '' 61 | M.train.dataset.kpts_root = '' 62 | M.train.dataset.workers = 6 63 | 64 | M.train.solver = CN() 65 | M.train.solver.epochs = 16 66 | M.train.solver.lr = 1e-3 67 | M.train.solver.lr_gamma = 0.1 68 | M.train.solver.lr_step = 4 69 | M.train.solver.optim = 'Adam' 70 | M.train.solver.renderer_lr = 1e-3 71 | M.train.solver.renderer_optim_step = 10 72 | M.train.solver.renderer_weight = 1. 73 | M.train.solver.save_freq = 4 74 | M.train.solver.weight_decay = -1 75 | M.train.solver.grad_clip = 1e-3 76 | 77 | M.eval = CN() 78 | 79 | M.eval.general = CN() 80 | M.eval.general.ckpt_path = '' 81 | 82 | M.eval.geomreg = CN() 83 | M.eval.geomreg.workers = 0 84 | 85 | M.eval.geomreg.valid = CN() 86 | M.eval.geomreg.valid.name = '' 87 | M.eval.geomreg.valid.pcloud_root = '' 88 | 89 | M.eval.geomreg.test = CN() 90 | M.eval.geomreg.test.name = '' 91 | M.eval.geomreg.test.pcloud_root = '' 92 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/data/__init__.py -------------------------------------------------------------------------------- /data/pointclouds.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | from collections import defaultdict, namedtuple 5 | from pathlib import Path 6 | import math 7 | import numpy as np 8 | import open3d as o3d 9 | import os.path as osp 10 | import random 11 | import sys 12 | import pickle 13 | 14 | import torch 15 | from torch.utils.data import Dataset, Sampler 16 | 17 | ROOT_DIR = osp.abspath('../') 18 | if ROOT_DIR not in sys.path: 19 | sys.path.append(ROOT_DIR) 20 | 21 | from utils import io as uio 22 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 23 | 24 | CAMERA_UP = np.asarray([0., -1., 0.], dtype=np.float32) 25 | OverlapMeta = namedtuple('OverlapMeta', 26 | ['scene', 'seq', 'cloud_name_i', 'cloud_name_j', 'full_path']) 27 | PCloudMeta = namedtuple('PCloudMeta', ['scene', 'seq', 'name', 'full_path']) 28 | 29 | 30 | def is_numpy(x): 31 | if x is None: 32 | return False 33 | return type(x).__module__ == np.__name__ 34 | 35 | 36 | def random_index(n, excludes=[]): 37 | while True: 38 | i = random.randint(0, n - 1) 39 | if i not in excludes: 40 | return i 41 | 42 | 43 | def list_pcd_pairs(root_dir, excluded_scenes=None): 44 | res = list() 45 | for scene in uio.list_folders(root_dir, alphanum_sort=False): 46 | if excluded_scenes is not None and scene in excluded_scenes: 47 | continue 48 | for seq in uio.list_folders(osp.join(root_dir, scene), alphanum_sort=True): 49 | seq_folder = osp.join(root_dir, scene, seq) 50 | for npy_file in uio.list_files(seq_folder, 'cloud_bin_*.npy', alphanum_sort=True): 51 | cloud_name_i, cloud_name_j = npy_file[:-4].split('-') 52 | res.append( 53 | OverlapMeta(scene=scene, 54 | seq=seq, 55 | cloud_name_i=cloud_name_i, 56 | cloud_name_j=cloud_name_j, 57 | full_path=osp.join(seq_folder, npy_file))) 58 | return res 59 | 60 | 61 | def list_pcds(root_dir, excluded_scenes=None): 62 | res = list() 63 | for scene in uio.list_folders(root_dir, alphanum_sort=False): 64 | if excluded_scenes is not None and scene in excluded_scenes: 65 | continue 66 | for seq in uio.list_folders(osp.join(root_dir, scene), alphanum_sort=True): 67 | seq_folder = osp.join(root_dir, scene, seq) 68 | pcloud_names = uio.list_files(seq_folder, '*.ply', alphanum_sort=True) 69 | metas = [ 70 | PCloudMeta( 71 | scene=scene, 72 | seq=seq, 73 | name=pn[:-4], 74 | full_path=osp.join(seq_folder, pn), 75 | ) for pn in pcloud_names 76 | ] 77 | res.extend(metas) 78 | return res 79 | 80 | 81 | class PointCloud(object): 82 | 83 | def __init__(self, points, radii, colors, at_centers, at_normals): 84 | assert points is not None 85 | assert radii is not None 86 | assert at_centers is not None 87 | assert at_normals is not None 88 | 89 | if is_numpy(points): 90 | self.points = torch.from_numpy(points) 91 | else: 92 | self.points = points 93 | 94 | if is_numpy(radii): 95 | self.radii = torch.from_numpy(radii) 96 | else: 97 | self.radii = radii 98 | 99 | if is_numpy(colors): 100 | self.colors = torch.from_numpy(colors) 101 | else: 102 | self.colors = colors 103 | 104 | if is_numpy(at_centers): 105 | self.at_centers = torch.from_numpy(at_centers) 106 | else: 107 | self.at_centers = at_centers 108 | 109 | if is_numpy(at_normals): 110 | self.at_normals = torch.from_numpy(at_normals) 111 | else: 112 | self.at_normals = at_normals 113 | 114 | def to(self, device): 115 | self.points = self.points.to(device) 116 | self.radii = self.radii.to(device) 117 | if self.colors is not None: 118 | self.colors = self.colors.to(device) 119 | self.at_centers = self.at_centers.to(device) 120 | self.at_normals = self.at_normals.to(device) 121 | 122 | @classmethod 123 | def from_o3d(cls, pcd_o3d, radii, at_indices, at_normals=None): 124 | points = np.asarray(pcd_o3d.points, dtype=np.float32) 125 | radii = np.asarray(radii, dtype=np.float32) 126 | if len(pcd_o3d.colors) == len(pcd_o3d.points): 127 | colors = np.asarray(pcd_o3d.colors, dtype=np.float32) 128 | else: 129 | colors = None 130 | at_centers = points[at_indices, :] 131 | if at_normals is None: 132 | if len(pcd_o3d.normals) != len(pcd_o3d.points): 133 | raise RuntimeError('[!] The point cloud needs normals.') 134 | at_normals = np.asarray(pcd_o3d.normals, dtype=np.float32)[at_indices, :] 135 | return cls(points, radii, colors, at_centers, at_normals) 136 | 137 | 138 | class PointCloudPairSampler(Sampler): 139 | 140 | def __init__(self, data_source, batch_size=1): 141 | self.data_source = data_source 142 | self.batch_size = batch_size 143 | 144 | self.indices = self._generate_iter_indices() 145 | self.regen_flag = False 146 | 147 | def __iter__(self): 148 | if self.regen_flag: 149 | self.indices = self._generate_iter_indices() 150 | else: 151 | self.regen_flag = True 152 | return iter(self.indices) 153 | 154 | def __len__(self): 155 | return len(self.indices) 156 | 157 | def _generate_iter_indices(self): 158 | indices_dict = dict() 159 | for i, meta in enumerate(self.data_source): 160 | scene, seq = meta.scene, meta.seq 161 | if scene not in indices_dict: 162 | indices_dict[scene] = dict() 163 | if seq not in indices_dict[scene]: 164 | indices_dict[scene][seq] = list() 165 | indices_dict[scene][seq].append(i) 166 | 167 | grouped_indices = list() 168 | for scene_name, scene_dict in indices_dict.items(): 169 | for seq_name, seq_list in scene_dict.items(): 170 | meta_indices = seq_list.copy() 171 | random.shuffle(meta_indices) 172 | n_sublists = math.floor(float(len(meta_indices)) / self.batch_size) 173 | for i in range(n_sublists): 174 | grouped_indices.append(meta_indices[i * self.batch_size:(i + 1) * 175 | self.batch_size]) 176 | random.shuffle(grouped_indices) 177 | iter_indices = list() 178 | for item in grouped_indices: 179 | iter_indices.extend(item) 180 | return iter_indices 181 | 182 | 183 | class PointCloudPairDataset(Dataset): 184 | def __init__(self, data_source, pcd_root, num_point_pairs, radius=None): 185 | self.data_source = data_source 186 | self.pcd_root = pcd_root 187 | self.num_point_pairs = num_point_pairs 188 | self.radius = radius 189 | 190 | def __getitem__(self, index): 191 | meta = self.data_source[index] 192 | 193 | path_i = osp.join(self.pcd_root, meta.scene, meta.seq, meta.cloud_name_i + '.ply') 194 | path_j = osp.join(self.pcd_root, meta.scene, meta.seq, meta.cloud_name_j + '.ply') 195 | 196 | pcd_o3d_i = o3d.io.read_point_cloud(path_i) 197 | pcd_o3d_j = o3d.io.read_point_cloud(path_j) 198 | if Path(path_i[:-4] + '.radius.npy').is_file() and Path(path_j[:-4] + '.radius.npy').is_file(): 199 | radii_i = np.load(path_i[:-4] + '.radius.npy') 200 | radii_j = np.load(path_j[:-4] + '.radius.npy') 201 | else: 202 | assert self.radius is not None 203 | radii_i = np.ones((len(pcd_o3d_i.points),), dtype=np.float32) * self.radius 204 | radii_j = np.ones((len(pcd_o3d_j.points),), dtype=np.float32) * self.radius 205 | 206 | point_pairs = np.load(meta.full_path) 207 | samples = random.sample(range(len(point_pairs)), self.num_point_pairs) 208 | indices = point_pairs[samples, :] 209 | 210 | pcd_i = PointCloud.from_o3d(pcd_o3d_i, radii_i, indices[:, 0], None) 211 | pcd_j = PointCloud.from_o3d(pcd_o3d_j, radii_j, indices[:, 1], None) 212 | 213 | return { 214 | 'cloud_i': pcd_i, 215 | 'cloud_j': pcd_j, 216 | 'name_i': '{}/{}/{}'.format(meta.scene, meta.seq, meta.cloud_name_i), 217 | 'name_j': '{}/{}/{}'.format(meta.scene, meta.seq, meta.cloud_name_j), 218 | } 219 | 220 | def __len__(self): 221 | return len(self.data_source) 222 | 223 | 224 | class PointCloudDataset(Dataset): 225 | 226 | def __init__(self, data_source, pcd_root, radius=None): 227 | self.data_source = data_source 228 | self.pcd_root = pcd_root 229 | self.radius = radius 230 | 231 | def __getitem__(self, index): 232 | meta = self.data_source[index] 233 | pcd_o3d = o3d.io.read_point_cloud(meta.full_path) 234 | if Path(meta.full_path[:-4] + '.radius.npy').is_file(): 235 | radii = np.load(meta.full_path[:-4] + '.radius.npy') 236 | else: 237 | assert self.radius is not None 238 | radii = np.ones((len(pcd_o3d.points),), dtype=np.float32) * self.radius 239 | kpt_indices = np.load(meta.full_path[:-4] + '.keypts.npy') 240 | pcd = PointCloud.from_o3d(pcd_o3d, radii, kpt_indices) 241 | 242 | return { 243 | 'cloud': pcd, 244 | 'scene': meta.scene, 245 | 'seq': meta.seq, 246 | 'name': meta.name, 247 | } 248 | 249 | def __len__(self): 250 | return len(self.data_source) 251 | -------------------------------------------------------------------------------- /data/preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/data/preprocess/__init__.py -------------------------------------------------------------------------------- /data/preprocess/compute_kpt_pairs.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from collections import namedtuple, defaultdict 5 | from pathlib import Path 6 | import argparse 7 | import math 8 | import numpy as np 9 | import os.path as osp 10 | import sys 11 | import random 12 | import pickle 13 | 14 | ROOT_DIR = osp.abspath('../../') 15 | if ROOT_DIR not in sys.path: 16 | sys.path.append(ROOT_DIR) 17 | 18 | from utils import io as uio 19 | 20 | 21 | def downsample_and_compute_fpfh(cfg, scene, seq, pcd_name): 22 | import open3d as o3d 23 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 24 | 25 | print(' {}'.format(pcd_name)) 26 | 27 | temp_folder = osp.join(cfg.temp_root, scene, seq) 28 | 29 | pcd = o3d.io.read_point_cloud(osp.join(cfg.dataset_root, scene, seq, pcd_name)) 30 | pcd.normalize_normals() 31 | pcd_down = o3d.geometry.voxel_down_sample(pcd, cfg.voxel_size) 32 | 33 | pcd_fpfh = o3d.registration.compute_fpfh_feature( 34 | pcd_down, o3d.geometry.KDTreeSearchParamRadius(cfg.fpfh_radius)) 35 | 36 | pose = np.load(osp.join(cfg.dataset_root, scene, seq, pcd_name[:-4] + '.pose.npy')) 37 | pcd_down.transform(pose) 38 | 39 | o3d.io.write_point_cloud(osp.join(temp_folder, pcd_name), pcd_down) 40 | np.save(osp.join(temp_folder, pcd_name[:-4] + '.fpfh.npy'), np.asarray(pcd_fpfh.data).T) 41 | 42 | 43 | def match_fpfh(cfg, scene, seq, pcd_stem_i, pcd_stem_j): 44 | import open3d as o3d 45 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 46 | 47 | print(' {} - {}'.format(pcd_stem_i, pcd_stem_j)) 48 | 49 | temp_folder = osp.join(cfg.temp_root, scene, seq) 50 | 51 | pcd_down_i = o3d.io.read_point_cloud(osp.join(temp_folder, pcd_stem_i + '.ply')) 52 | pcd_down_j = o3d.io.read_point_cloud(osp.join(temp_folder, pcd_stem_j + '.ply')) 53 | points_i = np.asarray(pcd_down_i.points) 54 | points_j = np.asarray(pcd_down_j.points) 55 | 56 | fpfh_i = np.load(osp.join(temp_folder, pcd_stem_i + '.fpfh.npy')) 57 | fpfh_j = np.load(osp.join(temp_folder, pcd_stem_j + '.fpfh.npy')) 58 | 59 | indices_i = np.arange(len(points_i))[np.any(fpfh_i != 0, axis=1)] 60 | indices_j = np.arange(len(points_j))[np.any(fpfh_j != 0, axis=1)] 61 | 62 | fpfh_i = fpfh_i[indices_i, :] 63 | fpfh_j = fpfh_j[indices_j, :] 64 | points_i = points_i[indices_i, :] 65 | points_j = points_j[indices_j, :] 66 | 67 | kdtree_j = o3d.geometry.KDTreeFlann(fpfh_j.T) 68 | nnindices = [ 69 | kdtree_j.search_knn_vector_xd(fpfh_i[k, :], 1)[1][0] for k in range(len(fpfh_i)) 70 | ] 71 | points_j = points_j[nnindices, :] 72 | 73 | distances = np.sqrt(np.sum(np.square(points_i - points_j), axis=1)) 74 | match_flags = distances <= cfg.dist_thresh 75 | 76 | if np.sum(match_flags) < 128: return 77 | 78 | points_i = points_i[match_flags, :] 79 | points_j = points_j[match_flags, :] 80 | 81 | pair_indices = list() 82 | for pcd_stem, query_points in zip([pcd_stem_i, pcd_stem_j], [points_i, points_j]): 83 | pcd = o3d.io.read_point_cloud(osp.join(cfg.dataset_root, scene, seq, pcd_stem + '.ply')) 84 | pose = np.load(osp.join(cfg.dataset_root, scene, seq, pcd_stem + '.pose.npy')) 85 | pcd.transform(pose) 86 | 87 | kdtree = o3d.geometry.KDTreeFlann(np.asarray(pcd.points).T) 88 | nnindices = [ 89 | kdtree.search_knn_vector_3d(query_points[k, :], 1)[1][0] 90 | for k in range(len(query_points)) 91 | ] 92 | pair_indices.append(np.asarray(nnindices)) 93 | 94 | pair_indices = np.stack(pair_indices, axis=1) 95 | out_npy_path = osp.join(cfg.out_root, scene, seq, 96 | '{}-{}.npy'.format(pcd_stem_i, pcd_stem_j)) 97 | np.save(out_npy_path, pair_indices) 98 | 99 | 100 | def collate_kpts(cfg, scene, seq): 101 | import open3d as o3d 102 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 103 | 104 | kpt_pair_folder = osp.join(cfg.out_root, scene, seq) 105 | 106 | pcd_kpt_indices = defaultdict(list) 107 | for npy_file in uio.list_files(kpt_pair_folder, '*.npy', True): 108 | pcd_stem_i, pcd_stem_j = npy_file[:-4].split('-') 109 | kpt_pairs = np.load(osp.join(kpt_pair_folder, npy_file)) 110 | pcd_kpt_indices[pcd_stem_i].extend(kpt_pairs[:, 0].tolist()) 111 | pcd_kpt_indices[pcd_stem_j].extend(kpt_pairs[:, 1].tolist()) 112 | if len(pcd_kpt_indices) < 1: 113 | return 114 | 115 | scene_points = list() 116 | scene_normals = list() 117 | labels = list() 118 | for pcd_stem, kpt_indices in pcd_kpt_indices.items(): 119 | pcd = o3d.io.read_point_cloud(osp.join(cfg.dataset_root, scene, seq, pcd_stem + '.ply')) 120 | pose = np.load(osp.join(cfg.dataset_root, scene, seq, pcd_stem + '.pose.npy')) 121 | pcd.transform(pose) 122 | pcd.normalize_normals() 123 | 124 | uni_kpt_indices = list(set(kpt_indices)) 125 | scene_points.append(np.asarray(pcd.points)[uni_kpt_indices, :]) 126 | scene_normals.append(np.asarray(pcd.normals)[uni_kpt_indices, :]) 127 | labels.extend(list(zip([pcd_stem] * len(uni_kpt_indices), uni_kpt_indices))) 128 | scene_points = np.concatenate(scene_points, axis=0) 129 | scene_normals = np.concatenate(scene_normals, axis=0) 130 | 131 | print(' {} scene points/normals'.format(len(scene_points))) 132 | 133 | kdtree = o3d.geometry.KDTreeFlann(scene_points.T) 134 | num_points = len(scene_points) 135 | flags = [False] * num_points 136 | identities = list() 137 | for i in range(num_points): 138 | if flags[i]: continue 139 | 140 | [_, nn_indices, 141 | nn_dists2] = kdtree.search_radius_vector_3d(scene_points[i, :], cfg.dist_thresh) 142 | nn_indices = [j for j in nn_indices if not flags[j]] 143 | 144 | nn_normal = [scene_normals[j] for j in nn_indices] 145 | if len(nn_normal) < 2: continue 146 | nn_normal = np.mean(np.asarray(nn_normal), axis=0) 147 | nn_normal /= np.linalg.norm(nn_normal) 148 | 149 | nn_pcd_indices = defaultdict(list) 150 | for j in nn_indices: 151 | if np.arccos(np.clip(np.dot(scene_normals[j], nn_normal), -1, 152 | 1)) > cfg.angle_thresh: 153 | continue 154 | nn_pcd_indices[labels[j][0]].append(labels[j][1]) 155 | if len(nn_pcd_indices) < 2: continue 156 | 157 | identities.append({k: random.choice(v) for k, v in nn_pcd_indices.items()}) 158 | 159 | for j in nn_indices: 160 | flags[j] = True 161 | flags[i] = True 162 | 163 | with open(osp.join(cfg.out_root, scene, '{}.kpts.pkl'.format(seq)), 'wb') as fh: 164 | to_save = {'identities': identities} 165 | pickle.dump(to_save, fh, protocol=pickle.HIGHEST_PROTOCOL) 166 | 167 | print(' {} identities'.format(len(identities))) 168 | 169 | 170 | def run_seq(cfg, scene, seq): 171 | print(' Start {}'.format(seq)) 172 | 173 | out_folder = osp.join(cfg.out_root, scene, seq) 174 | if osp.exists(out_folder): 175 | print(' Skip...') 176 | return 177 | uio.make_clean_folder(out_folder) 178 | 179 | temp_folder = osp.join(cfg.temp_root, scene, seq) 180 | uio.make_clean_folder(temp_folder) 181 | 182 | print(' Start downsampling and computing FPFH') 183 | pcd_names = uio.list_files(osp.join(cfg.dataset_root, scene, seq), 184 | 'cloud_bin_*.ply', 185 | alphanum_sort=True) 186 | if cfg.threads > 1: 187 | from joblib import Parallel, delayed 188 | import multiprocessing 189 | 190 | Parallel(n_jobs=cfg.threads)( 191 | delayed(downsample_and_compute_fpfh)(cfg, scene, seq, pcd_name) 192 | for pcd_name in pcd_names) 193 | else: 194 | for pcd_name in pcd_names: 195 | downsample_and_compute_fpfh(cfg, scene, seq, pcd_name) 196 | 197 | print(' Start matching FPFH') 198 | overlaps = uio.list_files(osp.join(cfg.overlap_root, scene, seq), 199 | 'cloud_bin_*.npy', 200 | alphanum_sort=True) 201 | overlap_pcds = [npy_file[:-4].split('-') for npy_file in overlaps] 202 | if cfg.threads > 1: 203 | from joblib import Parallel, delayed 204 | import multiprocessing 205 | 206 | Parallel(n_jobs=cfg.threads)( 207 | delayed(match_fpfh)(cfg, scene, seq, pcd_pair[0], pcd_pair[1]) 208 | for pcd_pair in overlap_pcds) 209 | else: 210 | for pcd_pair in overlap_pcds: 211 | match_fpfh(cfg, scene, seq, pcd_pair[0], pcd_pair[1]) 212 | 213 | print(' Start collating kpts') 214 | collate_kpts(cfg, scene, seq) 215 | 216 | print(" Finished {}".format(seq)) 217 | 218 | 219 | def run_scene(cfg, sid, scene): 220 | print(" Start {}th scene {} ".format(sid, scene)) 221 | 222 | scene_folder = osp.join(cfg.dataset_root, scene) 223 | seqs = uio.list_folders(scene_folder, alphanum_sort=True) 224 | print(" {} sequences".format(len(seqs))) 225 | for seq in seqs: 226 | run_seq(cfg, scene, seq) 227 | 228 | print(" Finished {}th scene {} ".format(sid, scene)) 229 | 230 | 231 | def run(cfg): 232 | print("Start iterating dataset") 233 | 234 | scenes = uio.list_folders(cfg.dataset_root, alphanum_sort=False) 235 | print("{} scenes".format(len(scenes))) 236 | for sid, scene in enumerate(scenes): 237 | run_scene(cfg, sid, scene) 238 | 239 | print("Finished iterating dataset") 240 | 241 | 242 | def parse_args(): 243 | parser = argparse.ArgumentParser() 244 | parser.add_argument('--dataset_root', default='') 245 | parser.add_argument('--overlap_root', default='./log_overlaps') 246 | parser.add_argument('--out_root', default='./log_kpts') 247 | parser.add_argument('--temp_root', default='./log_temp') 248 | parser.add_argument('--threads', type=int, default=4) 249 | parser.add_argument('--voxel_size', type=float, default=0.05) 250 | parser.add_argument('--fpfh_radius', type=float, default=0.15) 251 | parser.add_argument('--dist_thresh', type=float, default=0.03) 252 | parser.add_argument('--angle_thresh', type=float, default=math.pi / 12.0) 253 | 254 | return parser.parse_args() 255 | 256 | 257 | if __name__ == '__main__': 258 | cfg = parse_args() 259 | run(cfg) 260 | -------------------------------------------------------------------------------- /data/preprocess/compute_overlap.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from collections import namedtuple 5 | from pathlib import Path 6 | import argparse 7 | import math 8 | import numpy as np 9 | import os.path as osp 10 | import sys 11 | 12 | ROOT_DIR = osp.abspath('../../') 13 | if ROOT_DIR not in sys.path: 14 | sys.path.append(ROOT_DIR) 15 | 16 | from utils import io as uio 17 | 18 | PCDMeta = namedtuple('PCDMeta', ['name', 'cloud']) 19 | 20 | 21 | class Cloud(object): 22 | 23 | def __init__(self, points, indices): 24 | self.points = points 25 | self.indices = indices 26 | 27 | def save(self, filepath): 28 | np.savez(filepath, points=self.points, indices=self.indices) 29 | 30 | @classmethod 31 | def load_from(cls, filepath): 32 | arrays = np.load(filepath) 33 | return cls(arrays['points'], arrays['indices']) 34 | 35 | @classmethod 36 | def downsample_from(cls, pcd, max_points): 37 | points = np.asarray(pcd.points) 38 | n_points = len(points) 39 | if n_points <= max_points: 40 | return cls(points.astype(np.float32), np.arange(n_points)) 41 | else: 42 | indices = np.random.choice(n_points, max_points, replace=False) 43 | downsampled = points[indices, :].astype(np.float32) 44 | return cls(downsampled, indices) 45 | 46 | 47 | def downsample_pcds(in_root, out_root, max_points): 48 | import open3d as o3d 49 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 50 | 51 | uio.may_create_folder(out_root) 52 | 53 | pcd_names = uio.list_files(in_root, 'cloud_bin_*.ply', alphanum_sort=True) 54 | pcd_stems = list() 55 | for pname in pcd_names: 56 | pstem = pname[:-4] 57 | pcd_path = osp.join(in_root, pname) 58 | pose_path = osp.join(in_root, pstem + '.pose.npy') 59 | pcd = o3d.io.read_point_cloud(pcd_path) 60 | pose = np.load(pose_path) 61 | pcd.transform(pose) 62 | 63 | down_pcd = Cloud.downsample_from(pcd, max_points) 64 | down_pcd.save(osp.join(out_root, pstem + '.npz')) 65 | 66 | pcd_stems.append(pstem) 67 | 68 | return pcd_stems 69 | 70 | 71 | def compute_overlap(cfg, scene, seq, pcd_names, pid, dist_thresh=0.075): 72 | import pyflann 73 | 74 | temp_folder = osp.join(cfg.temp_root, scene, seq) 75 | out_folder = osp.join(cfg.out_root, scene, seq) 76 | 77 | n_pcds = len(pcd_names) 78 | 79 | pcd_src = Cloud.load_from(osp.join(temp_folder, pcd_names[pid] + '.npz')) 80 | n_points_src = len(pcd_src.points) 81 | index_src = int(pcd_names[pid][10:]) 82 | kdtree_src = pyflann.FLANN() 83 | params_src = kdtree_src.build_index(pcd_src.points, algorithm='kdtree', trees=4) 84 | 85 | for j in range(pid + 1, n_pcds): 86 | pcd_dst = Cloud.load_from(osp.join(temp_folder, pcd_names[j] + '.npz')) 87 | n_points_dst = len(pcd_dst.points) 88 | index_dst = int(pcd_names[j][10:]) 89 | assert index_src < index_dst 90 | if index_src + 1 == index_dst: 91 | continue 92 | 93 | knn_indices, knn_dists2 = kdtree_src.nn_index(pcd_dst.points, 94 | num_neighbors=1, 95 | checks=params_src['checks']) 96 | pair_indices = np.stack((pcd_dst.indices, pcd_src.indices[knn_indices]), axis=1) 97 | corr_indices = pair_indices[np.sqrt(knn_dists2) <= dist_thresh, :] 98 | 99 | overlap_ratio = float(len(corr_indices)) / max(n_points_src, n_points_dst) 100 | if overlap_ratio < 0.3: 101 | continue 102 | np.save(osp.join(out_folder, '{}-{}.npy'.format(pcd_names[j], pcd_names[pid])), 103 | corr_indices) 104 | 105 | 106 | def run_seq(cfg, scene, seq): 107 | print(" Start {}".format(seq)) 108 | 109 | pcd_names = downsample_pcds(osp.join(cfg.dataset_root, scene, seq), 110 | osp.join(cfg.temp_root, scene, seq), cfg.max_points) 111 | n_pcds = len(pcd_names) 112 | 113 | out_folder = osp.join(cfg.out_root, scene, seq) 114 | if osp.exists(out_folder): 115 | print(' Skip...') 116 | return 117 | uio.may_create_folder(out_folder) 118 | 119 | if cfg.threads > 1: 120 | from joblib import Parallel, delayed 121 | import multiprocessing 122 | 123 | Parallel(n_jobs=cfg.threads)( 124 | delayed(compute_overlap)(cfg, scene, seq, pcd_names, i) for i in range(n_pcds)) 125 | else: 126 | for i in range(n_pcds): 127 | compute_overlap(cfg, scene, seq, pcd_names, i) 128 | 129 | print(" Finished {}".format(seq)) 130 | 131 | 132 | def run_scene(cfg, sid, scene): 133 | print(" Start {}th scene {} ".format(sid, scene)) 134 | 135 | scene_folder = osp.join(cfg.dataset_root, scene) 136 | seqs = uio.list_folders(scene_folder, alphanum_sort=True) 137 | print(" {} sequences".format(len(seqs))) 138 | for seq in seqs: 139 | run_seq(cfg, scene, seq) 140 | 141 | print(" Finished {}th scene {} ".format(sid, scene)) 142 | 143 | 144 | def run(cfg): 145 | print("Start iterating dataset") 146 | 147 | uio.may_create_folder(cfg.out_root) 148 | 149 | scenes = uio.list_folders(cfg.dataset_root, alphanum_sort=False) 150 | print("{} scenes".format(len(scenes))) 151 | for sid, scene in enumerate(scenes): 152 | run_scene(cfg, sid, scene) 153 | 154 | print("Finished iterating dataset") 155 | 156 | 157 | def parse_args(): 158 | parser = argparse.ArgumentParser() 159 | parser.add_argument('--dataset_root', default='') 160 | parser.add_argument('--temp_root', default='./log_temp') 161 | parser.add_argument('--out_root', default='./log_overlaps') 162 | parser.add_argument('--max_points', type=int, default=100000) 163 | parser.add_argument('--threads', type=int, default=3) 164 | 165 | return parser.parse_args() 166 | 167 | 168 | if __name__ == '__main__': 169 | cfg = parse_args() 170 | run(cfg) 171 | -------------------------------------------------------------------------------- /data/preprocess/compute_radius.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from collections import namedtuple 5 | from pathlib import Path 6 | import argparse 7 | import math 8 | import numpy as np 9 | import os.path as osp 10 | import sys 11 | 12 | ROOT_DIR = osp.abspath('../../') 13 | if ROOT_DIR not in sys.path: 14 | sys.path.append(ROOT_DIR) 15 | 16 | from utils import io as uio 17 | 18 | 19 | def compute_radius(cfg, scene, seq, pcd_name): 20 | import open3d as o3d 21 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 22 | 23 | nn_radius = cfg.radius 24 | 25 | print(' {}'.format(pcd_name)) 26 | 27 | pcd = o3d.io.read_point_cloud(osp.join(cfg.dataset_root, scene, seq, pcd_name)) 28 | num_points = len(pcd.points) 29 | kdtree = o3d.geometry.KDTreeFlann(pcd) 30 | 31 | radii = list() 32 | for i in range(num_points): 33 | [k, nn_indices, nn_dists2] = kdtree.search_radius_vector_3d(pcd.points[i], nn_radius) 34 | if k < 2: 35 | radii.append(0) 36 | else: 37 | nn_indices = np.asarray(nn_indices) 38 | nn_dists2 = np.asarray(nn_dists2) 39 | nn_dists = np.sqrt(nn_dists2[nn_indices != i]) 40 | radius = np.mean(nn_dists) * 0.5 41 | radii.append(radius) 42 | radii = np.asarray(radii, dtype=np.float32) 43 | np.save(osp.join(cfg.dataset_root, scene, seq, pcd_name[:-4] + '.radius.npy'), radii) 44 | 45 | 46 | def run_seq(cfg, scene, seq): 47 | print(" Start {}".format(seq)) 48 | 49 | pcd_names = uio.list_files(osp.join(cfg.dataset_root, scene, seq), 50 | '*.ply', 51 | alphanum_sort=True) 52 | if cfg.threads > 1: 53 | from joblib import Parallel, delayed 54 | import multiprocessing 55 | 56 | Parallel(n_jobs=cfg.threads)( 57 | delayed(compute_radius)(cfg, scene, seq, pcd_name) for pcd_name in pcd_names) 58 | else: 59 | for pcd_name in pcd_names: 60 | compute_radius(cfg, scene, seq, pcd_name) 61 | 62 | print(" Finished {}".format(seq)) 63 | 64 | 65 | def run_scene(cfg, sid, scene): 66 | print(" Start {}th scene {} ".format(sid, scene)) 67 | 68 | scene_folder = osp.join(cfg.dataset_root, scene) 69 | seqs = uio.list_folders(scene_folder, alphanum_sort=True) 70 | print(" {} sequences".format(len(seqs))) 71 | for seq in seqs: 72 | run_seq(cfg, scene, seq) 73 | 74 | print(" Finished {}th scene {} ".format(sid, scene)) 75 | 76 | 77 | def run(cfg): 78 | print("Start iterating dataset") 79 | 80 | scenes = uio.list_folders(cfg.dataset_root, alphanum_sort=False) 81 | print("{} scenes".format(len(scenes))) 82 | for sid, scene in enumerate(scenes): 83 | run_scene(cfg, sid, scene) 84 | 85 | print("Finished iterating dataset") 86 | 87 | 88 | def parse_args(): 89 | parser = argparse.ArgumentParser() 90 | parser.add_argument('--dataset_root', default='<3DMatch_Fragments_Root>') 91 | parser.add_argument('--radius', type=float, default=0.075) 92 | parser.add_argument('--threads', type=int, default=8) 93 | 94 | return parser.parse_args() 95 | 96 | 97 | if __name__ == '__main__': 98 | cfg = parse_args() 99 | run(cfg) -------------------------------------------------------------------------------- /data/preprocess/fuse_fragments_3DMatch.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from pathlib import Path 5 | import argparse 6 | import math 7 | import numpy as np 8 | import os.path as osp 9 | import sys 10 | import pickle 11 | 12 | ROOT_DIR = osp.abspath('../../') 13 | if ROOT_DIR not in sys.path: 14 | sys.path.append(ROOT_DIR) 15 | 16 | from utils import io as uio 17 | 18 | 19 | def read_intrinsic(filepath, width, height): 20 | import open3d as o3d 21 | 22 | m = np.loadtxt(filepath, dtype=np.float32) 23 | intrinsic = o3d.camera.PinholeCameraIntrinsic(width, height, m[0, 0], m[1, 1], m[0, 2], 24 | m[1, 2]) 25 | return intrinsic 26 | 27 | 28 | def read_extrinsic(filepath): 29 | m = np.loadtxt(filepath, dtype=np.float32) 30 | if np.isnan(m).any(): 31 | return None 32 | return m 33 | 34 | 35 | def read_rgbd_image(cfg, color_file, depth_file, convert_rgb_to_intensity): 36 | import open3d as o3d 37 | 38 | color = o3d.io.read_image(color_file) 39 | depth = o3d.io.read_image(depth_file) 40 | rgbd_image = o3d.geometry.create_rgbd_image_from_color_and_depth( 41 | color, depth, cfg.depth_scale, cfg.depth_trunc, convert_rgb_to_intensity) 42 | return rgbd_image 43 | 44 | 45 | def process_single_fragment(cfg, color_files, depth_files, frag_id, n_frags, intrinsic_path, 46 | out_folder): 47 | import open3d as o3d 48 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 49 | 50 | n_frames = len(color_files) 51 | intrinsic = read_intrinsic(intrinsic_path, cfg.width, cfg.height) 52 | 53 | volume = o3d.integration.ScalableTSDFVolume( 54 | voxel_length=cfg.tsdf_cubic_size / 512.0, 55 | sdf_trunc=0.04, 56 | color_type=o3d.integration.TSDFVolumeColorType.RGB8) 57 | 58 | sid = frag_id * cfg.frames_per_frag 59 | eid = min(sid + cfg.frames_per_frag, n_frames) 60 | pose_base2world = None 61 | pose_base2world_inv = None 62 | frag_frames = list() 63 | for fid in range(sid, eid): 64 | color_path = color_files[fid] 65 | depth_path = depth_files[fid] 66 | pose_path = color_path[:-10] + '.pose.txt' 67 | 68 | pose_cam2world = read_extrinsic(pose_path) 69 | if pose_cam2world is None: 70 | continue 71 | if fid == sid: 72 | pose_base2world = pose_cam2world 73 | pose_base2world_inv = np.linalg.inv(pose_base2world) 74 | if pose_base2world_inv is None: 75 | break 76 | pose_cam2world = np.matmul(pose_base2world_inv, pose_cam2world) 77 | 78 | rgbd = read_rgbd_image(cfg, color_path, depth_path, False) 79 | volume.integrate(rgbd, intrinsic, np.linalg.inv(pose_cam2world)) 80 | 81 | frag_frames.append(color_path[:-10]) 82 | 83 | if pose_base2world_inv is None: 84 | return 85 | 86 | pcloud = volume.extract_point_cloud() 87 | o3d.geometry.estimate_normals(pcloud) 88 | o3d.io.write_point_cloud(osp.join(out_folder, 'cloud_bin_{}.ply'.format(frag_id)), pcloud) 89 | 90 | np.save(osp.join(out_folder, 'cloud_bin_{}.pose.npy'.format(frag_id)), pose_base2world) 91 | 92 | with open(osp.join(out_folder, 'cloud_bin_{}.frames.pkl'.format(frag_id)), 'wb') as fh: 93 | to_save = {'frames': frag_frames} 94 | pickle.dump(to_save, fh, protocol=pickle.HIGHEST_PROTOCOL) 95 | 96 | 97 | def run_seq(cfg, scene, seq): 98 | print(" Start {}".format(seq)) 99 | 100 | seq_folder = osp.join(cfg.dataset_root, scene, seq) 101 | color_names = uio.list_files(seq_folder, '*.color.png', alphanum_sort=True) 102 | color_paths = [osp.join(seq_folder, cf) for cf in color_names] 103 | depth_paths = [osp.join(seq_folder, cf[:-10] + '.depth.png') for cf in color_names] 104 | 105 | n_frames = len(color_paths) 106 | n_frags = int(math.ceil(float(n_frames) / cfg.frames_per_frag)) 107 | 108 | out_folder = osp.join(cfg.out_root, scene, seq) 109 | uio.may_create_folder(out_folder) 110 | 111 | intrinsic_path = osp.join(cfg.dataset_root, scene, 'camera-intrinsics.txt') 112 | 113 | if cfg.threads > 1: 114 | from joblib import Parallel, delayed 115 | import multiprocessing 116 | 117 | Parallel(n_jobs=cfg.threads)(delayed(process_single_fragment)( 118 | cfg, color_paths, depth_paths, frag_id, n_frags, intrinsic_path, out_folder) 119 | for frag_id in range(n_frags)) 120 | 121 | else: 122 | for frag_id in range(n_frags): 123 | process_single_fragment(cfg, color_paths, depth_paths, frag_id, n_frags, 124 | intrinsic_path, out_folder) 125 | 126 | print(" Finished {}".format(seq)) 127 | 128 | 129 | def run_scene(cfg, scene): 130 | print(" Start scene {} ".format(scene)) 131 | 132 | scene_folder = osp.join(cfg.dataset_root, scene) 133 | seqs = uio.list_folders(scene_folder, alphanum_sort=True) 134 | print(" {} sequences".format(len(seqs))) 135 | for seq in seqs: 136 | run_seq(cfg, scene, seq) 137 | 138 | print(" Finished scene {} ".format(scene)) 139 | 140 | 141 | def run(cfg): 142 | print("Start iterating dataset") 143 | 144 | uio.may_create_folder(cfg.out_root) 145 | 146 | scenes = uio.list_folders(cfg.dataset_root, alphanum_sort=False) 147 | print("{} scenes".format(len(scenes))) 148 | for scene in scenes: 149 | run_scene(cfg, scene) 150 | 151 | print("Finished iterating dataset") 152 | 153 | 154 | def parse_args(): 155 | parser = argparse.ArgumentParser() 156 | parser.add_argument('--dataset_root', default='<3DMatch_RGBD_Root>') 157 | parser.add_argument('--out_root', default='./log_fragments') 158 | parser.add_argument('--depth_scale', type=float, default=1000.0) 159 | parser.add_argument('--depth_trunc', type=float, default=6.0) 160 | parser.add_argument('--frames_per_frag', type=int, default=50) 161 | parser.add_argument('--height', type=int, default=480) 162 | parser.add_argument('--threads', type=int, default=4) 163 | parser.add_argument('--tsdf_cubic_size', type=float, default=3.0) 164 | parser.add_argument('--width', type=int, default=640) 165 | 166 | return parser.parse_args() 167 | 168 | 169 | if __name__ == '__main__': 170 | cfg = parse_args() 171 | run(cfg) 172 | 173 | 174 | # Scene list: 175 | # 176 | # 7-scenes-chess 177 | # 7-scenes-fire 178 | # 7-scenes-heads 179 | # 7-scenes-office 180 | # 7-scenes-pumpkin 181 | # 7-scenes-stairs 182 | # bundlefusion-apt0 183 | # bundlefusion-apt1 184 | # bundlefusion-apt2 185 | # bundlefusion-copyroom 186 | # bundlefusion-office0 187 | # bundlefusion-office1 188 | # bundlefusion-office2 189 | # bundlefusion-office3 190 | # rgbd-scenes-v2-scene_01 191 | # rgbd-scenes-v2-scene_02 192 | # rgbd-scenes-v2-scene_03 193 | # rgbd-scenes-v2-scene_04 194 | # rgbd-scenes-v2-scene_05 195 | # rgbd-scenes-v2-scene_06 196 | # rgbd-scenes-v2-scene_07 197 | # rgbd-scenes-v2-scene_08 198 | # rgbd-scenes-v2-scene_09 199 | # rgbd-scenes-v2-scene_10 200 | # rgbd-scenes-v2-scene_11 201 | # rgbd-scenes-v2-scene_12 202 | # rgbd-scenes-v2-scene_13 203 | # rgbd-scenes-v2-scene_14 204 | # sun3d-harvard_c5-hv_c5_1 205 | # sun3d-harvard_c6-hv_c6_1 206 | # sun3d-harvard_c8-hv_c8_3 207 | # sun3d-home_bksh-home_bksh_oct_30_2012_scan2_erika 208 | # sun3d-hotel_nips2012-nips_4 209 | # sun3d-hotel_sf-scan1 210 | # sun3d-mit_32_d507-d507_2 211 | # sun3d-mit_46_ted_lab1-ted_lab_2 212 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 2 | 3 | ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 4 | 5 | RUN apt-get update && apt-get install -qq -y --no-install-recommends \ 6 | build-essential \ 7 | ca-certificates \ 8 | cmake \ 9 | curl \ 10 | git \ 11 | libgl1-mesa-dev \ 12 | libgl1-mesa-glx \ 13 | libglew-dev \ 14 | libglfw3-dev \ 15 | libglu1-mesa-dev \ 16 | libgtk2.0-0 \ 17 | libhdf5-dev \ 18 | libjpeg-dev \ 19 | liblmdb-dev \ 20 | libopenblas-dev \ 21 | libosmesa6-dev \ 22 | libpng-dev \ 23 | libsm6 \ 24 | libxcursor-dev \ 25 | libxext6 \ 26 | libxi-dev \ 27 | libxinerama-dev \ 28 | libxrandr-dev \ 29 | lxde \ 30 | mesa-utils \ 31 | ninja-build \ 32 | pkg-config \ 33 | unzip \ 34 | vim \ 35 | wget && \ 36 | rm -rf /var/lib/apt/lists/* 37 | 38 | RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh -O ~/miniconda.sh && \ 39 | /bin/bash ~/miniconda.sh -b -p /opt/conda && \ 40 | rm ~/miniconda.sh 41 | 42 | RUN /opt/conda/bin/conda install -c anaconda protobuf 43 | RUN /opt/conda/bin/pip install --upgrade pip 44 | RUN /opt/conda/bin/pip install --upgrade --ignore-installed setuptools 45 | RUN /opt/conda/bin/pip install \ 46 | addict \ 47 | fire \ 48 | future \ 49 | h5py \ 50 | imageio \ 51 | imgaug \ 52 | joblib \ 53 | lmdb \ 54 | matplotlib \ 55 | ninja \ 56 | numpy \ 57 | open3d-python==0.7.0.0 \ 58 | opencv-python \ 59 | pandas \ 60 | Pillow \ 61 | protobuf \ 62 | pyflann3==1.8.4.1 \ 63 | pytorch-ignite==0.2.0 \ 64 | pytz \ 65 | PyYAML \ 66 | scikit-image \ 67 | scikit-learn \ 68 | scipy \ 69 | tensorboard \ 70 | torch \ 71 | torchvision \ 72 | tqdm \ 73 | typing 74 | RUN /opt/conda/bin/pip install git+https://github.com/rbgirshick/yacs 75 | RUN /opt/conda/bin/pip install git+https://github.com/Cadene/pretrained-models.pytorch 76 | # RUN /opt/conda/bin/conda install -c conda-forge pyflann=1.8.4 77 | RUN /opt/conda/bin/conda clean -ya 78 | 79 | ENV LD_LIBRARY_PATH .:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH 80 | ENV CPATH /usr/local/cuda/include:$CPATH 81 | ENV PATH /usr/local/cuda/bin:/opt/conda/bin:$PATH 82 | -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | sudo nvidia-docker build --rm --network=host --no-cache --tag mvdesc . 2 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/evaluation/__init__.py -------------------------------------------------------------------------------- /evaluation/eval_geomreg_3dmatch.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import argparse 5 | import numpy as np 6 | import os.path as osp 7 | import sys 8 | import pickle 9 | 10 | from pathlib import Path 11 | from collections import namedtuple 12 | 13 | ROOT_DIR = osp.abspath('../') 14 | if ROOT_DIR not in sys.path: 15 | sys.path.append(ROOT_DIR) 16 | 17 | from utils import io as uio 18 | 19 | 20 | INLIER_THRESHES = [ 21 | 0.1, 22 | ] 23 | INLIER_RATIO_THRESHES = (np.arange(0, 21, dtype=np.float32) * 0.2 / 20).tolist() 24 | 25 | VALID_SCENE_NAMES = [] 26 | 27 | TEST_SCENE_NAMES = [ 28 | '7-scenes-redkitchen', 29 | 'sun3d-home_at-home_at_scan1_2013_jan_1', 30 | 'sun3d-home_md-home_md_scan9_2012_sep_30', 31 | 'sun3d-hotel_uc-scan3', 32 | 'sun3d-hotel_umd-maryland_hotel1', 33 | 'sun3d-hotel_umd-maryland_hotel3', 34 | 'sun3d-mit_76_studyroom-76-1studyroom2', 35 | 'sun3d-mit_lab_hj-lab_hj_tea_nov_2_2012_scan1_erika', 36 | ] 37 | 38 | TEST_SCENE_ABBR_NAMES = [ 39 | 'Kitchen', 40 | 'Home_1', 41 | 'Home_2', 42 | 'Hotel_1', 43 | 'Hotel_2', 44 | 'Hotel_3', 45 | 'Study', 46 | 'MIT_Lab', 47 | ] 48 | 49 | 50 | Pose = namedtuple('Pose', ['indices', 'transformation']) 51 | 52 | 53 | class RegisterResult(object): 54 | 55 | def __init__(self, frag1_name, frag2_name, num_inliers, inlier_ratio, gt_flag): 56 | self.frag1_name = frag1_name 57 | self.frag2_name = frag2_name 58 | self.num_inliers = num_inliers 59 | self.inlier_ratio = inlier_ratio 60 | self.gt_flag = gt_flag 61 | 62 | 63 | def read_log(filepath): 64 | lines = uio.read_lines(filepath) 65 | n_poses = len(lines) // 5 66 | poses = list() 67 | for i in range(n_poses): 68 | items = lines[i * 5].split() # Meta line 69 | id0, id1, id2 = int(items[0]), int(items[1]), int(items[2]) 70 | mat = np.zeros((4, 4), dtype=np.float64) 71 | for j in range(4): 72 | items = lines[i * 5 + j + 1].split() 73 | for k in range(4): 74 | mat[j, k] = float(items[k]) 75 | poses.append(Pose(indices=[id0, id1, id2], transformation=mat)) 76 | return poses 77 | 78 | 79 | def read_keypoints(filepath): 80 | return np.load(filepath) 81 | 82 | 83 | def read_descriptors(desc_type, root_dir, scene_name, seq_name, pcd_name): 84 | if desc_type.startswith('MVPoolNet') or desc_type.startswith('Ours'): 85 | filepath = osp.join(root_dir, scene_name, seq_name, pcd_name + '.desc.npy') 86 | descs = np.load(filepath) 87 | return descs 88 | else: 89 | raise RuntimeError('[!] The descriptor type {} is not supported.'.format(desc_type)) 90 | 91 | 92 | def knn_search(points_src, points_dst, k=1): 93 | import open3d as o3d 94 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 95 | 96 | kdtree = o3d.geometry.KDTreeFlann(np.asarray(points_dst.T, dtype=np.float64)) 97 | points_src = np.asarray(points_src, dtype=np.float64) 98 | nnindices = [ 99 | kdtree.search_knn_vector_xd(points_src[i, :], k)[1] for i in range(len(points_src)) 100 | ] 101 | if k == 1: 102 | return np.asarray(nnindices, dtype=np.int32)[:, 0] 103 | else: 104 | return np.asarray(nnindices, dtype=np.int32) 105 | 106 | 107 | def register_fragment_pair(scene_name, seq_name, frag1_name, frag2_name, desc_type, poses, 108 | pcloud_root, desc_root, inlier_thresh): 109 | import open3d as o3d 110 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Error) 111 | 112 | print(' Start {} - {} - {} - {} - {}'.format(desc_type, scene_name, seq_name, frag1_name, 113 | frag2_name)) 114 | 115 | frag1_id = int(frag1_name.split('_')[-1]) 116 | frag2_id = int(frag2_name.split('_')[-1]) 117 | assert frag1_id < frag2_id 118 | 119 | overlap_pid = -1 120 | for pid, pose in enumerate(poses): 121 | if pose.indices[0] == frag1_id and pose.indices[1] == frag2_id: 122 | overlap_pid = pid 123 | break 124 | if overlap_pid < 0: 125 | num_inliers, inlier_ratio, gt_flag = 0, 0., 0 126 | return num_inliers, inlier_ratio, gt_flag 127 | 128 | frag1_pcd = o3d.io.read_point_cloud( 129 | osp.join(pcloud_root, scene_name, seq_name, frag1_name + '.ply')) 130 | frag2_pcd = o3d.io.read_point_cloud( 131 | osp.join(pcloud_root, scene_name, seq_name, frag2_name + '.ply')) 132 | frag1_kpt_indices = read_keypoints( 133 | osp.join(pcloud_root, scene_name, seq_name, frag1_name + '.keypts.npy')) 134 | frag2_kpt_indices = read_keypoints( 135 | osp.join(pcloud_root, scene_name, seq_name, frag2_name + '.keypts.npy')) 136 | frag1_kpts = np.asarray(frag1_pcd.points)[frag1_kpt_indices, :] 137 | frag2_kpts = np.asarray(frag2_pcd.points)[frag2_kpt_indices, :] 138 | 139 | frag1_descs = read_descriptors(desc_type, desc_root, scene_name, seq_name, frag1_name) 140 | frag2_descs = read_descriptors(desc_type, desc_root, scene_name, seq_name, frag2_name) 141 | 142 | assert len(frag1_kpt_indices) == len(frag1_descs) 143 | assert len(frag2_kpt_indices) == len(frag2_descs) 144 | 145 | frag21_nnindices = knn_search(frag2_descs, frag1_descs) 146 | assert frag21_nnindices.ndim == 1 147 | 148 | frag12_nnindices = knn_search(frag1_descs, frag2_descs) 149 | assert frag12_nnindices.ndim == 1 150 | 151 | frag2_match_indices = np.flatnonzero( 152 | np.equal(np.arange(len(frag21_nnindices)), frag12_nnindices[frag21_nnindices])) 153 | frag2_match_kpts = frag2_kpts[frag2_match_indices, :] 154 | frag1_match_kpts = frag1_kpts[frag21_nnindices[frag2_match_indices], :] 155 | 156 | frag2_pcd_tmp = o3d.geometry.PointCloud() 157 | frag2_pcd_tmp.points = o3d.utility.Vector3dVector(frag2_match_kpts) 158 | frag2_pcd_tmp.transform(poses[overlap_pid].transformation) 159 | 160 | distances = np.sqrt( 161 | np.sum(np.square(frag1_match_kpts - np.asarray(frag2_pcd_tmp.points)), axis=1)) 162 | num_inliers = np.sum(distances < inlier_thresh) 163 | inlier_ratio = num_inliers / len(distances) 164 | gt_flag = 1 165 | return num_inliers, inlier_ratio, gt_flag 166 | 167 | 168 | def run_scene_matching(scene_name, 169 | seq_name, 170 | desc_type, 171 | pcloud_root, 172 | desc_root, 173 | out_root, 174 | inlier_thresh=0.1, 175 | n_threads=1): 176 | out_folder = osp.join(out_root, desc_type) 177 | uio.may_create_folder(out_folder) 178 | 179 | out_filename = '{}-{}-{:.2f}'.format(scene_name, seq_name, inlier_thresh) 180 | if Path(osp.join(out_folder, out_filename + '.pkl')).is_file(): 181 | print('[*] {} already exists. Skip computation.'.format(out_filename)) 182 | return osp.join(out_folder, out_filename) 183 | 184 | fragment_names = uio.list_files(osp.join(pcloud_root, scene_name, seq_name), 185 | '*.ply', 186 | alphanum_sort=True) 187 | fragment_names = [fn[:-4] for fn in fragment_names] 188 | n_fragments = len(fragment_names) 189 | 190 | register_results = [ 191 | RegisterResult( 192 | frag1_name=fragment_names[i], 193 | frag2_name=fragment_names[j], 194 | num_inliers=None, 195 | inlier_ratio=None, 196 | gt_flag=None, 197 | ) for i in range(n_fragments) for j in range(i + 1, n_fragments) 198 | ] 199 | poses = read_log(osp.join(pcloud_root, scene_name, seq_name, 'gt.log')) 200 | 201 | if n_threads > 1: 202 | from joblib import Parallel, delayed 203 | import multiprocessing 204 | 205 | results = Parallel(n_jobs=n_threads)(delayed( 206 | register_fragment_pair)(scene_name, seq_name, k.frag1_name, k.frag2_name, desc_type, 207 | poses, pcloud_root, desc_root, inlier_thresh) 208 | for k in register_results) 209 | for k, res in enumerate(results): 210 | register_results[k].num_inliers = res[0] 211 | register_results[k].inlier_ratio = res[1] 212 | register_results[k].gt_flag = res[2] 213 | else: 214 | for k in range(len(register_results)): 215 | num_inliers, inlier_ratio, gt_flag = register_fragment_pair( 216 | scene_name, seq_name, register_results[k].frag1_name, 217 | register_results[k].frag2_name, desc_type, poses, pcloud_root, desc_root, 218 | inlier_thresh) 219 | register_results[k].num_inliers = num_inliers 220 | register_results[k].inlier_ratio = inlier_ratio 221 | register_results[k].gt_flag = gt_flag 222 | 223 | with open(osp.join(out_folder, out_filename + '.pkl'), 'wb') as fh: 224 | to_save = { 225 | 'register_results': register_results, 226 | 'scene_name': scene_name, 227 | 'seq_name': seq_name, 228 | 'desc_type': desc_type, 229 | 'inlier_thresh': inlier_thresh, 230 | 'n_threads': n_threads, 231 | } 232 | pickle.dump(to_save, fh, protocol=pickle.HIGHEST_PROTOCOL) 233 | with open(osp.join(out_folder, out_filename + '.txt'), 'w') as fh: 234 | for k in register_results: 235 | fh.write('{} {} {} {:.8f} {}\n'.format(k.frag1_name, k.frag2_name, k.num_inliers, 236 | k.inlier_ratio, k.gt_flag)) 237 | 238 | return osp.join(out_folder, out_filename) 239 | 240 | 241 | def compute_metrics(match_paths, desc_type, inlier_thresh, out_root, scene_abbr_fn=None): 242 | scenes = list() 243 | all_recalls = list() 244 | all_inliers = list() 245 | 246 | for match_path in match_paths: 247 | with open(match_path + '.pkl', 'rb') as fh: 248 | saved = pickle.load(fh) 249 | register_results = saved['register_results'] 250 | assert saved['inlier_thresh'] == inlier_thresh 251 | if scene_abbr_fn is not None: 252 | scenes.append(scene_abbr_fn(saved['scene_name'])) 253 | else: 254 | scenes.append(saved['scene_name']) 255 | 256 | num_inliers = list() 257 | inlier_ratios = list() 258 | gt_flags = list() 259 | for rr in register_results: 260 | num_inliers.append(rr.num_inliers) 261 | inlier_ratios.append(rr.inlier_ratio) 262 | gt_flags.append(rr.gt_flag) 263 | num_inliers = np.asarray(num_inliers, dtype=np.int32) 264 | inlier_ratios = np.asarray(inlier_ratios, dtype=np.float32) 265 | gt_flags = np.asarray(gt_flags, dtype=np.int32) 266 | 267 | recalls = list() 268 | inliers = list() 269 | for inlier_ratio_thresh in INLIER_RATIO_THRESHES: 270 | n_correct_matches = np.sum(inlier_ratios[gt_flags == 1] > inlier_ratio_thresh) 271 | recalls.append(float(n_correct_matches) / np.sum(gt_flags == 1)) 272 | inliers.append(np.mean(num_inliers[gt_flags == 1])) 273 | all_recalls.append(recalls) 274 | all_inliers.append(inliers) 275 | 276 | out_path = osp.join(out_root, '{}-metrics-{:.2f}'.format(desc_type, inlier_thresh)) 277 | with open(out_path + '.csv', 'w') as fh: 278 | header_str = 'SceneName' 279 | for inlier_ratio_thresh in INLIER_RATIO_THRESHES: 280 | header_str += ',Recall-{:.2f},AverageMatches-{:.2f}'.format( 281 | inlier_ratio_thresh, inlier_ratio_thresh) 282 | fh.write(header_str + '\n') 283 | 284 | for scene_name, recalls, inliers in zip(scenes, all_recalls, all_inliers): 285 | row_str = scene_name 286 | for recall, num_inlier in zip(recalls, inliers): 287 | row_str += ',{:.6f},{:.3f}'.format(recall, num_inlier) 288 | fh.write(row_str + '\n') 289 | 290 | avg_recalls = np.mean(np.asarray(all_recalls), axis=0).tolist() 291 | avg_inliers = np.mean(np.asarray(all_inliers), axis=0).tolist() 292 | avg_row_str = 'Average' 293 | for recall, num_inlier in zip(avg_recalls, avg_inliers): 294 | avg_row_str += ',{:.6f},{:.3f}'.format(recall, num_inlier) 295 | fh.write(avg_row_str + '\n') 296 | 297 | with open(out_path + '.pkl', 'wb') as fh: 298 | to_save = { 299 | 'scenes': scenes, 300 | 'recalls': all_recalls, 301 | 'inliers': all_inliers, 302 | 'threshes': INLIER_RATIO_THRESHES 303 | } 304 | pickle.dump(to_save, fh, protocol=pickle.HIGHEST_PROTOCOL) 305 | 306 | return out_path 307 | 308 | 309 | def plot_recall_curve(desc_types, stat_paths, out_path): 310 | import matplotlib 311 | matplotlib.use('Agg') 312 | from matplotlib import rc 313 | #rc('text', usetex=True) 314 | import matplotlib.pyplot as plt 315 | 316 | figure = plt.figure() 317 | for stat_path in stat_paths: 318 | with open(stat_path + '.pkl', 'rb') as fh: 319 | saved = pickle.load(fh) 320 | threshes = np.asarray(saved['threshes']) 321 | all_recalls = np.asarray(saved['recalls']) 322 | avg_recalls = np.mean(all_recalls, axis=0) 323 | plt.plot(threshes, avg_recalls * 100, linewidth=1) 324 | 325 | plt.grid(True) 326 | plt.xlim(0, max(threshes)) 327 | plt.xticks(np.arange(0, 6, dtype=np.float32) * max(threshes) / 5) 328 | plt.ylim(0, 100) 329 | plt.xlabel(r'$\tau_2$') 330 | plt.ylabel('Recall (%)') 331 | plt.legend(desc_types, loc='lower left') 332 | 333 | figure.savefig(out_path + '.pdf', bbox_inches='tight') 334 | 335 | 336 | def evaluate(cfg): 337 | assert len(cfg.desc_types) == len(cfg.desc_roots) 338 | 339 | if cfg.mode == 'valid': 340 | scene_names = VALID_SCENE_NAMES 341 | scene_abbr_fn = None 342 | elif cfg.mode == 'test': 343 | scene_names = TEST_SCENE_NAMES 344 | scene_abbr_fn = lambda sn: TEST_SCENE_ABBR_NAMES[TEST_SCENE_NAMES.index(sn)] 345 | else: 346 | raise RuntimeError('[!] Mode is not supported.') 347 | 348 | for inlier_thresh in INLIER_THRESHES: 349 | print('Start inlier_thresh {:.2f}m'.format(inlier_thresh)) 350 | stat_paths = list() 351 | for desc_type, desc_root in zip(cfg.desc_types, cfg.desc_roots): 352 | print(' Start', desc_type) 353 | seq_name = 'seq-01' 354 | match_paths = list() 355 | for scene_name in scene_names: 356 | match_path = run_scene_matching(scene_name, seq_name, desc_type, 357 | cfg.pcloud_root, desc_root, cfg.out_root, 358 | inlier_thresh, cfg.threads) 359 | match_paths.append(match_path) 360 | stat_path = compute_metrics(match_paths, desc_type, inlier_thresh, cfg.out_root, 361 | scene_abbr_fn) 362 | stat_paths.append(stat_path) 363 | plot_recall_curve(cfg.desc_types, stat_paths, 364 | osp.join(cfg.out_root, 'recall-{:.2f}'.format(inlier_thresh))) 365 | 366 | print('All done.') 367 | 368 | 369 | def parse_args(): 370 | parser = argparse.ArgumentParser() 371 | parser.add_argument('--pcloud_root', default='<3DMatch_Root>') 372 | parser.add_argument('--out_root', default='./log_3dmatch') 373 | parser.add_argument('--desc_roots', nargs='+') 374 | parser.add_argument('--desc_types', nargs='+') 375 | parser.add_argument('--mode', default='test') 376 | parser.add_argument('--threads', type=int, default=4) 377 | 378 | return parser.parse_args() 379 | 380 | 381 | if __name__ == '__main__': 382 | cfg = parse_args() 383 | evaluate(cfg) 384 | -------------------------------------------------------------------------------- /evaluation/eval_geomreg_3dmatch.sh: -------------------------------------------------------------------------------- 1 | python eval_geomreg_3dmatch.py \ 2 | --pcloud_root \ 3 | <3DMatch_Root> \ 4 | --desc_types \ 5 | Ours \ 6 | --desc_roots \ 7 | 8 | -------------------------------------------------------------------------------- /evaluation/eval_geomreg_eth.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import argparse 5 | import numpy as np 6 | import os.path as osp 7 | import sys 8 | import pickle 9 | 10 | from pathlib import Path 11 | from collections import namedtuple 12 | 13 | ROOT_DIR = osp.abspath('../') 14 | if ROOT_DIR not in sys.path: 15 | sys.path.append(ROOT_DIR) 16 | 17 | from utils import io as uio 18 | from evaluation.eval_geomreg_3dmatch import INLIER_THRESHES 19 | from evaluation.eval_geomreg_3dmatch import run_scene_matching, compute_metrics, plot_recall_curve 20 | 21 | 22 | TEST_SCENE_NAMES = ['gazebo_summer', 'gazebo_winter', 'wood_summer', 'wood_autmn'] 23 | 24 | 25 | def evaluate(cfg): 26 | assert len(cfg.desc_types) == len(cfg.desc_roots) 27 | 28 | if cfg.mode == 'test': 29 | scene_names = TEST_SCENE_NAMES 30 | scene_abbr_fn = None 31 | else: 32 | raise RuntimeError('[!] Mode is not supported.') 33 | 34 | for inlier_thresh in INLIER_THRESHES: 35 | print('Start inlier_thresh {:.2f}m'.format(inlier_thresh)) 36 | stat_paths = list() 37 | for desc_type, desc_root in zip(cfg.desc_types, cfg.desc_roots): 38 | print(' Start', desc_type) 39 | seq_name = 'seq-01' 40 | match_paths = list() 41 | for scene_name in scene_names: 42 | match_path = run_scene_matching(scene_name, seq_name, desc_type, 43 | cfg.pcloud_root, desc_root, cfg.out_root, 44 | inlier_thresh, cfg.threads) 45 | match_paths.append(match_path) 46 | stat_path = compute_metrics(match_paths, desc_type, inlier_thresh, cfg.out_root, 47 | scene_abbr_fn) 48 | stat_paths.append(stat_path) 49 | plot_recall_curve(cfg.desc_types, stat_paths, 50 | osp.join(cfg.out_root, 'recall-{:.2f}'.format(inlier_thresh))) 51 | 52 | print('All done.') 53 | 54 | 55 | def parse_args(): 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--pcloud_root', default='') 58 | parser.add_argument('--out_root', default='./log_eth') 59 | parser.add_argument('--desc_roots', nargs='+') 60 | parser.add_argument('--desc_types', nargs='+') 61 | parser.add_argument('--mode', default='test') 62 | parser.add_argument('--threads', type=int, default=4) 63 | 64 | return parser.parse_args() 65 | 66 | 67 | if __name__ == '__main__': 68 | cfg = parse_args() 69 | evaluate(cfg) 70 | -------------------------------------------------------------------------------- /evaluation/eval_geomreg_eth.sh: -------------------------------------------------------------------------------- 1 | python eval_geomreg_eth.py \ 2 | --pcloud_root \ 3 | \ 4 | --desc_types \ 5 | Ours \ 6 | --desc_roots \ 7 | 8 | -------------------------------------------------------------------------------- /figures/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/figures/pipeline.png -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/models/__init__.py -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import os.path as osp 5 | import torch 6 | 7 | 8 | class BaseModel(object): 9 | 10 | def __init__(self): 11 | self._nets = list() 12 | self._net_names = list() 13 | self._train_flags = list() 14 | 15 | def __call__(self, *args): 16 | pass 17 | 18 | def register_nets(self, nets, names, train_flags): 19 | self._nets.extend(nets) 20 | self._net_names.extend(names) 21 | self._train_flags.extend(train_flags) 22 | 23 | def params(self, trainable, named=False, add_prefix=False): 24 | def _get_net_params(_net, _net_name): 25 | if named: 26 | if add_prefix: 27 | return [(_net_name + '.' + _param_name, _param_data) 28 | for _param_name, _param_data in _net.named_parameters()] 29 | else: 30 | return list(_net.named_parameters()) 31 | else: 32 | return list(_net.parameters()) 33 | 34 | res = list() 35 | for idx, net in enumerate(self._nets): 36 | net_flag = self._train_flags[idx] 37 | net_name = self._net_names[idx] 38 | 39 | if trainable: 40 | if net_flag: 41 | res.extend(_get_net_params(net, net_name)) 42 | else: 43 | res.extend(_get_net_params(net, net_name)) 44 | return res 45 | 46 | def params_to_optimize(self, l2_weight_decay, excludes=('bias',)): 47 | if l2_weight_decay > 0: 48 | if excludes is None: 49 | excludes = list() 50 | 51 | decay_params = list() 52 | nondecay_params = list() 53 | 54 | named_params = self.params(True, named=True, add_prefix=False) 55 | for param_name, param_data in named_params: 56 | use_decay = True 57 | for kw in excludes: 58 | if kw in param_name: 59 | use_decay = False 60 | break 61 | if use_decay: 62 | decay_params.append(param_data) 63 | else: 64 | nondecay_params.append(param_data) 65 | return [{ 66 | 'params': decay_params, 67 | 'weight_decay': l2_weight_decay 68 | }, { 69 | 'params': nondecay_params, 70 | 'weight_decay': 0 71 | }] 72 | else: 73 | return self.params(True, named=False, add_prefix=False) 74 | 75 | def print_params(self, model_name='Model'): 76 | print('[*] {} parameters:'.format(model_name)) 77 | for nid, net in enumerate(self._nets): 78 | if self._train_flags[nid]: 79 | print('[*] Trainable module {}'.format(self._net_names[nid])) 80 | else: 81 | print('[*] None-trainable module {}'.format(self._net_names[nid])) 82 | for name, param in net.named_parameters(): 83 | print('[*] {}: {}'.format(name, param.size())) 84 | print('[*] {} size: {:.5f}M'.format(model_name, self.num_params() / 1e6)) 85 | 86 | def num_params(self): 87 | return sum(p.numel() for p in self.params(False)) 88 | 89 | def subnet_dict(self): 90 | return {self._net_names[i]: self._nets[i] for i in range(len(self._nets))} 91 | 92 | def save(self, root_folder, filename_prefix, iteration, solver_state=None): 93 | res = list() 94 | for net, name in zip(self._nets, self._net_names): 95 | net_path = osp.join(root_folder, '{}_{}_{}.pth'.format(filename_prefix, name, 96 | iteration)) 97 | torch.save(net.state_dict(), net_path) 98 | res.append(net_path) 99 | if solver_state is not None: 100 | solver_state_path = osp.join(root_folder, 101 | '{}_solver_{}.pth'.format(filename_prefix, iteration)) 102 | torch.save(solver_state, solver_state_path) 103 | res.append(solver_state_path) 104 | return res 105 | 106 | def load(self, path_pattern, net_names=None): 107 | print('[*] Load Pretrained Parameters:') 108 | 109 | def load_net(name, net, pth_path): 110 | model_dict = net.state_dict() 111 | pretrained_dict = torch.load(pth_path) 112 | print('[*] Module {} from {}'.format(name, pth_path)) 113 | filtered_dict = dict() 114 | for k, v in pretrained_dict.items(): 115 | if k in model_dict: 116 | if model_dict[k].size() == pretrained_dict[k].size(): 117 | filtered_dict[k] = v 118 | print('[*] Use {}, {}'.format(k, pretrained_dict[k].size())) 119 | else: 120 | print('[*] Discard {}, {} and {} do not match'.format( 121 | k, model_dict[k].size(), pretrained_dict[k].size())) 122 | else: 123 | print('[*] Discard unknown {}'.format(k)) 124 | model_dict.update(filtered_dict) 125 | net.load_state_dict(model_dict) 126 | 127 | if net_names is None or len(net_names) == 0: 128 | for net, name in zip(self._nets, self._net_names): 129 | net_path = path_pattern.format(name) 130 | if osp.exists(net_path): 131 | load_net(name, net, net_path) 132 | else: 133 | for net, name in zip(self._nets, self._net_names): 134 | if name not in net_names: 135 | continue 136 | net_path = path_pattern.format(name) 137 | if not osp.exists(net_path): 138 | raise RuntimeError("[!] {} does not exist.".format(net_path)) 139 | load_net(name, net, net_path) 140 | 141 | def train_mode(self): 142 | for net, train_flag in zip(self._nets, self._train_flags): 143 | if train_flag: 144 | net.train() 145 | else: 146 | net.eval() 147 | 148 | def eval_mode(self): 149 | for net in self._nets: 150 | net.eval() 151 | 152 | def to(self, device): 153 | for net in self._nets: 154 | net.to(device) 155 | -------------------------------------------------------------------------------- /models/modules.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | from collections import OrderedDict 5 | import functools 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def init_module(m): 12 | """ 13 | Args: 14 | m (nn.Module): 15 | """ 16 | 17 | _linear_modules = [ 18 | 'Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d', 19 | 'Linear', 'Bilinear' 20 | ] 21 | _recurrent_modules = ['LSTM', 'GRU'] 22 | _norm_modules = [ 23 | 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'InstanceNorm1d', 'InstanceNorm2d', 24 | 'InstanceNorm3d' 25 | ] 26 | 27 | classname = m.__class__.__name__ 28 | if classname in _norm_modules: 29 | for name, param in m.named_parameters(): 30 | if 'bias' in name: 31 | nn.init.constant_(param.data, val=0) 32 | else: 33 | nn.init.normal_(param.data, mean=1., std=0.02) 34 | elif classname in _recurrent_modules: 35 | for name, param in m.named_parameters(): 36 | if 'bias' in name: 37 | nn.init.constant_(param.data, val=0) 38 | else: 39 | nn.init.orthogonal_(param.data) 40 | elif classname in _linear_modules: 41 | for name, param in m.named_parameters(): 42 | if 'bias' in name: 43 | nn.init.constant_(param.data, val=0) 44 | else: 45 | nn.init.normal_(param.data, mean=0.0, std=3.0) 46 | 47 | def get_norm2d(norm_type='instance_norm', trainable=False): 48 | if norm_type == 'batch_norm': 49 | return functools.partial(nn.BatchNorm2d, affine=trainable) 50 | elif norm_type == 'instance_norm': 51 | return functools.partial(nn.InstanceNorm2d, affine=trainable) 52 | else: 53 | raise NotImplementedError('[!] Normalization layer - {} is not found'.format(norm_type)) 54 | 55 | 56 | def is_batchnorm(norm_layer): 57 | if type(norm_layer) == functools.partial: 58 | return norm_layer.func == nn.BatchNorm2d 59 | else: 60 | return norm_layer.__class__.__name__ == 'BatchNorm2d' 61 | 62 | 63 | class Conv2dNorm(nn.Module): 64 | 65 | def __init__(self, in_channels, out_channels, use_relu=True, **kwargs): 66 | super().__init__() 67 | self.in_channels = in_channels 68 | self.out_channels = out_channels 69 | self.use_relu = use_relu 70 | 71 | norm_layer = get_norm2d() 72 | self.conv = nn.Conv2d(in_channels, 73 | out_channels, 74 | bias=not is_batchnorm(norm_layer), 75 | **kwargs) 76 | self.norm = norm_layer(out_channels) 77 | 78 | def forward(self, x): 79 | x = self.conv(x) 80 | x = self.norm(x) 81 | if self.use_relu: 82 | return F.relu(x, inplace=True) 83 | else: 84 | return x 85 | 86 | 87 | class ConvT2dNorm(nn.Module): 88 | 89 | def __init__(self, in_channels, out_channels, output_padding=0, use_relu=True, **kwargs): 90 | super().__init__() 91 | self.in_channels = in_channels 92 | self.out_channels = out_channels 93 | self.output_padding = output_padding 94 | self.use_relu = use_relu 95 | 96 | norm_layer = get_norm2d() 97 | self.conv = nn.ConvTranspose2d(in_channels, 98 | out_channels, 99 | output_padding=output_padding, 100 | bias=not is_batchnorm(norm_layer), 101 | **kwargs) 102 | self.norm = norm_layer(out_channels) 103 | 104 | def forward(self, x): 105 | x = self.conv(x) 106 | x = self.norm(x) 107 | if self.use_relu: 108 | return F.relu(x, inplace=True) 109 | else: 110 | return x 111 | 112 | 113 | class L2NetBackbone(nn.Module): 114 | 115 | def __init__(self, cfg, in_channels, out_channels=128): 116 | super().__init__() 117 | self.in_channels = in_channels 118 | self.out_shape = (out_channels, 8, 8) 119 | self.return_interims = cfg.l2net.return_interims 120 | 121 | self.layer1 = Conv2dNorm(in_channels, 122 | 32, 123 | use_relu=True, 124 | kernel_size=3, 125 | stride=2, 126 | padding=1) 127 | self.layer2 = Conv2dNorm(32, 32, use_relu=True, kernel_size=3, stride=1, 128 | padding=1) 129 | self.layer3 = Conv2dNorm(32, 64, use_relu=True, kernel_size=3, stride=2, 130 | padding=1) 131 | self.layer4 = Conv2dNorm(64, 64, use_relu=True, kernel_size=3, stride=1, 132 | padding=1) 133 | self.layer5 = Conv2dNorm(64, 128, use_relu=True, kernel_size=3, stride=2, 134 | padding=1) 135 | self.layer6 = Conv2dNorm(128, 128, use_relu=True, kernel_size=3, stride=1, 136 | padding=1) 137 | if out_channels != 128: 138 | self.layer7 = nn.Sequential(nn.Conv2d(128, out_channels, kernel_size=1, stride=1), 139 | nn.ReLU(inplace=True)) 140 | else: 141 | self.layer7 = None 142 | 143 | if not cfg.l2net.trainable: 144 | for param in self.parameters(): 145 | param.requires_grad = False 146 | 147 | def forward(self, x): 148 | e1 = self.layer1(x) 149 | e2 = self.layer2(e1) 150 | e3 = self.layer3(e2) 151 | e4 = self.layer4(e3) 152 | e5 = self.layer5(e4) 153 | e6 = self.layer6(e5) 154 | if self.layer7 is not None: 155 | e7 = self.layer7(e6) 156 | last = e7 157 | else: 158 | e7 = None 159 | last = e6 160 | if self.return_interims: 161 | if self.layer7 is not None: 162 | return e1, e2, e3, e4, e5, e6, e7 163 | else: 164 | return e1, e2, e3, e4, e5, e6 165 | else: 166 | return last 167 | 168 | 169 | class L2Net(nn.Module): 170 | 171 | def __init__(self, cfg, in_channels): 172 | super().__init__() 173 | self.in_channels = in_channels 174 | 175 | backbone = L2NetBackbone(cfg, in_channels) 176 | self.layer1 = backbone.layer1 177 | self.layer2 = backbone.layer2 178 | self.layer3 = backbone.layer3 179 | self.layer4 = backbone.layer4 180 | self.layer5 = backbone.layer5 181 | self.layer6 = backbone.layer6 182 | self.embed = nn.Sequential( 183 | nn.Dropout(0.1), 184 | Conv2dNorm(128, 128, use_relu=False, kernel_size=8, stride=1, padding=0)) 185 | if not cfg.l2net.trainable: 186 | for param in self.parameters(): 187 | param.requires_grad = False 188 | 189 | def forward(self, x): 190 | x = self.layer1(x) 191 | x = self.layer2(x) 192 | x = self.layer3(x) 193 | x = self.layer4(x) 194 | x = self.layer5(x) 195 | x = self.layer6(x) 196 | x = self.embed(x) 197 | x = x.view(x.size(0), -1) 198 | x = F.normalize(x) 199 | return x 200 | -------------------------------------------------------------------------------- /models/mvdesc.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | import os.path as osp 6 | import sys 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | ROOT_DIR = osp.abspath('../') 12 | if ROOT_DIR not in sys.path: 13 | sys.path.append(ROOT_DIR) 14 | 15 | from models.base_model import BaseModel 16 | from models.modules import L2NetBackbone 17 | from models.modules import Conv2dNorm, ConvT2dNorm, init_module 18 | from soft_renderer.transform import MultiViewRenderer 19 | 20 | 21 | class BaseViewFusionModule(nn.Module): 22 | 23 | def __init__(self, cfg, in_shape): 24 | super().__init__() 25 | 26 | self.cfg = cfg 27 | assert len(in_shape) == 4 28 | self.in_shape = in_shape 29 | self.out_channels = None 30 | 31 | self._init() 32 | 33 | def _init(self): 34 | raise NotImplementedError 35 | 36 | def forward(self, *args): 37 | raise NotImplementedError 38 | 39 | 40 | class ViewPoolFusion(BaseViewFusionModule): 41 | def _init(self): 42 | self.out_channels = self.in_shape[1] 43 | 44 | def forward(self, x): 45 | if x.size(1) == 1: 46 | return torch.squeeze(x, dim=1) 47 | else: 48 | x, _ = torch.max(x, dim=1) 49 | return x 50 | 51 | 52 | class SoftViewPoolFusion(BaseViewFusionModule): 53 | def _init(self): 54 | in_channels = self.in_shape[1] 55 | self.out_channels = in_channels 56 | 57 | kernel = self.cfg.view_pool.kernel 58 | bias = self.cfg.view_pool.bias 59 | if kernel == 1: 60 | self.attn = nn.Sequential( 61 | nn.Conv2d(in_channels, 62 | in_channels // 2, 63 | kernel_size=kernel, 64 | stride=1, 65 | bias=bias), nn.ReLU(inplace=True), 66 | nn.Conv2d(in_channels // 2, 67 | in_channels, 68 | kernel_size=kernel, 69 | stride=1, 70 | bias=bias)) 71 | elif kernel == 3: 72 | self.attn = nn.Sequential( 73 | nn.Conv2d( 74 | in_channels, 75 | in_channels // 2, 76 | kernel_size=kernel, 77 | stride=2, 78 | padding=1, 79 | bias=bias, 80 | ), nn.ReLU(inplace=True), 81 | nn.ConvTranspose2d(in_channels // 2, 82 | in_channels, 83 | kernel_size=kernel, 84 | stride=2, 85 | padding=1, 86 | output_padding=1, 87 | bias=bias)) 88 | else: 89 | raise RuntimeError('[!] cfg.view_pool.kernel={} is not supported.'.format(kernel)) 90 | 91 | def forward(self, x): 92 | B, V, C, H, W = x.size() 93 | a = self.attn(x.view(B * V, C, H, W)) 94 | a = F.softmax(a.view(B, V, C, H, W), dim=1) 95 | ax = torch.sum(a * x, dim=1) 96 | return ax 97 | 98 | 99 | class BaseMVDescModel(BaseModel): 100 | BACKBONES = {'l2net': L2NetBackbone} 101 | 102 | def __init__(self, cfg): 103 | super().__init__() 104 | self.cfg = cfg 105 | self.cnn_output = None 106 | self.fusion_output = None 107 | 108 | self._init() 109 | 110 | def _init(self): 111 | raise NotImplementedError 112 | 113 | def get_cnn_backbone(self): 114 | draw_color = self.cfg.render.draw_color 115 | draw_depth = self.cfg.render.draw_depth 116 | cnn = self.cfg.model.cnn 117 | cnn_out_channels = self.cfg.model.cnn_out_channels 118 | 119 | image_channels = -1 120 | if draw_color: 121 | image_channels = 3 122 | elif draw_depth: 123 | image_channels = 1 124 | else: 125 | raise RuntimeError('[!] Cannot decide image channels.') 126 | 127 | backbone = self.BACKBONES[cnn] 128 | return backbone(self.cfg, image_channels, cnn_out_channels) 129 | 130 | @staticmethod 131 | def _call_backbone(cnn, x): 132 | B, V, C, H, W = x.size() 133 | x = x.view(B * V, C, H, W) 134 | x = cnn(x) 135 | C, H, W = x.size(1), x.size(2), x.size(3) 136 | x = x.view(B, V, C, H, W) 137 | return x 138 | 139 | 140 | class MVPoolNet(BaseMVDescModel): 141 | POOLS = {'max_pool': ViewPoolFusion, 'soft_pool': SoftViewPoolFusion} 142 | 143 | def _init(self): 144 | fusion_type = self.cfg.model.fusion_type 145 | desc_dim = self.cfg.model.desc_dim 146 | view_num = self.cfg.render.view_num 147 | augment_rotations = self.cfg.render.augment_rotations 148 | rotation_num = self.cfg.render.rotation_num 149 | if augment_rotations: 150 | view_num *= 4 151 | elif rotation_num > 0: 152 | view_num *= rotation_num 153 | 154 | # Subnets 155 | self.cnn = self.get_cnn_backbone() 156 | C, H, W = self.cnn.out_shape 157 | 158 | pool_fn = self.POOLS[fusion_type] 159 | self.pool = pool_fn(self.cfg, (view_num, C, H, W)) 160 | C = self.pool.out_channels 161 | print('[*] Using', self.pool.__class__.__name__) 162 | 163 | self.embed = nn.Sequential( 164 | nn.Conv2d(C, desc_dim, kernel_size=(H, W), stride=1, padding=0)) 165 | 166 | self.register_nets([self.cnn, self.pool, self.embed], ['cnn', 'pool', 'embed'], 167 | [True] * 3) 168 | 169 | def __call__(self, x): 170 | x = self._call_backbone(self.cnn, x) 171 | self.cnn_output = x 172 | 173 | x = self.pool(x) 174 | self.fusion_output = x 175 | 176 | x = self.embed(x) 177 | x = x.view(x.size(0), -1) 178 | x = F.normalize(x, p=2, dim=1) 179 | return x 180 | 181 | 182 | MV_MODELS = { 183 | 'MVPoolNet': MVPoolNet, 184 | } 185 | 186 | 187 | class RenderModel(BaseModel): 188 | 189 | def __init__(self, cfg): 190 | super().__init__() 191 | self.cfg = cfg 192 | 193 | self.renderer = MultiViewRenderer(cfg.render.view_num, 194 | cfg.render.rotation_num, 195 | znear=cfg.render.znear, 196 | zfar=cfg.render.zfar, 197 | image_size=cfg.render.image_size, 198 | sigma=cfg.render.sigma, 199 | gamma=cfg.render.gamma, 200 | dist_ratio=cfg.render.dist_ratio, 201 | dist_factor=cfg.render.dist_factor, 202 | radius_ratio=cfg.render.radius_ratio, 203 | draw_color=cfg.render.draw_color, 204 | draw_depth=cfg.render.draw_depth, 205 | trainable=cfg.render.trainable) 206 | 207 | self.register_nets([self.renderer], ['renderer'], [cfg.render.trainable]) 208 | 209 | def __call__(self, vertices, radii, colors, at_centers, at_normals): 210 | images, _ = self.renderer(vertices, radii, colors, at_centers, at_normals) 211 | if self.cfg.render.augment_rotations: 212 | images = self._augment_rotation(images) 213 | return images 214 | 215 | @staticmethod 216 | def _augment_rotation(x): 217 | res = [x] 218 | for i in range(3): 219 | res.append(torch.rot90(x, k=i + 1, dims=(3, 4))) 220 | res = torch.cat(res, dim=1) 221 | return res 222 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fire 2 | ipython 3 | jupyter 4 | joblib 5 | matplotlib 6 | notebook 7 | numpy 8 | ninja 9 | open3d-python==0.7.0.0 10 | opencv-python 11 | pandas 12 | Pillow 13 | protobuf 14 | pytorch-ignite==0.2.0 15 | pyflann3==1.8.4.1 16 | pytz 17 | PyYAML 18 | scikit-image 19 | scipy 20 | tqdm 21 | torch==1.2.0 22 | torchvision==0.4.0 23 | tensorboard 24 | typing 25 | yacs 26 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/configs/ours_3dmatch.yaml: -------------------------------------------------------------------------------- 1 | log: 2 | identifier: "mvd" 3 | root_path: "./log_3dmatch" 4 | 5 | render: 6 | augment_rotations: True 7 | draw_color: False 8 | draw_depth: True 9 | trainable: True 10 | view_num: 8 11 | rotation_num: 0 12 | 13 | model: 14 | cnn_out_channels: 128 15 | desc_dim: 32 16 | fusion_type: "soft_pool" 17 | type: "MVPoolNet" 18 | 19 | view_pool: 20 | kernel: 3 21 | 22 | train: 23 | general: 24 | ckpt_path: "./ours_3dmatch/net_{}_16.pth" 25 | dataset: 26 | name: "3dmatch" 27 | pcloud_root: "" 28 | kpts_root: "" 29 | 30 | eval: 31 | general: 32 | ckpt_path: "./ours_3dmatch/net_{}_16.pth" 33 | geomreg: 34 | test: 35 | name: "3dmatch" 36 | pcloud_root: "<3DMatch_Root>" 37 | -------------------------------------------------------------------------------- /scripts/configs/ours_eth.yaml: -------------------------------------------------------------------------------- 1 | log: 2 | identifier: "mvd" 3 | root_path: "./log_eth" 4 | 5 | render: 6 | augment_rotations: True 7 | draw_color: False 8 | draw_depth: True 9 | trainable: True 10 | view_num: 8 11 | rotation_num: 0 12 | default_radius: 0.1 13 | dist_factor: 3.0 14 | 15 | model: 16 | cnn_out_channels: 128 17 | desc_dim: 32 18 | fusion_type: "soft_pool" 19 | type: "MVPoolNet" 20 | 21 | view_pool: 22 | kernel: 3 23 | 24 | eval: 25 | general: 26 | ckpt_path: "./ours_3dmatch/net_{}_16.pth" 27 | geomreg: 28 | test: 29 | name: "eth" 30 | pcloud_root: "" 31 | -------------------------------------------------------------------------------- /scripts/engine_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | import random 4 | import sys 5 | import time 6 | import torch 7 | import torch.optim as optim 8 | import warnings 9 | import math 10 | 11 | ROOT_DIR = osp.abspath('../') 12 | if ROOT_DIR not in sys.path: 13 | sys.path.append(ROOT_DIR) 14 | 15 | from utils.log import Logger 16 | 17 | WEIGHT_DECAY_EXCLUDES = [ 18 | 'bias', 19 | ] 20 | PTH_PREFIX = 'net' 21 | 22 | def is_not_empty(x): 23 | return x is not None and len(x) > 0 24 | 25 | def print_config(cfg): 26 | print('=========================Configurations=========================>') 27 | print(cfg) 28 | print('<================================================================') 29 | 30 | 31 | def redirect_stdout(log_dir, prefix): 32 | time_stamp = time.strftime("%m_%d-%H_%M") 33 | sys.stdout = Logger(osp.join(log_dir, '{}-{}.log'.format(prefix, time_stamp))) 34 | 35 | 36 | def seed_random(seed): 37 | seed = seed or random.randint(1, 10000) 38 | 39 | random.seed(seed) 40 | np.random.seed(seed) 41 | torch.manual_seed(seed) 42 | 43 | print('[*] Using manual seed:', seed) 44 | 45 | 46 | def get_device(gpu=None): 47 | if gpu is not None and torch.cuda.is_available(): 48 | device = torch.device('cuda:{}'.format(gpu)) 49 | else: 50 | device = torch.device('cpu') 51 | print('[*] Using device: {}'.format(device)) 52 | return device 53 | 54 | 55 | def get_optimizer(name, lr, params): 56 | if len(params) < 1: 57 | return None 58 | if name == 'SGD': 59 | optimizer = optim.SGD(params, lr=lr, momentum=0.9) 60 | elif name == 'Adam': 61 | optimizer = optim.Adam(params, lr=lr) 62 | else: 63 | raise RuntimeError('[!] name is not supported.') 64 | return optimizer 65 | 66 | 67 | def get_lr_scheduler(lr_step, lr_gamma, optimizer): 68 | if optimizer is not None and lr_step > 0: 69 | scheduler = torch.optim.lr_scheduler.StepLR( 70 | optimizer, step_size=lr_step, gamma=lr_gamma) 71 | else: 72 | scheduler = None 73 | return scheduler 74 | 75 | 76 | def get_tbwriter(log_dir): 77 | import warnings 78 | from torch.utils.tensorboard import SummaryWriter 79 | 80 | with warnings.catch_warnings(): 81 | warnings.simplefilter('ignore') 82 | return SummaryWriter(log_dir, flush_secs=30) 83 | 84 | 85 | def prepare_batch(batch, device): 86 | if isinstance(batch, dict): 87 | for k, v in batch.items(): 88 | if isinstance(v, torch.Tensor): 89 | batch[k] = v.to(device) 90 | elif isinstance(batch, list): 91 | for i in range(len(batch)): 92 | if isinstance(batch[i], torch.Tensor): 93 | batch[i] = batch[i].to(device) 94 | return batch 95 | 96 | 97 | def step_lr_scheduler(engine, scheduler=None): 98 | if scheduler is not None: 99 | scheduler.step() 100 | 101 | 102 | def print_train_log(engine, timer=None, num_batches=None, cfg=None): 103 | state = engine.state 104 | iteration = state.iteration 105 | if iteration == 1 or iteration % cfg.log.freq == 0: 106 | epoch = state.epoch 107 | loss = state.output 108 | epochs = cfg.train.solver.epochs 109 | 110 | seconds_per_batch = timer.value() 111 | minutes_per_epoch = seconds_per_batch * num_batches / 60. 112 | total_minutes = minutes_per_epoch * epochs 113 | total_elapsed_minutes = seconds_per_batch * iteration / 60. 114 | epoch_elapsed_minutes = seconds_per_batch * (iteration % num_batches) / 60. 115 | 116 | msg = 'Epoch: {}/{} | Step: {}/{}'.format(epoch, epochs, iteration % num_batches, 117 | num_batches) 118 | msg += ' | Iter: {}'.format(iteration) 119 | msg += ' | Loss: {:.5f}'.format(loss) 120 | msg += ' | GTime: {:.2f}/{:.2f} min'.format(total_elapsed_minutes, total_minutes) 121 | msg += ' | LTime: {:.2f}/{:.2f} min'.format(epoch_elapsed_minutes, minutes_per_epoch) 122 | print(msg) 123 | 124 | 125 | def print_eval_log(engine, timer=None, num_batches=None): 126 | iteration = engine.state.iteration 127 | seconds_per_batch = timer.value() 128 | minutes_per_epoch = seconds_per_batch * num_batches / 60. 129 | epoch_elapsed_minutes = seconds_per_batch * (iteration % num_batches) / 60. 130 | 131 | msg = 'Iter: {}/{} | Time: {:.2f}/{:.2f} min'.format(iteration, num_batches, 132 | epoch_elapsed_minutes, 133 | minutes_per_epoch) 134 | print(msg) 135 | 136 | 137 | def handle_exception(engine, e): 138 | if isinstance(e, KeyboardInterrupt) and (engine.state.iteration > 1): 139 | engine.terminate() 140 | warnings.warn('[!] KeyboardInterrupt caught. Exiting gracefully.') 141 | else: 142 | raise e 143 | -------------------------------------------------------------------------------- /scripts/main_mvdesc.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | from collections import defaultdict 5 | from pathlib import Path 6 | from scipy import spatial 7 | import functools 8 | import numpy as np 9 | import os.path as osp 10 | import pickle 11 | import sys 12 | import time 13 | 14 | ROOT_DIR = osp.abspath('../') 15 | if ROOT_DIR not in sys.path: 16 | sys.path.append(ROOT_DIR) 17 | 18 | from config import mvdesc_cfg 19 | from data.pointclouds import PointCloudPairDataset, PointCloudDataset 20 | from data.pointclouds import PointCloudPairSampler, list_pcd_pairs, list_pcds 21 | from models.mvdesc import RenderModel, MV_MODELS 22 | from scripts import engine_utils as eu 23 | from utils import io as uio 24 | 25 | import torch 26 | import torchvision 27 | from torch.utils.data import DataLoader 28 | from ignite.engine import Engine, Events 29 | from ignite.handlers import ModelCheckpoint, Timer 30 | 31 | 32 | def prepare_config_train(cfg): 33 | name = uio.new_log_folder(cfg.log.root_path, cfg.log.identifier) 34 | name += '-{}'.format(time.strftime('%m_%d-%H_%M')) 35 | if cfg.render.draw_color: 36 | name += '-color' 37 | elif cfg.render.draw_depth: 38 | name += '-depth' 39 | name += '-{}'.format(cfg.model.type) 40 | name += '-c{}'.format(cfg.model.cnn_out_channels) 41 | name += '-{}'.format(cfg.model.fusion_type) 42 | if cfg.model.fusion_type == 'soft_pool': 43 | name += '-k{}'.format(cfg.view_pool.kernel) 44 | name += '-d{}'.format(cfg.model.desc_dim) 45 | name += '-{}'.format(cfg.train.dataset.name) 46 | if cfg.render.trainable: 47 | name += '-tr' 48 | view_num = cfg.render.view_num 49 | if cfg.render.augment_rotations: 50 | view_num *= 4 51 | elif cfg.render.rotation_num > 0: 52 | view_num *= cfg.render.rotation_num 53 | name += '-v{}'.format(view_num) 54 | if eu.is_not_empty(cfg.train.general.ckpt_path): 55 | name += '-ft' 56 | name += '-e{}'.format(cfg.train.solver.epochs) 57 | name += '-{}'.format(cfg.train.solver.optim) 58 | cfg.log.root_path = osp.join(cfg.log.root_path, name) 59 | uio.may_create_folder(cfg.log.root_path) 60 | 61 | 62 | def prepare_config_eval(cfg): 63 | if eu.is_not_empty(cfg.eval.general.ckpt_path): 64 | exp_name = str(Path(cfg.eval.general.ckpt_path).parent) 65 | else: 66 | _, exp_name = uio.last_log_folder(cfg.log.root_path, cfg.log.identifier) 67 | ckpt_name = uio.last_checkpoint(osp.join(cfg.log.root_path, exp_name), eu.PTH_PREFIX) 68 | cfg.eval.general.ckpt_path = osp.join(cfg.log.root_path, exp_name, ckpt_name) 69 | 70 | assert eu.is_not_empty(exp_name) 71 | cfg.log.root_path = osp.join(cfg.log.root_path, exp_name) 72 | uio.may_create_folder(cfg.log.root_path) 73 | 74 | 75 | def get_dataloader_train(cfg): 76 | batch_size = cfg.train.input.batch_size 77 | instance_num = cfg.train.input.instance_num 78 | name = cfg.train.dataset.name 79 | kpts_root = cfg.train.dataset.kpts_root 80 | pcloud_root = cfg.train.dataset.pcloud_root 81 | workers = cfg.train.dataset.workers 82 | radius = cfg.render.default_radius 83 | 84 | data = list_pcd_pairs(kpts_root) 85 | dataset = PointCloudPairDataset(data, pcloud_root, instance_num, radius) 86 | sampler = PointCloudPairSampler(data, batch_size) 87 | return DataLoader(dataset, 88 | batch_size=batch_size, 89 | shuffle=False, 90 | sampler=sampler, 91 | num_workers=workers, 92 | collate_fn=lambda x: x, 93 | pin_memory=True, 94 | drop_last=True) 95 | 96 | 97 | def get_dataloader_eval_geomreg(cfg, mode): 98 | workers = cfg.eval.geomreg.workers 99 | radius = cfg.render.default_radius 100 | 101 | if mode == 'valid': 102 | name = cfg.eval.geomreg.valid.name 103 | pcloud_root = cfg.eval.geomreg.valid.pcloud_root 104 | elif mode == 'test': 105 | name = cfg.eval.geomreg.test.name 106 | pcloud_root = cfg.eval.geomreg.test.pcloud_root 107 | else: 108 | raise RuntimeError('[!] mode is not supported.') 109 | 110 | data = list_pcds(pcloud_root) 111 | dataset = PointCloudDataset(data, pcloud_root, radius) 112 | return DataLoader(dataset, 113 | batch_size=1, 114 | shuffle=False, 115 | num_workers=workers, 116 | collate_fn=lambda x: x[0], 117 | pin_memory=False, 118 | drop_last=False) 119 | 120 | 121 | def get_models(cfg): 122 | render_model = RenderModel(cfg) 123 | desc_model = MV_MODELS[cfg.model.type](cfg) 124 | return render_model, desc_model 125 | 126 | 127 | def get_criterion(cfg): 128 | from utils.loss import BatchHardNegativeLoss 129 | 130 | return BatchHardNegativeLoss() 131 | 132 | 133 | def step_train(engine, 134 | batch, 135 | render_model=None, 136 | desc_model=None, 137 | render_optimizer=None, 138 | desc_optimizer=None, 139 | criterion=None, 140 | tbwriter=None, 141 | device=None, 142 | cfg=None): 143 | iteration = engine.state.iteration 144 | grad_clip = cfg.train.solver.grad_clip 145 | renderer_optim_step = cfg.train.solver.renderer_optim_step 146 | renderer_weight = cfg.train.solver.renderer_weight 147 | renderer_trainable = cfg.render.trainable and iteration % renderer_optim_step == 0 148 | 149 | for item in batch: 150 | item['cloud_i'].to(device) 151 | item['cloud_j'].to(device) 152 | 153 | if renderer_trainable: 154 | render_optimizer.zero_grad() 155 | desc_optimizer.zero_grad() 156 | 157 | with torch.set_grad_enabled(renderer_trainable): 158 | renderings_i = list() 159 | renderings_j = list() 160 | for item in batch: 161 | cloud_i = item['cloud_i'] 162 | cloud_j = item['cloud_j'] 163 | renderings_i.append( 164 | render_model(cloud_i.points, cloud_i.radii, cloud_i.colors, cloud_i.at_centers, 165 | cloud_i.at_normals)) 166 | renderings_j.append( 167 | render_model(cloud_j.points, cloud_j.radii, cloud_j.colors, cloud_j.at_centers, 168 | cloud_j.at_normals)) 169 | renderings_i = torch.cat(renderings_i, dim=0) 170 | renderings_j = torch.cat(renderings_j, dim=0) 171 | 172 | with torch.set_grad_enabled(True): 173 | descs_i = desc_model(renderings_i) 174 | descs_j = desc_model(renderings_j) 175 | 176 | loss_batchhard = criterion(descs_i, descs_j) 177 | if renderer_trainable: 178 | loss_render = render_model.renderer.constraints() 179 | loss = loss_batchhard + renderer_weight * loss_render 180 | else: 181 | loss_render = None 182 | loss = loss_batchhard 183 | loss.backward() 184 | 185 | if renderer_trainable: 186 | torch.nn.utils.clip_grad_value_( 187 | render_model.params(True, named=False, add_prefix=False), grad_clip) 188 | render_optimizer.step() 189 | desc_optimizer.step() 190 | 191 | iteration = engine.state.iteration 192 | if iteration == 1 or iteration % cfg.log.freq == 0: 193 | tbwriter.add_scalar('loss', loss.item(), iteration) 194 | tbwriter.add_scalar('loss_batchhard', loss_batchhard.item(), iteration) 195 | if renderer_trainable: 196 | tbwriter.add_scalar('loss_render', loss_render.item(), iteration) 197 | tbwriter.add_scalar('lr_render', render_optimizer.param_groups[0]['lr'], iteration) 198 | tbwriter.add_scalar('lr', desc_optimizer.param_groups[0]['lr'], iteration) 199 | 200 | for name, image in zip(['renderings_i', 'renderings_j'], [renderings_i, renderings_j]): 201 | B, V, C, H, W = image.size() 202 | B = min(B, 6) 203 | V = cfg.render.view_num 204 | image_slice = image[:B, :V, :, :, :] 205 | image_slice = image_slice.contiguous().view(-1, C, H, W) 206 | image_grid = torchvision.utils.make_grid(image_slice, nrow=V, normalize=True) 207 | tbwriter.add_image(name, image_grid, iteration) 208 | 209 | return loss.item() 210 | 211 | 212 | def step_eval_geomreg(engine, batch, render_model=None, desc_model=None, device=None, cfg=None): 213 | cloud = batch['cloud'] 214 | cloud.to(device) 215 | num_indices = len(cloud.at_centers) 216 | 217 | descs = list() 218 | with torch.set_grad_enabled(False): 219 | for i in range(num_indices): 220 | renderings = render_model(cloud.points, cloud.radii, cloud.colors, 221 | cloud.at_centers[[i], :], cloud.at_normals[[i], :]) 222 | batch_desc = desc_model(renderings).cpu().numpy() 223 | assert batch_desc.shape[0] == 1 224 | descs.append(batch_desc[0, :]) 225 | descs = np.asarray(descs, dtype=np.float32) 226 | 227 | scene = batch['scene'] 228 | seq = batch['seq'] 229 | name = batch['name'] 230 | 231 | out_folder = osp.join(cfg.log.root_path, scene, seq) 232 | uio.may_create_folder(out_folder) 233 | 234 | np.save(osp.join(out_folder, name + '.desc.npy'), descs) 235 | return out_folder 236 | 237 | 238 | def engine_train(cfg): 239 | prepare_config_train(cfg) 240 | 241 | ckpt_nets = cfg.train.general.ckpt_nets 242 | ckpt_path = cfg.train.general.ckpt_path 243 | epochs = cfg.train.solver.epochs 244 | gpu = cfg.general.gpu 245 | lr = cfg.train.solver.lr 246 | lr_gamma = cfg.train.solver.lr_gamma 247 | lr_step = cfg.train.solver.lr_step 248 | optim = cfg.train.solver.optim 249 | renderer_lr = cfg.train.solver.renderer_lr 250 | root_path = cfg.log.root_path 251 | save_freq = cfg.train.solver.save_freq 252 | seed = cfg.general.seed 253 | 254 | eu.redirect_stdout(root_path, 'train') 255 | eu.print_config(cfg) 256 | 257 | eu.seed_random(seed) 258 | 259 | device = eu.get_device(gpu) 260 | 261 | dataloader = get_dataloader_train(cfg) 262 | num_batches = len(dataloader) 263 | 264 | render_model, desc_model = get_models(cfg) 265 | render_model.to(device) 266 | render_model.train_mode() 267 | render_model.print_params('render_model') 268 | desc_model.to(device) 269 | desc_model.train_mode() 270 | desc_model.print_params('desc_model') 271 | 272 | crit = get_criterion(cfg) 273 | print('[*] Loss Function:', crit.__class__.__name__) 274 | 275 | render_params = render_model.params(True, named=False, add_prefix=False) 276 | render_optimizer = eu.get_optimizer(optim, renderer_lr, render_params) 277 | render_lr_scheduler = eu.get_lr_scheduler(lr_step, lr_gamma, render_optimizer) 278 | 279 | desc_params = desc_model.params(True, named=False, add_prefix=False) 280 | desc_optimizer = eu.get_optimizer(optim, lr, desc_params) 281 | desc_lr_scheduler = eu.get_lr_scheduler(lr_step, lr_gamma, desc_optimizer) 282 | 283 | if eu.is_not_empty(ckpt_path): 284 | render_model.load(ckpt_path, ckpt_nets) 285 | desc_model.load(ckpt_path, ckpt_nets) 286 | 287 | tbwriter = eu.get_tbwriter(root_path) 288 | 289 | engine = Engine( 290 | functools.partial(step_train, 291 | render_model=render_model, 292 | desc_model=desc_model, 293 | render_optimizer=render_optimizer, 294 | desc_optimizer=desc_optimizer, 295 | criterion=crit, 296 | tbwriter=tbwriter, 297 | device=device, 298 | cfg=cfg)) 299 | engine.add_event_handler(Events.EPOCH_COMPLETED, 300 | eu.step_lr_scheduler, 301 | scheduler=render_lr_scheduler) 302 | engine.add_event_handler(Events.EPOCH_COMPLETED, 303 | eu.step_lr_scheduler, 304 | scheduler=desc_lr_scheduler) 305 | 306 | ckpt_handler = ModelCheckpoint(root_path, 307 | eu.PTH_PREFIX, 308 | atomic=False, 309 | save_interval=save_freq, 310 | n_saved=epochs // save_freq, 311 | require_empty=False) 312 | render_subnets = render_model.subnet_dict() 313 | desc_subnets = desc_model.subnet_dict() 314 | engine.add_event_handler(Events.EPOCH_COMPLETED, 315 | ckpt_handler, 316 | to_save={ 317 | **render_subnets, 318 | **desc_subnets 319 | }) 320 | 321 | timer = Timer(average=True) 322 | timer.attach(engine, 323 | start=Events.EPOCH_STARTED, 324 | pause=Events.EPOCH_COMPLETED, 325 | resume=Events.ITERATION_STARTED, 326 | step=Events.ITERATION_COMPLETED) 327 | 328 | engine.add_event_handler(Events.ITERATION_COMPLETED, 329 | eu.print_train_log, 330 | timer=timer, 331 | num_batches=num_batches, 332 | cfg=cfg) 333 | 334 | engine.add_event_handler(Events.EXCEPTION_RAISED, eu.handle_exception) 335 | 336 | engine.run(dataloader, epochs) 337 | 338 | tbwriter.close() 339 | 340 | return root_path 341 | 342 | 343 | def engine_eval_geomreg(cfg, mode): 344 | prepare_config_eval(cfg) 345 | 346 | ckpt_path = cfg.eval.general.ckpt_path 347 | gpu = cfg.general.gpu 348 | root_path = cfg.log.root_path 349 | seed = cfg.general.seed 350 | 351 | eu.redirect_stdout(root_path, 'eval_geomreg-{}'.format(mode)) 352 | eu.print_config(cfg) 353 | 354 | eu.seed_random(seed) 355 | 356 | device = eu.get_device(gpu) 357 | 358 | dataloader = get_dataloader_eval_geomreg(cfg, mode) 359 | num_batches = len(dataloader) 360 | 361 | render_model, desc_model = get_models(cfg) 362 | render_model.to(device) 363 | render_model.eval_mode() 364 | render_model.print_params('render_model') 365 | desc_model.to(device) 366 | desc_model.eval_mode() 367 | desc_model.print_params('desc_model') 368 | 369 | assert eu.is_not_empty(ckpt_path) 370 | render_model.load(ckpt_path) 371 | desc_model.load(ckpt_path) 372 | 373 | engine = Engine( 374 | functools.partial(step_eval_geomreg, 375 | render_model=render_model, 376 | desc_model=desc_model, 377 | device=device, 378 | cfg=cfg)) 379 | 380 | timer = Timer(average=True) 381 | timer.attach(engine, 382 | start=Events.EPOCH_STARTED, 383 | pause=Events.EPOCH_COMPLETED, 384 | resume=Events.ITERATION_STARTED, 385 | step=Events.ITERATION_COMPLETED) 386 | 387 | engine.add_event_handler(Events.ITERATION_COMPLETED, 388 | eu.print_eval_log, 389 | timer=timer, 390 | num_batches=num_batches) 391 | 392 | engine.add_event_handler(Events.EXCEPTION_RAISED, eu.handle_exception) 393 | 394 | engine.run(dataloader, 1) 395 | 396 | return root_path 397 | 398 | 399 | def train(cfg_path): 400 | cfg = mvdesc_cfg.clone() 401 | cfg.merge_from_file(cfg_path) 402 | engine_train(cfg) 403 | 404 | 405 | def test(cfg_path): 406 | cfg = mvdesc_cfg.clone() 407 | cfg.merge_from_file(cfg_path) 408 | engine_eval_geomreg(cfg, 'test') 409 | 410 | 411 | if __name__ == '__main__': 412 | import fire 413 | 414 | fire.Fire() 415 | -------------------------------------------------------------------------------- /scripts/ours_3dmatch/net_cnn_16.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/scripts/ours_3dmatch/net_cnn_16.pth -------------------------------------------------------------------------------- /scripts/ours_3dmatch/net_embed_16.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/scripts/ours_3dmatch/net_embed_16.pth -------------------------------------------------------------------------------- /scripts/ours_3dmatch/net_pool_16.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/scripts/ours_3dmatch/net_pool_16.pth -------------------------------------------------------------------------------- /scripts/ours_3dmatch/net_renderer_16.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/scripts/ours_3dmatch/net_renderer_16.pth -------------------------------------------------------------------------------- /soft_renderer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/soft_renderer/__init__.py -------------------------------------------------------------------------------- /soft_renderer/cuda/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/soft_renderer/cuda/__init__.py -------------------------------------------------------------------------------- /soft_renderer/cuda/jit.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from torch.utils.cpp_extension import load 3 | 4 | current_folder = osp.realpath(osp.abspath(osp.dirname(__file__))) 5 | 6 | soft_rasterize_cuda = load('soft_rasterize_cuda', 7 | [current_folder + '/soft_rasterize_cuda.cpp', current_folder + '/soft_rasterize_cuda_kernel.cu'], 8 | verbose=True) 9 | -------------------------------------------------------------------------------- /soft_renderer/cuda/soft_rasterize_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | void soft_rasterize_forward_cuda( 7 | // Inputs 8 | torch::Tensor mvps, 9 | torch::Tensor vertices, 10 | torch::Tensor radii, 11 | torch::Tensor colors, 12 | torch::Tensor locks, 13 | float sigma, 14 | float gamma, 15 | float dist_ratio, 16 | float znear, 17 | float zfar, 18 | float tan_half_fov, 19 | int image_size, 20 | bool compute_weight, 21 | bool draw_color, 22 | bool draw_depth, 23 | // Outputs 24 | torch::Tensor weights, 25 | torch::Tensor color_map, 26 | torch::Tensor depth_map, 27 | torch::Tensor pseudo_depth_map); 28 | 29 | void soft_rasterize_backward_cuda( 30 | // Inputs 31 | torch::Tensor mvps, 32 | torch::Tensor vertices, 33 | torch::Tensor radii, 34 | torch::Tensor colors, 35 | torch::Tensor weights, 36 | torch::Tensor grad_color_map, 37 | torch::Tensor grad_depth_map, 38 | float sigma, 39 | float gamma, 40 | float dist_ratio, 41 | float znear, 42 | float zfar, 43 | float tan_half_fov, 44 | int image_size, 45 | bool draw_color, 46 | bool draw_depth, 47 | // Outputs 48 | torch::Tensor grad_mvps); 49 | 50 | 51 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDA tensor") 52 | #define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous") 53 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 54 | 55 | 56 | void soft_rasterize_forward( 57 | // Inputs 58 | torch::Tensor mvps, 59 | torch::Tensor vertices, 60 | torch::Tensor radii, 61 | torch::Tensor colors, 62 | torch::Tensor locks, 63 | float sigma, 64 | float gamma, 65 | float dist_ratio, 66 | float znear, 67 | float zfar, 68 | float tan_half_fov, 69 | int image_size, 70 | bool compute_weight, 71 | bool draw_color, 72 | bool draw_depth, 73 | // Outputs 74 | torch::Tensor weights, 75 | torch::Tensor color_map, 76 | torch::Tensor depth_map, 77 | torch::Tensor pseudo_depth_map) { 78 | 79 | CHECK_INPUT(mvps); 80 | CHECK_INPUT(vertices); 81 | CHECK_INPUT(radii); 82 | CHECK_INPUT(colors); 83 | CHECK_INPUT(locks); 84 | CHECK_INPUT(weights); 85 | CHECK_INPUT(color_map); 86 | CHECK_INPUT(depth_map); 87 | CHECK_INPUT(pseudo_depth_map); 88 | 89 | return soft_rasterize_forward_cuda( 90 | mvps, 91 | vertices, 92 | radii, 93 | colors, 94 | locks, 95 | sigma, 96 | gamma, 97 | dist_ratio, 98 | znear, 99 | zfar, 100 | tan_half_fov, 101 | image_size, 102 | compute_weight, 103 | draw_color, 104 | draw_depth, 105 | weights, 106 | color_map, 107 | depth_map, 108 | pseudo_depth_map); 109 | } 110 | 111 | void soft_rasterize_backward( 112 | // Inputs 113 | torch::Tensor mvps, 114 | torch::Tensor vertices, 115 | torch::Tensor radii, 116 | torch::Tensor colors, 117 | torch::Tensor weights, 118 | torch::Tensor grad_color_map, 119 | torch::Tensor grad_depth_map, 120 | float sigma, 121 | float gamma, 122 | float dist_ratio, 123 | float znear, 124 | float zfar, 125 | float tan_half_fov, 126 | int image_size, 127 | bool draw_color, 128 | bool draw_depth, 129 | // Outputs 130 | torch::Tensor grad_mvps) { 131 | 132 | CHECK_INPUT(mvps); 133 | CHECK_INPUT(vertices); 134 | CHECK_INPUT(colors); 135 | CHECK_INPUT(weights); 136 | CHECK_INPUT(grad_color_map); 137 | CHECK_INPUT(grad_depth_map); 138 | CHECK_INPUT(grad_mvps); 139 | 140 | return soft_rasterize_backward_cuda( 141 | mvps, 142 | vertices, 143 | radii, 144 | colors, 145 | weights, 146 | grad_color_map, 147 | grad_depth_map, 148 | sigma, 149 | gamma, 150 | dist_ratio, 151 | znear, 152 | zfar, 153 | tan_half_fov, 154 | image_size, 155 | draw_color, 156 | draw_depth, 157 | grad_mvps); 158 | } 159 | 160 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 161 | m.def("soft_rasterize_forward", &soft_rasterize_forward, "Soft Rasterize - Forward Pass (CUDA)"); 162 | m.def("soft_rasterize_backward", &soft_rasterize_backward, "Soft Rasterize - Backward Pass (CUDA)"); 163 | } 164 | -------------------------------------------------------------------------------- /soft_renderer/cuda/soft_rasterize_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "utils.cuh" 2 | 3 | namespace { // kernel namespace 4 | 5 | template 6 | __device__ __forceinline__ scalar_t sigmoid(const scalar_t x) { 7 | return 1. / (1. + exp(-x)); 8 | } 9 | 10 | template 11 | __device__ __forceinline__ scalar_t distance_p2p(const scalar_t x1, const scalar_t y1, const scalar_t x2, const scalar_t y2) { 12 | const scalar_t dx = x1 - x2; 13 | const scalar_t dy = y1 - y2; 14 | return sqrt(dx * dx + dy * dy); 15 | } 16 | 17 | template 18 | __device__ __forceinline__ scalar_t distance2_p2p(const scalar_t x1, const scalar_t y1, const scalar_t x2, const scalar_t y2) { 19 | const scalar_t dx = x1 - x2; 20 | const scalar_t dy = y1 - y2; 21 | return dx * dx + dy * dy; 22 | } 23 | 24 | template 25 | __device__ __forceinline__ void world_to_dc(const scalar_t* mvp, const scalar_t* xyz1, scalar_t* dc) { 26 | for (int r = 0; r < 4; ++r) { 27 | dc[r] = 0.; 28 | for (int c = 0; c < 4; ++c) { 29 | dc[r] += mvp[r * 4 + c] * xyz1[c]; 30 | } 31 | } 32 | } 33 | 34 | template 35 | __device__ __forceinline__ void dc_to_ndc(const scalar_t* dc, scalar_t* ndc) { 36 | for (int r = 0; r < 3; ++r) { 37 | ndc[r] = (dc[r] / dc[3] + 1.) * 0.5; 38 | } 39 | } 40 | 41 | template 42 | __global__ void soft_rasterize_forward_cuda_kernel( 43 | // Inputs 44 | const scalar_t* __restrict__ mvps, 45 | const scalar_t* __restrict__ vertices, 46 | const scalar_t* __restrict__ radii, 47 | const scalar_t* __restrict__ colors, 48 | index_t* __restrict__ locks, 49 | const scalar_t sigma, 50 | const scalar_t gamma, 51 | const scalar_t dist_ratio, 52 | const scalar_t znear, 53 | const scalar_t zfar, 54 | const scalar_t tan_half_fov, 55 | const index_t num_points, 56 | const index_t image_size, 57 | const index_t loops, 58 | const bool compute_weight, 59 | const bool draw_color, 60 | const bool draw_depth, 61 | // Outputs 62 | scalar_t* __restrict__ weights, 63 | scalar_t* __restrict__ color_map, 64 | scalar_t* __restrict__ depth_map, 65 | scalar_t* __restrict__ pseudo_depth_map) { 66 | 67 | const index_t i = blockIdx.x * blockDim.x + threadIdx.x; 68 | if (i >= loops) return; 69 | 70 | const index_t image_id = i / num_points; 71 | const index_t image_id3 = image_id * 3; 72 | const index_t point_id = i % num_points; 73 | const index_t point_id3 = point_id * 3; 74 | 75 | const scalar_t epsilon = 1e-5; 76 | 77 | const scalar_t* mvp = &mvps[image_id * 16]; 78 | scalar_t point_world[4] = {vertices[point_id3], vertices[point_id3 + 1], vertices[point_id3 + 2], 1.}; 79 | 80 | scalar_t point_dc[4] = {0., 0., 0., 0.}; 81 | scalar_t point[3] = {0., 0., 0.}; 82 | world_to_dc(mvp, point_world, point_dc); 83 | if (abs(point_dc[3]) < epsilon) return; 84 | dc_to_ndc(point_dc, point); 85 | 86 | if (point[0] < 0 || point[0] > 1 || point[1] < 0 || point[1] > 1 || point[2] < 0 || point[2] > 1) return; 87 | 88 | const index_t image_size2 = image_size * image_size; 89 | const scalar_t depth_cam = 2.0 * znear * zfar / (zfar + znear - (2. * point[2] - 1.) * (zfar - znear)); 90 | if (radii[point_id] < epsilon) return; 91 | const scalar_t radius = radii[point_id] / (tan_half_fov * depth_cam); 92 | scalar_t dist_thresh = radius; 93 | if (compute_weight) { 94 | dist_thresh *= dist_ratio; 95 | } 96 | 97 | const index_t px_min = max(floor((point[0] - dist_thresh) * image_size), 0.); 98 | const index_t px_max = min(ceil((point[0] + dist_thresh) * image_size), image_size - 1.); 99 | const index_t py_min = max(floor((point[1] - dist_thresh) * image_size), 0.); 100 | const index_t py_max = min(ceil((point[1] + dist_thresh) * image_size), image_size - 1.); 101 | 102 | for (index_t px = px_min; px <= px_max; ++px) { 103 | const scalar_t pxf = (scalar_t) px / (image_size - 1.); 104 | for (index_t py = py_min; py <= py_max; ++py) { 105 | const index_t pid = (image_size - 1 - py) * image_size + px; 106 | const scalar_t pyf = (scalar_t) py / (image_size - 1.); 107 | 108 | const scalar_t dist2 = distance2_p2p(point[0], point[1], pxf, pyf); 109 | const scalar_t dist = sqrt(dist2); 110 | 111 | const index_t gpid = image_id * image_size2 + pid; 112 | if (compute_weight) { 113 | if (dist > dist_thresh) continue; 114 | const scalar_t dist2_diff = dist2 - radius * radius; 115 | const scalar_t sign = dist2_diff > 0 ? -1 : 1; 116 | const scalar_t prob = sigmoid(sign * dist2_diff / (sigma * sigma)); 117 | const scalar_t wtop = prob * exp(-point[2] * gamma); 118 | atomicAdd(&weights[gpid], wtop); 119 | } 120 | 121 | if (dist > radius) continue; 122 | index_t locked = 0; 123 | do { 124 | if ((locked = atomicCAS(&locks[gpid], 0, 1)) == 0) { 125 | if (atomicAdd(&pseudo_depth_map[gpid], 0.) > point[2]) { 126 | atomicExch(&pseudo_depth_map[gpid], point[2]); 127 | if (draw_color) { 128 | const scalar_t* color = &colors[point_id3]; 129 | for (int k = 0; k < 3; ++k) { 130 | atomicExch(&color_map[(image_id3 + k) * image_size2 + pid], color[k]); 131 | } 132 | } 133 | if (draw_depth) { 134 | atomicExch(&depth_map[gpid], depth_cam); 135 | } 136 | } 137 | atomicExch(&locks[gpid], 0); 138 | } 139 | } while(locked > 0); 140 | } 141 | } 142 | } 143 | 144 | template 145 | __global__ void soft_rasterize_backward_cuda_kernel( 146 | // Inputs 147 | const scalar_t* __restrict__ mvps, 148 | const scalar_t* __restrict__ vertices, 149 | const scalar_t* __restrict__ radii, 150 | const scalar_t* __restrict__ colors, 151 | const scalar_t* __restrict__ weights, 152 | const scalar_t* __restrict__ grad_color_map, 153 | const scalar_t* __restrict__ grad_depth_map, 154 | const scalar_t sigma, 155 | const scalar_t gamma, 156 | const scalar_t dist_ratio, 157 | const scalar_t znear, 158 | const scalar_t zfar, 159 | const scalar_t tan_half_fov, 160 | const index_t num_points, 161 | const index_t image_size, 162 | const index_t loops, 163 | const bool draw_color, 164 | const bool draw_depth, 165 | // Outputs 166 | scalar_t* __restrict__ grad_mvps) { 167 | 168 | const index_t i = blockIdx.x * blockDim.x + threadIdx.x; 169 | if (i >= loops) return; 170 | 171 | const index_t image_id = i / num_points; 172 | const index_t image_id3 = image_id * 3; 173 | const index_t point_id = i % num_points; 174 | const index_t point_id3 = point_id * 3; 175 | 176 | const scalar_t epsilon = 1e-5; 177 | 178 | const scalar_t* mvp = &mvps[image_id * 16]; 179 | scalar_t point_world[4] = {vertices[point_id3], vertices[point_id3 + 1], vertices[point_id3 + 2], 1.}; 180 | 181 | scalar_t point_dc[4] = {0., 0., 0., 0.}; 182 | scalar_t point[3] = {0., 0., 0.}; 183 | world_to_dc(mvp, point_world, point_dc); 184 | if (abs(point_dc[3]) < epsilon) return; 185 | dc_to_ndc(point_dc, point); 186 | 187 | if (point[0] < 0 || point[0] > 1 || point[1] < 0 || point[1] > 1 || point[2] < 0 || point[2] > 1) return; 188 | 189 | const index_t image_size2 = image_size * image_size; 190 | const scalar_t depth_cam = 2.0 * znear * zfar / (zfar + znear - (2. * point[2] - 1.) * (zfar - znear)); 191 | if (radii[point_id] < epsilon) return; 192 | const scalar_t radius = radii[point_id] / (tan_half_fov * depth_cam); 193 | const scalar_t dist_thresh = dist_ratio * radius; 194 | 195 | const index_t px_min = max(floor((point[0] - dist_thresh) * image_size), 0.); 196 | const index_t px_max = min(ceil((point[0] + dist_thresh) * image_size), image_size - 1.); 197 | const index_t py_min = max(floor((point[1] - dist_thresh) * image_size), 0.); 198 | const index_t py_max = min(ceil((point[1] + dist_thresh) * image_size), image_size - 1.); 199 | 200 | const scalar_t d_dc_z_deno = znear * point[2] - zfar * (point[2] - 1); 201 | const scalar_t d_dc_z = zfar * znear * (zfar - znear) / (d_dc_z_deno * d_dc_z_deno); 202 | const scalar_t d_r_z = -(radii[point_id] / tan_half_fov) / (depth_cam * depth_cam) * d_dc_z; 203 | const scalar_t d_d2d_z = -2 * radius * d_r_z; 204 | 205 | scalar_t grad_point[3] = {0, 0, 0}; 206 | for (index_t px = px_min; px <= px_max; ++px) { 207 | const scalar_t pxf = (scalar_t) px / (image_size - 1.); 208 | for (index_t py = py_min; py <= py_max; ++py) { 209 | const index_t pid = (image_size - 1 - py) * image_size + px; 210 | const scalar_t pyf = (scalar_t) py / (image_size - 1.); 211 | 212 | const scalar_t dist2 = distance2_p2p(point[0], point[1], pxf, pyf); 213 | if (sqrt(dist2) > dist_thresh) continue; 214 | 215 | const scalar_t dist2_diff = dist2 - radius * radius; 216 | const scalar_t sign = dist2_diff > 0 ? -1 : 1; 217 | const scalar_t sis = sign / (sigma * sigma); 218 | const scalar_t prob = sigmoid(sis * dist2_diff); 219 | const scalar_t ezg = exp(-point[2] * gamma); 220 | const scalar_t wtop = prob * ezg; 221 | const scalar_t wsum = weights[image_id * image_size2 + pid]; 222 | const scalar_t pps = prob * (1 - prob) * sis; 223 | 224 | const scalar_t d_wtopsum_wtop = (wsum - wtop) / (wsum * wsum); 225 | 226 | const scalar_t d_prob_z = pps * d_d2d_z; 227 | const scalar_t d_ezg_z = -gamma * ezg; 228 | const scalar_t d_wtop_z = prob * d_ezg_z + ezg * d_prob_z; 229 | 230 | const scalar_t d_d2d_x = 2 * (point[0] - pxf); 231 | const scalar_t d_prob_x = pps * d_d2d_x; 232 | const scalar_t d_wtop_x = ezg * d_prob_x; 233 | 234 | const scalar_t d_d2d_y = 2 * (point[1] - pyf); 235 | const scalar_t d_prob_y = pps * d_d2d_y; 236 | const scalar_t d_wtop_y = ezg * d_prob_y; 237 | 238 | if (draw_color) { 239 | const scalar_t* color = &colors[point_id3]; 240 | for (int k = 0; k < 3; ++k) { 241 | const scalar_t d_l_c = grad_color_map[(image_id3 + k) * image_size2 + pid]; 242 | const scalar_t dcd = d_l_c * color[k] * d_wtopsum_wtop; 243 | grad_point[0] += dcd * d_wtop_x; 244 | grad_point[1] += dcd * d_wtop_y; 245 | grad_point[2] += dcd * d_wtop_z; 246 | } 247 | } 248 | if (draw_depth) { 249 | const scalar_t dcd = depth_cam * d_wtopsum_wtop; 250 | const scalar_t d_d_x = dcd * d_wtop_x; 251 | const scalar_t d_d_y = dcd * d_wtop_y; 252 | const scalar_t d_d_z = dcd * d_wtop_z + (wtop / wsum) * d_dc_z; 253 | 254 | const scalar_t d_l_d = grad_depth_map[image_id * image_size2 + pid]; 255 | grad_point[0] += d_l_d * d_d_x; 256 | grad_point[1] += d_l_d * d_d_y; 257 | grad_point[2] += d_l_d * d_d_z; 258 | } 259 | } 260 | } 261 | 262 | scalar_t* grad_mvp = &grad_mvps[image_id * 16]; 263 | for (int r = 0; r < 3; ++r) { 264 | const scalar_t tmp = grad_point[r] * 0.5 / point_dc[3]; 265 | for (int c = 0; c < 4; ++c) { 266 | atomicAdd(&grad_mvp[r * 4 + c], tmp * point_world[c]); 267 | } 268 | } 269 | for (int c = 0; c < 4; ++c) { 270 | const scalar_t tmp = -0.5 * point_world[c] / (point_dc[3] * point_dc[3]); 271 | scalar_t total = 0; 272 | for (int k = 0; k < 3; ++k) { 273 | total += grad_point[k] * tmp * point_dc[k]; 274 | } 275 | atomicAdd(&grad_mvp[12 + c], total); 276 | } 277 | } 278 | 279 | } // kernel namespace 280 | 281 | 282 | void soft_rasterize_forward_cuda( 283 | // Inputs 284 | torch::Tensor mvps, 285 | torch::Tensor vertices, 286 | torch::Tensor radii, 287 | torch::Tensor colors, 288 | torch::Tensor locks, 289 | float sigma, 290 | float gamma, 291 | float dist_ratio, 292 | float znear, 293 | float zfar, 294 | float tan_half_fov, 295 | int image_size, 296 | bool compute_weight, 297 | bool draw_color, 298 | bool draw_depth, 299 | // Outputs 300 | torch::Tensor weights, 301 | torch::Tensor color_map, 302 | torch::Tensor depth_map, 303 | torch::Tensor pseudo_depth_map) { 304 | 305 | if (mvps.dim() != 3) { 306 | fprintf(stderr,"\nSize of mvps is incorrect.\n"); 307 | exit(-1); 308 | } 309 | if (vertices.dim() != 2) { 310 | fprintf(stderr,"\nSize of vertices is incorrect.\n"); 311 | exit(-1); 312 | } 313 | if (compute_weight && weights.dim() != 4){ 314 | fprintf(stderr,"\nSize of weights is incorrect.\n"); 315 | exit(-1); 316 | } 317 | 318 | const auto num_image = mvps.size(0); 319 | const auto num_points = vertices.size(0); 320 | 321 | const int loops = num_image * num_points; 322 | const int threads = MAX_THREADS; 323 | const int blocks = gpu_blocks(loops, threads); 324 | 325 | soft_rasterize_forward_cuda_kernel<<>>( 326 | mvps.data(), 327 | vertices.data(), 328 | radii.data(), 329 | colors.data(), 330 | locks.data(), 331 | sigma, 332 | gamma, 333 | dist_ratio, 334 | znear, 335 | zfar, 336 | tan_half_fov, 337 | num_points, 338 | image_size, 339 | loops, 340 | compute_weight, 341 | draw_color, 342 | draw_depth, 343 | weights.data(), 344 | color_map.data(), 345 | depth_map.data(), 346 | pseudo_depth_map.data()); 347 | GPU_ERROR_CHECK(cudaPeekAtLastError()); 348 | GPU_ERROR_CHECK(cudaDeviceSynchronize()); 349 | } 350 | 351 | void soft_rasterize_backward_cuda( 352 | // Inputs 353 | torch::Tensor mvps, 354 | torch::Tensor vertices, 355 | torch::Tensor radii, 356 | torch::Tensor colors, 357 | torch::Tensor weights, 358 | torch::Tensor grad_color_map, 359 | torch::Tensor grad_depth_map, 360 | float sigma, 361 | float gamma, 362 | float dist_ratio, 363 | float znear, 364 | float zfar, 365 | float tan_half_fov, 366 | int image_size, 367 | bool draw_color, 368 | bool draw_depth, 369 | // Outputs 370 | torch::Tensor grad_mvps) { 371 | 372 | if (mvps.dim() != 3) { 373 | fprintf(stderr,"\nSize of mvps is incorrect.\n"); 374 | exit(-1); 375 | } 376 | if (vertices.dim() != 2) { 377 | fprintf(stderr,"\nSize of vertices is incorrect.\n"); 378 | exit(-1); 379 | } 380 | if (weights.dim() != 4){ 381 | fprintf(stderr,"\nSize of weights is incorrect.\n"); 382 | exit(-1); 383 | } 384 | 385 | const auto num_image = mvps.size(0); 386 | const auto num_points = vertices.size(0); 387 | 388 | const int loops = num_image * num_points; 389 | const int threads = MAX_THREADS; 390 | const int blocks = gpu_blocks(loops, threads); 391 | 392 | soft_rasterize_backward_cuda_kernel<<>>( 393 | mvps.data(), 394 | vertices.data(), 395 | radii.data(), 396 | colors.data(), 397 | weights.data(), 398 | grad_color_map.data(), 399 | grad_depth_map.data(), 400 | sigma, 401 | gamma, 402 | dist_ratio, 403 | znear, 404 | zfar, 405 | tan_half_fov, 406 | num_points, 407 | image_size, 408 | loops, 409 | draw_color, 410 | draw_depth, 411 | grad_mvps.data()); 412 | GPU_ERROR_CHECK(cudaPeekAtLastError()); 413 | GPU_ERROR_CHECK(cudaDeviceSynchronize()); 414 | } 415 | -------------------------------------------------------------------------------- /soft_renderer/cuda/utils.cuh: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #ifndef MAX_THREADS 11 | #define MAX_THREADS 512 12 | #endif 13 | 14 | #if __CUDA_ARCH__ < 600 and defined(__CUDA_ARCH__) 15 | static __inline__ __device__ double atomicAdd(double* address, double val) { 16 | unsigned long long int* address_as_ull = (unsigned long long int*)address; 17 | unsigned long long int old = *address_as_ull, assumed; 18 | do { 19 | assumed = old; 20 | old = atomicCAS(address_as_ull, assumed, 21 | __double_as_longlong(val + __longlong_as_double(assumed))); 22 | } while (assumed != old); 23 | return __longlong_as_double(old); 24 | } 25 | #endif 26 | 27 | #define GPU_ERROR_CHECK(ans) {gpu_assert((ans), __FILE__, __LINE__);} 28 | inline void gpu_assert(cudaError_t code, const char *file, int line, bool abort=true) { 29 | if (code != cudaSuccess) { 30 | fprintf(stderr,"\nGPU Assert: %s %s %d\n", cudaGetErrorString(code), file, line); 31 | if (abort) 32 | exit(code); 33 | } 34 | } 35 | 36 | inline int64_t gpu_blocks(int64_t total_threads, int64_t threads_per_block) { 37 | return (total_threads + threads_per_block - 1) / threads_per_block; 38 | } 39 | -------------------------------------------------------------------------------- /soft_renderer/transform.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import math 5 | import numpy as np 6 | import os.path as osp 7 | import sys 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import time 12 | 13 | ROOT_DIR = osp.abspath('../') 14 | if ROOT_DIR not in sys.path: 15 | sys.path.append(ROOT_DIR) 16 | 17 | from soft_renderer.cuda.jit import soft_rasterize_cuda as src 18 | 19 | 20 | def rotate_about(a, b, theta): 21 | a = a.view(1, 1, 1, -1) 22 | b = b.view(b.size(0), 1, b.size(1), b.size(3)) 23 | theta = theta.view(1, -1, 1, 1) 24 | 25 | ab_dot = torch.sum(a * b, dim=3, keepdim=True) 26 | bb_dot = torch.sum(b * b, dim=3, keepdim=True) 27 | par = (ab_dot / bb_dot) * b 28 | perp = a - par 29 | w = torch.cross(b, perp, dim=3) 30 | 31 | perp_norm = torch.norm(perp, p=2, dim=3, keepdim=True) 32 | res = par + perp * torch.cos(theta) + perp_norm * F.normalize(w, dim=3) * torch.sin( 33 | theta) 34 | return res.view(res.size(0), -1, 1, res.size(3)) 35 | 36 | 37 | def init_eye_xyz_coords(num_views, 38 | rho_min=0.3, 39 | rho_max=1.0, 40 | phi_min=0., 41 | phi_max=math.pi / 2., 42 | theta_min=0., 43 | theta_max=2. * math.pi): 44 | shape = (num_views,) 45 | rho = np.random.uniform(rho_min, rho_max, shape) 46 | phi = np.random.uniform(phi_min, phi_max, shape) 47 | theta = np.random.uniform(theta_min, theta_max, shape) 48 | 49 | local_x = rho * np.sin(phi) * np.cos(theta) 50 | local_y = rho * np.sin(phi) * np.sin(theta) 51 | local_z = rho * np.cos(phi) 52 | eye = np.stack((local_x, local_y, local_z), axis=1) 53 | return eye 54 | 55 | 56 | def spherical_to_xyz_coords(rho, phi, theta): 57 | x = rho * torch.sin(phi) * torch.cos(theta) 58 | y = rho * torch.sin(phi) * torch.sin(theta) 59 | z = rho * torch.cos(phi) 60 | xyz = torch.stack((x, y, z), dim=1) 61 | return xyz 62 | 63 | 64 | def look_at(centers, normals, up, eye_lcs, thetas=None): 65 | V = centers.size(0) 66 | N = eye_lcs.size(0) 67 | up = torch.unsqueeze(up, dim=0) 68 | 69 | axis_z = F.normalize(normals, dim=1) 70 | axis_y = F.normalize(up, dim=1).repeat(V, 1) 71 | axis_x = F.normalize(torch.cross(axis_y, axis_z, dim=1), dim=1) 72 | axis_y = F.normalize(torch.cross(axis_z, axis_x, dim=1), dim=1) 73 | axis = torch.cat((torch.unsqueeze(axis_x, dim=1), torch.unsqueeze( 74 | axis_y, dim=1), torch.unsqueeze(axis_z, dim=1)), 75 | dim=1) 76 | centers = centers.view(-1, 1, 1, 3) 77 | eye = torch.matmul(eye_lcs.view(1, -1, 1, 3), axis.view(-1, 1, 3, 78 | 3)) + centers 79 | 80 | front_dir = F.normalize(eye - centers, dim=3) 81 | up_dir = F.normalize(up, dim=1) 82 | if thetas is None: 83 | up_dir = up.view(1, 1, 1, -1).repeat(V, N, 1, 1) 84 | right_dir = F.normalize(torch.cross(up_dir, front_dir, dim=3), dim=3) 85 | up_dir = F.normalize(torch.cross(front_dir, right_dir, dim=3), dim=3) 86 | 87 | mat = torch.cat((right_dir, up_dir, front_dir), dim=2) 88 | vec = -1. * torch.matmul(mat, torch.transpose(eye, 2, 3)) 89 | mat = torch.cat((mat, vec), dim=3) 90 | 91 | vec = torch.tensor([0.0, 0.0, 0.0, 1.0], dtype=centers.dtype, 92 | device=centers.device) 93 | mat = torch.cat((mat, vec.view(1, 1, 1, 4).repeat(V, N, 1, 1)), dim=2) 94 | else: 95 | R = thetas.size(0) 96 | up_dir = rotate_about(up_dir, front_dir, thetas) 97 | front_dir = front_dir.repeat(1, R, 1, 1) 98 | right_dir = F.normalize(torch.cross(up_dir, front_dir, dim=3), dim=3) 99 | up_dir = F.normalize(torch.cross(front_dir, right_dir, dim=3), dim=3) 100 | 101 | mat = torch.cat((right_dir, up_dir, front_dir), dim=2) 102 | eye = eye.repeat(1, R, 1, 1) 103 | vec = -1. * torch.matmul(mat, torch.transpose(eye, 2, 3)) 104 | mat = torch.cat((mat, vec), dim=3) 105 | vec = torch.tensor([0.0, 0.0, 0.0, 1.0], dtype=centers.dtype, 106 | device=centers.device) 107 | mat = torch.cat((mat, vec.view(1, 1, 1, 4).repeat(V, N * R, 1, 1)), 108 | dim=2) 109 | return mat 110 | 111 | 112 | def look(eye, direction, up): 113 | V, N = eye.size(0), eye.size(1) 114 | front_dir = F.normalize(-direction, dim=2) 115 | up_dir = F.normalize(up.view(1, 1, 3), dim=2) 116 | up_dir = up_dir.repeat(V, N, 1) 117 | right_dir = F.normalize(torch.cross(up_dir, front_dir, dim=2), dim=2) 118 | up_dir = F.normalize(torch.cross(front_dir, right_dir, dim=2), dim=2) 119 | 120 | mat = torch.cat((torch.unsqueeze(right_dir, dim=2), torch.unsqueeze( 121 | up_dir, dim=2), torch.unsqueeze(front_dir, dim=2)), 122 | dim=2) 123 | vec = -1. * torch.matmul(mat, torch.unsqueeze(eye, dim=3)) 124 | mat = torch.cat((mat, vec), dim=3) 125 | 126 | vec = torch.tensor([0.0, 0.0, 0.0, 1.0], dtype=mat.dtype, device=mat.device) 127 | mat = torch.cat((mat, vec.view(1, 1, 1, 4).repeat(V, N, 1, 1)), dim=2) 128 | return mat 129 | 130 | 131 | def projection_matrix(field_of_view, aspect, znear, zfar): 132 | mat = np.zeros((4, 4), dtype=np.float32) 133 | fov_rad = field_of_view / 180.0 * math.pi 134 | tan_half_fov = math.tan(fov_rad / 2.0) 135 | mat[0, 0] = 1.0 / aspect / tan_half_fov 136 | mat[1, 1] = 1.0 / tan_half_fov 137 | mat[2, 2] = -(zfar + znear) / (zfar - znear) 138 | mat[3, 2] = -1.0 139 | mat[2, 3] = -2.0 * zfar * znear / (zfar - znear) 140 | return torch.from_numpy(mat) 141 | 142 | 143 | class SoftRasterizeFunc(torch.autograd.Function): 144 | 145 | @staticmethod 146 | def forward( 147 | ctx, 148 | mvps, 149 | vertices, 150 | radii, 151 | colors, 152 | sigma, 153 | gamma, 154 | dist_ratio, 155 | znear, 156 | zfar, 157 | tan_half_fov, 158 | image_size, 159 | compute_weight, 160 | draw_color, 161 | draw_depth): 162 | 163 | assert draw_color != draw_depth 164 | 165 | V, N = mvps.size(0), mvps.size(1) 166 | num_images = V * N 167 | device = mvps.device 168 | dtype = mvps.dtype 169 | 170 | if colors is None: 171 | colors = torch.tensor([], dtype=dtype, device=device) 172 | if compute_weight: 173 | weights = torch.ones( 174 | num_images, 1, image_size, image_size, dtype=dtype, device=device) * 1e-8 175 | else: 176 | weights = torch.tensor([], dtype=dtype, device=device) 177 | if draw_color: 178 | color_map = torch.zeros(num_images, 179 | 3, 180 | image_size, 181 | image_size, 182 | dtype=dtype, 183 | device=device) 184 | else: 185 | color_map = torch.tensor([], dtype=dtype, device=device) 186 | if draw_depth: 187 | depth_map = torch.zeros(num_images, 188 | 1, 189 | image_size, 190 | image_size, 191 | dtype=dtype, 192 | device=device) 193 | else: 194 | depth_map = torch.tensor([], dtype=dtype, device=device) 195 | 196 | pseudo_depth_map = torch.ones(num_images, 197 | 1, 198 | image_size, 199 | image_size, 200 | dtype=dtype, 201 | device=device) 202 | locks = torch.zeros_like(pseudo_depth_map, dtype=torch.int32) 203 | 204 | src.soft_rasterize_forward(mvps.view(-1, 4, 4), vertices, radii, colors, locks, sigma, 205 | gamma, dist_ratio, znear, zfar, tan_half_fov, image_size, 206 | compute_weight, draw_color, draw_depth, weights, color_map, 207 | depth_map, pseudo_depth_map) 208 | 209 | ctx.save_for_backward(mvps, vertices, radii, colors, weights) 210 | ctx.sigma = sigma 211 | ctx.gamma = gamma 212 | ctx.dist_ratio = dist_ratio 213 | ctx.znear = znear 214 | ctx.zfar = zfar 215 | ctx.tan_half_fov = tan_half_fov 216 | ctx.image_size = image_size 217 | ctx.compute_weight = compute_weight 218 | ctx.draw_color = draw_color 219 | ctx.draw_depth = draw_depth 220 | 221 | if draw_color: 222 | return color_map.view(V, N, -1, image_size, image_size) 223 | if draw_depth: 224 | return depth_map.view(V, N, -1, image_size, image_size) 225 | 226 | @staticmethod 227 | def backward(ctx, grad_map): 228 | mvps, vertices, radii, colors, weights = ctx.saved_tensors 229 | V, N = mvps.size(0), mvps.size(1) 230 | num_images = V * N 231 | device = mvps.device 232 | dtype = mvps.dtype 233 | 234 | if ctx.draw_color: 235 | grad_color_map = grad_map.view(num_images, -1, ctx.image_size, ctx.image_size) 236 | else: 237 | grad_color_map = torch.tensor([], dtype=dtype, device=device) 238 | if ctx.draw_depth: 239 | grad_depth_map = grad_map.view(num_images, -1, ctx.image_size, ctx.image_size) 240 | else: 241 | grad_depth_map = torch.tensor([], dtype=dtype, device=device) 242 | 243 | grad_mvps = torch.zeros_like(mvps) 244 | 245 | src.soft_rasterize_backward(mvps.view(-1, 4, 4), vertices, radii, colors, weights, 246 | grad_color_map, grad_depth_map, ctx.sigma, ctx.gamma, 247 | ctx.dist_ratio, ctx.znear, ctx.zfar, ctx.tan_half_fov, 248 | ctx.image_size, ctx.draw_color, ctx.draw_depth, grad_mvps) 249 | 250 | return grad_mvps, None, None, None, None, None, None, None, None, None, None, None, None, None 251 | 252 | 253 | class MultiViewRenderer(nn.Module): 254 | 255 | def __init__(self, 256 | num_views, 257 | num_rotations=0, 258 | field_of_view=60., 259 | aspect=1., 260 | znear=0.1, 261 | zfar=6., 262 | image_size=64, 263 | sigma=1. / 64., 264 | gamma=5., 265 | dist_ratio=5., 266 | dist_factor=1.0, 267 | radius_ratio=0.25, 268 | draw_color=False, 269 | draw_depth=True, 270 | trainable=True): 271 | super().__init__() 272 | self.num_views = num_views 273 | self.num_rotations = num_rotations 274 | self.field_of_view = field_of_view 275 | self.aspect = aspect 276 | self.znear = znear 277 | self.zfar = zfar 278 | self.image_size = image_size 279 | self.sigma = sigma 280 | self.gamma = gamma 281 | self.dist_ratio = dist_ratio 282 | self.dist_factor = dist_factor 283 | self.radius_ratio = radius_ratio 284 | self.draw_color = draw_color 285 | self.draw_depth = draw_depth 286 | self.trainable = trainable 287 | self.tan_half_fov = math.tan(field_of_view / 180.0 * math.pi / 2.0) 288 | 289 | self.rho_min = 0.3 290 | self.rho_max = 1.0 291 | self.phi_min = 0 292 | self.phi_max = math.pi / 2. 293 | self.theta_min = 0 294 | self.theta_max = 2. * math.pi 295 | self.rot_min = 0 296 | self.rot_max = 2. * math.pi 297 | 298 | proj_mat = projection_matrix(field_of_view, aspect, znear, zfar) 299 | self.register_buffer('proj_mat', proj_mat) 300 | 301 | rho = np.random.uniform(self.rho_min, self.rho_max, (num_views,)) 302 | phi = np.random.uniform(self.phi_min, self.phi_max, (num_views,)) 303 | theta = np.random.uniform(self.theta_min, self.theta_max, (num_views,)) 304 | 305 | rho = torch.tensor(rho, dtype=torch.float32) 306 | phi = torch.tensor(phi, dtype=torch.float32) 307 | theta = torch.tensor(theta, dtype=torch.float32) 308 | up = torch.tensor([0., -1., 0.], dtype=torch.float32) 309 | if self.num_rotations > 0: 310 | rot = torch.tensor(np.random.uniform(self.rot_min, self.rot_max, (num_rotations,)), 311 | dtype=torch.float32) 312 | if self.trainable: 313 | self.rho = nn.Parameter(rho) 314 | self.phi = nn.Parameter(phi) 315 | self.theta = nn.Parameter(theta) 316 | self.up = nn.Parameter(up) 317 | if self.num_rotations > 0: 318 | self.rot = nn.Parameter(rot) 319 | else: 320 | self.register_buffer('rho', rho) 321 | self.register_buffer('phi', phi) 322 | self.register_buffer('theta', theta) 323 | self.register_buffer('up', up) 324 | if self.num_rotations > 0: 325 | self.register_buffer('rot', rot) 326 | 327 | def constraints(self): 328 | if not self.trainable: 329 | raise RuntimeError('Renderer is not trainable.') 330 | 331 | params = [self.rho, self.phi, self.theta] 332 | min_vals = [self.rho_min, self.phi_min, self.theta_min] 333 | max_vals = [self.rho_max, self.phi_max, self.theta_max] 334 | if self.num_rotations > 0: 335 | params.append(self.rot) 336 | min_vals.append(self.rot_min) 337 | max_vals.append(self.rot_max) 338 | 339 | res = None 340 | for i in range(len(params)): 341 | mid = torch.ones_like(params[i]) * ((min_vals[i] + max_vals[i]) / 2.) 342 | rng = torch.ones_like(params[i]) * ((max_vals[i] - min_vals[i]) / 2.) 343 | diff = torch.mean(F.relu(torch.abs(params[i] - mid) - rng)) 344 | if res is None: 345 | res = diff 346 | else: 347 | res += diff 348 | return res 349 | 350 | def forward(self, vertices, radii, colors, at_centers, at_normals): 351 | compute_weight = self.trainable and self.training 352 | 353 | rho = torch.clamp(self.rho, self.rho_min, self.rho_max) 354 | phi = torch.clamp(self.phi, self.phi_min, self.phi_max) 355 | theta = torch.clamp(self.theta, self.theta_min, self.theta_max) 356 | if self.num_rotations > 0: 357 | rot = torch.clamp(self.rot, self.rot_min, self.rot_max) 358 | else: 359 | rot = None 360 | eye_lcs = spherical_to_xyz_coords(rho * self.dist_factor, phi, theta) 361 | mv_mat = look_at(at_centers, at_normals, self.up, eye_lcs, rot) 362 | mvp_mat = torch.matmul(self.proj_mat.view(1, 1, 4, 4), mv_mat) 363 | 364 | images = SoftRasterizeFunc.apply(mvp_mat, vertices, radii * self.radius_ratio, colors, 365 | self.sigma, self.gamma, self.dist_ratio, self.znear, 366 | self.zfar, self.tan_half_fov, self.image_size, 367 | compute_weight, self.draw_color, self.draw_depth) 368 | return images, mv_mat 369 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigleili/3DLocalMultiViewDesc/eb7d6912886da851e673e6a8528e9ba04001d73a/utils/__init__.py -------------------------------------------------------------------------------- /utils/io.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | from collections import defaultdict 5 | from pathlib import Path 6 | import cv2 7 | import json 8 | import numpy as np 9 | import os 10 | import os.path as osp 11 | import re 12 | import shutil 13 | 14 | 15 | def is_number(s): 16 | try: 17 | float(s) 18 | return True 19 | except ValueError: 20 | return False 21 | 22 | 23 | def may_create_folder(folder_path): 24 | if not osp.exists(folder_path): 25 | oldmask = os.umask(000) 26 | os.makedirs(folder_path, mode=0o777) 27 | os.umask(oldmask) 28 | return True 29 | return False 30 | 31 | 32 | def make_clean_folder(folder_path): 33 | success = may_create_folder(folder_path) 34 | if not success: 35 | shutil.rmtree(folder_path) 36 | may_create_folder(folder_path) 37 | 38 | 39 | def sorted_alphanum(file_list_ordered): 40 | convert = lambda text: int(text) if text.isdigit() else text 41 | alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key) if len(c) > 0] 42 | return sorted(file_list_ordered, key=alphanum_key) 43 | 44 | 45 | def list_files(folder_path, name_filter, alphanum_sort=False): 46 | file_list = [p.name for p in list(Path(folder_path).glob(name_filter))] 47 | if alphanum_sort: 48 | return sorted_alphanum(file_list) 49 | else: 50 | return sorted(file_list) 51 | 52 | 53 | def list_folders(folder_path, name_filter=None, alphanum_sort=False): 54 | folders = list() 55 | for subfolder in Path(folder_path).iterdir(): 56 | if subfolder.is_dir() and not subfolder.name.startswith('.'): 57 | folder_name = subfolder.name 58 | if name_filter is not None: 59 | if name_filter in folder_name: 60 | folders.append(folder_name) 61 | else: 62 | folders.append(folder_name) 63 | if alphanum_sort: 64 | return sorted_alphanum(folders) 65 | else: 66 | return sorted(folders) 67 | 68 | 69 | def read_lines(file_path): 70 | """ 71 | :param file_path: 72 | :return: 73 | """ 74 | with open(file_path, 'r') as fin: 75 | lines = [line.strip() for line in fin.readlines() if len(line.strip()) > 0] 76 | return lines 77 | 78 | 79 | def read_json(filepath): 80 | with open(filepath, 'r') as fh: 81 | ret = json.load(fh) 82 | return ret 83 | 84 | 85 | def last_log_folder(root_folder, prefix, digits=3): 86 | prefix_len = len(prefix) 87 | tmp = list() 88 | for folder in list_folders(root_folder, alphanum_sort=True): 89 | if not folder.startswith(prefix): 90 | continue 91 | assert not is_number(folder[prefix_len + digits]) 92 | tmp.append((int(folder[prefix_len:prefix_len + digits]), folder)) 93 | if len(tmp) == 0: 94 | return 0, None 95 | else: 96 | tmp = sorted(tmp, key=lambda tup: tup[0]) 97 | return tmp[-1][0], tmp[-1][1] 98 | 99 | 100 | def new_log_folder(root_folder, prefix, digits=3): 101 | idx, _ = last_log_folder(root_folder, prefix, digits) 102 | tmp = prefix + '{:0' + str(digits) + 'd}' 103 | assert idx + 1 < 10**digits 104 | return tmp.format(idx + 1) 105 | 106 | 107 | def last_checkpoint(root_folder, prefix): 108 | tmp = defaultdict(list) 109 | for file in list_files(root_folder, '{}*.pth'.format(prefix), alphanum_sort=True): 110 | stem = file[:-4] 111 | values = stem.split('_') 112 | tmp[values[1]].append(int(values[-1])) 113 | for k, v in tmp.items(): 114 | return prefix + '_{}_' + str(sorted(v)[-1]) + '.pth' 115 | 116 | 117 | def read_color_image(file_path): 118 | img = cv2.imread(file_path) 119 | return img[..., ::-1] 120 | 121 | 122 | def read_gray_image(file_path): 123 | img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) 124 | return img 125 | 126 | 127 | def read_16bit_image(file_path): 128 | img = cv2.imread(file_path, cv2.IMREAD_UNCHANGED) 129 | return img 130 | 131 | 132 | def write_color_image(file_path, image): 133 | cv2.imwrite(file_path, image[..., ::-1]) 134 | return file_path 135 | 136 | 137 | def write_gray_image(file_path, image): 138 | cv2.imwrite(file_path, image) 139 | return file_path 140 | 141 | 142 | def write_image(file_path, image): 143 | if image.ndim == 2: 144 | return write_gray_image(file_path, image) 145 | elif image.ndim == 3: 146 | return write_color_image(file_path, image) 147 | else: 148 | raise RuntimeError('Image dimensions are not correct!') 149 | 150 | def read_pcds(root_folder, transform): 151 | import open3d as o3d 152 | 153 | ret = dict() 154 | for pcd_name in list_files(root_folder, '*.pcd', alphanum_sort=True): 155 | pcd_path = osp.join(root_folder, pcd_name) 156 | pcd_stem = pcd_name[:-4] 157 | pcloud = o3d.io.read_point_cloud(pcd_path) 158 | if transform: 159 | vp_path = osp.join(root_folder, pcd_stem + '.vp.json') 160 | vparams = read_json(vp_path) 161 | modelview = np.asarray(vparams['modelview_matrix'], np.float32) 162 | modelview = np.reshape(modelview, (4, 4)).T 163 | modelview_inv = np.linalg.inv(modelview) 164 | pcloud.transform(modelview_inv) 165 | ret[pcd_stem] = np.asarray(pcloud.points) 166 | return ret 167 | 168 | 169 | def write_correspondence_ply(file_path, 170 | pcloudi, 171 | pcloudj, 172 | edges, 173 | colori=(255, 255, 0), 174 | colorj=(255, 0, 0), 175 | edge_color=(255, 255, 255)): 176 | num_pointsi = len(pcloudi) 177 | num_pointsj = len(pcloudj) 178 | num_points = num_pointsi + num_pointsj 179 | with open(file_path, 'w') as fh: 180 | fh.write('ply\n') 181 | fh.write('format ascii 1.0\n') 182 | fh.write('element vertex {}\n'.format(num_points)) 183 | fh.write('property float x\n') 184 | fh.write('property float y\n') 185 | fh.write('property float z\n') 186 | fh.write('property uchar red\n') 187 | fh.write('property uchar green\n') 188 | fh.write('property uchar blue\n') 189 | fh.write('element edge {}\n'.format(len(edges))) 190 | fh.write('property int vertex1\n') 191 | fh.write('property int vertex2\n') 192 | fh.write('property uchar red\n') 193 | fh.write('property uchar green\n') 194 | fh.write('property uchar blue\n') 195 | fh.write('end_header\n') 196 | 197 | for k in range(num_pointsi): 198 | fh.write('{} {} {} {} {} {}\n'.format(pcloudi[k, 0], pcloudi[k, 1], pcloudi[k, 2], 199 | colori[0], colori[1], colori[2])) 200 | for k in range(num_pointsj): 201 | fh.write('{} {} {} {} {} {}\n'.format(pcloudj[k, 0], pcloudj[k, 1], pcloudj[k, 2], 202 | colorj[0], colorj[1], colorj[2])) 203 | for k in range(len(edges)): 204 | fh.write('{} {} {} {} {}\n'.format(edges[k][0], edges[k][1] + num_pointsi, 205 | edge_color[0], edge_color[1], edge_color[2])) 206 | -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from .io import may_create_folder 5 | import os 6 | import os.path as osp 7 | import sys 8 | 9 | 10 | class Logger(object): 11 | 12 | def __init__(self, fpath=None): 13 | self.console = sys.stdout 14 | self.file = None 15 | if fpath is not None: 16 | may_create_folder(osp.dirname(fpath)) 17 | self.file = open(fpath, 'w') 18 | 19 | def __del__(self): 20 | self.close() 21 | 22 | def __enter__(self): 23 | pass 24 | 25 | def __exit__(self, *args): 26 | self.close() 27 | 28 | def write(self, msg): 29 | self.console.write(msg) 30 | if self.file is not None: 31 | self.file.write(msg) 32 | 33 | def flush(self): 34 | self.console.flush() 35 | if self.file is not None: 36 | self.file.flush() 37 | os.fsync(self.file.fileno()) 38 | 39 | def close(self): 40 | self.console.close() 41 | if self.file is not None: 42 | self.file.close() 43 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | def pairwise_distance_matrix(x, y, eps=1e-6): 10 | M, N = x.size(0), y.size(0) 11 | x2 = torch.sum(x * x, dim=1, keepdim=True).repeat(1, N) 12 | y2 = torch.sum(y * y, dim=1, keepdim=True).repeat(1, M) 13 | dist2 = x2 + torch.t(y2) - 2.0 * torch.matmul(x, torch.t(y)) 14 | dist2 = torch.clamp(dist2, min=eps) 15 | return torch.sqrt(dist2) 16 | 17 | 18 | def batch_hard_mining(dist_mat, labels): 19 | assert len(dist_mat.size()) == 2 and len(labels.size()) == 1 20 | assert dist_mat.size(0) == dist_mat.size(1) == labels.size(0) 21 | 22 | N = dist_mat.size(0) 23 | labels_NN = labels.view(N, 1).expand(N, N) 24 | is_pos = labels_NN.eq(labels_NN.t()) 25 | is_neg = labels_NN.ne(labels_NN.t()) 26 | dist_ap, _ = torch.max(torch.reshape(dist_mat[is_pos], (N, -1)), 1, keepdim=False) 27 | dist_an, _ = torch.min(torch.reshape(dist_mat[is_neg], (N, -1)), 1, keepdim=False) 28 | return dist_ap, dist_an 29 | 30 | 31 | def batch_hard_negative_mining(dist_mat): 32 | M, N = dist_mat.size(0), dist_mat.size(1) 33 | assert M == N 34 | labels = torch.arange(N, device=dist_mat.device).view(N, 1).expand(N, N) 35 | is_neg = labels.ne(labels.t()) 36 | dist_an, _ = torch.min(torch.reshape(dist_mat[is_neg], (N, -1)), 1, keepdim=False) 37 | return dist_an 38 | 39 | 40 | def batch_hard_positive_mining(dist_mat, labels): 41 | assert len(dist_mat.size()) == 2 and len(labels.size()) == 1 42 | assert dist_mat.size(0) == dist_mat.size(1) == labels.size(0) 43 | 44 | N = dist_mat.size(0) 45 | labels_NN = labels.view(N, 1).expand(N, N) 46 | is_pos = labels_NN.eq(labels_NN.t()) 47 | dist_ap, _ = torch.max(torch.reshape(dist_mat[is_pos], (N, -1)), 1, keepdim=False) 48 | return dist_ap 49 | 50 | 51 | class BatchHardLoss(nn.Module): 52 | 53 | def __init__(self, margin=1.0): 54 | super().__init__() 55 | self.margin = margin 56 | if margin is not None: 57 | self.loss = nn.MarginRankingLoss(margin=margin) 58 | else: 59 | self.loss = nn.SoftMarginLoss() 60 | 61 | def forward(self, features, labels): 62 | dist_mat = pairwise_distance_matrix(features, features) 63 | dist_ap, dist_an = batch_hard_mining(dist_mat, labels) 64 | y = torch.ones_like(dist_an) 65 | if self.margin is not None: 66 | loss = self.loss(dist_an, dist_ap, y) 67 | else: 68 | loss = self.loss(dist_an - dist_ap, y) 69 | return loss 70 | 71 | 72 | class BatchHardNegativeLoss(nn.Module): 73 | 74 | def __init__(self, margin=1.0): 75 | super().__init__() 76 | self.margin = margin 77 | if margin is not None: 78 | self.loss = nn.MarginRankingLoss(margin=margin) 79 | else: 80 | self.loss = nn.SoftMarginLoss() 81 | 82 | def forward(self, anchor_features, positive_features): 83 | dist_mat = pairwise_distance_matrix(anchor_features, positive_features) 84 | dist_ap = torch.diagonal(dist_mat) 85 | dist_an = batch_hard_negative_mining(dist_mat) 86 | y = torch.ones_like(dist_ap) 87 | if self.margin is not None: 88 | loss = self.loss(dist_an, dist_ap, y) 89 | else: 90 | loss = self.loss(dist_an - dist_ap, y) 91 | return loss 92 | -------------------------------------------------------------------------------- /utils/meters.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | import math 5 | import numpy as np 6 | 7 | 8 | class AverageMeter(object): 9 | 10 | def __init__(self): 11 | self.n = 0 12 | self.sum = 0.0 13 | self.var = 0.0 14 | self.val = 0.0 15 | self.mean = np.nan 16 | self.std = np.nan 17 | 18 | def update(self, value, n=1): 19 | self.val = value 20 | self.sum += value 21 | self.var += value * value 22 | self.n += n 23 | 24 | if self.n == 0: 25 | self.mean, self.std = np.nan, np.nan 26 | elif self.n == 1: 27 | self.mean, self.std = self.sum, np.inf 28 | else: 29 | self.mean = self.sum / self.n 30 | self.std = math.sqrt((self.var - self.n * self.mean * self.mean) / (self.n - 1.0)) 31 | 32 | def value(self): 33 | return self.mean, self.std 34 | 35 | def reset(self): 36 | self.n = 0 37 | self.sum = 0.0 38 | self.var = 0.0 39 | self.val = 0.0 40 | self.mean = np.nan 41 | self.std = np.nan 42 | --------------------------------------------------------------------------------