├── .gitignore ├── LICENSE ├── README.md ├── environment.yml ├── evaluations ├── evaluate_3d.py ├── evaluate_nvs.py └── test_seqs.txt └── utils ├── arkit_utils.py ├── geometry.py ├── o3d_helper.py └── pytorch_ssim └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | meshes/ 132 | nvs/ 133 | img/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Active Vision Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |

Building LEGO for 3D Reconstruction on Mobile Devices

3 |

4 | Kejie Li 5 | · 6 | Jia-Wang Bian 7 | · 8 | Robert Castle 9 | · 10 | Philip H.S. Torr 11 | · 12 | Victor Adrian Prisacariu 13 |

14 | 15 |

Project Page | arXiv | Dataset

16 |
17 |

18 | 19 | 20 |
Even 3D scanners can only generate pseudo ground-truth shapes with artefacts. 21 | MobileBrick is the first **multi-view RGBD** dataset, captured on a **mobile device**, with **precise** 3D annotations for detailed 3D object reconstruction. 22 | 23 | We propose a novel data capturing and 3D annotation pipeline in MobileBrick without relying on expensive 3D scanners. 24 | The key to creating the precise 3D ground-truth shapes is using LEGO models, which are made of LEGO bricks with known geometry. 25 | The data modality of RGBD images captured on a mobile device paired with exact 3D geometry annotations provides a unique opportunity for future research on high-fidelity 3D reconstruction. 26 | 27 | 28 | 31 | 32 | # Overview 33 | 34 | 1. [Install](#install) 35 | 1. [Our dataset](#dataset-organisation) 36 | 1. [Evaluation](#evaluation) 37 | 1. [Cite](#cite) 38 | 1. [Changelog](#changelog) 39 | 40 | 41 | # Install 42 | you can install dependencies with Anaconda as follows: 43 | ```shell 44 | conda env create -f environment.yml 45 | conda activate mobilebrick 46 | ``` 47 | 48 | # Dataset Organisation 49 | The dataset is organised by sequences, with 135 sequences of random shapes can be used for training, and 18 sequences of manually curated LEGO models for evaluation. 50 | 51 | A sequence contains the following structure: 52 | ``` 53 | 54 | SEQUENCE_NAME 55 | ├── arkit_depth (the confidence and depth maps provided by ARKit) 56 | | ├── 000000_conf.png 57 | | ├── 000000.png 58 | | ├── ... 59 | ├── gt_depth (The high-resolution depth maps projected from the aligned GT shape) 60 | | ├── 000000.png 61 | | ├── ... 62 | ├── image (the RGB images) 63 | | ├── 000000.jpg 64 | | ├── ... 65 | ├── mask (object foreground mask projected from the aligned GT shape) 66 | | ├── 000000.png 67 | | ├── ... 68 | ├── intrinsic (3x3 intrinsic matrix of each image) 69 | | ├── 000000.txt 70 | | ├── ... 71 | ├── pose (4x4 transformation matrix from camera to world of each image) 72 | | ├── 000000.txt 73 | | ├── ... 74 | ├── mesh 75 | | ├── gt_mesh.ply 76 | ├── visibility_mask.npy (the visibility mask to be used for evaluation) 77 | ├── cameras.npz (processed camera poses using the format of NeuS) 78 | ``` 79 | 80 | Note: 81 | - the gt_mesh.ply is created by running tsdf-fusion using the gt depth 82 | 83 | # Evaluation 84 | We provide scripts to run evaluation on 3D reconstruction and Novel View Synthesis (NVS). 85 | 86 | To evaluate 3D reconstruction, use the following code. 87 | ``` 88 | python evaluations/evaluate_3d.py --method $METHOD 89 | ``` 90 | The reconstruction files (.ply) to be evaluated should be places in the ```./meshes/$METHOD``` folder. A .csv file with per-sequence results will be generated. 91 | 92 | To evaluate NVS, use the following code. 93 | ```shell 94 | python evaluate_nvs.py --method $METHOD 95 | ``` 96 | The rendered images for evaluation should be placed in ```./nvs/$METHOD``` 97 | 98 | 99 | # Cite 100 | Please cite our work if you find it useful or use any of our code 101 | ```latex 102 | @article{li2023mobilebrick, 103 | author = {Kejie Li, Jia-Wang Bian, Robert Castle, Philip H.S. Torr, Victor Adrian Prisacariu}, 104 | title = {MobileBrick: Building LEGO for 3D Reconstruction on Mobile Devices}, 105 | journal={arXiv preprint arXiv:2303.01932}, 106 | year={2023} 107 | } 108 | ``` 109 | 110 | # Changelog 111 | - 09/03/2023: MobileBrick is merged into [Voxurf](https://github.com/wutong16/Voxurf), see instructions on their repo. 112 | - 06/03/2023: Dataset is online 113 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: mobilebrick 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - _openmp_mutex=5.1=1_gnu 9 | - blas=1.0=mkl 10 | - bzip2=1.0.8=h7f98852_4 11 | - ca-certificates=2022.12.7=ha878542_0 12 | - certifi=2022.12.7=pyhd8ed1ab_0 13 | - cudatoolkit=11.3.1=h9edb442_10 14 | - ffmpeg=4.3=hf484d3e_0 15 | - freetype=2.10.4=h0708190_1 16 | - gmp=6.2.1=h58526e2_0 17 | - gnutls=3.6.13=h85f3911_1 18 | - intel-openmp=2022.1.0=h9e868ea_3769 19 | - jbig=2.1=h7f98852_2003 20 | - jpeg=9e=h166bdaf_1 21 | - lame=3.100=h7f98852_1001 22 | - lcms2=2.12=hddcbb42_0 23 | - ld_impl_linux-64=2.38=h1181459_1 24 | - lerc=2.2.1=h9c3ff4c_0 25 | - libblas=3.9.0=16_linux64_mkl 26 | - libcblas=3.9.0=16_linux64_mkl 27 | - libdeflate=1.7=h7f98852_5 28 | - libffi=3.4.2=h6a678d5_6 29 | - libgcc-ng=11.2.0=h1234567_1 30 | - libgfortran-ng=12.2.0=h69a702a_19 31 | - libgfortran5=12.2.0=h337968e_19 32 | - libgomp=11.2.0=h1234567_1 33 | - libiconv=1.17=h166bdaf_0 34 | - liblapack=3.9.0=16_linux64_mkl 35 | - libopenblas=0.3.20=pthreads_h78a6416_0 36 | - libpng=1.6.37=h21135ba_2 37 | - libstdcxx-ng=11.2.0=h1234567_1 38 | - libtiff=4.3.0=hf544144_1 39 | - libuv=1.43.0=h7f98852_0 40 | - libwebp-base=1.2.2=h7f98852_1 41 | - lz4-c=1.9.3=h9c3ff4c_1 42 | - mkl=2022.1.0=hc2b9512_224 43 | - ncurses=6.4=h6a678d5_0 44 | - nettle=3.6=he412f7d_0 45 | - olefile=0.46=pyh9f0ad1d_1 46 | - openh264=2.1.1=h780b84a_0 47 | - openjpeg=2.4.0=hb52868f_1 48 | - openssl=1.1.1t=h7f8727e_0 49 | - pillow=8.3.2=py39ha612740_0 50 | - pip=22.3.1=py39h06a4308_0 51 | - python=3.9.16=h7a1cb2a_0 52 | - python_abi=3.9=2_cp39 53 | - pytorch=1.10.1=py3.9_cuda11.3_cudnn8.2.0_0 54 | - pytorch-mutex=1.0=cuda 55 | - readline=8.2=h5eee18b_0 56 | - setuptools=65.6.3=py39h06a4308_0 57 | - sqlite=3.40.1=h5082296_0 58 | - tk=8.6.12=h1ccaba5_0 59 | - torchaudio=0.10.1=py39_cu113 60 | - torchvision=0.11.2=py39_cu113 61 | - trimesh=3.20.0=pyhd8ed1ab_0 62 | - typing_extensions=4.4.0=pyha770c72_0 63 | - tzdata=2022g=h04d1e81_0 64 | - wheel=0.38.4=py39h06a4308_0 65 | - xz=5.2.10=h5eee18b_1 66 | - zlib=1.2.13=h5eee18b_0 67 | - zstd=1.5.0=ha95c52a_0 68 | - pip: 69 | - addict==2.4.0 70 | - aiofiles==22.1.0 71 | - aiosqlite==0.18.0 72 | - anyio==3.6.2 73 | - arrow==1.2.3 74 | - babel==2.11.0 75 | - beautifulsoup4==4.11.2 76 | - charset-normalizer==3.0.1 77 | - decorator==5.1.1 78 | - deprecation==2.1.0 79 | - fastjsonschema==2.16.2 80 | - fqdn==1.5.1 81 | - idna==3.4 82 | - importlib-metadata==6.0.0 83 | - isoduration==20.11.0 84 | - joblib==1.2.0 85 | - json5==0.9.11 86 | - jsonpointer==2.3 87 | - jsonschema==4.17.3 88 | - jupyter-client==7.4.9 89 | - jupyter-core==5.2.0 90 | - jupyter-events==0.6.3 91 | - jupyter-packaging==0.12.3 92 | - jupyter-server==2.3.0 93 | - jupyter-server-fileid==0.7.0 94 | - jupyter-server-terminals==0.4.4 95 | - jupyter-server-ydoc==0.6.1 96 | - jupyter-ydoc==0.2.2 97 | - jupyterlab==3.6.1 98 | - jupyterlab-server==2.19.0 99 | - lpips==0.1.4 100 | - mistune==2.0.5 101 | - nbclassic==0.5.2 102 | - nbconvert==7.2.9 103 | - nbformat==5.7.3 104 | - notebook-shim==0.2.2 105 | - numpy==1.24.2 106 | - open3d==0.15.2 107 | - opencv-python==4.7.0.72 108 | - platformdirs==3.0.0 109 | - pyparsing==3.0.9 110 | - pyquaternion==0.9.9 111 | - python-json-logger==2.0.7 112 | - pyyaml==6.0 113 | - pyzmq==25.0.0 114 | - requests==2.28.2 115 | - rfc3339-validator==0.1.4 116 | - rfc3986-validator==0.1.1 117 | - scikit-learn==1.2.1 118 | - scipy==1.10.1 119 | - six==1.16.0 120 | - sniffio==1.3.0 121 | - soupsieve==2.4 122 | - threadpoolctl==3.1.0 123 | - tinycss2==1.2.1 124 | - tomli==2.0.1 125 | - tomlkit==0.11.6 126 | - tornado==6.2 127 | - tqdm==4.64.1 128 | - traitlets==5.9.0 129 | - uri-template==1.2.0 130 | - urllib3==1.26.14 131 | - webcolors==1.12 132 | - websocket-client==1.5.1 133 | - y-py==0.5.9 134 | - ypy-websocket==0.8.2 135 | - zipp==3.14.0 136 | prefix: /home/kejie/anaconda3/envs/oxbrick 137 | -------------------------------------------------------------------------------- /evaluations/evaluate_3d.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import csv 4 | import numpy as np 5 | import trimesh 6 | import torch 7 | import torch.nn.functional as F 8 | from sklearn.neighbors import NearestNeighbors 9 | from scipy.spatial import KDTree 10 | import open3d as o3d 11 | import sys 12 | 13 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 14 | 15 | import utils.o3d_helper as o3d_helper 16 | 17 | 18 | def compute_curvature(points, radius=0.005): 19 | tree = KDTree(points) 20 | 21 | curvature = [ 0 ] * points.shape[0] 22 | 23 | for index, point in enumerate(points): 24 | indices = tree.query_ball_point(point, radius) 25 | if len(indices) < 3: 26 | print("invalid points") 27 | continue 28 | # local covariance 29 | M = np.array([ points[i] for i in indices ]).T 30 | M = np.cov(M) 31 | 32 | # eigen decomposition 33 | V, E = np.linalg.eig(M) 34 | # h3 < h2 < h1 35 | h1, h2, h3 = V 36 | 37 | curvature[index] = h3 / (h1 + h2 + h3) 38 | 39 | return np.asarray(curvature) 40 | 41 | 42 | def visibility_test(volume, min_pts, resolution, voxel_size, mesh, device): 43 | """ filter out points that are not wihin the masked volume 44 | 45 | Args: 46 | volume (np.ndarray): [H,W,D] the mask volume 47 | min_pts (np.ndarray): minimum points 48 | resolution (np.ndarray): volume resolution 49 | voxel_size (float): voxel_size 50 | mesh (open3d.mesh): input mesh 51 | device (string): the device for pytorch 52 | """ 53 | 54 | points = np.asarray(mesh.vertices) 55 | volume = torch.from_numpy(volume).float().to(device) 56 | voxels = (points - min_pts) / voxel_size 57 | voxels = voxels / (resolution-1) * 2 - 1 58 | voxels = torch.from_numpy(voxels)[..., [2,1,0]].float().to(device) 59 | mask = F.grid_sample(volume.unsqueeze(0).unsqueeze(0), # [1,1,H,W,D] 60 | voxels.unsqueeze(0).unsqueeze(0).unsqueeze(0), # [1,1,1,N,3] 61 | mode="nearest", 62 | padding_mode="zeros", 63 | align_corners=True) # [] 64 | mask = mask[0, 0, 0, 0].cpu().numpy() > 0 65 | mesh.remove_vertices_by_mask(mask==False) 66 | mesh.compute_vertex_normals() 67 | return mesh 68 | 69 | 70 | def evaluate( 71 | pred_points, 72 | # pred_curv, 73 | gt_points, 74 | # gt_curv, 75 | threshold, 76 | verbose=False 77 | ): 78 | nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(gt_points) 79 | distances, indices = nbrs.kneighbors(pred_points) 80 | 81 | pred_gt_dist = np.mean(distances) 82 | precision = np.sum(distances < threshold) / len(distances) 83 | 84 | nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(pred_points) 85 | distances, indices = nbrs.kneighbors(gt_points) 86 | 87 | # curv_diff = np.abs(gt_curv - pred_curv[indices[:,0]]) 88 | # mean_curv_diff = np.mean(curv_diff) 89 | 90 | gt_pred_dist = np.mean(distances) 91 | recall = np.sum(distances < threshold) / len(distances) 92 | F1 = 2 * precision * recall / (precision + recall) 93 | chamfer = pred_gt_dist + gt_pred_dist 94 | 95 | if verbose: 96 | # print("pred -> gt: ", pred_gt_dist) 97 | print("precision @ {}: {:.6f}".format(threshold, precision)) 98 | # print("gt -> pred: ", gt_pred_dist) 99 | print("recall @ {}: {:.6f}".format(threshold, recall)) 100 | 101 | print("F1: {:.6f}".format(F1)) 102 | # print("mean curvature difference: {:.6f}".format(mean_curv_diff)) 103 | print("Chamfer: {:.6f}".format(chamfer)) 104 | # print("{:.3f}/{:.4f}/{:.3f}/{:.4f}/{:.4f}".format(pred_gt_dist, precision, gt_pred_dist, recall, F1)) 105 | out = {} 106 | out['pred_gt'] = pred_gt_dist 107 | out['accuracy'] = precision 108 | out['gt_pred'] = gt_pred_dist 109 | out['recall'] = recall 110 | out['chamfer'] = pred_gt_dist + gt_pred_dist 111 | out['F1'] = F1 112 | return out 113 | 114 | 115 | def sample_surface_points(mesh): 116 | n_points = mesh.vertices 117 | 118 | 119 | def main(): 120 | if torch.cuda.is_available(): 121 | device = torch.device('cuda') 122 | else: 123 | device = torch.device('cpu') 124 | 125 | args_parser = argparse.ArgumentParser() 126 | args_parser.add_argument("--seq_txt", 127 | default="./evaluations/test_seqs.txt", 128 | help="the .txt file listing the testing sequences") 129 | args_parser.add_argument("--gt_root", 130 | default="./data", 131 | help="the directory of the dataset") 132 | args_parser.add_argument("--skip", 133 | nargs="+", 134 | help="sequences to skip") 135 | args_parser.add_argument("--method", 136 | required=True, 137 | help="name of the method to be evaluated") 138 | args_parser.add_argument("--n_pts", 139 | default=100000, 140 | type=int, 141 | help="the number of sampling points for evaluation") 142 | args_parser.add_argument("--save_output", 143 | action="store_true", 144 | help="whether to save output mesh") 145 | args = args_parser.parse_args() 146 | 147 | n_samples = args.n_pts 148 | pred_dir = os.path.join(f"./meshes/{args.method}") 149 | gt_root = args.gt_root 150 | 151 | skip_seqs = args.skip if args.skip is not None else [] 152 | with open(args.seq_txt, "r") as f: 153 | seqs = [l for l in f.read().split(",") if l not in skip_seqs] 154 | 155 | chamfer_loss = [] 156 | fitness = [] 157 | accuracy = [] 158 | recall = [] 159 | F1 = [] 160 | accuracy_1,recall_1, F1_1 = [], [], [] 161 | for seq in seqs: 162 | # load ground-truth 163 | print(f"evaluating {seq}: ") 164 | gt_dir = os.path.join(gt_root, seq) 165 | visibility_mask = np.load(os.path.join(gt_dir, "visibility_mask.npy"), allow_pickle=True).item() 166 | resolution = visibility_mask['resolutions'] 167 | volume = visibility_mask['mask'].reshape(resolution) 168 | voxel_size = visibility_mask['voxel_size'] 169 | min_pts = visibility_mask['min_pts'] 170 | gt_mesh = o3d.io.read_triangle_mesh(os.path.join(gt_dir, "mesh", "gt_mesh.ply")) 171 | gt_points = np.asarray(gt_mesh.sample_points_poisson_disk(n_samples).points) 172 | # gt_mesh_trimesh = trimesh.load(os.path.join(gt_dir, "mesh", "gt_mesh.ply")) 173 | # gt_curv = trimesh.curvature.discrete_gaussian_curvature_measure(gt_mesh_trimesh, gt_points, 0.005) 174 | 175 | # load predictions 176 | mesh_path = os.path.join(pred_dir, f"{seq}.ply") 177 | pred_mesh_trimesh = trimesh.load(mesh_path) 178 | pred_mesh = o3d.io.read_triangle_mesh(mesh_path) 179 | 180 | gt_pts = o3d_helper.np2pc(gt_mesh.vertices) 181 | pred_pts = o3d_helper.np2pc(pred_mesh.vertices) 182 | threshold = 0.02 183 | trans_init = np.eye(4) 184 | reg_p2l = o3d.pipelines.registration.registration_icp( 185 | gt_pts, pred_pts, threshold, trans_init, 186 | o3d.pipelines.registration.TransformationEstimationPointToPoint(), 187 | o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=10)) 188 | fitness.append(reg_p2l.fitness) 189 | if reg_p2l.fitness > 0.99: 190 | new_pose = reg_p2l.transformation 191 | pred_mesh.transform(np.linalg.inv(new_pose)) 192 | pred_mesh = visibility_test(volume, min_pts, resolution, voxel_size, pred_mesh, device) 193 | if args.save_output: 194 | o3d.io.write_triangle_mesh(os.path.join(pred_dir, f"{seq}_cropped.ply"), pred_mesh) 195 | if len(np.asarray(pred_mesh.triangles)) > 0: 196 | pred_points = np.asarray(pred_mesh.sample_points_poisson_disk(n_samples).points) 197 | # pred_curv = trimesh.curvature.discrete_gaussian_curvature_measure(pred_mesh_trimesh, pred_points, 0.005) 198 | else: 199 | pred_points = np.random.permutation(np.asarray(pred_mesh.vertices))[:n_samples] 200 | out = evaluate( 201 | pred_points, 202 | # pred_curv, 203 | gt_points, 204 | # gt_curv, 205 | threshold=0.0025, 206 | verbose=True) 207 | chamfer_loss.append(out['chamfer']) 208 | accuracy.append(out['accuracy']) 209 | recall.append(out['recall']) 210 | F1.append(out['F1']) 211 | out = evaluate(pred_points, gt_points, threshold=0.005, verbose=True) 212 | accuracy_1.append(out['accuracy']) 213 | recall_1.append(out['recall']) 214 | F1_1.append(out['F1']) 215 | 216 | with open(os.path.join(pred_dir, "data.csv"), "w") as f: 217 | writer = csv.writer(f) 218 | writer.writerow(seqs) 219 | writer.writerow(fitness) 220 | writer.writerow(chamfer_loss) 221 | writer.writerow(accuracy) 222 | writer.writerow(recall) 223 | writer.writerow(F1) 224 | writer.writerow(accuracy_1) 225 | writer.writerow(recall_1) 226 | writer.writerow(F1_1) 227 | 228 | print("final result: ") 229 | print(f"chamfer: {sum(chamfer_loss) / len(chamfer_loss)}") 230 | print(f"accuracy: {sum(accuracy) / len(accuracy)}") 231 | print(f"recall: {sum(recall) / len(recall)}") 232 | print(f"F1: {sum(F1) / len(F1)}") 233 | 234 | 235 | 236 | if __name__ == "__main__": 237 | main() 238 | 239 | -------------------------------------------------------------------------------- /evaluations/evaluate_nvs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import os 4 | import numpy as np 5 | import torch 6 | import torch.nn.functional as F 7 | import lpips as lpips_lib 8 | import sys 9 | 10 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 11 | from utils import pytorch_ssim 12 | 13 | 14 | def mse2psnr(mse): 15 | """ 16 | :param mse: scalar 17 | :return: scalar np.float32 18 | """ 19 | mse = np.maximum(mse, 1e-10) # avoid -inf or nan when mse is very small. 20 | psnr = -10.0 * np.log10(mse) 21 | return psnr.astype(np.float32) 22 | 23 | 24 | def main(): 25 | if torch.cuda.is_available(): 26 | device = torch.device('cuda') 27 | else: 28 | device = torch.device('cpu') 29 | lpips_vgg_fn = lpips_lib.LPIPS(net='vgg').to(device) 30 | 31 | args_parser = argparse.ArgumentParser() 32 | args_parser.add_argument("--seq_txt", 33 | default="./evaluations/test_seqs.txt", 34 | help="the .txt file listing the testing sequences") 35 | args_parser.add_argument("--gt_root", 36 | default="./data", 37 | help="the directory of the dataset") 38 | args_parser.add_argument("--skip", 39 | nargs="+", 40 | help="sequences to skip") 41 | args_parser.add_argument("--method", 42 | required=True, 43 | help="name of the method to be evaluated") 44 | args = args_parser.parse_args() 45 | 46 | skip_seqs = args.skip if args.skip is not None else [] 47 | with open(args.seq_txt, "r") as f: 48 | seqs = [l for l in f.read().split(",") if l not in skip_seqs] 49 | 50 | root_dir = f"./nvs/{args.method}" 51 | psnr_out = 0 52 | ssim_out = 0 53 | lpips_out = 0 54 | n_imgs = 0 55 | for seq in seqs: 56 | seq_dir = os.path.join(root_dir, seq) 57 | img_ids = sorted([f.split("_")[1].split(".")[0] for f in os.listdir(seq_dir) if f.startswith("gt") and f.endswith(".png")]) 58 | for img_id in img_ids: 59 | rendered_img = cv2.imread(os.path.join(seq_dir, f"render_{img_id}.png"), -1)[...,::-1] / 255. 60 | gt_img = cv2.imread(os.path.join(seq_dir, f"gt_{img_id}.png"), -1)[...,::-1] / 255. 61 | rendered_img = torch.from_numpy(rendered_img).float().to(device) 62 | gt_img = torch.from_numpy(gt_img).float().to(device) 63 | 64 | # compute mse 65 | mse = F.mse_loss(rendered_img, gt_img).item() 66 | 67 | # compute psnr 68 | psnr = mse2psnr(mse) 69 | 70 | # compute ssim 71 | ssim = pytorch_ssim.ssim(rendered_img.permute(2, 0, 1).unsqueeze(0), gt_img.permute(2, 0, 1).unsqueeze(0)).item() 72 | 73 | # compute lpips 74 | lpips_loss = lpips_vgg_fn(rendered_img.permute(2, 0, 1).unsqueeze(0).contiguous(), 75 | gt_img.permute(2, 0, 1).unsqueeze(0).contiguous(), normalize=True).item() 76 | psnr_out += psnr 77 | ssim_out += ssim 78 | lpips_out += lpips_loss 79 | n_imgs += 1 80 | print(f"psnr: {psnr_out/n_imgs}") 81 | print(f"ssim_out: {ssim_out/n_imgs}") 82 | print(f"lpips: {lpips_out/n_imgs}") 83 | 84 | 85 | if __name__ == "__main__": 86 | main() -------------------------------------------------------------------------------- /evaluations/test_seqs.txt: -------------------------------------------------------------------------------- 1 | aston,audi,beetles,big_ben,boat,bridge,cabin,camera,castle,colosseum,convertible,ferrari,jeep,london_bus,motorcycle,porsche,satellite,space_shuttle -------------------------------------------------------------------------------- /utils/arkit_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | 4 | 5 | def load_extr(path): 6 | with open(path, "r") as f: 7 | cam = json.load(f) 8 | T_wc = np.asarray(cam['cameraPoseARFrame']).reshape(4, 4) 9 | T_align = np.eye(4) 10 | T_align[1, 1] = -1 11 | T_align[2, 2] = -1 12 | T_wc = T_wc @ T_align 13 | return T_wc 14 | 15 | def load_intr(path): 16 | with open(path, "r") as f: 17 | cam = json.load(f) 18 | intr_mat = np.asarray(cam['intrinsics']).reshape(3, 3) 19 | return intr_mat 20 | 21 | 22 | def load_alignment(path): 23 | with open(path, "r") as f: 24 | cam = json.load(f) 25 | align_mat = np.asarray(cam['alignment'][:-1]).reshape(4, 4) 26 | scale = float(cam['alignment'][-1]) 27 | return align_mat, scale 28 | 29 | 30 | -------------------------------------------------------------------------------- /utils/geometry.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from typing import Union, Tuple, Sequence 5 | from scipy.spatial.transform import Rotation 6 | from typing import Optional 7 | 8 | 9 | class Simulator: 10 | def __init__(self): 11 | fname = "/home/kejie/repository/fast_sdf/dist-model.txt" 12 | data = np.loadtxt(fname, comments='%', skiprows=5) 13 | dist = np.empty([80, 80, 5]) 14 | 15 | for y in range(0, 80): 16 | for x in range(0, 80): 17 | idx = (y * 80 + x) * 23 + 3 18 | if (data[idx:idx + 5] < 8000).all(): 19 | dist[y, x, :] = 0 20 | else: 21 | dist[y, x, :] = data[idx + 15: idx + 20] 22 | 23 | self.model = dist 24 | 25 | def undistort(self, x, y, z): 26 | 27 | i2 = int((z + 1) / 2) 28 | i1 = i2 - 1 29 | a = (z - (i1 * 2 + 1)) / 2 30 | x = int(x / 8) 31 | y = int(y / 6) 32 | f = (1 - a) * self.model[y, x, min(max(i1, 0), 4) 33 | ] + a * self.model[y, x, min(i2, 4)] 34 | 35 | if f == 0: 36 | return 0 37 | else: 38 | return z / f 39 | 40 | def simulate(self, clean_depth): 41 | 42 | # a = np.array(Image.open(inputpng)).astype(np.float32) / 1000.0 43 | img_h, img_w = clean_depth.shape 44 | b = np.copy(clean_depth) 45 | it = np.nditer(clean_depth, flags=[ 46 | 'multi_index'], op_flags=['writeonly']) 47 | 48 | while not it.finished: 49 | 50 | # pixel shuffle 51 | x = min( 52 | max(round(it.multi_index[1] + np.random.normal(0, 0.25)), 0), img_w-1) 53 | y = min( 54 | max(round(it.multi_index[0] + np.random.normal(0, 0.25)), 0), img_h-1) 55 | 56 | # downsample 57 | d = b[y - y % 2, x - x % 2] 58 | 59 | # distortion 60 | d = self.undistort(x, y, d) 61 | 62 | # quantization and high freq noise 63 | if d == 0: 64 | it[0] = 0 65 | else: 66 | it[0] = 35.130 * 8 / \ 67 | round((35.130 / d + np.random.normal(0, 0.027778)) * 8) 68 | 69 | it.iternext() 70 | return clean_depth 71 | 72 | 73 | def get_homogeneous( 74 | pts: Union['np.ndarray', 'torch.tensor'] 75 | ) -> Union['np.ndarray', 'torch.tensor']: 76 | """ convert [(b), N, 3] pts to homogeneous coordinate 77 | 78 | Args: 79 | pts ([(b), N, 3] Union['np.ndarray', 'torch.tensor']): input point cloud 80 | 81 | Returns: 82 | homo_pts ([(b), N, 4] Union['np.ndarray', 'torch.tensor']): output point 83 | cloud 84 | 85 | Raises: 86 | ValueError: if the input tensor/array is not with the shape of [b, N, 3] 87 | or [N, 3] 88 | TypeError: if input is not either tensor or array 89 | """ 90 | 91 | batch = False 92 | if len(pts.shape) == 3: 93 | batch = True 94 | elif len(pts.shape) == 2: 95 | pts = pts 96 | else: 97 | raise ValueError("only accept [b, n_pts, 3] or [n_pts, 3]") 98 | 99 | if isinstance(pts, torch.Tensor): 100 | ones = torch.ones_like(pts[..., :1]) 101 | homo_pts = torch.cat([pts, ones], dim=-1) 102 | elif isinstance(pts, np.ndarray): 103 | ones = np.ones_like(pts[..., :1]) 104 | homo_pts = np.concatenate([pts, ones], axis=-1) 105 | else: 106 | raise TypeError("wrong data type") 107 | return homo_pts 108 | 109 | 110 | def get_aabb(pc: 'np.ndarray') -> 'np.ndarray': 111 | """ get aabb of a point cloud 112 | 113 | Args: 114 | pc ([N, 3] np.ndarray): input point cloud 115 | 116 | Returns: 117 | aabb ([2, 3] np.ndarray): a 3D bbox represent by 118 | [[x_min, y_min, z_min], [x_max, y_max, z_max]] 119 | """ 120 | 121 | x_min, y_min, z_min = np.min(pc, axis=0) 122 | x_max, y_max, z_max = np.max(pc, axis=0) 123 | aabb = np.array([[x_min, y_min, z_min], [x_max, y_max, z_max]]) 124 | return aabb 125 | 126 | 127 | # def get_aabb(pc: 'np.ndarray', img_w: int, img_h: int) -> 'np.ndarray': 128 | # """ get aabb of a point cloud 129 | 130 | # Args: 131 | # pc ([N, 2] np.ndarray): input point cloud 132 | 133 | # Returns: 134 | # aabb ([2, 2] np.ndarray): a 2D bbox represent by 135 | # [[x_min, y_min], [x_max, y_max]] 136 | # """ 137 | 138 | # x_min, y_min = np.min(pc, axis=0) 139 | # x_max, y_max = np.max(pc, axis=0) 140 | # x_min = max(0, x_min) 141 | # y_min = max(0, y_min) 142 | # x_max = min(img_w, x_max) 143 | # y_max = min(img_h, y_max) 144 | # aabb = np.array([[x_min, y_min], [x_max, y_max]]) 145 | # return aabb 146 | 147 | 148 | def transform_point_cloud(point_cloud, transform_matrix): 149 | 150 | """ 151 | Transforms a point cloud using a 4x4 transformation matrix. 152 | 153 | Parameters 154 | ---------- 155 | point_cloud : numpy.ndarray, shape (N, 3) 156 | The input point cloud, represented as a Nx3 numpy array. 157 | transform_matrix : numpy.ndarray, shape (4, 4) 158 | The transformation matrix, represented as a 4x4 numpy array. 159 | 160 | Returns 161 | ------- 162 | numpy.ndarray, shape (N, 3) 163 | The transformed point cloud, represented as a Nx3 numpy array. 164 | """ 165 | 166 | transformed_point_cloud = np.dot(point_cloud, transform_matrix[:3, :3].T) + transform_matrix[:3, 3][np.newaxis, :] 167 | return transformed_point_cloud 168 | 169 | 170 | def depth2xyz(depth, intr_mat): 171 | """ convert depth map to xyz map 172 | 173 | Args: 174 | depth ([H, W] np.ndarray): depth map 175 | 176 | Returns: 177 | xyz ([H, W, 3] np.ndarray): xyz map 178 | """ 179 | 180 | height, width = depth.shape 181 | fx, fy, cx, cy = intr_mat[0, 0], intr_mat[1, 1], intr_mat[0, 2], intr_mat[1, 2] 182 | 183 | urange = ( 184 | np.arange(width, dtype=np.float32).reshape(1, -1).repeat(height, 0) - cx 185 | ) / fx 186 | vrange = ( 187 | np.arange(height, dtype=np.float32).reshape(-1, 1).repeat(width, 1) - cy 188 | ) / fy 189 | xyz = np.stack([urange, vrange, np.ones(urange.shape)], axis=-1) 190 | xyz = xyz * depth.reshape(height, width, 1) 191 | return xyz 192 | 193 | 194 | def angle2class(angles, num_classes=30): 195 | """ convert angles between [0, 180] to class index for classification 196 | 197 | Args: 198 | angles (np.ndarray): angle in radian 199 | 200 | Returns: 201 | out_class (np.ndarray): angle is converted to class, the number of which 202 | is defined in num_classes 203 | """ 204 | y = torch.sin(angles) 205 | x = torch.cos(angles) 206 | angles = torch.atan2(y, x) / np.pi * 180. 207 | angles = torch.where(angles<0, angles + 180, angles) 208 | out_class = angles // (180 / num_classes) 209 | assert (out_class >= 0).all() 210 | assert (out_class <= num_classes).all() 211 | out_class = np.clip(out_class, a_min=0, a_max=num_classes-1) 212 | return out_class 213 | 214 | 215 | def iou_2d(bboxA: 'np.ndarray', bboxB: 'np.ndarray') -> float: 216 | """ calculate IoU between two 2D bboxes 217 | 218 | Args: 219 | bboxA ([2, 2] np.ndarray): input bbox A in AABB format 220 | bboxB ([2, 2] np.ndarray): input bbox B in AABB format 221 | 222 | Returns: 223 | IoU (float): output IoU 224 | """ 225 | 226 | x_min = max(bboxA[0, 0], bboxB[0, 0]) 227 | y_min = max(bboxA[0, 1], bboxB[0, 1]) 228 | x_max = min(bboxA[1, 0], bboxB[1, 0]) 229 | y_max = min(bboxA[1, 1], bboxB[1, 1]) 230 | 231 | inter_area = max(0, (x_max - x_min)) * max(0, (y_max - y_min)) 232 | area_A = np.prod(bboxA[1] - bboxA[0]) 233 | area_B = np.prod(bboxB[1] - bboxB[0]) 234 | IoU = inter_area / (area_A + area_B - inter_area) 235 | assert IoU <= 1 and IoU >= 0, "invalid IoU value" 236 | return IoU 237 | 238 | 239 | def iou_3d(bboxA: 'np.ndarray', bboxB: 'np.ndarray') -> float: 240 | """ calculate 3D IoU between two 3D bboxes 241 | 242 | Args: 243 | bboxA ([2, 3] np.ndarray): input bbox A in AABB format 244 | bboxB ([2, 3] np.ndarray): input bbox B in AABB format 245 | 246 | Returns: 247 | IoU (float): 3D IoU 248 | """ 249 | 250 | x_min = max(bboxA[0, 0], bboxB[0, 0]) 251 | y_min = max(bboxA[0, 1], bboxB[0, 1]) 252 | z_min = max(bboxA[0, 2], bboxB[0, 2]) 253 | x_max = min(bboxA[1, 0], bboxB[1, 0]) 254 | y_max = min(bboxA[1, 1], bboxB[1, 1]) 255 | z_max = min(bboxA[1, 2], bboxB[1, 2]) 256 | 257 | inter_volume = max(0, (x_max - x_min)) * max(0, (y_max - y_min)) * max(0, (z_max - z_min)) 258 | volume_A = np.prod(bboxA[1] - bboxA[0]) 259 | volume_B = np.prod(bboxB[1] - bboxB[0]) 260 | IoU = inter_volume / (volume_A + volume_B - inter_volume) 261 | assert IoU <= 1 and IoU >= 0, "invalid IoU value" 262 | return IoU 263 | 264 | 265 | def giou_3d(bboxA: 'np.ndarray', bboxB: 'np.ndarray') -> float: 266 | """ calculate generalized 3D IoU between two 3D bboxes 267 | 268 | Args: 269 | bboxA ([2, 3] np.ndarray): input bbox A in AABB format 270 | bboxB ([2, 3] np.ndarray): input bbox B in AABB format 271 | 272 | Returns: 273 | IoU (float): 3D Generalized IoU 274 | """ 275 | 276 | x_min = max(bboxA[0, 0], bboxB[0, 0]) 277 | y_min = max(bboxA[0, 1], bboxB[0, 1]) 278 | z_min = max(bboxA[0, 2], bboxB[0, 2]) 279 | x_max = min(bboxA[1, 0], bboxB[1, 0]) 280 | y_max = min(bboxA[1, 1], bboxB[1, 1]) 281 | z_max = min(bboxA[1, 2], bboxB[1, 2]) 282 | 283 | inter_volume = max(0, (x_max - x_min)) * max(0, (y_max - y_min)) * max(0, (z_max - z_min)) 284 | volume_A = np.prod(bboxA[1] - bboxA[0]) 285 | volume_B = np.prod(bboxB[1] - bboxB[0]) 286 | volume_union = (volume_A + volume_B - inter_volume) 287 | 288 | iou = iou_3d(bboxA, bboxB) 289 | 290 | x_min = min(bboxA[0, 0], bboxB[0, 0]) 291 | y_min = min(bboxA[0, 1], bboxB[0, 1]) 292 | z_min = min(bboxA[0, 2], bboxB[0, 2]) 293 | x_max = max(bboxA[1, 0], bboxB[1, 0]) 294 | y_max = max(bboxA[1, 1], bboxB[1, 1]) 295 | z_max = max(bboxA[1, 2], bboxB[1, 2]) 296 | 297 | volume_complete = (x_max - x_min) * (y_max - y_min) * (z_max - z_min) 298 | giou = iou - (volume_complete - volume_union) / volume_complete 299 | return giou 300 | 301 | 302 | def get_corner_by_dims(dimensions) -> np.ndarray: 303 | """get 8 corner points of 3D bbox defined by self.dimensions 304 | 305 | Returns: 306 | a np.ndarray with shape [8,3] to represent 8 corner points' 307 | position of the 3D bounding box. 308 | """ 309 | 310 | w, h, l = dimensions[0], dimensions[1], dimensions[2] 311 | x_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2] 312 | y_corners = [h/2, h/2, h/2, h/2, -h/2, -h/2, -h/2, -h/2] 313 | z_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2] 314 | corner_pts = np.array([x_corners, y_corners, z_corners], dtype=np.float32).T 315 | return corner_pts 316 | 317 | 318 | def scale_vertices_to_bbox( 319 | vertices: 'np.ndarray', bbox_dims: 'np.ndarray' 320 | ) -> 'np.ndarray': 321 | """scale the vertices such that they are tightly bounded by the 3D bbox 322 | 323 | Args: 324 | vertices ([N, 3] np.ndarray): input vertices 325 | bbox_dims ([3] np.ndarray): bbox dimension in x, y, z axis 326 | 327 | Returns: 328 | vertices: the scaled vertices 329 | """ 330 | 331 | vertices[:, 0] *= (bbox_dims[0] / (np.max(vertices[:, 0]) - np.min(vertices[:, 0]))) 332 | vertices[:, 1] *= (bbox_dims[1] / (np.max(vertices[:, 1]) - np.min(vertices[:, 1]))) 333 | vertices[:, 2] *= (bbox_dims[2] / (np.max(vertices[:, 2]) - np.min(vertices[:, 2]))) 334 | 335 | return vertices 336 | 337 | 338 | def unproject(pixel, depth, intr_mat): 339 | """ unproject from pixels and depths to 3D 340 | 341 | Args: 342 | pixel: [n, 2] 343 | depth: [n] 344 | """ 345 | fx = intr_mat[0, 0] 346 | fy = intr_mat[1, 1] 347 | cx = intr_mat[0, 2] 348 | cy = intr_mat[1, 2] 349 | pts = np.concatenate([pixel, np.ones_like(pixel)[:, :1]], axis=1) 350 | pts[:, 0] = (pts[:, 0] - cx) / fx 351 | pts[:, 1] = (pts[:, 1] - cy) / fy 352 | pts = pts * depth[:, None] 353 | return pts 354 | 355 | 356 | def projection(pts, intr_mat, keep_z=False): 357 | """perspective projection 358 | 359 | Args: 360 | pts ([(b), N, 3] or [(b), N, 4] np.ndarray or torch.tensor): 3D points 361 | intr_mat ([(b), 3, 3] or [(b), 3, 4] np.ndarray or torch.tensor): intrinsic 362 | matrix 363 | 364 | Returns: 365 | pts ([(b), N, 3], np.ndarray or torch.tensor): projected points 366 | """ 367 | 368 | batch = False 369 | if len(pts.shape) == 3: 370 | assert len(intr_mat.shape) == 3, "intr_mat shape needs to match pts" 371 | batch = True 372 | elif len(pts.shape) == 2: 373 | assert len(intr_mat.shape) == 2, "intr_mat shape needs to match pts" 374 | else: 375 | ValueError("only accept [b, n_pts, 3] or [n_pts, 3]") 376 | if batch: 377 | if isinstance(pts, torch.Tensor): 378 | intr_mat = intr_mat.transpose(1, 2) 379 | else: 380 | intr_mat = intr_mat.transpose(0, 2, 1) 381 | else: 382 | intr_mat = intr_mat.T 383 | pts = pts @ intr_mat 384 | if isinstance(pts, torch.Tensor): 385 | z = torch.ones_like(pts[..., -1]) 386 | else: 387 | z = np.ones_like(pts[..., -1]) 388 | if batch: 389 | if keep_z: 390 | z = pts[:, :, -1] 391 | pts = pts / pts[:, :, -1:] 392 | pts[:, :, -1] *= z 393 | else: 394 | if keep_z: 395 | z = pts[:, -1] 396 | pts = pts / pts[:, -1:] 397 | pts[:, -1] *= z 398 | return pts 399 | 400 | 401 | def pad_transform_matrix(mat: 'np.ndarray') -> 'np.ndarray': 402 | """ pad a [3, 4] transform matrix to a [4, 4] matrix 403 | 404 | Args: 405 | mat ([3, 4] np.ndarray): the input [3, 4] matrix 406 | Returns: 407 | mat ([4, 4] np.ndarray): the output [4, 4] matrix 408 | """ 409 | 410 | if mat.shape[0] < 4: 411 | pad = np.zeros((1, 4), dtype=np.float32) 412 | pad[0,-1] = 1 413 | return np.concatenate([mat, pad], axis=0) 414 | else: 415 | return mat 416 | 417 | 418 | def rgbd_to_colored_pc( 419 | rgb: 'np.ndarray', 420 | depth: 'np.ndarray', 421 | fx: float, 422 | fy: float, 423 | cx: float, 424 | cy: float, 425 | cap: float = 200) -> Tuple['np.ndarray', 'np.ndarray']: 426 | """ convert a pair of rgb and depth iamge to a 3D colored point cloud 427 | 428 | Args: 429 | rgb ([H, W, 3] np.ndarray): rgb image 430 | depth ([H, W] np.ndarray): depth image 431 | fx, fy, cx, cy (float, float, float, float): camera intrinsic matrix 432 | cap (float): depth capping value 433 | 434 | Returns: 435 | a tuple containing: 436 | points ([N, 3] np.ndarray): 3D point positions 437 | colors ([N, 3] np.ndarray): color for each point 438 | """ 439 | 440 | rgb_height, rgb_width, _ = rgb.shape 441 | X, Y = np.meshgrid(np.arange(rgb_width), np.arange(rgb_height)) 442 | xyz_rgb = np.concatenate( 443 | [X[:, :, None], Y[:, :, None], depth[:, :, None], rgb], 444 | axis=2 445 | ) 446 | xyz_rgb[:, :, 0] = (xyz_rgb[:, :, 0] - cx) * xyz_rgb[:, :, 2] / fx 447 | xyz_rgb[:, :, 1] = (xyz_rgb[:, :, 1] - cy) * xyz_rgb[:, :, 2] / fy 448 | points = xyz_rgb[:, :, :3].reshape(-1, 3) 449 | colors = xyz_rgb[:, :, 3:].reshape(-1, 3) / 255. 450 | cap_ind = np.logical_and((points[:, 2] < cap), (points[:, 2] > 0)) 451 | points = points[cap_ind] 452 | colors = colors[cap_ind] 453 | return points, colors 454 | 455 | 456 | def geodesic_distance(R1: 'np.ndarray', R2: 'np.ndarray') -> float: 457 | '''Returns the geodesic distance between two rotation matrices. 458 | 459 | Args: 460 | R1 ([3, 3] np.ndarray): input rotation matrix 461 | R2 ([3, 3] np.ndarray): input rotation matrix 462 | 463 | Returns: 464 | delta_theta (float): geodesic distance between the input rotation 465 | matrices 466 | ''' 467 | 468 | delta_R = np.dot(R1, R2.T) 469 | rotvec = Rotation.from_dcm(delta_R).as_rotvec() 470 | delta_theta = np.linalg.norm(rotvec) 471 | return delta_theta 472 | 473 | 474 | def pts_in_box(pts: 'np.ndarray', img_shape: 'np.ndarray') -> 'np.ndarray': 475 | """ check projected points are within image frame 476 | 477 | Args: 478 | pts ([N, 2] np.ndarray): a set of 2D points on image plane 479 | img_shape (aabb): bbox_size [x_min, y_min, x_max, y_max] 480 | Return: 481 | a boolean array of shape [N] indicating whether a point is within 482 | image frame 483 | """ 484 | 485 | img_shape = img_shape.reshape(2, 2) 486 | larger_x_min = pts[:, 0] > img_shape[0, 0] 487 | smaller_x_max = pts[:, 0] < img_shape[1, 0] 488 | larger_y_min = pts[:, 1] > img_shape[0, 1] 489 | smaller_y_max = pts[:, 1] < img_shape[1, 1] 490 | return (larger_x_min * smaller_x_max * \ 491 | larger_y_min * smaller_y_max) 492 | 493 | 494 | def get_normalize_factor(pts): 495 | norm = np.max(np.linalg.norm(pts, axis=1)) 496 | return norm 497 | 498 | 499 | def create_meshgrid( 500 | height: int, 501 | width: int, 502 | normalized_coordinates: bool = True, 503 | device: Optional[torch.device] = torch.device('cpu'), 504 | dtype: torch.dtype = torch.float32, 505 | ) -> torch.Tensor: 506 | """Generates a coordinate grid for an image. 507 | 508 | When the flag `normalized_coordinates` is set to True, the grid is 509 | normalized to be in the range [-1,1] to be consistent with the pytorch 510 | function grid_sample. 511 | http://pytorch.org/docs/master/nn.html#torch.nn.functional.grid_sample 512 | 513 | Args: 514 | height (int): the image height (rows). 515 | width (int): the image width (cols). 516 | normalized_coordinates (bool): whether to normalize 517 | coordinates in the range [-1, 1] in order to be consistent with the 518 | PyTorch function grid_sample. 519 | device (torch.device): the device on which the grid will be generated. Default: cpu. 520 | dtype (torch.dtype): the data type of the generated gird. Default: float32. 521 | 522 | Return: 523 | torch.Tensor: returns a grid tensor with shape :math:`(1, H, W, 2)`. 524 | """ 525 | xs: torch.Tensor = torch.linspace(0, width - 1, width, device=device, dtype=dtype) 526 | ys: torch.Tensor = torch.linspace(0, height - 1, height, device=device, dtype=dtype) 527 | if normalized_coordinates: 528 | xs = (xs / (width - 1) - 0.5) * 2 529 | ys = (ys / (height - 1) - 0.5) * 2 530 | # generate grid by stacking coordinates 531 | base_grid: torch.Tensor = torch.stack(torch.meshgrid([xs, ys])).transpose(1, 2) # 2xHxW 532 | return torch.unsqueeze(base_grid, dim=0).permute(0, 2, 3, 1) # 1xHxWx2 533 | 534 | 535 | def calculate_normals_finite_difference(pts, func): 536 | """ 537 | compute the normal of an implicit reprentation using finite difference 538 | 539 | pts: (B, N, 3) 540 | 541 | """ 542 | 543 | batch_size, n_pts = pts.shape[:2] 544 | epsilon = np.abs(np.random.normal(scale=0.01, size=(batch_size, n_pts))) 545 | epsilon = torch.from_numpy(epsilon).float().to(pts.device) 546 | epsilon = torch.clamp(epsilon, min=1e-3, max=1) 547 | epsilon_x = torch.stack( 548 | [epsilon, torch.zeros_like(epsilon), torch.zeros_like(epsilon)], 549 | dim=-1 550 | ).detach() 551 | epsilon_y = torch.stack( 552 | [torch.zeros_like(epsilon), epsilon, torch.zeros_like(epsilon)], 553 | dim=-1 554 | ).detach() 555 | epsilon_z = torch.stack( 556 | [torch.zeros_like(epsilon), torch.zeros_like(epsilon), epsilon], 557 | dim=-1 558 | ).detach() 559 | 560 | pts_x0 = pts - epsilon_x 561 | pts_x1 = pts + epsilon_x 562 | 563 | pts_y0 = pts - epsilon_y 564 | pts_y1 = pts + epsilon_y 565 | 566 | pts_z0 = pts - epsilon_z 567 | pts_z1 = pts + epsilon_z 568 | f_out = func( 569 | torch.cat([pts_x0, pts_x1, pts_y0, pts_y1, pts_z0, pts_z1], dim=0) 570 | ) 571 | f_x0, f_x1, f_y0, f_y1, f_z0, f_z1 = torch.split(f_out, batch_size, dim=0) 572 | 573 | g_x = (f_x1 - f_x0) / (2 * epsilon) 574 | g_y = (f_y1 - f_y0) / (2 * epsilon) 575 | g_z = (f_z1 - f_z0) / (2 * epsilon) 576 | 577 | normals = torch.stack([g_x, g_y, g_z], dim=-1) 578 | normals = normals / (torch.norm(normals, dim=-1, keepdim=True) + 1e-5) 579 | return normals 580 | 581 | 582 | def recenter(vertices): 583 | min_ = np.min(vertices, axis=0) 584 | max_ = np.max(vertices, axis=0) 585 | center = (max_ + min_) / 2. 586 | vertices = vertices - center[None, :] 587 | return vertices, center -------------------------------------------------------------------------------- /utils/o3d_helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | import trimesh 4 | import os 5 | import sys 6 | 7 | 8 | from utils.geometry import get_homogeneous 9 | 10 | 11 | def align_vector_to_another(a=np.array([0, 0, 1]), b=np.array([1, 0, 0])): 12 | """ 13 | Aligns vector a to vector b with axis angle rotation 14 | """ 15 | if np.array_equal(a, b): 16 | return None, None 17 | if np.sum(b + a) == 0: # if b is possite to a 18 | b += 1e-3 19 | axis_ = np.cross(a, b) 20 | axis_ = axis_ / (np.linalg.norm(axis_)) 21 | angle = np.arccos(np.dot(a, b)) 22 | return axis_, angle 23 | 24 | 25 | def normalized(a, axis=-1, order=2): 26 | """Normalizes a numpy array of points""" 27 | l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) 28 | l2[l2 == 0] = 1 29 | return a / np.expand_dims(l2, axis), l2 30 | 31 | 32 | class LineMesh(object): 33 | def __init__(self, points, lines=None, colors=[0, 1, 0], radius=0.15): 34 | """Creates a line represented as sequence of cylinder triangular meshes 35 | 36 | Arguments: 37 | points {ndarray} -- Numpy array of ponts Nx3. 38 | 39 | Keyword Arguments: 40 | lines {list[list] or None} -- List of point index pairs denoting 41 | line segments. If None, implicit lines from ordered pairwise 42 | points. (default: {None}) 43 | colors {list} -- list of colors, or single color of the line 44 | (default: {[0, 1, 0]}) 45 | radius {float} -- radius of cylinder (default: {0.15}) 46 | """ 47 | self.points = np.array(points) 48 | self.lines = np.array(lines) if lines is not None else \ 49 | self.lines_from_ordered_points(self.points) 50 | self.colors = np.array(colors) 51 | self.radius = radius 52 | self.cylinder_segments = [] 53 | 54 | self.create_line_mesh() 55 | 56 | @staticmethod 57 | def lines_from_ordered_points(points): 58 | lines = [[i, i + 1] for i in range(0, points.shape[0] - 1, 1)] 59 | return np.array(lines) 60 | 61 | def create_line_mesh(self): 62 | first_points = self.points[self.lines[:, 0], :] 63 | second_points = self.points[self.lines[:, 1], :] 64 | line_segments = second_points - first_points 65 | line_segments_unit, line_lengths = normalized(line_segments) 66 | 67 | z_axis = np.array([0, 0, 1]) 68 | # Create triangular mesh cylinder segments of line 69 | for i in range(line_segments_unit.shape[0]): 70 | line_segment = line_segments_unit[i, :] 71 | line_length = line_lengths[i] 72 | # get axis angle rotation to allign cylinder with line segment 73 | axis, angle = align_vector_to_another(z_axis, line_segment) 74 | # Get translation vector 75 | translation = first_points[i, :] + line_segment * line_length * 0.5 76 | # create cylinder and apply transformations 77 | cylinder_segment = o3d.geometry.TriangleMesh.create_cylinder( 78 | self.radius, line_length) 79 | cylinder_segment = cylinder_segment.translate( 80 | translation, relative=False) 81 | if axis is not None: 82 | axis_a = axis * angle 83 | cylinder_segment = cylinder_segment.rotate( 84 | R=o3d.geometry.get_rotation_matrix_from_axis_angle(axis_a), 85 | # center=True 86 | ) 87 | # color cylinder 88 | color = self.colors if self.colors.ndim == 1 else self.colors[i, :] 89 | cylinder_segment.paint_uniform_color(color) 90 | 91 | self.cylinder_segments.append(cylinder_segment) 92 | 93 | def add_line(self, vis): 94 | """Adds this line to the visualizer""" 95 | for cylinder in self.cylinder_segments: 96 | vis.add_geometry(cylinder) 97 | 98 | def remove_line(self, vis): 99 | """Removes this line from the visualizer""" 100 | for cylinder in self.cylinder_segments: 101 | vis.remove_geometry(cylinder) 102 | 103 | 104 | def lineset_from_pc(point_cloud, colors, orders=None): 105 | """ open3d lineset from numpy point cloud 106 | 107 | Args: 108 | point_cloud ([N, 3] np.ndarray): corner points of a 3D bounding box 109 | colors ([1, 3] np.ndarray): color of the lineset 110 | orders (): reorder the point cloud to build a valid 3D bbox 111 | 112 | Returns: 113 | line_set (open3d.geometry.Lineset) 114 | """ 115 | # vertex order is consistent with get_corner_pts() in Object class 116 | if orders is None: 117 | lines = [ 118 | [0, 1], 119 | [1, 2], 120 | [2, 3], 121 | [3, 0], 122 | [4, 5], 123 | [5, 6], 124 | [6, 7], 125 | [7, 4], 126 | [0, 4], 127 | [1, 5], 128 | [2, 6], 129 | [3, 7], 130 | ] 131 | else: 132 | lines = orders 133 | colors_tmp = np.zeros((len(lines), 3)) 134 | colors_tmp += colors 135 | line_set = o3d.geometry.LineSet( 136 | points=o3d.utility.Vector3dVector(point_cloud), 137 | lines=o3d.utility.Vector2iVector(lines), 138 | ) 139 | line_set.colors = o3d.utility.Vector3dVector(colors_tmp) 140 | return line_set 141 | 142 | 143 | def linemesh_from_pc(point_cloud, colors, orders=None): 144 | if orders is None: 145 | lines = [ 146 | [0, 1], 147 | [1, 2], 148 | [2, 3], 149 | [3, 0], 150 | [4, 5], 151 | [5, 6], 152 | [6, 7], 153 | [7, 4], 154 | [0, 4], 155 | [1, 5], 156 | [2, 6], 157 | [3, 7], 158 | ] 159 | else: 160 | lines = orders 161 | 162 | colors_tmp = np.zeros((len(lines), 3)) 163 | colors_tmp += colors 164 | 165 | line_mesh = LineMesh(point_cloud, lines, colors_tmp, radius=0.02) 166 | return line_mesh.cylinder_segments 167 | 168 | 169 | def load_scene_mesh(path, trans_mat=None, open_3d=True): 170 | scene_mesh = trimesh.load(path) 171 | if trans_mat is not None: 172 | scene_mesh.vertices = np.dot(get_homogeneous( 173 | scene_mesh.vertices), trans_mat.T)[:, :3] 174 | if open_3d: 175 | scene_mesh_o3d = trimesh2o3d(scene_mesh) 176 | return scene_mesh_o3d 177 | else: 178 | return scene_mesh 179 | 180 | 181 | def trimesh2o3d(mesh, load_color=True): 182 | mesh_o3d = o3d.geometry.TriangleMesh() 183 | mesh_o3d.vertices = o3d.utility.Vector3dVector(mesh.vertices) 184 | mesh_o3d.triangles = o3d.utility.Vector3iVector(mesh.faces) 185 | mesh_o3d.compute_vertex_normals() 186 | if load_color: 187 | if mesh.visual.vertex_colors is not None: 188 | mesh_o3d.vertex_colors = o3d.utility.Vector3dVector( 189 | mesh.visual.vertex_colors[:, :3] / 255. 190 | ) 191 | return mesh_o3d 192 | 193 | 194 | def np2pc(points, colors=None): 195 | """ convert numpy colors point cloud to o3d point cloud 196 | 197 | Args: 198 | points (np.ndarray): [n_pts, 3] 199 | colors (np.ndarray): [n_pts, 3] 200 | Return: 201 | pts_o3d (o3d.geometry.PointCloud) 202 | """ 203 | pts_o3d = o3d.geometry.PointCloud() 204 | pts_o3d.points = o3d.utility.Vector3dVector(points) 205 | if colors is not None: 206 | pts_o3d.colors = o3d.utility.Vector3dVector(colors) 207 | return pts_o3d 208 | 209 | 210 | def mesh2o3d(vertices, faces, normals=None, colors=None): 211 | mesh = trimesh.Trimesh( 212 | vertices=vertices, 213 | faces=faces, 214 | vertex_normals=normals, 215 | vertex_colors=colors 216 | ) 217 | return trimesh2o3d(mesh) 218 | 219 | 220 | class TSDFFusion: 221 | def __init__(self, voxel_size=0.01): 222 | self.volume = o3d.pipelines.integration.ScalableTSDFVolume( 223 | voxel_length=voxel_size, 224 | sdf_trunc=voxel_size*5, 225 | color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8) 226 | 227 | def integrate(self, depth, color, T_wc, intr_mat): 228 | """integrate new RGBD frame 229 | 230 | Args: 231 | depth (np.ndarray): [h,w] in meters 232 | color (np.ndarray): [h,w,3] in range[0,255] 233 | T_wc (np.ndarray): [4,4] 234 | intr_mat (np.ndarray): [3,3] or [4,4] 235 | 236 | """ 237 | img_h, img_w = depth.shape 238 | color = o3d.geometry.Image(color.astype(np.uint8)) 239 | depth = o3d.geometry.Image((depth * 1000).astype(np.uint16)) 240 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( 241 | color, depth, depth_trunc=10000.0, convert_rgb_to_intensity=False) 242 | intrinsic = o3d.camera.PinholeCameraIntrinsic() 243 | intrinsic.set_intrinsics( 244 | width=img_w, 245 | height=img_h, 246 | fx=intr_mat[0, 0], 247 | fy=intr_mat[1, 1], 248 | cx=intr_mat[0, 2], 249 | cy=intr_mat[1, 2], 250 | ) 251 | T_cw = np.linalg.inv(T_wc) 252 | self.volume.integrate(rgbd, intrinsic, T_cw) 253 | 254 | def marching_cube(self, path=None, with_color=False): 255 | mesh_o3d = self.volume.extract_triangle_mesh() 256 | mesh_o3d.compute_vertex_normals() 257 | mesh = trimesh.Trimesh( 258 | vertices=np.asarray(mesh_o3d.vertices), # / dimension, 259 | faces=np.asarray(mesh_o3d.triangles), 260 | vertex_normals=np.asarray(mesh_o3d.vertex_normals) 261 | ) 262 | if with_color: 263 | mesh.visual.vertex_colors = np.asarray(mesh_o3d.vertex_colors) 264 | if path is not None: 265 | dir_ = "/".join(path.split("/")[:-1]) 266 | if not os.path.exists(dir_): 267 | os.mkdir(dir_) 268 | mesh.export(path) 269 | return mesh 270 | 271 | def marching_cube_o3d(self, path=None, with_color=False): 272 | mesh_o3d = self.volume.extract_triangle_mesh() 273 | return mesh_o3d -------------------------------------------------------------------------------- /utils/pytorch_ssim/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.autograd import Variable 4 | import numpy as np 5 | from math import exp 6 | 7 | 8 | def gaussian(window_size, sigma): 9 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) 10 | return gauss / gauss.sum() 11 | 12 | 13 | def create_window(window_size, channel): 14 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 15 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 16 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) 17 | return window 18 | 19 | 20 | def _ssim(img1, img2, window, window_size, channel, use_padding, size_average=True): 21 | 22 | if use_padding: 23 | padding_size = window_size // 2 24 | else: 25 | padding_size = 0 26 | 27 | mu1 = F.conv2d(img1, window, padding=padding_size, groups=channel) 28 | mu2 = F.conv2d(img2, window, padding=padding_size, groups=channel) 29 | 30 | mu1_sq = mu1.pow(2) 31 | mu2_sq = mu2.pow(2) 32 | mu1_mu2 = mu1 * mu2 33 | 34 | sigma1_sq = F.conv2d(img1 * img1, window, padding=padding_size, groups=channel) - mu1_sq 35 | sigma2_sq = F.conv2d(img2 * img2, window, padding=padding_size, groups=channel) - mu2_sq 36 | sigma12 = F.conv2d(img1 * img2, window, padding=padding_size, groups=channel) - mu1_mu2 37 | 38 | C1 = 0.01 ** 2 39 | C2 = 0.03 ** 2 40 | 41 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) 42 | 43 | if size_average: 44 | return ssim_map.mean() 45 | else: 46 | return ssim_map.mean(1).mean(1).mean(1) 47 | 48 | 49 | class SSIM(torch.nn.Module): 50 | def __init__(self, window_size=11, use_padding=True, size_average=True): 51 | super(SSIM, self).__init__() 52 | self.window_size = window_size 53 | self.size_average = size_average 54 | self.use_padding = use_padding 55 | self.channel = 1 56 | self.window = create_window(window_size, self.channel) 57 | 58 | def forward(self, img1, img2): 59 | (_, channel, _, _) = img1.size() 60 | 61 | if channel == self.channel and self.window.data.type() == img1.data.type(): 62 | window = self.window 63 | else: 64 | window = create_window(self.window_size, channel) 65 | 66 | if img1.is_cuda: 67 | window = window.cuda(img1.get_device()) 68 | window = window.type_as(img1) 69 | 70 | self.window = window 71 | self.channel = channel 72 | 73 | return _ssim(img1, img2, window, self.window_size, channel, self.use_padding, self.size_average) 74 | 75 | 76 | def ssim(img1, img2, use_padding=True, window_size=11, size_average=True): 77 | """SSIM only defined at intensity channel. For RGB or YUV or other image format, this function computes SSIm at each 78 | channel and averge them. 79 | :param img1: (B, C, H, W) float32 in [0, 1] 80 | :param img2: (B, C, H, W) float32 in [0, 1] 81 | :param use_padding: we use conv2d when we compute mean and var for each patch, this use_padding is for that conv2d. 82 | :param window_size: patch size 83 | :param size_average: 84 | :return: a tensor that contains only one scalar. 85 | """ 86 | (_, channel, _, _) = img1.size() 87 | window = create_window(window_size, channel) 88 | 89 | if img1.is_cuda: 90 | window = window.cuda(img1.get_device()) 91 | window = window.type_as(img1) 92 | 93 | return _ssim(img1, img2, window, window_size, channel, use_padding, size_average) --------------------------------------------------------------------------------