├── .gitignore
├── LICENSE
├── README.md
├── environment.yml
├── evaluations
    ├── evaluate_3d.py
    ├── evaluate_nvs.py
    └── test_seqs.txt
└── utils
    ├── arkit_utils.py
    ├── geometry.py
    ├── o3d_helper.py
    └── pytorch_ssim
        └── __init__.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | meshes/
132 | nvs/
133 | img/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Active Vision Laboratory
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <h1 align="center">Building LEGO for 3D Reconstruction on Mobile Devices</h1>
  3 |   <p align="center">
  4 |     <a href="https://likojack.github.io/kejieli/#/home">Kejie Li</a>
  5 |     ·
  6 |     <a href="https://jwbian.net/">Jia-Wang Bian</a>
  7 |     ·
  8 |     <a href="https://robertcastle.com/">Robert Castle</a>
  9 |     ·
 10 |      <a href="https://torrvision.com/">Philip H.S. Torr</a>
 11 |     ·
 12 |      <a href="https://www.robots.ox.ac.uk/~victor/">Victor Adrian Prisacariu</a>   
 13 |   </p>
 14 | 
 15 |   <h3 align="center"><a href="https://code.active.vision/MobileBrick/">Project Page</a> | <a href="https://arxiv.org/abs/2303.01932">arXiv</a> | <a href="http://www.robots.ox.ac.uk/~victor/data/MobileBrick/MobileBrick_Mar23.zip">Dataset</a> </h3> 
 16 |   <div align="center"></div>
 17 | </p>
 18 | 
 19 | 
 20 | <br>Even 3D scanners can only generate pseudo ground-truth shapes with artefacts.
 21 | MobileBrick is the first **multi-view RGBD** dataset, captured on a **mobile device**, with **precise** 3D annotations for detailed 3D object reconstruction.
 22 | 
 23 | We propose a novel data capturing and 3D annotation pipeline in MobileBrick without relying on expensive 3D scanners. 
 24 | The key to creating the precise 3D ground-truth shapes is using LEGO models, which are made of LEGO bricks with known geometry. 
 25 | The data modality of RGBD images captured on a mobile device paired with exact 3D geometry annotations provides a unique opportunity for future research on high-fidelity 3D reconstruction.
 26 | 
 27 | 
 28 | <!-- <p align="center">
 29 |     <img src="etc/teaser.png" alt="teaser" width="90%">
 30 | </p> -->
 31 | 
 32 | # Overview
 33 | 
 34 | 1. [Install](#install)
 35 | 1. [Our dataset](#dataset-organisation)
 36 | 1. [Evaluation](#evaluation)
 37 | 1. [Cite](#cite)
 38 | 1. [Changelog](#changelog)
 39 | 
 40 | 
 41 | # Install
 42 | you can install dependencies with Anaconda as follows: 
 43 | ```shell
 44 | conda env create -f environment.yml
 45 | conda activate mobilebrick
 46 | ```
 47 | 
 48 | # Dataset Organisation
 49 | The dataset is organised by sequences, with 135 sequences of random shapes can be used for training, and 18 sequences of manually curated LEGO models for evaluation.
 50 | 
 51 | A sequence contains the following structure:
 52 | ```
 53 | 
 54 | SEQUENCE_NAME
 55 | ├── arkit_depth (the confidence and depth maps provided by ARKit)
 56 | |    ├── 000000_conf.png
 57 | |    ├── 000000.png
 58 | |    ├── ...
 59 | ├── gt_depth (The high-resolution depth maps projected from the aligned GT shape)
 60 | |    ├── 000000.png
 61 | |    ├── ...     
 62 | ├── image (the RGB images)
 63 | |    ├── 000000.jpg
 64 | |    ├── ...
 65 | ├── mask (object foreground mask projected from the aligned GT shape)
 66 | |    ├── 000000.png
 67 | |    ├── ...
 68 | ├── intrinsic (3x3 intrinsic matrix of each image)
 69 | |    ├── 000000.txt
 70 | |    ├── ...
 71 | ├── pose (4x4 transformation matrix from camera to world of each image)
 72 | |    ├── 000000.txt
 73 | |    ├── ...
 74 | ├── mesh
 75 | |    ├── gt_mesh.ply
 76 | ├── visibility_mask.npy (the visibility mask to be used for evaluation)
 77 | ├── cameras.npz (processed camera poses using the format of NeuS)
 78 | ```
 79 | 
 80 | Note:
 81 | - the gt_mesh.ply is created by running tsdf-fusion using the gt depth
 82 | 
 83 | # Evaluation 
 84 | We provide scripts to run evaluation on 3D reconstruction and Novel View Synthesis (NVS).
 85 | 
 86 | To evaluate 3D reconstruction, use the following code.
 87 | ```
 88 | python evaluations/evaluate_3d.py --method $METHOD
 89 | ```
 90 | The reconstruction files (.ply) to be evaluated should be places in the ```./meshes/$METHOD``` folder. A .csv file with per-sequence results will be generated.
 91 | 
 92 | To evaluate NVS, use the following code.
 93 | ```shell
 94 | python evaluate_nvs.py --method $METHOD
 95 | ```
 96 | The rendered images for evaluation should be placed in ```./nvs/$METHOD```
 97 | 
 98 | 
 99 | # Cite
100 | Please cite our work if you find it useful or use any of our code
101 | ```latex
102 | @article{li2023mobilebrick,
103 |   author = {Kejie Li, Jia-Wang Bian, Robert Castle, Philip H.S. Torr, Victor Adrian Prisacariu},
104 |   title = {MobileBrick: Building LEGO for 3D Reconstruction on Mobile Devices},
105 |   journal={arXiv preprint arXiv:2303.01932},
106 |   year={2023}
107 | }
108 | ```
109 | 
110 | # Changelog
111 | - 09/03/2023: MobileBrick is merged into [Voxurf](https://github.com/wutong16/Voxurf), see instructions on their repo.
112 | - 06/03/2023: Dataset is online
113 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: mobilebrick
  2 | channels:
  3 |   - pytorch
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=main
  8 |   - _openmp_mutex=5.1=1_gnu
  9 |   - blas=1.0=mkl
 10 |   - bzip2=1.0.8=h7f98852_4
 11 |   - ca-certificates=2022.12.7=ha878542_0
 12 |   - certifi=2022.12.7=pyhd8ed1ab_0
 13 |   - cudatoolkit=11.3.1=h9edb442_10
 14 |   - ffmpeg=4.3=hf484d3e_0
 15 |   - freetype=2.10.4=h0708190_1
 16 |   - gmp=6.2.1=h58526e2_0
 17 |   - gnutls=3.6.13=h85f3911_1
 18 |   - intel-openmp=2022.1.0=h9e868ea_3769
 19 |   - jbig=2.1=h7f98852_2003
 20 |   - jpeg=9e=h166bdaf_1
 21 |   - lame=3.100=h7f98852_1001
 22 |   - lcms2=2.12=hddcbb42_0
 23 |   - ld_impl_linux-64=2.38=h1181459_1
 24 |   - lerc=2.2.1=h9c3ff4c_0
 25 |   - libblas=3.9.0=16_linux64_mkl
 26 |   - libcblas=3.9.0=16_linux64_mkl
 27 |   - libdeflate=1.7=h7f98852_5
 28 |   - libffi=3.4.2=h6a678d5_6
 29 |   - libgcc-ng=11.2.0=h1234567_1
 30 |   - libgfortran-ng=12.2.0=h69a702a_19
 31 |   - libgfortran5=12.2.0=h337968e_19
 32 |   - libgomp=11.2.0=h1234567_1
 33 |   - libiconv=1.17=h166bdaf_0
 34 |   - liblapack=3.9.0=16_linux64_mkl
 35 |   - libopenblas=0.3.20=pthreads_h78a6416_0
 36 |   - libpng=1.6.37=h21135ba_2
 37 |   - libstdcxx-ng=11.2.0=h1234567_1
 38 |   - libtiff=4.3.0=hf544144_1
 39 |   - libuv=1.43.0=h7f98852_0
 40 |   - libwebp-base=1.2.2=h7f98852_1
 41 |   - lz4-c=1.9.3=h9c3ff4c_1
 42 |   - mkl=2022.1.0=hc2b9512_224
 43 |   - ncurses=6.4=h6a678d5_0
 44 |   - nettle=3.6=he412f7d_0
 45 |   - olefile=0.46=pyh9f0ad1d_1
 46 |   - openh264=2.1.1=h780b84a_0
 47 |   - openjpeg=2.4.0=hb52868f_1
 48 |   - openssl=1.1.1t=h7f8727e_0
 49 |   - pillow=8.3.2=py39ha612740_0
 50 |   - pip=22.3.1=py39h06a4308_0
 51 |   - python=3.9.16=h7a1cb2a_0
 52 |   - python_abi=3.9=2_cp39
 53 |   - pytorch=1.10.1=py3.9_cuda11.3_cudnn8.2.0_0
 54 |   - pytorch-mutex=1.0=cuda
 55 |   - readline=8.2=h5eee18b_0
 56 |   - setuptools=65.6.3=py39h06a4308_0
 57 |   - sqlite=3.40.1=h5082296_0
 58 |   - tk=8.6.12=h1ccaba5_0
 59 |   - torchaudio=0.10.1=py39_cu113
 60 |   - torchvision=0.11.2=py39_cu113
 61 |   - trimesh=3.20.0=pyhd8ed1ab_0
 62 |   - typing_extensions=4.4.0=pyha770c72_0
 63 |   - tzdata=2022g=h04d1e81_0
 64 |   - wheel=0.38.4=py39h06a4308_0
 65 |   - xz=5.2.10=h5eee18b_1
 66 |   - zlib=1.2.13=h5eee18b_0
 67 |   - zstd=1.5.0=ha95c52a_0
 68 |   - pip:
 69 |     - addict==2.4.0
 70 |     - aiofiles==22.1.0
 71 |     - aiosqlite==0.18.0
 72 |     - anyio==3.6.2
 73 |     - arrow==1.2.3
 74 |     - babel==2.11.0
 75 |     - beautifulsoup4==4.11.2
 76 |     - charset-normalizer==3.0.1
 77 |     - decorator==5.1.1
 78 |     - deprecation==2.1.0
 79 |     - fastjsonschema==2.16.2
 80 |     - fqdn==1.5.1
 81 |     - idna==3.4
 82 |     - importlib-metadata==6.0.0
 83 |     - isoduration==20.11.0
 84 |     - joblib==1.2.0
 85 |     - json5==0.9.11
 86 |     - jsonpointer==2.3
 87 |     - jsonschema==4.17.3
 88 |     - jupyter-client==7.4.9
 89 |     - jupyter-core==5.2.0
 90 |     - jupyter-events==0.6.3
 91 |     - jupyter-packaging==0.12.3
 92 |     - jupyter-server==2.3.0
 93 |     - jupyter-server-fileid==0.7.0
 94 |     - jupyter-server-terminals==0.4.4
 95 |     - jupyter-server-ydoc==0.6.1
 96 |     - jupyter-ydoc==0.2.2
 97 |     - jupyterlab==3.6.1
 98 |     - jupyterlab-server==2.19.0
 99 |     - lpips==0.1.4
100 |     - mistune==2.0.5
101 |     - nbclassic==0.5.2
102 |     - nbconvert==7.2.9
103 |     - nbformat==5.7.3
104 |     - notebook-shim==0.2.2
105 |     - numpy==1.24.2
106 |     - open3d==0.15.2
107 |     - opencv-python==4.7.0.72
108 |     - platformdirs==3.0.0
109 |     - pyparsing==3.0.9
110 |     - pyquaternion==0.9.9
111 |     - python-json-logger==2.0.7
112 |     - pyyaml==6.0
113 |     - pyzmq==25.0.0
114 |     - requests==2.28.2
115 |     - rfc3339-validator==0.1.4
116 |     - rfc3986-validator==0.1.1
117 |     - scikit-learn==1.2.1
118 |     - scipy==1.10.1
119 |     - six==1.16.0
120 |     - sniffio==1.3.0
121 |     - soupsieve==2.4
122 |     - threadpoolctl==3.1.0
123 |     - tinycss2==1.2.1
124 |     - tomli==2.0.1
125 |     - tomlkit==0.11.6
126 |     - tornado==6.2
127 |     - tqdm==4.64.1
128 |     - traitlets==5.9.0
129 |     - uri-template==1.2.0
130 |     - urllib3==1.26.14
131 |     - webcolors==1.12
132 |     - websocket-client==1.5.1
133 |     - y-py==0.5.9
134 |     - ypy-websocket==0.8.2
135 |     - zipp==3.14.0
136 | prefix: /home/kejie/anaconda3/envs/oxbrick
137 | 


--------------------------------------------------------------------------------
/evaluations/evaluate_3d.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import csv
  4 | import numpy as np
  5 | import trimesh
  6 | import torch
  7 | import torch.nn.functional as F
  8 | from sklearn.neighbors import NearestNeighbors
  9 | from scipy.spatial import KDTree
 10 | import open3d as o3d
 11 | import sys
 12 | 
 13 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 14 | 
 15 | import utils.o3d_helper as o3d_helper
 16 | 
 17 | 
 18 | def compute_curvature(points, radius=0.005):
 19 |     tree = KDTree(points)
 20 | 
 21 |     curvature = [ 0 ] * points.shape[0]
 22 | 
 23 |     for index, point in enumerate(points):
 24 |         indices = tree.query_ball_point(point, radius)
 25 |         if len(indices) < 3:
 26 |             print("invalid points")
 27 |             continue
 28 |         # local covariance
 29 |         M = np.array([ points[i] for i in indices ]).T
 30 |         M = np.cov(M)
 31 | 
 32 |         # eigen decomposition
 33 |         V, E = np.linalg.eig(M)
 34 |         # h3 < h2 < h1
 35 |         h1, h2, h3 = V
 36 | 
 37 |         curvature[index] = h3 / (h1 + h2 + h3)
 38 | 
 39 |     return np.asarray(curvature)
 40 | 
 41 | 
 42 | def visibility_test(volume, min_pts, resolution, voxel_size, mesh, device):
 43 |     """ filter out points that are not wihin the masked volume
 44 | 
 45 |     Args:
 46 |         volume (np.ndarray): [H,W,D] the mask volume
 47 |         min_pts (np.ndarray): minimum points
 48 |         resolution (np.ndarray): volume resolution
 49 |         voxel_size (float): voxel_size
 50 |         mesh (open3d.mesh): input mesh
 51 |         device (string): the device for pytorch
 52 |     """
 53 | 
 54 |     points = np.asarray(mesh.vertices)
 55 |     volume = torch.from_numpy(volume).float().to(device)
 56 |     voxels = (points - min_pts) / voxel_size
 57 |     voxels = voxels / (resolution-1) * 2 - 1
 58 |     voxels = torch.from_numpy(voxels)[..., [2,1,0]].float().to(device)
 59 |     mask = F.grid_sample(volume.unsqueeze(0).unsqueeze(0),  # [1,1,H,W,D]
 60 |                          voxels.unsqueeze(0).unsqueeze(0).unsqueeze(0),  # [1,1,1,N,3] 
 61 |                          mode="nearest",
 62 |                          padding_mode="zeros",
 63 |                          align_corners=True)  # []
 64 |     mask = mask[0, 0, 0, 0].cpu().numpy() > 0
 65 |     mesh.remove_vertices_by_mask(mask==False)
 66 |     mesh.compute_vertex_normals()
 67 |     return mesh
 68 | 
 69 | 
 70 | def evaluate(
 71 |     pred_points,
 72 |     # pred_curv,
 73 |     gt_points,
 74 |     # gt_curv,
 75 |     threshold,
 76 |     verbose=False
 77 | ):
 78 |     nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(gt_points)
 79 |     distances, indices = nbrs.kneighbors(pred_points)
 80 | 
 81 |     pred_gt_dist = np.mean(distances)
 82 |     precision = np.sum(distances < threshold) / len(distances)
 83 | 
 84 |     nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(pred_points)
 85 |     distances, indices = nbrs.kneighbors(gt_points)
 86 | 
 87 |     # curv_diff = np.abs(gt_curv - pred_curv[indices[:,0]])
 88 |     # mean_curv_diff = np.mean(curv_diff)
 89 | 
 90 |     gt_pred_dist = np.mean(distances)
 91 |     recall = np.sum(distances < threshold) / len(distances)
 92 |     F1 = 2 * precision * recall / (precision + recall)
 93 |     chamfer = pred_gt_dist + gt_pred_dist
 94 | 
 95 |     if verbose:
 96 |         # print("pred -> gt: ", pred_gt_dist)
 97 |         print("precision @ {}: {:.6f}".format(threshold, precision))
 98 |         # print("gt -> pred: ", gt_pred_dist)
 99 |         print("recall @ {}: {:.6f}".format(threshold, recall))
100 | 
101 |         print("F1: {:.6f}".format(F1))
102 |         # print("mean curvature difference: {:.6f}".format(mean_curv_diff))
103 |         print("Chamfer: {:.6f}".format(chamfer))
104 |         # print("{:.3f}/{:.4f}/{:.3f}/{:.4f}/{:.4f}".format(pred_gt_dist, precision, gt_pred_dist, recall, F1))
105 |     out = {}
106 |     out['pred_gt'] = pred_gt_dist
107 |     out['accuracy'] = precision
108 |     out['gt_pred'] = gt_pred_dist
109 |     out['recall'] = recall
110 |     out['chamfer'] = pred_gt_dist + gt_pred_dist
111 |     out['F1'] = F1
112 |     return out
113 | 
114 | 
115 | def sample_surface_points(mesh):
116 |     n_points = mesh.vertices
117 | 
118 | 
119 | def main():
120 |     if torch.cuda.is_available():
121 |         device = torch.device('cuda')
122 |     else:
123 |         device = torch.device('cpu')
124 | 
125 |     args_parser = argparse.ArgumentParser()
126 |     args_parser.add_argument("--seq_txt",
127 |                              default="./evaluations/test_seqs.txt",
128 |                              help="the .txt file listing the testing sequences")
129 |     args_parser.add_argument("--gt_root",
130 |                              default="./data",
131 |                              help="the directory of the dataset")
132 |     args_parser.add_argument("--skip", 
133 |                              nargs="+",
134 |                              help="sequences to skip")
135 |     args_parser.add_argument("--method",
136 |                              required=True,
137 |                              help="name of the method to be evaluated")
138 |     args_parser.add_argument("--n_pts",
139 |                              default=100000,
140 |                              type=int,
141 |                              help="the number of sampling points for evaluation")
142 |     args_parser.add_argument("--save_output",
143 |                              action="store_true",
144 |                              help="whether to save output mesh")
145 |     args = args_parser.parse_args()
146 | 
147 |     n_samples = args.n_pts
148 |     pred_dir = os.path.join(f"./meshes/{args.method}")
149 |     gt_root = args.gt_root
150 | 
151 |     skip_seqs = args.skip if args.skip is not None else []
152 |     with open(args.seq_txt, "r") as f:
153 |         seqs = [l for l in f.read().split(",") if l not in skip_seqs]
154 | 
155 |     chamfer_loss = []
156 |     fitness = []
157 |     accuracy = []
158 |     recall = []
159 |     F1 = []
160 |     accuracy_1,recall_1, F1_1 = [], [], []
161 |     for seq in seqs:
162 |         # load ground-truth
163 |         print(f"evaluating {seq}: ")
164 |         gt_dir = os.path.join(gt_root, seq)
165 |         visibility_mask = np.load(os.path.join(gt_dir, "visibility_mask.npy"), allow_pickle=True).item()
166 |         resolution = visibility_mask['resolutions']
167 |         volume = visibility_mask['mask'].reshape(resolution)
168 |         voxel_size = visibility_mask['voxel_size']
169 |         min_pts = visibility_mask['min_pts']
170 |         gt_mesh = o3d.io.read_triangle_mesh(os.path.join(gt_dir, "mesh", "gt_mesh.ply"))
171 |         gt_points = np.asarray(gt_mesh.sample_points_poisson_disk(n_samples).points)
172 |         # gt_mesh_trimesh = trimesh.load(os.path.join(gt_dir, "mesh", "gt_mesh.ply"))
173 |         # gt_curv = trimesh.curvature.discrete_gaussian_curvature_measure(gt_mesh_trimesh, gt_points, 0.005)
174 | 
175 |         # load predictions
176 |         mesh_path = os.path.join(pred_dir, f"{seq}.ply")
177 |         pred_mesh_trimesh = trimesh.load(mesh_path)
178 |         pred_mesh = o3d.io.read_triangle_mesh(mesh_path)
179 | 
180 |         gt_pts = o3d_helper.np2pc(gt_mesh.vertices)
181 |         pred_pts = o3d_helper.np2pc(pred_mesh.vertices)
182 |         threshold = 0.02
183 |         trans_init = np.eye(4)
184 |         reg_p2l = o3d.pipelines.registration.registration_icp(
185 |             gt_pts, pred_pts, threshold, trans_init,
186 |             o3d.pipelines.registration.TransformationEstimationPointToPoint(),
187 |             o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=10))
188 |         fitness.append(reg_p2l.fitness)
189 |         if reg_p2l.fitness > 0.99:
190 |             new_pose = reg_p2l.transformation
191 |             pred_mesh.transform(np.linalg.inv(new_pose))
192 |         pred_mesh = visibility_test(volume, min_pts, resolution, voxel_size, pred_mesh, device)
193 |         if args.save_output:
194 |             o3d.io.write_triangle_mesh(os.path.join(pred_dir, f"{seq}_cropped.ply"), pred_mesh)
195 |         if len(np.asarray(pred_mesh.triangles)) > 0:
196 |             pred_points = np.asarray(pred_mesh.sample_points_poisson_disk(n_samples).points)
197 |             # pred_curv = trimesh.curvature.discrete_gaussian_curvature_measure(pred_mesh_trimesh, pred_points, 0.005)
198 |         else:
199 |             pred_points = np.random.permutation(np.asarray(pred_mesh.vertices))[:n_samples]
200 |         out = evaluate(
201 |             pred_points,
202 |             # pred_curv,
203 |             gt_points,
204 |             # gt_curv,
205 |             threshold=0.0025,
206 |             verbose=True)
207 |         chamfer_loss.append(out['chamfer'])
208 |         accuracy.append(out['accuracy'])
209 |         recall.append(out['recall'])
210 |         F1.append(out['F1'])
211 |         out = evaluate(pred_points, gt_points, threshold=0.005, verbose=True)
212 |         accuracy_1.append(out['accuracy'])
213 |         recall_1.append(out['recall'])
214 |         F1_1.append(out['F1'])
215 | 
216 |     with open(os.path.join(pred_dir, "data.csv"), "w") as f:
217 |         writer = csv.writer(f)
218 |         writer.writerow(seqs)
219 |         writer.writerow(fitness)
220 |         writer.writerow(chamfer_loss)
221 |         writer.writerow(accuracy)
222 |         writer.writerow(recall)
223 |         writer.writerow(F1)
224 |         writer.writerow(accuracy_1)
225 |         writer.writerow(recall_1)
226 |         writer.writerow(F1_1)
227 |         
228 |     print("final result: ")
229 |     print(f"chamfer: {sum(chamfer_loss) / len(chamfer_loss)}")
230 |     print(f"accuracy: {sum(accuracy) / len(accuracy)}")
231 |     print(f"recall: {sum(recall) / len(recall)}")
232 |     print(f"F1: {sum(F1) / len(F1)}")
233 | 
234 | 
235 | 
236 | if __name__ == "__main__":
237 |     main()
238 | 
239 | 


--------------------------------------------------------------------------------
/evaluations/evaluate_nvs.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import os
 4 | import numpy as np
 5 | import torch
 6 | import torch.nn.functional as F
 7 | import lpips as lpips_lib
 8 | import sys
 9 | 
10 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
11 | from utils import pytorch_ssim
12 | 
13 | 
14 | def mse2psnr(mse):
15 |     """
16 |     :param mse: scalar
17 |     :return:    scalar np.float32
18 |     """
19 |     mse = np.maximum(mse, 1e-10)  # avoid -inf or nan when mse is very small.
20 |     psnr = -10.0 * np.log10(mse)
21 |     return psnr.astype(np.float32)
22 | 
23 | 
24 | def main():
25 |     if torch.cuda.is_available():
26 |         device = torch.device('cuda')
27 |     else:
28 |         device = torch.device('cpu')
29 |     lpips_vgg_fn = lpips_lib.LPIPS(net='vgg').to(device)
30 | 
31 |     args_parser = argparse.ArgumentParser()
32 |     args_parser.add_argument("--seq_txt",
33 |                              default="./evaluations/test_seqs.txt",
34 |                              help="the .txt file listing the testing sequences")
35 |     args_parser.add_argument("--gt_root",
36 |                              default="./data",
37 |                              help="the directory of the dataset")
38 |     args_parser.add_argument("--skip", 
39 |                              nargs="+",
40 |                              help="sequences to skip")
41 |     args_parser.add_argument("--method",
42 |                              required=True,
43 |                              help="name of the method to be evaluated")
44 |     args = args_parser.parse_args()
45 | 
46 |     skip_seqs = args.skip if args.skip is not None else []
47 |     with open(args.seq_txt, "r") as f:
48 |         seqs = [l for l in f.read().split(",") if l not in skip_seqs]
49 | 
50 |     root_dir = f"./nvs/{args.method}"
51 |     psnr_out = 0
52 |     ssim_out = 0
53 |     lpips_out = 0
54 |     n_imgs = 0
55 |     for seq in seqs:
56 |         seq_dir = os.path.join(root_dir, seq)
57 |         img_ids = sorted([f.split("_")[1].split(".")[0] for f in os.listdir(seq_dir) if f.startswith("gt") and f.endswith(".png")])
58 |         for img_id in img_ids:
59 |             rendered_img = cv2.imread(os.path.join(seq_dir, f"render_{img_id}.png"), -1)[...,::-1] / 255.
60 |             gt_img = cv2.imread(os.path.join(seq_dir, f"gt_{img_id}.png"), -1)[...,::-1] / 255.
61 |             rendered_img = torch.from_numpy(rendered_img).float().to(device)
62 |             gt_img = torch.from_numpy(gt_img).float().to(device)
63 | 
64 |             # compute mse
65 |             mse = F.mse_loss(rendered_img, gt_img).item()
66 | 
67 |             # compute psnr
68 |             psnr = mse2psnr(mse)
69 | 
70 |             # compute ssim
71 |             ssim = pytorch_ssim.ssim(rendered_img.permute(2, 0, 1).unsqueeze(0), gt_img.permute(2, 0, 1).unsqueeze(0)).item()
72 | 
73 |             # compute lpips
74 |             lpips_loss = lpips_vgg_fn(rendered_img.permute(2, 0, 1).unsqueeze(0).contiguous(),
75 |                                         gt_img.permute(2, 0, 1).unsqueeze(0).contiguous(), normalize=True).item()
76 |             psnr_out += psnr
77 |             ssim_out += ssim
78 |             lpips_out += lpips_loss
79 |             n_imgs += 1
80 |     print(f"psnr: {psnr_out/n_imgs}")
81 |     print(f"ssim_out: {ssim_out/n_imgs}")
82 |     print(f"lpips: {lpips_out/n_imgs}")
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     main()


--------------------------------------------------------------------------------
/evaluations/test_seqs.txt:
--------------------------------------------------------------------------------
1 | aston,audi,beetles,big_ben,boat,bridge,cabin,camera,castle,colosseum,convertible,ferrari,jeep,london_bus,motorcycle,porsche,satellite,space_shuttle


--------------------------------------------------------------------------------
/utils/arkit_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_extr(path):
 6 |     with open(path, "r") as f:
 7 |         cam = json.load(f)
 8 |     T_wc = np.asarray(cam['cameraPoseARFrame']).reshape(4, 4)
 9 |     T_align = np.eye(4)
10 |     T_align[1, 1] = -1
11 |     T_align[2, 2] = -1
12 |     T_wc = T_wc @ T_align
13 |     return T_wc
14 | 
15 | def load_intr(path):
16 |     with open(path, "r") as f:
17 |         cam = json.load(f)
18 |     intr_mat = np.asarray(cam['intrinsics']).reshape(3, 3)
19 |     return intr_mat
20 | 
21 | 
22 | def load_alignment(path):
23 |     with open(path, "r") as f:
24 |         cam = json.load(f)
25 |     align_mat = np.asarray(cam['alignment'][:-1]).reshape(4, 4)
26 |     scale = float(cam['alignment'][-1])
27 |     return align_mat, scale
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/utils/geometry.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn.functional as F
  4 | from typing import Union, Tuple, Sequence
  5 | from scipy.spatial.transform import Rotation
  6 | from typing import Optional
  7 | 
  8 | 
  9 | class Simulator:
 10 |     def __init__(self):
 11 |         fname = "/home/kejie/repository/fast_sdf/dist-model.txt"
 12 |         data = np.loadtxt(fname, comments='%', skiprows=5)
 13 |         dist = np.empty([80, 80, 5])
 14 | 
 15 |         for y in range(0, 80):
 16 |             for x in range(0, 80):
 17 |                 idx = (y * 80 + x) * 23 + 3
 18 |                 if (data[idx:idx + 5] < 8000).all():
 19 |                     dist[y, x, :] = 0
 20 |                 else:
 21 |                     dist[y, x, :] = data[idx + 15: idx + 20]
 22 | 
 23 |         self.model = dist
 24 | 
 25 |     def undistort(self, x, y, z):
 26 | 
 27 |         i2 = int((z + 1) / 2)
 28 |         i1 = i2 - 1
 29 |         a = (z - (i1 * 2 + 1)) / 2
 30 |         x = int(x / 8)
 31 |         y = int(y / 6)
 32 |         f = (1 - a) * self.model[y, x, min(max(i1, 0), 4)
 33 |                                  ] + a * self.model[y, x, min(i2, 4)]
 34 | 
 35 |         if f == 0:
 36 |             return 0
 37 |         else:
 38 |             return z / f
 39 | 
 40 |     def simulate(self, clean_depth):
 41 | 
 42 |         # a = np.array(Image.open(inputpng)).astype(np.float32) / 1000.0
 43 |         img_h, img_w = clean_depth.shape
 44 |         b = np.copy(clean_depth)
 45 |         it = np.nditer(clean_depth, flags=[
 46 |                        'multi_index'], op_flags=['writeonly'])
 47 | 
 48 |         while not it.finished:
 49 | 
 50 |             # pixel shuffle
 51 |             x = min(
 52 |                 max(round(it.multi_index[1] + np.random.normal(0, 0.25)), 0), img_w-1)
 53 |             y = min(
 54 |                 max(round(it.multi_index[0] + np.random.normal(0, 0.25)), 0), img_h-1)
 55 | 
 56 |             # downsample
 57 |             d = b[y - y % 2, x - x % 2]
 58 | 
 59 |             # distortion
 60 |             d = self.undistort(x, y, d)
 61 | 
 62 |             # quantization and high freq noise
 63 |             if d == 0:
 64 |                 it[0] = 0
 65 |             else:
 66 |                 it[0] = 35.130 * 8 / \
 67 |                     round((35.130 / d + np.random.normal(0, 0.027778)) * 8)
 68 | 
 69 |             it.iternext()
 70 |         return clean_depth
 71 | 
 72 | 
 73 | def get_homogeneous(
 74 |     pts: Union['np.ndarray', 'torch.tensor']
 75 |     ) -> Union['np.ndarray', 'torch.tensor']:
 76 |     """ convert [(b), N, 3] pts to homogeneous coordinate
 77 | 
 78 |     Args:
 79 |         pts ([(b), N, 3] Union['np.ndarray', 'torch.tensor']): input point cloud
 80 | 
 81 |     Returns:
 82 |         homo_pts ([(b), N, 4] Union['np.ndarray', 'torch.tensor']): output point
 83 |             cloud
 84 | 
 85 |     Raises:
 86 |         ValueError: if the input tensor/array is not with the shape of [b, N, 3]
 87 |             or [N, 3]
 88 |         TypeError: if input is not either tensor or array
 89 |     """
 90 | 
 91 |     batch = False
 92 |     if len(pts.shape) == 3:
 93 |         batch = True
 94 |     elif len(pts.shape) == 2:
 95 |         pts = pts
 96 |     else:
 97 |         raise ValueError("only accept [b, n_pts, 3] or [n_pts, 3]")
 98 | 
 99 |     if isinstance(pts, torch.Tensor):
100 |         ones = torch.ones_like(pts[..., :1])
101 |         homo_pts = torch.cat([pts, ones], dim=-1)
102 |     elif isinstance(pts, np.ndarray):
103 |         ones = np.ones_like(pts[..., :1])
104 |         homo_pts = np.concatenate([pts, ones], axis=-1)
105 |     else:
106 |         raise TypeError("wrong data type")
107 |     return homo_pts
108 | 
109 | 
110 | def get_aabb(pc: 'np.ndarray') -> 'np.ndarray':
111 |     """ get aabb of a point cloud
112 | 
113 |     Args:
114 |         pc ([N, 3] np.ndarray): input point cloud
115 | 
116 |     Returns:
117 |         aabb ([2, 3] np.ndarray): a 3D bbox represent by
118 |             [[x_min, y_min, z_min], [x_max, y_max, z_max]]    
119 |     """
120 | 
121 |     x_min, y_min, z_min = np.min(pc, axis=0)
122 |     x_max, y_max, z_max = np.max(pc, axis=0)
123 |     aabb = np.array([[x_min, y_min, z_min], [x_max, y_max, z_max]])
124 |     return aabb
125 | 
126 | 
127 | # def get_aabb(pc: 'np.ndarray', img_w: int, img_h: int) -> 'np.ndarray':
128 | #     """ get aabb of a point cloud
129 | 
130 | #     Args:
131 | #         pc ([N, 2] np.ndarray): input point cloud
132 | 
133 | #     Returns:
134 | #         aabb ([2, 2] np.ndarray): a 2D bbox represent by
135 | #             [[x_min, y_min], [x_max, y_max]]    
136 | #     """
137 | 
138 | #     x_min, y_min = np.min(pc, axis=0)
139 | #     x_max, y_max = np.max(pc, axis=0)
140 | #     x_min = max(0, x_min)
141 | #     y_min = max(0, y_min)
142 | #     x_max = min(img_w, x_max)
143 | #     y_max = min(img_h, y_max)
144 | #     aabb = np.array([[x_min, y_min], [x_max, y_max]])
145 | #     return aabb
146 | 
147 | 
148 | def transform_point_cloud(point_cloud, transform_matrix):
149 | 
150 |     """
151 |     Transforms a point cloud using a 4x4 transformation matrix.
152 |     
153 |     Parameters
154 |     ----------
155 |     point_cloud : numpy.ndarray, shape (N, 3)
156 |         The input point cloud, represented as a Nx3 numpy array.
157 |     transform_matrix : numpy.ndarray, shape (4, 4)
158 |         The transformation matrix, represented as a 4x4 numpy array.
159 |     
160 |     Returns
161 |     -------
162 |     numpy.ndarray, shape (N, 3)
163 |         The transformed point cloud, represented as a Nx3 numpy array.
164 |     """
165 | 
166 |     transformed_point_cloud = np.dot(point_cloud, transform_matrix[:3, :3].T) + transform_matrix[:3, 3][np.newaxis, :]
167 |     return transformed_point_cloud
168 | 
169 | 
170 | def depth2xyz(depth, intr_mat):
171 |     """ convert depth map to xyz map
172 | 
173 |     Args:
174 |         depth ([H, W] np.ndarray): depth map
175 |     
176 |     Returns:
177 |         xyz ([H, W, 3] np.ndarray): xyz map
178 |     """
179 | 
180 |     height, width = depth.shape
181 |     fx, fy, cx, cy = intr_mat[0, 0], intr_mat[1, 1], intr_mat[0, 2], intr_mat[1, 2]
182 | 
183 |     urange = (
184 |         np.arange(width, dtype=np.float32).reshape(1, -1).repeat(height, 0) - cx
185 |     ) / fx
186 |     vrange = (
187 |         np.arange(height, dtype=np.float32).reshape(-1, 1).repeat(width, 1) - cy
188 |     ) / fy
189 |     xyz = np.stack([urange, vrange, np.ones(urange.shape)], axis=-1)
190 |     xyz = xyz * depth.reshape(height, width, 1)
191 |     return xyz
192 | 
193 | 
194 | def angle2class(angles, num_classes=30):
195 |     """ convert angles between [0, 180] to class index for classification
196 |     
197 |     Args:
198 |         angles (np.ndarray): angle in radian
199 |     
200 |     Returns:
201 |         out_class (np.ndarray): angle is converted to class, the number of which
202 |             is defined in num_classes
203 |     """
204 |     y = torch.sin(angles)
205 |     x = torch.cos(angles)
206 |     angles = torch.atan2(y, x) / np.pi * 180.
207 |     angles = torch.where(angles<0, angles + 180, angles)
208 |     out_class = angles // (180 / num_classes)
209 |     assert (out_class >= 0).all()
210 |     assert (out_class <= num_classes).all()
211 |     out_class = np.clip(out_class, a_min=0, a_max=num_classes-1)
212 |     return out_class
213 | 
214 | 
215 | def iou_2d(bboxA: 'np.ndarray', bboxB: 'np.ndarray') -> float:
216 |     """ calculate IoU between two 2D bboxes
217 |     
218 |     Args:
219 |         bboxA ([2, 2] np.ndarray): input bbox A in AABB format
220 |         bboxB ([2, 2] np.ndarray): input bbox B in AABB format
221 |         
222 |     Returns:
223 |         IoU (float): output IoU
224 |     """
225 | 
226 |     x_min = max(bboxA[0, 0], bboxB[0, 0])
227 |     y_min = max(bboxA[0, 1], bboxB[0, 1])
228 |     x_max = min(bboxA[1, 0], bboxB[1, 0])
229 |     y_max = min(bboxA[1, 1], bboxB[1, 1])
230 |     
231 |     inter_area = max(0, (x_max - x_min)) * max(0, (y_max - y_min))
232 |     area_A = np.prod(bboxA[1] - bboxA[0])
233 |     area_B = np.prod(bboxB[1] - bboxB[0])
234 |     IoU = inter_area / (area_A + area_B - inter_area)
235 |     assert IoU <= 1 and IoU >= 0, "invalid IoU value"
236 |     return IoU
237 | 
238 | 
239 | def iou_3d(bboxA: 'np.ndarray', bboxB: 'np.ndarray') -> float:
240 |     """ calculate 3D IoU between two 3D bboxes
241 | 
242 |     Args:
243 |         bboxA ([2, 3] np.ndarray): input bbox A in AABB format
244 |         bboxB ([2, 3] np.ndarray): input bbox B in AABB format
245 |         
246 |     Returns:
247 |         IoU (float): 3D IoU
248 |     """
249 | 
250 |     x_min = max(bboxA[0, 0], bboxB[0, 0])
251 |     y_min = max(bboxA[0, 1], bboxB[0, 1])
252 |     z_min = max(bboxA[0, 2], bboxB[0, 2])
253 |     x_max = min(bboxA[1, 0], bboxB[1, 0])
254 |     y_max = min(bboxA[1, 1], bboxB[1, 1])
255 |     z_max = min(bboxA[1, 2], bboxB[1, 2])
256 |     
257 |     inter_volume = max(0, (x_max - x_min)) * max(0, (y_max - y_min)) * max(0, (z_max - z_min))
258 |     volume_A = np.prod(bboxA[1] - bboxA[0])
259 |     volume_B = np.prod(bboxB[1] - bboxB[0])
260 |     IoU = inter_volume / (volume_A + volume_B - inter_volume)
261 |     assert IoU <= 1 and IoU >= 0, "invalid IoU value"
262 |     return IoU
263 | 
264 | 
265 | def giou_3d(bboxA: 'np.ndarray', bboxB: 'np.ndarray') -> float:
266 |     """ calculate generalized 3D IoU between two 3D bboxes
267 | 
268 |     Args:
269 |         bboxA ([2, 3] np.ndarray): input bbox A in AABB format
270 |         bboxB ([2, 3] np.ndarray): input bbox B in AABB format
271 |         
272 |     Returns:
273 |         IoU (float): 3D Generalized IoU
274 |     """
275 | 
276 |     x_min = max(bboxA[0, 0], bboxB[0, 0])
277 |     y_min = max(bboxA[0, 1], bboxB[0, 1])
278 |     z_min = max(bboxA[0, 2], bboxB[0, 2])
279 |     x_max = min(bboxA[1, 0], bboxB[1, 0])
280 |     y_max = min(bboxA[1, 1], bboxB[1, 1])
281 |     z_max = min(bboxA[1, 2], bboxB[1, 2])
282 |     
283 |     inter_volume = max(0, (x_max - x_min)) * max(0, (y_max - y_min)) * max(0, (z_max - z_min))
284 |     volume_A = np.prod(bboxA[1] - bboxA[0])
285 |     volume_B = np.prod(bboxB[1] - bboxB[0])
286 |     volume_union = (volume_A + volume_B - inter_volume)
287 | 
288 |     iou = iou_3d(bboxA, bboxB)
289 | 
290 |     x_min = min(bboxA[0, 0], bboxB[0, 0])
291 |     y_min = min(bboxA[0, 1], bboxB[0, 1])
292 |     z_min = min(bboxA[0, 2], bboxB[0, 2])
293 |     x_max = max(bboxA[1, 0], bboxB[1, 0])
294 |     y_max = max(bboxA[1, 1], bboxB[1, 1])
295 |     z_max = max(bboxA[1, 2], bboxB[1, 2])
296 |     
297 |     volume_complete = (x_max - x_min) * (y_max - y_min) * (z_max - z_min)
298 |     giou = iou - (volume_complete - volume_union) / volume_complete
299 |     return giou
300 | 
301 | 
302 | def get_corner_by_dims(dimensions) -> np.ndarray:
303 |     """get 8 corner points of 3D bbox defined by self.dimensions
304 | 
305 |     Returns:
306 |         a np.ndarray with shape [8,3] to represent 8 corner points'
307 |         position of the 3D bounding box.
308 |     """
309 | 
310 |     w, h, l = dimensions[0], dimensions[1], dimensions[2]
311 |     x_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
312 |     y_corners = [h/2, h/2, h/2, h/2, -h/2, -h/2, -h/2, -h/2]
313 |     z_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
314 |     corner_pts = np.array([x_corners, y_corners, z_corners], dtype=np.float32).T
315 |     return corner_pts
316 | 
317 | 
318 | def scale_vertices_to_bbox(
319 |     vertices: 'np.ndarray', bbox_dims: 'np.ndarray'
320 |     ) -> 'np.ndarray':
321 |     """scale the vertices such that they are tightly bounded by the 3D bbox
322 |     
323 |     Args:
324 |         vertices ([N, 3] np.ndarray): input vertices
325 |         bbox_dims ([3] np.ndarray): bbox dimension in x, y, z axis
326 |     
327 |     Returns:
328 |         vertices: the scaled vertices
329 |     """
330 | 
331 |     vertices[:, 0] *= (bbox_dims[0] / (np.max(vertices[:, 0]) - np.min(vertices[:, 0])))
332 |     vertices[:, 1] *= (bbox_dims[1] / (np.max(vertices[:, 1]) - np.min(vertices[:, 1])))
333 |     vertices[:, 2] *= (bbox_dims[2] / (np.max(vertices[:, 2]) - np.min(vertices[:, 2])))
334 | 
335 |     return vertices
336 | 
337 | 
338 | def unproject(pixel, depth, intr_mat):
339 |     """ unproject from pixels and depths to 3D
340 | 
341 |     Args:
342 |         pixel: [n, 2]
343 |         depth: [n]
344 |     """
345 |     fx = intr_mat[0, 0]
346 |     fy = intr_mat[1, 1]
347 |     cx = intr_mat[0, 2]
348 |     cy = intr_mat[1, 2]
349 |     pts = np.concatenate([pixel, np.ones_like(pixel)[:, :1]], axis=1)
350 |     pts[:, 0] = (pts[:, 0] - cx) / fx
351 |     pts[:, 1] = (pts[:, 1] - cy) / fy
352 |     pts = pts * depth[:, None]
353 |     return pts
354 | 
355 | 
356 | def projection(pts, intr_mat, keep_z=False):
357 |     """perspective projection
358 |     
359 |     Args:
360 |         pts ([(b), N, 3] or [(b), N, 4] np.ndarray or torch.tensor): 3D points
361 |         intr_mat ([(b), 3, 3] or [(b), 3, 4] np.ndarray or torch.tensor): intrinsic
362 |             matrix
363 |     
364 |     Returns:
365 |         pts ([(b), N, 3], np.ndarray or torch.tensor): projected points
366 |     """
367 | 
368 |     batch = False
369 |     if len(pts.shape) == 3:
370 |         assert len(intr_mat.shape) == 3, "intr_mat shape needs to match pts"
371 |         batch = True
372 |     elif len(pts.shape) == 2:
373 |         assert len(intr_mat.shape) == 2, "intr_mat shape needs to match pts"
374 |     else:
375 |         ValueError("only accept [b, n_pts, 3] or [n_pts, 3]")
376 |     if batch:
377 |         if isinstance(pts, torch.Tensor):
378 |             intr_mat = intr_mat.transpose(1, 2)
379 |         else:
380 |             intr_mat = intr_mat.transpose(0, 2, 1)
381 |     else:
382 |         intr_mat = intr_mat.T
383 |     pts = pts @ intr_mat
384 |     if isinstance(pts, torch.Tensor):
385 |         z = torch.ones_like(pts[..., -1])
386 |     else:
387 |         z = np.ones_like(pts[..., -1])
388 |     if batch:
389 |         if keep_z:
390 |             z = pts[:, :, -1]
391 |         pts = pts / pts[:, :, -1:]
392 |         pts[:, :, -1] *= z
393 |     else:
394 |         if keep_z:
395 |             z = pts[:, -1]
396 |         pts = pts / pts[:, -1:]
397 |         pts[:, -1] *= z
398 |     return pts
399 | 
400 | 
401 | def pad_transform_matrix(mat: 'np.ndarray') -> 'np.ndarray':
402 |     """ pad a [3, 4] transform matrix to a [4, 4] matrix
403 | 
404 |     Args:
405 |         mat ([3, 4] np.ndarray): the input [3, 4] matrix
406 |     Returns:
407 |         mat ([4, 4] np.ndarray): the output [4, 4] matrix
408 |     """
409 | 
410 |     if mat.shape[0] < 4:
411 |         pad = np.zeros((1, 4), dtype=np.float32)
412 |         pad[0,-1] = 1
413 |         return np.concatenate([mat, pad], axis=0)
414 |     else:
415 |         return mat
416 | 
417 | 
418 | def rgbd_to_colored_pc(
419 |     rgb: 'np.ndarray',
420 |     depth: 'np.ndarray',
421 |     fx: float,
422 |     fy: float,
423 |     cx: float,
424 |     cy: float,
425 |     cap: float = 200) -> Tuple['np.ndarray', 'np.ndarray']:
426 |     """ convert a pair of rgb and depth iamge to a 3D colored point cloud
427 | 
428 |     Args:
429 |         rgb ([H, W, 3] np.ndarray): rgb image
430 |         depth ([H, W] np.ndarray): depth image
431 |         fx, fy, cx, cy (float, float, float, float): camera intrinsic matrix
432 |         cap (float): depth capping value
433 |     
434 |     Returns:
435 |         a tuple containing:
436 |             points ([N, 3] np.ndarray): 3D point positions
437 |             colors ([N, 3] np.ndarray): color for each point
438 |     """
439 | 
440 |     rgb_height, rgb_width, _ = rgb.shape
441 |     X, Y = np.meshgrid(np.arange(rgb_width), np.arange(rgb_height))
442 |     xyz_rgb = np.concatenate(
443 |         [X[:, :, None], Y[:, :, None], depth[:, :, None], rgb],
444 |         axis=2
445 |     )
446 |     xyz_rgb[:, :, 0] = (xyz_rgb[:, :, 0] - cx) * xyz_rgb[:, :, 2] / fx
447 |     xyz_rgb[:, :, 1] = (xyz_rgb[:, :, 1] - cy) * xyz_rgb[:, :, 2] / fy
448 |     points = xyz_rgb[:, :, :3].reshape(-1, 3)
449 |     colors = xyz_rgb[:, :, 3:].reshape(-1, 3) / 255.
450 |     cap_ind = np.logical_and((points[:, 2] < cap), (points[:, 2] > 0))
451 |     points = points[cap_ind]
452 |     colors = colors[cap_ind]
453 |     return points, colors
454 | 
455 | 
456 | def geodesic_distance(R1: 'np.ndarray', R2: 'np.ndarray') -> float:
457 |     '''Returns the geodesic distance between two rotation matrices.
458 | 
459 |     Args:
460 |         R1 ([3, 3] np.ndarray): input rotation matrix
461 |         R2 ([3, 3] np.ndarray): input rotation matrix
462 |     
463 |     Returns:
464 |         delta_theta (float): geodesic distance between the input rotation
465 |             matrices
466 |     '''
467 | 
468 |     delta_R = np.dot(R1, R2.T)
469 |     rotvec = Rotation.from_dcm(delta_R).as_rotvec()
470 |     delta_theta = np.linalg.norm(rotvec)
471 |     return delta_theta
472 | 
473 | 
474 | def pts_in_box(pts: 'np.ndarray', img_shape: 'np.ndarray') -> 'np.ndarray':
475 |     """ check projected points are within image frame
476 | 
477 |     Args:
478 |         pts ([N, 2] np.ndarray): a set of 2D points on image plane
479 |         img_shape (aabb): bbox_size [x_min, y_min, x_max, y_max]
480 |     Return:
481 |         a boolean array of shape [N] indicating whether a point is within
482 |         image frame
483 |     """
484 | 
485 |     img_shape = img_shape.reshape(2, 2)
486 |     larger_x_min = pts[:, 0] > img_shape[0, 0]
487 |     smaller_x_max = pts[:, 0] < img_shape[1, 0]
488 |     larger_y_min = pts[:, 1] > img_shape[0, 1]
489 |     smaller_y_max = pts[:, 1] < img_shape[1, 1]
490 |     return (larger_x_min * smaller_x_max * \
491 |         larger_y_min * smaller_y_max)
492 | 
493 | 
494 | def get_normalize_factor(pts):
495 |     norm = np.max(np.linalg.norm(pts, axis=1))
496 |     return norm
497 | 
498 | 
499 | def create_meshgrid(
500 |     height: int,
501 |     width: int,
502 |     normalized_coordinates: bool = True,
503 |     device: Optional[torch.device] = torch.device('cpu'),
504 |     dtype: torch.dtype = torch.float32,
505 | ) -> torch.Tensor:
506 |     """Generates a coordinate grid for an image.
507 | 
508 |     When the flag `normalized_coordinates` is set to True, the grid is
509 |     normalized to be in the range [-1,1] to be consistent with the pytorch
510 |     function grid_sample.
511 |     http://pytorch.org/docs/master/nn.html#torch.nn.functional.grid_sample
512 | 
513 |     Args:
514 |         height (int): the image height (rows).
515 |         width (int): the image width (cols).
516 |         normalized_coordinates (bool): whether to normalize
517 |           coordinates in the range [-1, 1] in order to be consistent with the
518 |           PyTorch function grid_sample.
519 |         device (torch.device): the device on which the grid will be generated. Default: cpu.
520 |         dtype (torch.dtype): the data type of the generated gird. Default: float32.
521 | 
522 |     Return:
523 |         torch.Tensor: returns a grid tensor with shape :math:`(1, H, W, 2)`.
524 |     """
525 |     xs: torch.Tensor = torch.linspace(0, width - 1, width, device=device, dtype=dtype)
526 |     ys: torch.Tensor = torch.linspace(0, height - 1, height, device=device, dtype=dtype)
527 |     if normalized_coordinates:
528 |         xs = (xs / (width - 1) - 0.5) * 2
529 |         ys = (ys / (height - 1) - 0.5) * 2
530 |     # generate grid by stacking coordinates
531 |     base_grid: torch.Tensor = torch.stack(torch.meshgrid([xs, ys])).transpose(1, 2)  # 2xHxW
532 |     return torch.unsqueeze(base_grid, dim=0).permute(0, 2, 3, 1)  # 1xHxWx2
533 | 
534 | 
535 | def calculate_normals_finite_difference(pts, func):
536 |     """
537 |     compute the normal of an implicit reprentation using finite difference
538 | 
539 |     pts: (B, N, 3)
540 | 
541 |     """
542 | 
543 |     batch_size, n_pts = pts.shape[:2]
544 |     epsilon = np.abs(np.random.normal(scale=0.01, size=(batch_size, n_pts)))
545 |     epsilon = torch.from_numpy(epsilon).float().to(pts.device)
546 |     epsilon = torch.clamp(epsilon, min=1e-3, max=1)
547 |     epsilon_x = torch.stack(
548 |         [epsilon, torch.zeros_like(epsilon), torch.zeros_like(epsilon)],
549 |         dim=-1
550 |     ).detach()
551 |     epsilon_y = torch.stack(
552 |         [torch.zeros_like(epsilon), epsilon, torch.zeros_like(epsilon)],
553 |         dim=-1
554 |     ).detach()
555 |     epsilon_z = torch.stack(
556 |         [torch.zeros_like(epsilon), torch.zeros_like(epsilon), epsilon],
557 |         dim=-1
558 |     ).detach()
559 | 
560 |     pts_x0 = pts - epsilon_x
561 |     pts_x1 = pts + epsilon_x
562 | 
563 |     pts_y0 = pts - epsilon_y
564 |     pts_y1 = pts + epsilon_y
565 | 
566 |     pts_z0 = pts - epsilon_z
567 |     pts_z1 = pts + epsilon_z
568 |     f_out = func(
569 |         torch.cat([pts_x0, pts_x1, pts_y0, pts_y1, pts_z0, pts_z1], dim=0)
570 |     )
571 |     f_x0, f_x1, f_y0, f_y1, f_z0, f_z1 = torch.split(f_out, batch_size, dim=0)
572 | 
573 |     g_x = (f_x1 - f_x0) / (2 * epsilon)
574 |     g_y = (f_y1 - f_y0) / (2 * epsilon)
575 |     g_z = (f_z1 - f_z0) / (2 * epsilon)
576 | 
577 |     normals = torch.stack([g_x, g_y, g_z], dim=-1)
578 |     normals = normals / (torch.norm(normals, dim=-1, keepdim=True) + 1e-5)
579 |     return normals
580 | 
581 | 
582 | def recenter(vertices):
583 |     min_ = np.min(vertices, axis=0)
584 |     max_ = np.max(vertices, axis=0)
585 |     center = (max_ + min_) / 2.
586 |     vertices = vertices - center[None, :]
587 |     return vertices, center


--------------------------------------------------------------------------------
/utils/o3d_helper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import open3d as o3d
  3 | import trimesh
  4 | import os
  5 | import sys
  6 | 
  7 | 
  8 | from utils.geometry import get_homogeneous
  9 | 
 10 | 
 11 | def align_vector_to_another(a=np.array([0, 0, 1]), b=np.array([1, 0, 0])):
 12 |     """
 13 |     Aligns vector a to vector b with axis angle rotation
 14 |     """
 15 |     if np.array_equal(a, b):
 16 |         return None, None
 17 |     if np.sum(b + a) == 0:  # if b is possite to a
 18 |         b += 1e-3
 19 |     axis_ = np.cross(a, b)
 20 |     axis_ = axis_ / (np.linalg.norm(axis_))
 21 |     angle = np.arccos(np.dot(a, b))
 22 |     return axis_, angle
 23 | 
 24 | 
 25 | def normalized(a, axis=-1, order=2):
 26 |     """Normalizes a numpy array of points"""
 27 |     l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
 28 |     l2[l2 == 0] = 1
 29 |     return a / np.expand_dims(l2, axis), l2
 30 | 
 31 | 
 32 | class LineMesh(object):
 33 |     def __init__(self, points, lines=None, colors=[0, 1, 0], radius=0.15):
 34 |         """Creates a line represented as sequence of cylinder triangular meshes
 35 | 
 36 |         Arguments:
 37 |             points {ndarray} -- Numpy array of ponts Nx3.
 38 | 
 39 |         Keyword Arguments:
 40 |             lines {list[list] or None} -- List of point index pairs denoting
 41 |                 line segments. If None, implicit lines from ordered pairwise
 42 |                 points. (default: {None})
 43 |             colors {list} -- list of colors, or single color of the line
 44 |                 (default: {[0, 1, 0]})
 45 |             radius {float} -- radius of cylinder (default: {0.15})
 46 |         """
 47 |         self.points = np.array(points)
 48 |         self.lines = np.array(lines) if lines is not None else \
 49 |             self.lines_from_ordered_points(self.points)
 50 |         self.colors = np.array(colors)
 51 |         self.radius = radius
 52 |         self.cylinder_segments = []
 53 | 
 54 |         self.create_line_mesh()
 55 | 
 56 |     @staticmethod
 57 |     def lines_from_ordered_points(points):
 58 |         lines = [[i, i + 1] for i in range(0, points.shape[0] - 1, 1)]
 59 |         return np.array(lines)
 60 | 
 61 |     def create_line_mesh(self):
 62 |         first_points = self.points[self.lines[:, 0], :]
 63 |         second_points = self.points[self.lines[:, 1], :]
 64 |         line_segments = second_points - first_points
 65 |         line_segments_unit, line_lengths = normalized(line_segments)
 66 | 
 67 |         z_axis = np.array([0, 0, 1])
 68 |         # Create triangular mesh cylinder segments of line
 69 |         for i in range(line_segments_unit.shape[0]):
 70 |             line_segment = line_segments_unit[i, :]
 71 |             line_length = line_lengths[i]
 72 |             # get axis angle rotation to allign cylinder with line segment
 73 |             axis, angle = align_vector_to_another(z_axis, line_segment)
 74 |             # Get translation vector
 75 |             translation = first_points[i, :] + line_segment * line_length * 0.5
 76 |             # create cylinder and apply transformations
 77 |             cylinder_segment = o3d.geometry.TriangleMesh.create_cylinder(
 78 |                 self.radius, line_length)
 79 |             cylinder_segment = cylinder_segment.translate(
 80 |                 translation, relative=False)
 81 |             if axis is not None:
 82 |                 axis_a = axis * angle
 83 |                 cylinder_segment = cylinder_segment.rotate(
 84 |                     R=o3d.geometry.get_rotation_matrix_from_axis_angle(axis_a),
 85 |                     # center=True
 86 |                 )
 87 |             # color cylinder
 88 |             color = self.colors if self.colors.ndim == 1 else self.colors[i, :]
 89 |             cylinder_segment.paint_uniform_color(color)
 90 | 
 91 |             self.cylinder_segments.append(cylinder_segment)
 92 | 
 93 |     def add_line(self, vis):
 94 |         """Adds this line to the visualizer"""
 95 |         for cylinder in self.cylinder_segments:
 96 |             vis.add_geometry(cylinder)
 97 | 
 98 |     def remove_line(self, vis):
 99 |         """Removes this line from the visualizer"""
100 |         for cylinder in self.cylinder_segments:
101 |             vis.remove_geometry(cylinder)
102 | 
103 | 
104 | def lineset_from_pc(point_cloud, colors, orders=None):
105 |     """ open3d lineset from numpy point cloud
106 | 
107 |     Args:
108 |         point_cloud ([N, 3] np.ndarray): corner points of a 3D bounding box
109 |         colors ([1, 3] np.ndarray): color of the lineset
110 |         orders (): reorder the point cloud to build a valid 3D bbox
111 | 
112 |     Returns:
113 |         line_set (open3d.geometry.Lineset)
114 |     """
115 |     # vertex order is consistent with get_corner_pts() in Object class
116 |     if orders is None:
117 |         lines = [
118 |             [0, 1],
119 |             [1, 2],
120 |             [2, 3],
121 |             [3, 0],
122 |             [4, 5],
123 |             [5, 6],
124 |             [6, 7],
125 |             [7, 4],
126 |             [0, 4],
127 |             [1, 5],
128 |             [2, 6],
129 |             [3, 7],
130 |         ]
131 |     else:
132 |         lines = orders
133 |     colors_tmp = np.zeros((len(lines), 3))
134 |     colors_tmp += colors
135 |     line_set = o3d.geometry.LineSet(
136 |         points=o3d.utility.Vector3dVector(point_cloud),
137 |         lines=o3d.utility.Vector2iVector(lines),
138 |     )
139 |     line_set.colors = o3d.utility.Vector3dVector(colors_tmp)
140 |     return line_set
141 | 
142 | 
143 | def linemesh_from_pc(point_cloud, colors, orders=None):
144 |     if orders is None:
145 |         lines = [
146 |             [0, 1],
147 |             [1, 2],
148 |             [2, 3],
149 |             [3, 0],
150 |             [4, 5],
151 |             [5, 6],
152 |             [6, 7],
153 |             [7, 4],
154 |             [0, 4],
155 |             [1, 5],
156 |             [2, 6],
157 |             [3, 7],
158 |         ]
159 |     else:
160 |         lines = orders
161 | 
162 |     colors_tmp = np.zeros((len(lines), 3))
163 |     colors_tmp += colors
164 | 
165 |     line_mesh = LineMesh(point_cloud, lines, colors_tmp, radius=0.02)
166 |     return line_mesh.cylinder_segments
167 | 
168 | 
169 | def load_scene_mesh(path, trans_mat=None, open_3d=True):
170 |     scene_mesh = trimesh.load(path)
171 |     if trans_mat is not None:
172 |         scene_mesh.vertices = np.dot(get_homogeneous(
173 |             scene_mesh.vertices), trans_mat.T)[:, :3]
174 |     if open_3d:
175 |         scene_mesh_o3d = trimesh2o3d(scene_mesh)
176 |         return scene_mesh_o3d
177 |     else:
178 |         return scene_mesh
179 | 
180 | 
181 | def trimesh2o3d(mesh, load_color=True):
182 |     mesh_o3d = o3d.geometry.TriangleMesh()
183 |     mesh_o3d.vertices = o3d.utility.Vector3dVector(mesh.vertices)
184 |     mesh_o3d.triangles = o3d.utility.Vector3iVector(mesh.faces)
185 |     mesh_o3d.compute_vertex_normals()
186 |     if load_color:
187 |         if mesh.visual.vertex_colors is not None:
188 |             mesh_o3d.vertex_colors = o3d.utility.Vector3dVector(
189 |                 mesh.visual.vertex_colors[:, :3] / 255.
190 |             )
191 |     return mesh_o3d
192 | 
193 | 
194 | def np2pc(points, colors=None):
195 |     """ convert numpy colors point cloud to o3d point cloud
196 | 
197 |     Args:
198 |         points (np.ndarray): [n_pts, 3]
199 |         colors (np.ndarray): [n_pts, 3]
200 |     Return:
201 |         pts_o3d (o3d.geometry.PointCloud)
202 |     """
203 |     pts_o3d = o3d.geometry.PointCloud()
204 |     pts_o3d.points = o3d.utility.Vector3dVector(points)
205 |     if colors is not None:
206 |         pts_o3d.colors = o3d.utility.Vector3dVector(colors)
207 |     return pts_o3d
208 | 
209 | 
210 | def mesh2o3d(vertices, faces, normals=None, colors=None):
211 |     mesh = trimesh.Trimesh(
212 |         vertices=vertices,
213 |         faces=faces,
214 |         vertex_normals=normals,
215 |         vertex_colors=colors
216 |     )
217 |     return trimesh2o3d(mesh)
218 | 
219 | 
220 | class TSDFFusion:
221 |     def __init__(self, voxel_size=0.01):
222 |         self.volume = o3d.pipelines.integration.ScalableTSDFVolume(
223 |             voxel_length=voxel_size,
224 |             sdf_trunc=voxel_size*5,
225 |             color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8)
226 |     
227 |     def integrate(self, depth, color, T_wc, intr_mat):
228 |         """integrate new RGBD frame
229 | 
230 |         Args:
231 |             depth (np.ndarray): [h,w] in meters
232 |             color (np.ndarray): [h,w,3] in range[0,255]
233 |             T_wc (np.ndarray): [4,4]
234 |             intr_mat (np.ndarray): [3,3] or [4,4]
235 |             
236 |         """
237 |         img_h, img_w = depth.shape
238 |         color = o3d.geometry.Image(color.astype(np.uint8))
239 |         depth = o3d.geometry.Image((depth * 1000).astype(np.uint16))
240 |         rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
241 |             color, depth, depth_trunc=10000.0, convert_rgb_to_intensity=False)
242 |         intrinsic = o3d.camera.PinholeCameraIntrinsic()
243 |         intrinsic.set_intrinsics(
244 |             width=img_w,
245 |             height=img_h,
246 |             fx=intr_mat[0, 0],
247 |             fy=intr_mat[1, 1],
248 |             cx=intr_mat[0, 2],
249 |             cy=intr_mat[1, 2],
250 |         )
251 |         T_cw = np.linalg.inv(T_wc)
252 |         self.volume.integrate(rgbd, intrinsic, T_cw)
253 |     
254 |     def marching_cube(self, path=None, with_color=False):
255 |         mesh_o3d = self.volume.extract_triangle_mesh()
256 |         mesh_o3d.compute_vertex_normals()
257 |         mesh = trimesh.Trimesh(
258 |             vertices=np.asarray(mesh_o3d.vertices), # / dimension,
259 |             faces=np.asarray(mesh_o3d.triangles),
260 |             vertex_normals=np.asarray(mesh_o3d.vertex_normals)
261 |         )
262 |         if with_color:
263 |             mesh.visual.vertex_colors = np.asarray(mesh_o3d.vertex_colors)
264 |         if path is not None:
265 |             dir_ = "/".join(path.split("/")[:-1])
266 |             if not os.path.exists(dir_):
267 |                 os.mkdir(dir_)
268 |             mesh.export(path)
269 |         return mesh
270 | 
271 |     def marching_cube_o3d(self, path=None, with_color=False):
272 |         mesh_o3d = self.volume.extract_triangle_mesh()
273 |         return mesh_o3d


--------------------------------------------------------------------------------
/utils/pytorch_ssim/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | from math import exp
 6 | 
 7 | 
 8 | def gaussian(window_size, sigma):
 9 |     gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
10 |     return gauss / gauss.sum()
11 | 
12 | 
13 | def create_window(window_size, channel):
14 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
15 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
16 |     window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
17 |     return window
18 | 
19 | 
20 | def _ssim(img1, img2, window, window_size, channel, use_padding, size_average=True):
21 | 
22 |     if use_padding:
23 |         padding_size = window_size // 2
24 |     else:
25 |         padding_size = 0
26 | 
27 |     mu1 = F.conv2d(img1, window, padding=padding_size, groups=channel)
28 |     mu2 = F.conv2d(img2, window, padding=padding_size, groups=channel)
29 | 
30 |     mu1_sq = mu1.pow(2)
31 |     mu2_sq = mu2.pow(2)
32 |     mu1_mu2 = mu1 * mu2
33 | 
34 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=padding_size, groups=channel) - mu1_sq
35 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=padding_size, groups=channel) - mu2_sq
36 |     sigma12 = F.conv2d(img1 * img2, window, padding=padding_size, groups=channel) - mu1_mu2
37 | 
38 |     C1 = 0.01 ** 2
39 |     C2 = 0.03 ** 2
40 | 
41 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
42 | 
43 |     if size_average:
44 |         return ssim_map.mean()
45 |     else:
46 |         return ssim_map.mean(1).mean(1).mean(1)
47 | 
48 | 
49 | class SSIM(torch.nn.Module):
50 |     def __init__(self, window_size=11, use_padding=True, size_average=True):
51 |         super(SSIM, self).__init__()
52 |         self.window_size = window_size
53 |         self.size_average = size_average
54 |         self.use_padding = use_padding
55 |         self.channel = 1
56 |         self.window = create_window(window_size, self.channel)
57 | 
58 |     def forward(self, img1, img2):
59 |         (_, channel, _, _) = img1.size()
60 | 
61 |         if channel == self.channel and self.window.data.type() == img1.data.type():
62 |             window = self.window
63 |         else:
64 |             window = create_window(self.window_size, channel)
65 | 
66 |             if img1.is_cuda:
67 |                 window = window.cuda(img1.get_device())
68 |             window = window.type_as(img1)
69 | 
70 |             self.window = window
71 |             self.channel = channel
72 | 
73 |         return _ssim(img1, img2, window, self.window_size, channel, self.use_padding, self.size_average)
74 | 
75 | 
76 | def ssim(img1, img2, use_padding=True, window_size=11, size_average=True):
77 |     """SSIM only defined at intensity channel. For RGB or YUV or other image format, this function computes SSIm at each
78 |     channel and averge them.
79 |     :param img1:  (B, C, H, W)  float32 in [0, 1]
80 |     :param img2:  (B, C, H, W)  float32 in [0, 1]
81 |     :param use_padding: we use conv2d when we compute mean and var for each patch, this use_padding is for that conv2d.
82 |     :param window_size: patch size
83 |     :param size_average:
84 |     :return:  a tensor that contains only one scalar.
85 |     """
86 |     (_, channel, _, _) = img1.size()
87 |     window = create_window(window_size, channel)
88 | 
89 |     if img1.is_cuda:
90 |         window = window.cuda(img1.get_device())
91 |     window = window.type_as(img1)
92 | 
93 |     return _ssim(img1, img2, window, window_size, channel, use_padding, size_average)


--------------------------------------------------------------------------------