├── .gitignore ├── LICENSE ├── README.md ├── anycalib ├── __init__.py ├── cameras │ ├── __init__.py │ ├── base.py │ ├── division.py │ ├── eucm.py │ ├── factory.py │ ├── fov.py │ ├── kannala_brandt.py │ ├── lensfun.py │ ├── pinhole.py │ ├── radial.py │ ├── simple_division.py │ ├── simple_eucm.py │ ├── simple_kannala_brandt.py │ ├── simple_pinhole.py │ ├── simple_radial.py │ ├── simple_ucm.py │ └── ucm.py ├── manifolds.py ├── model │ ├── __init__.py │ ├── anycalib_pretrained.py │ ├── dinov2.py │ ├── dinov2_layers │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── block.py │ │ ├── dino_head.py │ │ ├── drop_path.py │ │ ├── layer_scale.py │ │ ├── mlp.py │ │ ├── patch_embed.py │ │ └── swiglu_ffn.py │ ├── dpt_light_decoder.py │ ├── ray_decoder.py │ └── vision_transformer.py ├── optim │ ├── __init__.py │ ├── gauss_newton.py │ └── lev_mar.py ├── ransac.py ├── utils.py └── visualization │ ├── viz_2d.py │ └── viz_batch.py ├── assets ├── method.png └── method_dark.png ├── pyproject.toml ├── siclib ├── LICENSE ├── __init__.py ├── configs │ ├── anycalib.yaml │ ├── data │ │ ├── openpano-radial.yaml │ │ ├── openpano-rays.yaml │ │ └── openpano.yaml │ ├── deepcalib.yaml │ ├── geocalib-radial.yaml │ ├── geocalib.yaml │ ├── model │ │ ├── anycalib.yaml │ │ ├── deepcalib.yaml │ │ └── geocalib.yaml │ └── train │ │ ├── anycalib.yaml │ │ ├── deepcalib.yaml │ │ └── geocalib.yaml ├── datasets │ ├── __init__.py │ ├── augmentations.py │ ├── base_dataset.py │ ├── configs │ │ ├── edited_dataset.yaml │ │ ├── openpano-radial.yaml │ │ ├── openpano.yaml │ │ ├── openpano_v2.yaml │ │ ├── openpano_v2_dist.yaml │ │ ├── openpano_v2_gen.yaml │ │ └── openpano_v2_radial.yaml │ ├── create_dataset_from_pano.py │ ├── create_dataset_from_pano_rays.py │ ├── simple_dataset.py │ ├── simple_dataset_rays.py │ └── utils │ │ ├── __init__.py │ │ ├── align_megadepth.py │ │ ├── download_openpano.py │ │ └── tonemapping.py ├── eval │ ├── __init__.py │ ├── configs │ │ ├── anycalib.yaml │ │ ├── anycalib_pretrained.yaml │ │ ├── deepcalib.yaml │ │ ├── diffcalib.yaml │ │ ├── dust3r.yaml │ │ ├── geocalib-pinhole-rays.yaml │ │ ├── geocalib-pinhole.yaml │ │ ├── geocalib-simple_div-rays.yaml │ │ ├── geocalib-simple_radial-rays.yaml │ │ ├── geocalib-simple_radial.yaml │ │ ├── moge.yaml │ │ ├── uvp.yaml │ │ └── wildcam.yaml │ ├── eval_pipeline.py │ ├── inspect.py │ ├── io.py │ ├── lamar2k.py │ ├── lamar2k_rays.py │ ├── megadepth2k.py │ ├── megadepth2k_radial.py │ ├── megadepth2k_radial_rays.py │ ├── megadepth2k_rays.py │ ├── monovo2k_rays.py │ ├── openpano.py │ ├── openpano_radial.py │ ├── openpano_rays.py │ ├── run_perceptual.py │ ├── scannetpp2k_images.h5 │ ├── scannetpp2k_rays.py │ ├── simple_pipeline.py │ ├── simple_pipeline_rays.py │ ├── stanford2d3d.py │ ├── stanford2d3d_rays.py │ ├── tartanair.py │ ├── tartanair_rays.py │ ├── utils.py │ └── visual.py ├── geometry │ ├── __init__.py │ ├── base_camera.py │ ├── camera.py │ ├── gradient_checker.py │ ├── gravity.py │ ├── jacobians.py │ ├── manifolds.py │ └── perspective_fields.py ├── models │ ├── __init__.py │ ├── base_model.py │ ├── cache_loader.py │ ├── decoders │ │ ├── __init__.py │ │ ├── dpt_decoder.py │ │ ├── dpt_light_decoder.py │ │ ├── fpn.py │ │ ├── latitude_decoder.py │ │ ├── light_hamburger.py │ │ ├── perspective_decoder.py │ │ ├── ray_decoder.py │ │ └── up_decoder.py │ ├── encoders │ │ ├── __init__.py │ │ ├── dinov2.py │ │ ├── dinov2_layers │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── block.py │ │ │ ├── dino_head.py │ │ │ ├── drop_path.py │ │ │ ├── layer_scale.py │ │ │ ├── mlp.py │ │ │ ├── patch_embed.py │ │ │ └── swiglu_ffn.py │ │ ├── low_level_encoder.py │ │ ├── mscan.py │ │ ├── resnet.py │ │ ├── vgg.py │ │ └── vision_transformer.py │ ├── extractor.py │ ├── networks │ │ ├── __init__.py │ │ ├── anycalib_net.py │ │ ├── anycalib_pretrained.py │ │ ├── deepcalib.py │ │ ├── diffcalib_pretrained.py │ │ ├── dust3r_pretrained.py │ │ ├── dust3r_pretrained_rays.py │ │ ├── geocalib.py │ │ ├── geocalib_pretrained.py │ │ ├── geocalib_pretrained_rays.py │ │ ├── moge_pretrained.py │ │ └── wildcam_pretrained.py │ ├── optimization │ │ ├── __init__.py │ │ ├── inference_optimizer.py │ │ ├── lm_optimizer.py │ │ ├── losses.py │ │ ├── perspective_opt.py │ │ ├── ransac.py │ │ ├── utils.py │ │ └── vp_from_prior.py │ └── utils │ │ ├── __init__.py │ │ ├── losses_rays.py │ │ ├── metrics.py │ │ ├── modules.py │ │ └── perspective_encoding.py ├── pose_estimation.py ├── pyproject.toml ├── requirements.txt ├── settings.py ├── train.py ├── utils │ ├── __init__.py │ ├── conversions.py │ ├── experiments.py │ ├── export_predictions.py │ ├── image.py │ ├── image_rays.py │ ├── stdout_capturing.py │ ├── summary_writer.py │ ├── tensor.py │ └── tools.py └── visualization │ ├── __init__.py │ ├── global_frame.py │ ├── tools.py │ ├── two_view_frame.py │ ├── visualize_batch.py │ └── viz2d.py └── tests ├── test_cameras.py └── test_manifolds.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | .vscode/ 165 | /data/ 166 | /outputs/ 167 | /weights/ -------------------------------------------------------------------------------- /anycalib/__init__.py: -------------------------------------------------------------------------------- 1 | from anycalib.model import AnyCalib -------------------------------------------------------------------------------- /anycalib/cameras/__init__.py: -------------------------------------------------------------------------------- 1 | # isort: off 2 | from anycalib.cameras.pinhole import Pinhole 3 | from anycalib.cameras.simple_pinhole import SimplePinhole 4 | from anycalib.cameras.radial import Radial 5 | from anycalib.cameras.simple_radial import SimpleRadial 6 | from anycalib.cameras.kannala_brandt import KannalaBrandt 7 | from anycalib.cameras.simple_kannala_brandt import SimpleKannalaBrandt 8 | from anycalib.cameras.ucm import UCM 9 | from anycalib.cameras.simple_ucm import SimpleUCM 10 | from anycalib.cameras.division import Division 11 | from anycalib.cameras.simple_division import SimpleDivision 12 | from anycalib.cameras.eucm import EUCM 13 | from anycalib.cameras.simple_eucm import SimpleEUCM 14 | from anycalib.cameras.fov import FOV 15 | from anycalib.cameras.factory import CameraFactory 16 | 17 | # isort: on 18 | -------------------------------------------------------------------------------- /anycalib/cameras/simple_division.py: -------------------------------------------------------------------------------- 1 | from anycalib.cameras import Division 2 | 3 | 4 | class SimpleDivision(Division): 5 | """Implementation of the Division Camera Model [1] with one focal length. 6 | 7 | This class implements the slight variation [2, 3] of the original model [1] which 8 | defines the back-projection (or unprojection) function as: 9 | x = (u - cx)/f 10 | y = (v - cy)/f 11 | z = 1 + k1*r^2 + k2*r^4 + ... 12 | where r is the radius of the retinal point, defined as: r = sqrt(x^2 + y^2). The 13 | unprojected point is subsequently normalized to have unit norm. This implementation 14 | supports a variable number (up to 4) of distortion coefficients, controlled by the 15 | variable/attribute num_k. 16 | The (ordered) intrinsic parameters are f, cx, cy, k1, k2, ... 17 | - f [pixels] is the focal length, 18 | - (cx, cy) [pixels] is the principal points. 19 | - (k1, k2, ...) are the radial distortion coefficients. 20 | 21 | [1] Simultaneous Linear Estimation of Multiple View Geometry and Lens Distortion. 22 | A.W. Fitzgibbon, CVPR 2001. 23 | [2] Revisiting Radial Distortion Absolute Pose. V. Larsson et al., ICCV 2019. 24 | [3] Babelcalib: A Universal Approach to Calibrating Central Cameras. 25 | Y. Lochman et al., ICCV 2021. 26 | """ 27 | 28 | NAME = "simple_division" 29 | # number of focal lengths 30 | NUM_F = 1 31 | PARAMS_IDX = { 32 | "f": 0, 33 | "cx": 1, 34 | "cy": 2, 35 | "k1": 3, 36 | "k2": 4, 37 | "k3": 5, 38 | "k4": 6, 39 | } 40 | -------------------------------------------------------------------------------- /anycalib/cameras/simple_eucm.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | from anycalib.cameras import EUCM 4 | from anycalib.cameras.base import BaseCamera 5 | 6 | 7 | class SimpleEUCM(EUCM): 8 | """Implementation of the Enhanced Unified Camera Model (EUCM) [1, Sec. II]. 9 | 10 | The (ordered) intrinsic parameters are f, cx, cy, xi: 11 | - f [pixels] are the focal lengths, 12 | - (cx, cy) [pixels] is the principal points. 13 | - alpha 14 | - beta 15 | 16 | 17 | [1] An Enhanced Unified Camera Model. B. Khomutenko et al., RA-L 2015. 18 | """ 19 | 20 | NAME = "simple_eucm" 21 | # number of focal lengths 22 | NUM_F = 1 23 | PARAMS_IDX = { 24 | "f": 0, 25 | "cx": 1, 26 | "cy": 2, 27 | "k1": 3, # alpha 28 | "k2": 4, # beta 29 | } 30 | num_k = 2 31 | 32 | def __init__( 33 | self, 34 | proxy_cam_id: str = "simple_kb:3", 35 | proxy_cam_id_sac: str = "simple_kb:2", 36 | safe_optim: bool = True, 37 | beta_optim_min: float = 1e-6, 38 | beta_optim_max: float = 1e2, 39 | ): 40 | assert "simple" in proxy_cam_id and "simple" in proxy_cam_id_sac 41 | # FIXME: ugly import to avoid circular imports 42 | CameraFactory = importlib.import_module("anycalib.cameras.factory").CameraFactory # fmt: skip 43 | # Intermediate camera model used during linear fitting 44 | self.proxy_cam: BaseCamera = CameraFactory.create_from_id(proxy_cam_id) 45 | self.proxy_cam_sac: BaseCamera = CameraFactory.create_from_id(proxy_cam_id_sac) 46 | self.safe_optim = safe_optim 47 | # bounds for β during optimization (ignored if safe_optim=False) 48 | assert beta_optim_max >= beta_optim_min > 0, "β_max >= β_min > 0 not satisfied." 49 | self.beta_min = beta_optim_min 50 | self.beta_max = beta_optim_max 51 | self.beta_ptp = beta_optim_max - beta_optim_min 52 | -------------------------------------------------------------------------------- /anycalib/cameras/simple_kannala_brandt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | 4 | from anycalib.cameras.kannala_brandt import KannalaBrandt 5 | 6 | 7 | class SimpleKannalaBrandt(KannalaBrandt): 8 | """Kannala-Brandt camera model [1]. 9 | 10 | We use the common [2, 3, 4] slight variation of the original model [1] which sets 11 | k1 in [1, eq. 6] to 1.0. Thus the radial projection is defined as: 12 | r(θ) = θ + k1 * θ^3 + k2 * θ^5 + k3 * θ^7 + k4 * θ^9, 13 | where θ is the incidence angle of the incoming ray, computed as 14 | θ = atan2(sqrt(x^2 + y^2), z). 15 | for a 3D point with coordinates (x, y, z). 16 | Additionally, this implementation allows the use of a variable number of coefficients. 17 | 18 | [1] A Generic Camera Model and Calibration Method for Conventional, Wide-Angle, and 19 | Fish-Eye Lenses, J. Kannala, S. Brandt, PAMI 2006. 20 | [2] The Double Sphere Camera Model, V. Usenko et al., 3DV 2018. 21 | [3] BabelCalib: A Universal Approach to Calibrating Central Cameras, Y. Lochman et 22 | al., ICCV 2021. 23 | [4] Project Aria (KB3 camera model), Meta Reality Labs Research, 2023. 24 | 25 | The (ordered) intrinsic parameters are f, cx, cy, k1, k2, ..., 26 | - f [pixels] is the focal length, 27 | - (cx, cy) [pixels] is the principal points. 28 | - (k1, k2, ...) are the radial distortion coefficients. 29 | 30 | Args: 31 | num_k: number of radial distortion coefficients. Default is 4. 32 | newton_iters: number of Newton iterations for mapping sensor radii to polar angles (θ) 33 | newton_tol: threshold for checking convergence of the Newton algorithm. 34 | """ 35 | 36 | NAME = "simple_kb" 37 | # number of focal lengths 38 | NUM_F = 1 39 | PARAMS_IDX = { 40 | "f": 0, 41 | "cx": 1, 42 | "cy": 2, 43 | "k1": 3, 44 | "k2": 4, 45 | "k3": 5, 46 | "k4": 6, 47 | } 48 | 49 | def _form_batched_system( 50 | self, im_coords: Tensor, bearings: Tensor, cxcy: Tensor | None = None 51 | ) -> tuple[Tensor, Tensor]: 52 | """Form the 2D equations for each 2D-3D correspondence. 53 | 54 | Args: 55 | im_coords: (..., N, 2) image coordinates. 56 | bearings: (..., N, 3) unit bearing vectors in the camera frame. 57 | cxcy: (..., 2) known principal points. 58 | 59 | Returns: 60 | As: (..., N, 2, {1 .. 4} + num_k) design matrices (without stacking). 61 | bs: (..., N, 2) observations. 62 | """ 63 | num_k = self.num_k 64 | # ray radii and polar angles 65 | ray_radii = torch.linalg.norm(bearings[..., :2], dim=-1, keepdim=True) 66 | theta = torch.atan2(ray_radii, bearings[..., 2:]) # (..., N, 1) 67 | # form eqs corresponding to focal length(s) and principal point 68 | if cxcy is None: 69 | As = bearings.new_zeros((*theta.shape[:-1], 2, 3 + num_k)) 70 | As[..., 0] = im_coords 71 | As[..., 0, 1] = As[..., 1, 2] = -1 72 | offset = 3 73 | else: 74 | As = bearings.new_zeros((*theta.shape[:-1], 2, 1 + num_k)) 75 | As[..., 0] = im_coords - cxcy[..., None, :] 76 | offset = 1 77 | # form RHS (..., N, 2) 78 | bs = ( 79 | theta 80 | * bearings[..., :2] 81 | / ray_radii.clamp(torch.finfo(ray_radii.dtype).eps) 82 | ) 83 | # eqs corresponding to distortion terms 84 | theta_2 = theta**2 85 | coeff_i = -theta_2 * bs 86 | As[..., offset] = coeff_i 87 | for i in range(1, num_k): 88 | coeff_i = coeff_i * theta_2 89 | As[..., offset + i] = coeff_i 90 | return As, bs 91 | -------------------------------------------------------------------------------- /anycalib/cameras/simple_radial.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | 4 | from anycalib.cameras.radial import Radial 5 | 6 | 7 | class SimpleRadial(Radial): 8 | """Simple pinhole camera model with polynomial radial distortion. 9 | 10 | Projection: 11 | x = f * (X / Z) * (1 + k1 * r^2 + k2 * r^4 + ...) + cx 12 | y = f * (Y / Z) * (1 + k1 * r^2 + k2 * r^4 + ...) + cy 13 | The (ordered) intrinsic parameters are fx, fy, cx, cy, k1, k2, ..., 14 | - f [pixels] is the focal length, 15 | - (cx, cy) [pixels] is the principal points. 16 | - (k1, k2, ...) are the radial distortion coefficients. 17 | 18 | Args: 19 | max_fov: Threshold in degrees for masking out bearings/rays whose incidence 20 | angles correspond to fovs above this admissible field of view. 21 | num_k: number of radial distortion coefficients. Default is 1. 22 | undist_iters: number of Newton iterations for undistorting radii. 23 | undist_tol: threshold for checking convergence of the Newton algorithm. 24 | """ 25 | 26 | NAME = "simple_radial" 27 | # number of focal lengths 28 | NUM_F = 1 29 | PARAMS_IDX = { 30 | "f": 0, 31 | "cx": 1, 32 | "cy": 2, 33 | "k1": 3, 34 | "k2": 4, 35 | "k3": 5, 36 | "k4": 6, 37 | } 38 | 39 | def __init__( 40 | self, 41 | max_fov: float = 170, 42 | num_k: int = 1, 43 | undist_iters: int = 25, 44 | undist_tol: float = 1e-5, 45 | ): 46 | if not (0 < max_fov < 180): 47 | raise ValueError(f"`max_fov` must be in (0, 180) but got: {max_fov}.") 48 | if num_k <= 0 or not isinstance(num_k, int): 49 | raise ValueError(f"`num_k` must be a positive integer but got: {num_k}.") 50 | self.max_fov = max_fov 51 | self.num_k = num_k 52 | self.undist_iters = undist_iters 53 | self.undist_tol = undist_tol 54 | 55 | def _form_batched_system( 56 | self, im_coords: Tensor, bearings: Tensor, cxcy: Tensor | None = None 57 | ) -> tuple[Tensor, Tensor]: 58 | """Form the 2D equations for each 2D-3D correspondence. 59 | 60 | Args: 61 | im_coords: (..., N, 2) image coordinates. 62 | bearings: (..., N, 3) unit bearing vectors in the camera frame. 63 | cxcy: (..., 2) known principal points. 64 | 65 | Returns: 66 | As: (..., N, 3 + num_k) design matrices (without stacking). 67 | bs: (..., N, 2) observations. 68 | """ 69 | eps = torch.finfo(bearings.dtype).eps 70 | num_k = self.num_k 71 | # perspective projection 72 | proj = bearings[..., :2] / bearings[..., 2:].clamp(eps) # (..., N, 2) 73 | # form linear system 74 | if cxcy is None: 75 | As = proj.new_zeros((*proj.shape, 3 + num_k)) 76 | As[..., 0] = im_coords 77 | As[..., 0, 1] = As[..., 1, 2] = -1 78 | offset = 3 79 | else: 80 | As = proj.new_zeros((*proj.shape, 1 + num_k)) 81 | As[..., 0] = im_coords - cxcy[..., None, :] 82 | offset = 1 83 | # distortion terms 84 | radii_u2 = (proj * proj).sum(-1, keepdim=True) # (..., N, 1) 85 | proj_radii = -proj * radii_u2 # (..., N, 2) 86 | As[..., offset] = proj_radii 87 | for i in range(1, num_k): 88 | proj_radii = proj_radii * radii_u2 89 | As[..., offset + i] = proj_radii 90 | return As, proj 91 | -------------------------------------------------------------------------------- /anycalib/cameras/simple_ucm.py: -------------------------------------------------------------------------------- 1 | from anycalib.cameras import UCM 2 | 3 | 4 | class SimpleUCM(UCM): 5 | """Implementation of the Unified Camera Model (UCM) [1, Sec. II], with one focal length. 6 | 7 | The (ordered) intrinsic parameters are f, cx, cy, xi: 8 | - f [pixels] is the focal length, 9 | - (cx, cy) [pixels] is the principal points. 10 | - xi represents the distance from the center of projection to the center of the 11 | sphere and controls the magnitude of radial distortion present in the image. 12 | 13 | 14 | [1] Single View Point Omnidirectional Camera Calibration from Planar Grids. 15 | C Mei, P Rives, ICRA 2007. 16 | """ 17 | 18 | NAME = "simple_ucm" 19 | # number of focal lengths 20 | NUM_F = 1 21 | PARAMS_IDX = { 22 | "f": 0, 23 | "cx": 1, 24 | "cy": 2, 25 | "k1": 3, # xi 26 | } 27 | num_k = 1 28 | -------------------------------------------------------------------------------- /anycalib/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .anycalib_pretrained import AnyCalib 2 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | from .dino_head import DINOHead 7 | from .mlp import Mlp 8 | from .patch_embed import PatchEmbed 9 | from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused 10 | from .block import NestedTensorBlock 11 | from .attention import MemEffAttention 12 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/attention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py 9 | 10 | # import logging 11 | import os 12 | 13 | # import warnings 14 | from torch import Tensor, nn 15 | 16 | # logger = logging.getLogger("dinov2") 17 | 18 | 19 | XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None 20 | try: 21 | if XFORMERS_ENABLED: 22 | from xformers.ops import memory_efficient_attention, unbind 23 | 24 | XFORMERS_AVAILABLE = True 25 | # warnings.warn("xFormers is available (Attention)") 26 | else: 27 | # warnings.warn("xFormers is disabled (Attention)") 28 | raise ImportError 29 | except ImportError: 30 | XFORMERS_AVAILABLE = False 31 | # warnings.warn("xFormers is not available (Attention)") 32 | 33 | 34 | class Attention(nn.Module): 35 | def __init__( 36 | self, 37 | dim: int, 38 | num_heads: int = 8, 39 | qkv_bias: bool = False, 40 | proj_bias: bool = True, 41 | attn_drop: float = 0.0, 42 | proj_drop: float = 0.0, 43 | ) -> None: 44 | super().__init__() 45 | self.num_heads = num_heads 46 | head_dim = dim // num_heads 47 | self.scale = head_dim**-0.5 48 | 49 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) 50 | self.attn_drop = nn.Dropout(attn_drop) 51 | self.proj = nn.Linear(dim, dim, bias=proj_bias) 52 | self.proj_drop = nn.Dropout(proj_drop) 53 | 54 | def forward(self, x: Tensor) -> Tensor: 55 | B, N, C = x.shape 56 | qkv = ( 57 | self.qkv(x) 58 | .reshape(B, N, 3, self.num_heads, C // self.num_heads) 59 | .permute(2, 0, 3, 1, 4) 60 | ) 61 | 62 | q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] 63 | attn = q @ k.transpose(-2, -1) 64 | 65 | attn = attn.softmax(dim=-1) 66 | attn = self.attn_drop(attn) 67 | 68 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) 69 | x = self.proj(x) 70 | x = self.proj_drop(x) 71 | return x 72 | 73 | 74 | class MemEffAttention(Attention): 75 | def forward(self, x: Tensor, attn_bias=None) -> Tensor: 76 | if not XFORMERS_AVAILABLE: 77 | if attn_bias is not None: 78 | raise AssertionError("xFormers is required for using nested tensors") 79 | return super().forward(x) 80 | 81 | B, N, C = x.shape 82 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) 83 | 84 | q, k, v = unbind(qkv, 2) 85 | 86 | x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) 87 | x = x.reshape([B, N, C]) 88 | 89 | x = self.proj(x) 90 | x = self.proj_drop(x) 91 | return x 92 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/dino_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.nn.init import trunc_normal_ 9 | from torch.nn.utils import weight_norm 10 | 11 | 12 | class DINOHead(nn.Module): 13 | def __init__( 14 | self, 15 | in_dim, 16 | out_dim, 17 | use_bn=False, 18 | nlayers=3, 19 | hidden_dim=2048, 20 | bottleneck_dim=256, 21 | mlp_bias=True, 22 | ): 23 | super().__init__() 24 | nlayers = max(nlayers, 1) 25 | self.mlp = _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=hidden_dim, use_bn=use_bn, bias=mlp_bias) 26 | self.apply(self._init_weights) 27 | self.last_layer = weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False)) 28 | self.last_layer.weight_g.data.fill_(1) 29 | 30 | def _init_weights(self, m): 31 | if isinstance(m, nn.Linear): 32 | trunc_normal_(m.weight, std=0.02) 33 | if isinstance(m, nn.Linear) and m.bias is not None: 34 | nn.init.constant_(m.bias, 0) 35 | 36 | def forward(self, x): 37 | x = self.mlp(x) 38 | eps = 1e-6 if x.dtype == torch.float16 else 1e-12 39 | x = nn.functional.normalize(x, dim=-1, p=2, eps=eps) 40 | x = self.last_layer(x) 41 | return x 42 | 43 | 44 | def _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=None, use_bn=False, bias=True): 45 | if nlayers == 1: 46 | return nn.Linear(in_dim, bottleneck_dim, bias=bias) 47 | else: 48 | layers = [nn.Linear(in_dim, hidden_dim, bias=bias)] 49 | if use_bn: 50 | layers.append(nn.BatchNorm1d(hidden_dim)) 51 | layers.append(nn.GELU()) 52 | for _ in range(nlayers - 2): 53 | layers.append(nn.Linear(hidden_dim, hidden_dim, bias=bias)) 54 | if use_bn: 55 | layers.append(nn.BatchNorm1d(hidden_dim)) 56 | layers.append(nn.GELU()) 57 | layers.append(nn.Linear(hidden_dim, bottleneck_dim, bias=bias)) 58 | return nn.Sequential(*layers) 59 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/drop_path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py 9 | 10 | 11 | from torch import nn 12 | 13 | 14 | def drop_path(x, drop_prob: float = 0.0, training: bool = False): 15 | if drop_prob == 0.0 or not training: 16 | return x 17 | keep_prob = 1 - drop_prob 18 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 19 | random_tensor = x.new_empty(shape).bernoulli_(keep_prob) 20 | if keep_prob > 0.0: 21 | random_tensor.div_(keep_prob) 22 | output = x * random_tensor 23 | return output 24 | 25 | 26 | class DropPath(nn.Module): 27 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" 28 | 29 | def __init__(self, drop_prob=None): 30 | super(DropPath, self).__init__() 31 | self.drop_prob = drop_prob 32 | 33 | def forward(self, x): 34 | return drop_path(x, self.drop_prob, self.training) 35 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/layer_scale.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110 7 | 8 | from typing import Union 9 | 10 | import torch 11 | from torch import Tensor 12 | from torch import nn 13 | 14 | 15 | class LayerScale(nn.Module): 16 | def __init__( 17 | self, 18 | dim: int, 19 | init_values: Union[float, Tensor] = 1e-5, 20 | inplace: bool = False, 21 | ) -> None: 22 | super().__init__() 23 | self.inplace = inplace 24 | self.gamma = nn.Parameter(init_values * torch.ones(dim)) 25 | 26 | def forward(self, x: Tensor) -> Tensor: 27 | return x.mul_(self.gamma) if self.inplace else x * self.gamma 28 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/mlp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py 9 | 10 | 11 | from typing import Callable, Optional 12 | 13 | from torch import Tensor, nn 14 | 15 | 16 | class Mlp(nn.Module): 17 | def __init__( 18 | self, 19 | in_features: int, 20 | hidden_features: Optional[int] = None, 21 | out_features: Optional[int] = None, 22 | act_layer: Callable[..., nn.Module] = nn.GELU, 23 | drop: float = 0.0, 24 | bias: bool = True, 25 | ) -> None: 26 | super().__init__() 27 | out_features = out_features or in_features 28 | hidden_features = hidden_features or in_features 29 | self.fc1 = nn.Linear(in_features, hidden_features, bias=bias) 30 | self.act = act_layer() 31 | self.fc2 = nn.Linear(hidden_features, out_features, bias=bias) 32 | self.drop = nn.Dropout(drop) 33 | 34 | def forward(self, x: Tensor) -> Tensor: 35 | x = self.fc1(x) 36 | x = self.act(x) 37 | x = self.drop(x) 38 | x = self.fc2(x) 39 | x = self.drop(x) 40 | return x 41 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/patch_embed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py 9 | 10 | from typing import Callable, Optional, Tuple, Union 11 | 12 | from torch import Tensor 13 | import torch.nn as nn 14 | 15 | 16 | def make_2tuple(x): 17 | if isinstance(x, tuple): 18 | assert len(x) == 2 19 | return x 20 | 21 | assert isinstance(x, int) 22 | return (x, x) 23 | 24 | 25 | class PatchEmbed(nn.Module): 26 | """ 27 | 2D image to patch embedding: (B,C,H,W) -> (B,N,D) 28 | 29 | Args: 30 | img_size: Image size. 31 | patch_size: Patch token size. 32 | in_chans: Number of input image channels. 33 | embed_dim: Number of linear projection output channels. 34 | norm_layer: Normalization layer. 35 | """ 36 | 37 | def __init__( 38 | self, 39 | img_size: Union[int, Tuple[int, int]] = 224, 40 | patch_size: Union[int, Tuple[int, int]] = 16, 41 | in_chans: int = 3, 42 | embed_dim: int = 768, 43 | norm_layer: Optional[Callable] = None, 44 | flatten_embedding: bool = True, 45 | ) -> None: 46 | super().__init__() 47 | 48 | image_HW = make_2tuple(img_size) 49 | patch_HW = make_2tuple(patch_size) 50 | patch_grid_size = ( 51 | image_HW[0] // patch_HW[0], 52 | image_HW[1] // patch_HW[1], 53 | ) 54 | 55 | self.img_size = image_HW 56 | self.patch_size = patch_HW 57 | self.patches_resolution = patch_grid_size 58 | self.num_patches = patch_grid_size[0] * patch_grid_size[1] 59 | 60 | self.in_chans = in_chans 61 | self.embed_dim = embed_dim 62 | 63 | self.flatten_embedding = flatten_embedding 64 | 65 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW) 66 | self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() 67 | 68 | def forward(self, x: Tensor) -> Tensor: 69 | _, _, H, W = x.shape 70 | patch_H, patch_W = self.patch_size 71 | 72 | assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" 73 | assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" 74 | 75 | x = self.proj(x) # B C H W 76 | H, W = x.size(2), x.size(3) 77 | x = x.flatten(2).transpose(1, 2) # B HW C 78 | x = self.norm(x) 79 | if not self.flatten_embedding: 80 | x = x.reshape(-1, H, W, self.embed_dim) # B H W C 81 | return x 82 | 83 | def flops(self) -> float: 84 | Ho, Wo = self.patches_resolution 85 | flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) 86 | if self.norm is not None: 87 | flops += Ho * Wo * self.embed_dim 88 | return flops 89 | -------------------------------------------------------------------------------- /anycalib/model/dinov2_layers/swiglu_ffn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | import os 7 | from typing import Callable, Optional 8 | import warnings 9 | 10 | from torch import Tensor, nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class SwiGLUFFN(nn.Module): 15 | def __init__( 16 | self, 17 | in_features: int, 18 | hidden_features: Optional[int] = None, 19 | out_features: Optional[int] = None, 20 | act_layer: Callable[..., nn.Module] = None, 21 | drop: float = 0.0, 22 | bias: bool = True, 23 | ) -> None: 24 | super().__init__() 25 | out_features = out_features or in_features 26 | hidden_features = hidden_features or in_features 27 | self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias) 28 | self.w3 = nn.Linear(hidden_features, out_features, bias=bias) 29 | 30 | def forward(self, x: Tensor) -> Tensor: 31 | x12 = self.w12(x) 32 | x1, x2 = x12.chunk(2, dim=-1) 33 | hidden = F.silu(x1) * x2 34 | return self.w3(hidden) 35 | 36 | 37 | XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None 38 | try: 39 | if XFORMERS_ENABLED: 40 | from xformers.ops import SwiGLU 41 | 42 | XFORMERS_AVAILABLE = True 43 | warnings.warn("xFormers is available (SwiGLU)") 44 | else: 45 | warnings.warn("xFormers is disabled (SwiGLU)") 46 | raise ImportError 47 | except ImportError: 48 | SwiGLU = SwiGLUFFN 49 | XFORMERS_AVAILABLE = False 50 | 51 | warnings.warn("xFormers is not available (SwiGLU)") 52 | 53 | 54 | class SwiGLUFFNFused(SwiGLU): 55 | def __init__( 56 | self, 57 | in_features: int, 58 | hidden_features: Optional[int] = None, 59 | out_features: Optional[int] = None, 60 | act_layer: Callable[..., nn.Module] = None, 61 | drop: float = 0.0, 62 | bias: bool = True, 63 | ) -> None: 64 | out_features = out_features or in_features 65 | hidden_features = hidden_features or in_features 66 | hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8 67 | super().__init__( 68 | in_features=in_features, 69 | hidden_features=hidden_features, 70 | out_features=out_features, 71 | bias=bias, 72 | ) 73 | -------------------------------------------------------------------------------- /anycalib/model/ray_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch import Tensor 5 | 6 | from anycalib.manifolds import Unit3 7 | 8 | 9 | def cvx_upsample(x: Tensor, mask: Tensor, up_factor: int = 7) -> Tensor: 10 | """Upsample [H/k, W/k, C] -> [H, W, C] using convex combination of 3x3 patches. 11 | 12 | Code adapted from RAFT (Teed and Deng, 2020): 13 | https://github.com/princeton-vl/RAFT/blob/3fa0bb0a9c633ea0a9bb8a79c576b6785d4e6a02/core/raft.py#L72 14 | 15 | Args: 16 | x: (N, C, H, W) input tensor 17 | mask: (N, 9, 1, 1, H, W) already softmaxed mask tensor 18 | up_factor: upsample factor 19 | """ 20 | N, C, H, W = x.shape 21 | up_x = F.unfold(x, (3, 3), padding=1) 22 | up_x = up_x.view(N, C, 9, 1, 1, H, W) 23 | up_x = torch.sum(mask * up_x, dim=2) 24 | up_x = up_x.permute(0, 1, 4, 2, 5, 3) 25 | return up_x.reshape(N, C, up_factor * H, up_factor * W) 26 | 27 | 28 | class ConvexTangentDecoder(nn.Module): 29 | """Convex Tangent Coordinates Decoder. 30 | 31 | This decoder predicts 2D coordinates in the tangent space of the unit sphere at the 32 | optical axis: z_1 = [0, 0, 1]. These coordinates are subsequently mapped to unit 33 | rays using the exponential map. 34 | 35 | Args: 36 | in_channels: number of input channels 37 | up_factor: upsampling factor 38 | """ 39 | 40 | def __init__(self, in_channels: int = 256, up_factor: int = 7): 41 | super().__init__() 42 | self.in_channels = in_channels 43 | self.up_factor = up_factor 44 | 45 | # tangent head 46 | self.tangent_head = nn.Sequential( 47 | nn.Conv2d(in_channels, in_channels // 2, 3, padding=1), 48 | nn.ReLU(True), 49 | # tangent coords (2) 50 | nn.Conv2d(in_channels // 2, 2, 1), 51 | ) 52 | # weights head for convex upsampling to input resolution 53 | self.upsampling_weights_head = nn.Sequential( 54 | # convex combination of 3x3 patches 55 | nn.Conv2d(in_channels, in_channels // 2, 3, padding=1), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2d(in_channels // 2, up_factor**2 * 9, 1, padding=0), 58 | nn.Unflatten(1, (1, 9, up_factor, up_factor)), 59 | nn.Softmax(dim=2), 60 | ) 61 | 62 | def forward(self, x: Tensor) -> dict[str, Tensor | float]: 63 | # head 64 | tangent_pred = self.tangent_head(x) # (B, 5, H/7, W/7) 65 | weights = self.upsampling_weights_head(x) 66 | # upsample 67 | tangent_pred = cvx_upsample(tangent_pred, weights, self.up_factor) 68 | # postprocess 69 | tangent_coords = tangent_pred[:, :2] 70 | rays = Unit3.expmap_at_z1(tangent_coords.permute(0, 2, 3, 1)).permute( 71 | 0, 3, 1, 2 72 | ) 73 | out: dict[str, Tensor | float] = { 74 | "rays": rays, 75 | "tangent_coords": tangent_coords, 76 | } 77 | return out 78 | -------------------------------------------------------------------------------- /anycalib/optim/__init__.py: -------------------------------------------------------------------------------- 1 | from anycalib.optim.gauss_newton import GaussNewtonCalib 2 | from anycalib.optim.lev_mar import LevMarCalib 3 | -------------------------------------------------------------------------------- /assets/method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/assets/method.png -------------------------------------------------------------------------------- /assets/method_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/assets/method_dark.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools.packages.find] 6 | # to also install the modules under folders w/o __init__.py, we use the pattern pkg*: 7 | include = ["anycalib*"] 8 | 9 | [tool.ruff] 10 | src=["anycalib","tests","siclib"] 11 | 12 | [tool.ruff.lint.isort] 13 | known-first-party=["anycalib","tests","siclib"] 14 | 15 | [project] 16 | name = "anycalib" 17 | version = "1.0" 18 | authors = [ 19 | {name = "Javier Tirado Garin", email = "jtiradogarin@gmail.com"}, 20 | ] 21 | description = "AnyCalib - Single View Calibration" 22 | readme = "README.md" 23 | requires-python = ">=3.10" 24 | license = {file = "LICENSE"} 25 | dependencies = ["torch"] 26 | 27 | [project.optional-dependencies] 28 | eff = ["xformers"] 29 | viz = ["matplotlib"] -------------------------------------------------------------------------------- /siclib/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | formatter = logging.Formatter( 4 | fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s", datefmt="%m/%d/%Y %H:%M:%S" 5 | ) 6 | handler = logging.StreamHandler() 7 | handler.setFormatter(formatter) 8 | handler.setLevel(logging.INFO) 9 | 10 | logger = logging.getLogger(__name__) 11 | logger.setLevel(logging.INFO) 12 | logger.addHandler(handler) 13 | logger.propagate = False 14 | 15 | __module_name__ = __name__ 16 | -------------------------------------------------------------------------------- /siclib/configs/anycalib.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - data: openpano-rays 3 | - train: anycalib 4 | - model: anycalib 5 | - _self_ 6 | 7 | -------------------------------------------------------------------------------- /siclib/configs/data/openpano-radial.yaml: -------------------------------------------------------------------------------- 1 | name: simple_dataset 2 | dataset_dir: data/openpano/openpano_radial 3 | 4 | preprocessing: 5 | resize: 320 6 | side: short 7 | 8 | augmentations: 9 | name: geocalib 10 | grayscale: false 11 | 12 | use_up: true 13 | use_latitude: true 14 | 15 | train_batch_size: 64 16 | val_batch_size: 64 17 | test_batch_size: 64 18 | 19 | num_workers: 8 20 | prefetch_factor: 2 21 | -------------------------------------------------------------------------------- /siclib/configs/data/openpano-rays.yaml: -------------------------------------------------------------------------------- 1 | name: simple_dataset_rays 2 | dataset_dir: data/openpano_v2/openpano_v2 3 | 4 | preprocessing: 5 | edge_divisible_by: 14 6 | 7 | im_geom_transform: 8 | aspect_ratio: [0.5, 2.0] 9 | resolution: 102_400 10 | change_pixel_ar: false 11 | crop: null 12 | edit_prob: 0.5 13 | 14 | augmentations: 15 | name: geocalib 16 | grayscale: false 17 | 18 | train_batch_size: 24 19 | val_batch_size: 24 20 | test_batch_size: 24 21 | 22 | num_workers: 6 23 | prefetch_factor: 2 24 | -------------------------------------------------------------------------------- /siclib/configs/data/openpano.yaml: -------------------------------------------------------------------------------- 1 | name: simple_dataset 2 | dataset_dir: data/openpano/openpano 3 | 4 | preprocessing: 5 | resize: 320 6 | side: short 7 | 8 | augmentations: 9 | name: geocalib 10 | grayscale: false 11 | 12 | use_up: true 13 | use_latitude: true 14 | 15 | train_batch_size: 64 16 | val_batch_size: 64 17 | test_batch_size: 64 18 | 19 | num_workers: 8 20 | prefetch_factor: 2 21 | -------------------------------------------------------------------------------- /siclib/configs/deepcalib.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - data: openpano-radial 3 | - train: deepcalib 4 | - model: deepcalib 5 | - _self_ 6 | 7 | data: 8 | train_batch_size: 32 9 | val_batch_size: 32 10 | test_batch_size: 32 11 | augmentations: 12 | name: "deepcalib" 13 | -------------------------------------------------------------------------------- /siclib/configs/geocalib-radial.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - data: openpano-radial 3 | - train: geocalib 4 | - model: geocalib 5 | - _self_ 6 | 7 | data: 8 | # smaller batch size since lm takes more memory 9 | train_batch_size: 18 10 | val_batch_size: 18 11 | test_batch_size: 18 12 | 13 | model: 14 | optimizer: 15 | camera_model: simple_radial 16 | 17 | weights: weights/geocalib.tar 18 | 19 | train: 20 | lr: 1e-5 # smaller lr since we are fine-tuning 21 | num_steps: 200_000 # adapt to see same number of samples as previous training 22 | 23 | lr_schedule: 24 | type: SequentialLR 25 | on_epoch: false 26 | options: 27 | # adapt to see same number of samples as previous training 28 | milestones: [5_000] 29 | schedulers: 30 | - type: LinearLR 31 | options: 32 | start_factor: 1e-3 33 | total_iters: 5_000 34 | - type: MultiStepLR 35 | options: 36 | gamma: 0.1 37 | # adapt to see same number of samples as previous training 38 | milestones: [110_000, 170_000] 39 | -------------------------------------------------------------------------------- /siclib/configs/geocalib.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - data: openpano 3 | - train: geocalib 4 | - model: geocalib 5 | - _self_ 6 | 7 | data: 8 | train_batch_size: 24 9 | val_batch_size: 24 10 | test_batch_size: 24 11 | -------------------------------------------------------------------------------- /siclib/configs/model/anycalib.yaml: -------------------------------------------------------------------------------- 1 | name: networks.anycalib_net 2 | 3 | backbone: 4 | name: dinov2 5 | conf: 6 | model_name: dinov2_vitl14 7 | num_trainable_blocks: -1 # -1 -> all blocks are trainable 8 | intermediate_layers: null # null -> default DPT's intermediate layers 9 | 10 | decoder: 11 | name: light_dpt_tangent_decoder 12 | conf: 13 | dim_dhat: 256 14 | post_process_channels: null 15 | conf_head: 16 | predict_covs: false 17 | predict_mixture: false # will be ignored for some heads 18 | use_tanh: false # will be ignored for some heads 19 | logvar_lims: [-20, 10] # ignored if predict_covs is false 20 | 21 | calibrator: 22 | rm_borders: 0 23 | detach_lin_fit: true 24 | detach_rays: false 25 | nonlin_opt: 26 | use_covs: false 27 | name: gauss_newton 28 | conf: 29 | max_iters: 5 30 | res_tangent: fitted 31 | loss: 32 | name: null 33 | weight: 1.0 34 | 35 | loss: 36 | names: [l1-z1] 37 | weights: [1.0] 38 | 39 | recall_thresholds: 40 | - 1 41 | - 5 42 | - 10 43 | -------------------------------------------------------------------------------- /siclib/configs/model/deepcalib.yaml: -------------------------------------------------------------------------------- 1 | name: networks.deepcalib 2 | bounds: 3 | roll: [-45, 45] 4 | # rho = torch.tan(pitch) / torch.tan(vfov / 2) / 2 -> rho in [-1/0.3526, 1/0.0872] 5 | rho: [-2.83607487, 2.83607487] 6 | vfov: [20, 105] 7 | k1_hat: [-0.7, 0.7] 8 | -------------------------------------------------------------------------------- /siclib/configs/model/geocalib.yaml: -------------------------------------------------------------------------------- 1 | name: networks.geocalib 2 | 3 | ll_enc: 4 | name: encoders.low_level_encoder 5 | 6 | backbone: 7 | name: encoders.mscan 8 | weights: weights/mscan_b.pth 9 | 10 | perspective_decoder: 11 | name: decoders.perspective_decoder 12 | 13 | up_decoder: 14 | name: decoders.up_decoder 15 | loss_type: l1 16 | use_uncertainty_loss: true 17 | decoder: 18 | name: decoders.light_hamburger 19 | predict_uncertainty: true 20 | 21 | latitude_decoder: 22 | name: decoders.latitude_decoder 23 | loss_type: l1 24 | use_uncertainty_loss: true 25 | decoder: 26 | name: decoders.light_hamburger 27 | predict_uncertainty: true 28 | 29 | optimizer: 30 | name: optimization.lm_optimizer 31 | camera_model: pinhole 32 | -------------------------------------------------------------------------------- /siclib/configs/train/anycalib.yaml: -------------------------------------------------------------------------------- 1 | seed: 0 2 | # num_steps: 150_000 3 | epochs: 40 4 | keep_last_checkpoints: 3 5 | 6 | writer: tensorboard 7 | log_every_iter: 500 8 | eval_every_iter: 1000 9 | test_every_epoch: 5 10 | 11 | lr: 6e-5 12 | optimizer: adamw 13 | clip_grad: 1.0 14 | best_key: angular_error 15 | 16 | lr_scaling: 17 | - [0.1, ["backbone"]] 18 | 19 | lr_schedule: 20 | type: SequentialLR 21 | on_epoch: false 22 | options: 23 | milestones: [1_000] 24 | schedulers: 25 | - type: LinearLR 26 | options: 27 | start_factor: 1e-3 28 | total_iters: 1_000 29 | - type: MultiStepLR 30 | options: 31 | gamma: 0.3 32 | milestones: [10_000, 30_000] 33 | 34 | submodules: [] 35 | 36 | median_metrics: 37 | - maha_dist_error 38 | - angular_error 39 | - vfov_error 40 | - angular_error_recall@1 41 | - angular_error_recall@5 42 | - angular_error_recall@10 43 | 44 | recall_metrics: 45 | vfov_error: [1, 5, 10] 46 | 47 | plot: [2, "anycalib.visualization.viz_batch.make_batch_figures"] 48 | -------------------------------------------------------------------------------- /siclib/configs/train/deepcalib.yaml: -------------------------------------------------------------------------------- 1 | seed: 0 2 | num_steps: 20_000 3 | log_every_iter: 500 4 | eval_every_iter: 3000 5 | test_every_epoch: 1 6 | writer: null 7 | lr: 1.0e-4 8 | clip_grad: 1.0 9 | lr_schedule: 10 | type: null 11 | optimizer: adam 12 | submodules: [] 13 | median_metrics: 14 | - roll_error 15 | - pitch_error 16 | - vfov_error 17 | recall_metrics: 18 | roll_error: [1, 5, 10] 19 | pitch_error: [1, 5, 10] 20 | vfov_error: [1, 5, 10] 21 | 22 | plot: [3, "siclib.visualization.visualize_batch.make_perspective_figures"] 23 | -------------------------------------------------------------------------------- /siclib/configs/train/geocalib.yaml: -------------------------------------------------------------------------------- 1 | seed: 0 2 | num_steps: 150_000 3 | 4 | writer: null 5 | log_every_iter: 500 6 | eval_every_iter: 1000 7 | 8 | lr: 1e-4 9 | optimizer: adamw 10 | clip_grad: 1.0 11 | best_key: loss/param_total 12 | 13 | lr_schedule: 14 | type: SequentialLR 15 | on_epoch: false 16 | options: 17 | milestones: [4_000] 18 | schedulers: 19 | - type: LinearLR 20 | options: 21 | start_factor: 1e-3 22 | total_iters: 4_000 23 | - type: MultiStepLR 24 | options: 25 | gamma: 0.1 26 | milestones: [80_000, 130_000] 27 | 28 | submodules: [] 29 | 30 | median_metrics: 31 | - roll_error 32 | - pitch_error 33 | - gravity_error 34 | - vfov_error 35 | - up_angle_error 36 | - latitude_angle_error 37 | - up_angle_recall@1 38 | - up_angle_recall@5 39 | - up_angle_recall@10 40 | - latitude_angle_recall@1 41 | - latitude_angle_recall@5 42 | - latitude_angle_recall@10 43 | 44 | recall_metrics: 45 | roll_error: [1, 3, 5, 10] 46 | pitch_error: [1, 3, 5, 10] 47 | gravity_error: [1, 3, 5, 10] 48 | vfov_error: [1, 3, 5, 10] 49 | 50 | plot: [3, "siclib.visualization.visualize_batch.make_perspective_figures"] 51 | -------------------------------------------------------------------------------- /siclib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | 3 | from siclib.datasets.base_dataset import BaseDataset 4 | from siclib.utils.tools import get_class 5 | 6 | 7 | def get_dataset(name): 8 | import_paths = [name, f"{__name__}.{name}"] 9 | for path in import_paths: 10 | try: 11 | spec = importlib.util.find_spec(path) 12 | except ModuleNotFoundError: 13 | spec = None 14 | if spec is not None: 15 | try: 16 | return get_class(path, BaseDataset) 17 | except AssertionError: 18 | mod = __import__(path, fromlist=[""]) 19 | try: 20 | return mod.__main_dataset__ 21 | except AttributeError as exc: 22 | print(exc) 23 | continue 24 | 25 | raise RuntimeError(f'Dataset {name} not found in any of [{" ".join(import_paths)}]') 26 | -------------------------------------------------------------------------------- /siclib/datasets/configs/edited_dataset.yaml: -------------------------------------------------------------------------------- 1 | # im_dir: ??? 2 | # output_im_dir: ??? 3 | pixel_aspect_ratio: [0.5, 2.0] 4 | crop_factor: 0.5 5 | device: cuda 6 | overwrite: true 7 | -------------------------------------------------------------------------------- /siclib/datasets/configs/openpano-radial.yaml: -------------------------------------------------------------------------------- 1 | name: openpano_radial 2 | base_dir: data/openpano 3 | pano_dir: "${.base_dir}/panoramas" 4 | images_per_pano: 16 5 | resize_factor: null 6 | n_workers: 1 7 | device: cpu 8 | overwrite: true 9 | parameter_dists: 10 | roll: 11 | type: uniform # uni[-45, 45] 12 | options: 13 | loc: -0.7853981633974483 # -45 degrees 14 | scale: 1.5707963267948966 # 90 degrees 15 | pitch: 16 | type: uniform # uni[-45, 45] 17 | options: 18 | loc: -0.7853981633974483 # -45 degrees 19 | scale: 1.5707963267948966 # 90 degrees 20 | vfov: 21 | type: uniform # uni[20, 105] 22 | options: 23 | loc: 0.3490658503988659 # 20 degrees 24 | scale: 1.48352986419518 # 85 degrees 25 | k1_hat: 26 | type: truncnorm 27 | options: 28 | a: -4.285714285714286 # corresponds to -0.3 29 | b: 4.285714285714286 # corresponds to 0.3 30 | loc: 0 31 | scale: 0.07 32 | resize_factor: 33 | type: uniform 34 | options: 35 | loc: 1.2 36 | scale: 0.5 37 | shape: 38 | type: fix 39 | value: 40 | - 640 41 | - 640 42 | -------------------------------------------------------------------------------- /siclib/datasets/configs/openpano.yaml: -------------------------------------------------------------------------------- 1 | name: openpano 2 | base_dir: data/openpano 3 | pano_dir: "${.base_dir}/panoramas" 4 | images_per_pano: 16 5 | resize_factor: null 6 | n_workers: 1 7 | device: cpu 8 | overwrite: true 9 | parameter_dists: 10 | roll: 11 | type: uniform # uni[-45, 45] 12 | options: 13 | loc: -0.7853981633974483 # -45 degrees 14 | scale: 1.5707963267948966 # 90 degrees 15 | pitch: 16 | type: uniform # uni[-45, 45] 17 | options: 18 | loc: -0.7853981633974483 # -45 degrees 19 | scale: 1.5707963267948966 # 90 degrees 20 | vfov: 21 | type: uniform # uni[20, 105] 22 | options: 23 | loc: 0.3490658503988659 # 20 degrees 24 | scale: 1.48352986419518 # 85 degrees 25 | resize_factor: 26 | type: uniform 27 | options: 28 | loc: 1.2 29 | scale: 0.5 30 | shape: 31 | type: fix 32 | value: 33 | - 640 34 | - 640 35 | -------------------------------------------------------------------------------- /siclib/datasets/configs/openpano_v2.yaml: -------------------------------------------------------------------------------- 1 | name: openpano_v2 2 | base_dir: data/openpano_v2 3 | pano_dir: "${.base_dir}/panoramas" 4 | images_per_pano: 16 5 | resize_factor: null 6 | n_workers: 1 7 | device: cpu 8 | overwrite: true 9 | parameter_dists: 10 | roll: 11 | type: uniform # uni[-45, 45] 12 | options: 13 | loc: -0.7853981633974483 # -45 degrees 14 | scale: 1.5707963267948966 # 90 degrees 15 | pitch: 16 | type: uniform # uni[-45, 45] 17 | options: 18 | loc: -0.7853981633974483 # -45 degrees 19 | scale: 1.5707963267948966 # 90 degrees 20 | vfov: 21 | type: uniform # uni[20, 105] 22 | options: 23 | loc: 0.3490658503988659 # 20 degrees 24 | scale: 1.48352986419518 # 85 degrees 25 | resize_factor: 26 | type: uniform 27 | options: 28 | loc: 1.2 29 | scale: 0.5 30 | shape: 31 | type: fix 32 | value: 33 | - 640 34 | - 640 35 | -------------------------------------------------------------------------------- /siclib/datasets/configs/openpano_v2_dist.yaml: -------------------------------------------------------------------------------- 1 | name: openpano_v2_dist 2 | base_dir: data/openpano_v2 3 | pano_dir: "${.base_dir}/panoramas" 4 | images_per_pano: 16 5 | n_workers: 1 6 | device: cpu 7 | overwrite: true 8 | im_size: [640, 640] # h, w 9 | 10 | # 1) extrinsics 11 | roll: 12 | type: uniform # uni[-45, 45] 13 | options: 14 | loc: -0.7853981633974483 # -45 degrees 15 | scale: 1.5707963267948966 # 90 degrees 16 | pitch: 17 | type: uniform # uni[-45, 45] 18 | options: 19 | loc: -0.7853981633974483 # -45 degrees 20 | scale: 1.5707963267948966 # 90 degrees 21 | 22 | # 2) resize factor 23 | resize_factor: 24 | type: uniform 25 | options: 26 | loc: 1.2 27 | scale: 0.5 28 | 29 | # 3) intrinsics with (possibly) several camera models 30 | intrinsics: 31 | - cam_id: radial:1 32 | weight: 0.5 33 | vfov: 34 | type: uniform # uni[20, 105] 35 | options: 36 | loc: 0.3490658503988659 # 20 degrees 37 | scale: 1.48352986419518 # 85 degrees 38 | dist: 39 | # k1_hat. Following Geocalib: k1 = k1_hat * focal / height 40 | - name: k1_hat 41 | type: truncnorm 42 | options: 43 | a: -4.285714285714286 # corresponds to -0.3 44 | b: 4.285714285714286 # corresponds to 0.3 45 | loc: 0 46 | scale: 0.07 47 | 48 | - cam_id: eucm 49 | weight: 0.5 50 | vfov: 51 | type: uniform # uni[50, 180] 52 | options: 53 | loc: 0.8726646259971648 # 50 degrees 54 | scale: 2.2689280275926285 # 130 degrees 55 | dist: 56 | - name: alpha # uni[0.5, 0.8] 57 | type: uniform 58 | options: 59 | loc: 0.5 60 | scale: 0.3 61 | - name: beta # uni[0.5, 2.0] 62 | type: uniform 63 | options: 64 | loc: 0.5 65 | scale: 1.5 66 | 67 | -------------------------------------------------------------------------------- /siclib/datasets/configs/openpano_v2_gen.yaml: -------------------------------------------------------------------------------- 1 | name: openpano_v2_gen 2 | base_dir: data/openpano_v2 3 | pano_dir: "${.base_dir}/panoramas" 4 | images_per_pano: 16 5 | n_workers: 1 6 | device: cpu 7 | overwrite: true 8 | im_size: [640, 640] # h, w 9 | 10 | # 1) extrinsics 11 | roll: 12 | type: uniform # uni[-45, 45] 13 | options: 14 | loc: -0.7853981633974483 # -45 degrees 15 | scale: 1.5707963267948966 # 90 degrees 16 | pitch: 17 | type: uniform # uni[-45, 45] 18 | options: 19 | loc: -0.7853981633974483 # -45 degrees 20 | scale: 1.5707963267948966 # 90 degrees 21 | 22 | # 2) resize factor 23 | resize_factor: 24 | type: uniform 25 | options: 26 | loc: 1.2 27 | scale: 0.5 28 | 29 | # 3) intrinsics with (possibly) several camera models 30 | intrinsics: 31 | - cam_id: pinhole 32 | weight: 0.34 33 | vfov: 34 | type: uniform # uni[20, 105] 35 | options: 36 | loc: 0.3490658503988659 # 20 degrees 37 | scale: 1.48352986419518 # 85 degrees 38 | dist: null 39 | 40 | - cam_id: radial:1 41 | weight: 0.33 42 | vfov: 43 | type: uniform # uni[20, 105] 44 | options: 45 | loc: 0.3490658503988659 # 20 degrees 46 | scale: 1.48352986419518 # 85 degrees 47 | dist: 48 | # k1_hat. Following Geocalib: k1 = k1_hat * focal / height 49 | - name: k1_hat 50 | type: truncnorm 51 | options: 52 | a: -4.285714285714286 # corresponds to -0.3 53 | b: 4.285714285714286 # corresponds to 0.3 54 | loc: 0 55 | scale: 0.07 56 | 57 | - cam_id: eucm 58 | weight: 0.33 59 | vfov: 60 | type: uniform # uni[50, 180] 61 | options: 62 | loc: 0.8726646259971648 # 50 degrees 63 | scale: 2.2689280275926285 # 130 degrees 64 | dist: 65 | - name: alpha # uni[0.5, 0.8] 66 | type: uniform 67 | options: 68 | loc: 0.5 69 | scale: 0.3 70 | - name: beta # uni[0.5, 2.0] 71 | type: uniform 72 | options: 73 | loc: 0.5 74 | scale: 1.5 75 | 76 | -------------------------------------------------------------------------------- /siclib/datasets/configs/openpano_v2_radial.yaml: -------------------------------------------------------------------------------- 1 | name: openpano_v2_radial 2 | base_dir: data/openpano_v2 3 | pano_dir: "${.base_dir}/panoramas" 4 | images_per_pano: 16 5 | n_workers: 1 6 | device: cpu 7 | overwrite: true 8 | im_size: [640, 640] # h, w 9 | 10 | # 1) extrinsics 11 | roll: 12 | type: uniform # uni[-45, 45] 13 | options: 14 | loc: -0.7853981633974483 # -45 degrees 15 | scale: 1.5707963267948966 # 90 degrees 16 | pitch: 17 | type: uniform # uni[-45, 45] 18 | options: 19 | loc: -0.7853981633974483 # -45 degrees 20 | scale: 1.5707963267948966 # 90 degrees 21 | 22 | # 2) resize factor 23 | resize_factor: 24 | type: uniform 25 | options: 26 | loc: 1.2 27 | scale: 0.5 28 | 29 | # 3) intrinsics with (possibly) several camera models 30 | intrinsics: 31 | - cam_id: radial:1 32 | weight: 1.0 33 | vfov: 34 | type: uniform # uni[20, 105] 35 | options: 36 | loc: 0.3490658503988659 # 20 degrees 37 | scale: 1.48352986419518 # 85 degrees 38 | dist: 39 | # k1_hat. Following Geocalib: k1 = k1_hat * focal / height 40 | - name: k1_hat 41 | type: truncnorm 42 | options: 43 | a: -4.285714285714286 # corresponds to -0.3 44 | b: 4.285714285714286 # corresponds to 0.3 45 | loc: 0 46 | scale: 0.07 47 | 48 | -------------------------------------------------------------------------------- /siclib/datasets/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/datasets/utils/__init__.py -------------------------------------------------------------------------------- /siclib/datasets/utils/align_megadepth.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | from pathlib import Path 4 | 5 | # flake8: noqa 6 | # mypy: ignore-errors 7 | 8 | parser = argparse.ArgumentParser(description="Aligns a COLMAP model and plots the horizon lines") 9 | parser.add_argument( 10 | "--base_dir", type=str, help="Path to the base directory of the MegaDepth dataset" 11 | ) 12 | parser.add_argument("--out_dir", type=str, help="Path to the output directory") 13 | args = parser.parse_args() 14 | 15 | base_dir = Path(args.base_dir) 16 | out_dir = Path(args.out_dir) 17 | 18 | scenes = [d.name for d in base_dir.iterdir() if d.is_dir()] 19 | print(scenes[:3], len(scenes)) 20 | 21 | # exit() 22 | 23 | for scene in scenes: 24 | image_dir = base_dir / scene / "images" 25 | sfm_dir = base_dir / scene / "sparse" / "manhattan" / "0" 26 | 27 | # Align model 28 | align_dir = out_dir / scene / "sparse" / "align" 29 | align_dir.mkdir(exist_ok=True, parents=True) 30 | 31 | print(f"image_dir ({image_dir.exists()}): {image_dir}") 32 | print(f"sfm_dir ({sfm_dir.exists()}): {sfm_dir}") 33 | print(f"align_dir ({align_dir.exists()}): {align_dir}") 34 | 35 | cmd = ( 36 | "colmap model_orientation_aligner " 37 | + f"--image_path {image_dir} " 38 | + f"--input_path {sfm_dir} " 39 | + f"--output_path {str(align_dir)}" 40 | ) 41 | subprocess.run(cmd, shell=True) 42 | -------------------------------------------------------------------------------- /siclib/datasets/utils/download_openpano.py: -------------------------------------------------------------------------------- 1 | """Helper script to download and extract OpenPano dataset.""" 2 | 3 | import argparse 4 | import shutil 5 | from pathlib import Path 6 | 7 | import gdown 8 | import torch 9 | from tqdm import tqdm 10 | 11 | from siclib import logger 12 | 13 | URLS = { 14 | "openpano": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/openpano.zip", 15 | "openpano_v2": "https://drive.google.com/uc?id=17bmJedVjR5fmTzQJ3KEC27KEapzll6Sk", 16 | } 17 | 18 | 19 | def download_and_extract_dataset(name: str, url: str, output: Path) -> None: 20 | """Download and extract a dataset from a URL.""" 21 | dataset_dir = output / name 22 | if not output.exists(): 23 | output.mkdir(parents=True) 24 | 25 | if dataset_dir.exists(): 26 | logger.info(f"Dataset {name} already exists at {dataset_dir}, skipping download.") 27 | return 28 | 29 | zip_file = output / f"{name}.zip" 30 | 31 | if not zip_file.exists(): 32 | logger.info(f"Downloading dataset {name} to {zip_file} from {url}.") 33 | if "drive.google.com" in url: 34 | gdown.download(url, str(zip_file)) 35 | else: 36 | torch.hub.download_url_to_file(url, str(zip_file)) 37 | 38 | logger.info(f"Extracting dataset {name} in {output}.") 39 | shutil.unpack_archive(zip_file, output, format="zip") 40 | zip_file.unlink() 41 | 42 | 43 | def main(): 44 | """Prepare the OpenPano dataset.""" 45 | parser = argparse.ArgumentParser(description="Download and extract OpenPano dataset.") 46 | parser.add_argument("--name", type=str, default="openpano", help="Name of the dataset.") 47 | parser.add_argument( 48 | "--laval_dir", type=str, default="data/laval-tonemap", help="Path the Laval dataset." 49 | ) 50 | 51 | args = parser.parse_args() 52 | 53 | out_dir = Path("data") 54 | download_and_extract_dataset(args.name, URLS[args.name], out_dir) 55 | 56 | pano_dir = out_dir / args.name / "panoramas" 57 | for split in ["train", "test", "val"]: 58 | with open(pano_dir / f"{split}_panos.txt", "r") as f: 59 | pano_list = f.readlines() 60 | pano_list = [fname.strip() for fname in pano_list] 61 | 62 | for fname in tqdm(pano_list, ncols=80, desc=f"Copying {split} panoramas"): 63 | laval_path = Path(args.laval_dir) / fname 64 | target_path = pano_dir / split / fname 65 | 66 | # pano either exists in laval or is in split 67 | if target_path.exists(): 68 | continue 69 | 70 | if laval_path.exists(): 71 | shutil.copy(laval_path, target_path) 72 | else: # not in laval and not in split 73 | logger.warning(f"Panorama {fname} not found in {args.laval_dir} or {split} split.") 74 | 75 | n_train = len(list(pano_dir.glob("train/*.jpg"))) 76 | n_test = len(list(pano_dir.glob("test/*.jpg"))) 77 | n_val = len(list(pano_dir.glob("val/*.jpg"))) 78 | logger.info(f"{args.name} contains {n_train}/{n_test}/{n_val} train/test/val panoramas.") 79 | 80 | 81 | if __name__ == "__main__": 82 | main() 83 | -------------------------------------------------------------------------------- /siclib/eval/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from siclib.eval.eval_pipeline import EvalPipeline 4 | from siclib.utils.tools import get_class 5 | 6 | 7 | def get_benchmark(benchmark): 8 | return get_class(f"{__name__}.{benchmark}", EvalPipeline) 9 | 10 | 11 | @torch.no_grad() 12 | def run_benchmark(benchmark, eval_conf, experiment_dir, model=None): 13 | """This overwrites existing benchmarks""" 14 | experiment_dir.mkdir(exist_ok=True, parents=True) 15 | bm = get_benchmark(benchmark) 16 | 17 | pipeline = bm(eval_conf) 18 | return pipeline.run(experiment_dir, model=model, overwrite=True, overwrite_eval=True) 19 | -------------------------------------------------------------------------------- /siclib/eval/configs/anycalib.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.anycalib_net 3 | 4 | data: 5 | # aspect_ratio_strategy MUST match training config 6 | to_closest_train_size: true 7 | im_geom_transform: 8 | aspect_ratio: [0.5, 2.0] 9 | resolution: 102_400 10 | 11 | use_prior_cxcy: False 12 | preprocessing: 13 | # resize: 322 14 | edge_divisible_by: 14 15 | -------------------------------------------------------------------------------- /siclib/eval/configs/anycalib_pretrained.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.anycalib_pretrained 3 | model_id: anycalib_pinhole 4 | 5 | data: 6 | im_geom_transform: null 7 | preprocessing: 8 | resize: null 9 | edge_divisible_by: null 10 | -------------------------------------------------------------------------------- /siclib/eval/configs/deepcalib.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.deepcalib 3 | weights: weights/deepcalib.tar 4 | -------------------------------------------------------------------------------- /siclib/eval/configs/diffcalib.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.diffcalib_pretrained 3 | with_assumption: false 4 | 5 | data: 6 | im_geom_transform: null 7 | preprocessing: 8 | resize: null 9 | edge_divisible_by: null -------------------------------------------------------------------------------- /siclib/eval/configs/dust3r.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | # name: networks.dust3r_pretrained 3 | name: networks.dust3r_pretrained_rays 4 | 5 | data: 6 | im_geom_transform: null 7 | preprocessing: 8 | resize: null 9 | edge_divisible_by: null 10 | -------------------------------------------------------------------------------- /siclib/eval/configs/geocalib-pinhole-rays.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.geocalib_pretrained_rays 3 | data: 4 | im_geom_transform: null 5 | preprocessing: 6 | resize: null 7 | edge_divisible_by: null -------------------------------------------------------------------------------- /siclib/eval/configs/geocalib-pinhole.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.geocalib_pretrained 3 | -------------------------------------------------------------------------------- /siclib/eval/configs/geocalib-simple_div-rays.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.geocalib_pretrained_rays 3 | camera_model: simple_divisional 4 | model_weights: distorted 5 | data: 6 | im_geom_transform: null 7 | preprocessing: 8 | resize: null 9 | edge_divisible_by: null 10 | cam_id: division:1 11 | -------------------------------------------------------------------------------- /siclib/eval/configs/geocalib-simple_radial-rays.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.geocalib_pretrained_rays 3 | camera_model: simple_radial 4 | model_weights: distorted 5 | data: 6 | im_geom_transform: null 7 | preprocessing: 8 | resize: null 9 | edge_divisible_by: null 10 | cam_id: radial:1 11 | -------------------------------------------------------------------------------- /siclib/eval/configs/geocalib-simple_radial.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.geocalib_pretrained 3 | camera_model: simple_radial 4 | model_weights: distorted 5 | -------------------------------------------------------------------------------- /siclib/eval/configs/moge.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.moge_pretrained 3 | 4 | data: 5 | im_geom_transform: null 6 | preprocessing: 7 | resize: null 8 | edge_divisible_by: null 9 | -------------------------------------------------------------------------------- /siclib/eval/configs/uvp.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: optimization.vp_from_prior 3 | SOLVER_FLAGS: [True, True, True, True, True] 4 | magsac_scoring: true 5 | min_lines: 5 6 | verbose: false 7 | 8 | # RANSAC inlier threshold 9 | th_pixels: 3 10 | 11 | # 3 uses the gravity in the LS refinement, 2 does not. Here we use a prior on the gravity, so use 2 12 | ls_refinement: 2 13 | 14 | # change to 3 to add a Ceres optimization after the non minimal solver (slower) 15 | nms: 1 16 | 17 | # deeplsd, lsd 18 | line_type: deeplsd 19 | -------------------------------------------------------------------------------- /siclib/eval/configs/wildcam.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: networks.wildcam_pretrained 3 | with_assumption: false 4 | 5 | data: 6 | im_geom_transform: null 7 | preprocessing: 8 | resize: null 9 | edge_divisible_by: null -------------------------------------------------------------------------------- /siclib/eval/eval_pipeline.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import h5py 4 | import numpy as np 5 | from omegaconf import OmegaConf 6 | 7 | # flake8: noqa 8 | # mypy: ignore-errors 9 | 10 | 11 | def load_eval(dir): 12 | summaries, results = {}, {} 13 | with h5py.File(str(dir / "results.h5"), "r") as hfile: 14 | for k in hfile.keys(): 15 | r = np.array(hfile[k]) 16 | if len(r.shape) < 3: 17 | results[k] = r 18 | for k, v in hfile.attrs.items(): 19 | summaries[k] = v 20 | with open(dir / "summaries.json", "r") as f: 21 | s = json.load(f) 22 | summaries = {k: v if v is not None else np.nan for k, v in s.items()} 23 | return summaries, results 24 | 25 | 26 | def save_eval(dir, summaries, figures, results): 27 | with h5py.File(str(dir / "results.h5"), "w") as hfile: 28 | for k, v in results.items(): 29 | arr = np.array(v) 30 | if not np.issubdtype(arr.dtype, np.number): 31 | arr = arr.astype("object") 32 | hfile.create_dataset(k, data=arr) 33 | # just to be safe, not used in practice 34 | for k, v in summaries.items(): 35 | hfile.attrs[k] = v 36 | s = { 37 | k: float(v) if np.isfinite(v) else None 38 | for k, v in summaries.items() 39 | if not isinstance(v, list) 40 | } 41 | s = {**s, **{k: v for k, v in summaries.items() if isinstance(v, list)}} 42 | with open(dir / "summaries.json", "w") as f: 43 | json.dump(s, f, indent=4) 44 | 45 | for fig_name, fig in figures.items(): 46 | fig.savefig(dir / f"{fig_name}.png") 47 | 48 | 49 | def exists_eval(dir): 50 | return (dir / "results.h5").exists() and (dir / "summaries.json").exists() 51 | 52 | 53 | class EvalPipeline: 54 | default_conf = {} 55 | 56 | export_keys = [] 57 | optional_export_keys = [] 58 | 59 | def __init__(self, conf): 60 | """Assumes""" 61 | self.default_conf = OmegaConf.create(self.default_conf) 62 | self.conf = OmegaConf.merge(self.default_conf, conf) 63 | self._init(self.conf) 64 | 65 | def _init(self, conf): 66 | pass 67 | 68 | @classmethod 69 | def get_dataloader(cls, data_conf=None): 70 | """Returns a data loader with samples for each eval datapoint""" 71 | raise NotImplementedError 72 | 73 | def get_predictions(self, experiment_dir, model=None, overwrite=False): 74 | """Export a prediction file for each eval datapoint""" 75 | raise NotImplementedError 76 | 77 | def run_eval(self, loader, pred_file): 78 | """Run the eval on cached predictions""" 79 | raise NotImplementedError 80 | 81 | def run(self, experiment_dir, model=None, overwrite=False, overwrite_eval=False): 82 | """Run export+eval loop""" 83 | self.save_conf(experiment_dir, overwrite=overwrite, overwrite_eval=overwrite_eval) 84 | pred_file = self.get_predictions(experiment_dir, model=model, overwrite=overwrite) 85 | # pred_file = experiment_dir / "predictions.h5" 86 | 87 | f = {} 88 | if not exists_eval(experiment_dir) or overwrite_eval or overwrite: 89 | s, f, r = self.run_eval(self.get_dataloader(self.conf.data, 1), pred_file) # type: ignore 90 | save_eval(experiment_dir, s, f, r) 91 | s, r = load_eval(experiment_dir) 92 | 93 | if self.conf.eval.get("delete_cache", False): 94 | for file in ("results.h5", "predictions.h5"): 95 | (experiment_dir / file).unlink(missing_ok=True) 96 | if self.conf.eval.get("delete_also_summaries", False): 97 | for file in ("summaries.json", "conf.yaml"): 98 | (experiment_dir / file).unlink(missing_ok=True) 99 | experiment_dir.rmdir() 100 | 101 | return s, f, r 102 | 103 | def save_conf(self, experiment_dir, overwrite=False, overwrite_eval=False): 104 | # store config 105 | conf_output_path = experiment_dir / "conf.yaml" 106 | if conf_output_path.exists(): 107 | saved_conf = OmegaConf.load(conf_output_path) 108 | if (saved_conf.data != self.conf.data) or (saved_conf.model != self.conf.model): 109 | assert ( 110 | overwrite 111 | ), "configs changed, add --overwrite to rerun experiment with new conf" 112 | if saved_conf.eval != self.conf.eval: 113 | assert ( 114 | overwrite or overwrite_eval 115 | ), "eval configs changed, add --overwrite_eval to rerun evaluation" 116 | OmegaConf.save(self.conf, experiment_dir / "conf.yaml") 117 | -------------------------------------------------------------------------------- /siclib/eval/inspect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import defaultdict 3 | from pathlib import Path 4 | from pprint import pprint 5 | 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | 9 | from siclib.eval import get_benchmark 10 | from siclib.eval.eval_pipeline import load_eval 11 | from siclib.settings import EVAL_PATH 12 | from siclib.visualization.global_frame import GlobalFrame 13 | from siclib.visualization.two_view_frame import TwoViewFrame 14 | 15 | # flake8: noqa 16 | # mypy: ignore-errors 17 | 18 | if __name__ == "__main__": 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("benchmark", type=str) 21 | parser.add_argument("--x", type=str, default=None) 22 | parser.add_argument("--y", type=str, default=None) 23 | parser.add_argument("--backend", type=str, default=None) 24 | parser.add_argument("--default_plot", type=str, default=TwoViewFrame.default_conf["default"]) 25 | 26 | parser.add_argument("dotlist", nargs="*") 27 | args = parser.parse_intermixed_args() 28 | 29 | output_dir = Path(EVAL_PATH, args.benchmark) 30 | 31 | results = {} 32 | summaries = defaultdict(dict) 33 | 34 | predictions = {} 35 | 36 | if args.backend: 37 | matplotlib.use(args.backend) 38 | 39 | bm = get_benchmark(args.benchmark) 40 | loader = bm.get_dataloader() 41 | 42 | for name in args.dotlist: 43 | experiment_dir = output_dir / name 44 | pred_file = experiment_dir / "predictions.h5" 45 | s, results[name] = load_eval(experiment_dir) 46 | predictions[name] = pred_file 47 | for k, v in s.items(): 48 | summaries[k][name] = v 49 | 50 | pprint(summaries) 51 | 52 | plt.close("all") 53 | 54 | frame = GlobalFrame( 55 | {"child": {"default": args.default_plot}, **vars(args)}, 56 | results, 57 | loader, 58 | predictions, 59 | child_frame=TwoViewFrame, 60 | ) 61 | frame.draw() 62 | plt.show() 63 | -------------------------------------------------------------------------------- /siclib/eval/io.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from pprint import pprint 4 | from typing import Optional 5 | 6 | import pkg_resources 7 | from hydra import compose, initialize 8 | from omegaconf import OmegaConf 9 | 10 | from siclib.models import get_model 11 | from siclib.settings import TRAINING_PATH 12 | from siclib.utils.experiments import load_experiment 13 | 14 | # flake8: noqa 15 | # mypy: ignore-errors 16 | 17 | 18 | def parse_config_path(name_or_path: Optional[str], defaults: str) -> Path: 19 | default_configs = {} 20 | print(f"Looking for default config: {'siclib', str(defaults)}") 21 | for c in pkg_resources.resource_listdir("siclib.eval", str(defaults)): 22 | if c.endswith(".yaml"): 23 | default_configs[Path(c).stem] = Path( 24 | pkg_resources.resource_filename("siclib.eval", defaults + c) 25 | ) 26 | if name_or_path is None: 27 | return None 28 | if name_or_path in default_configs: 29 | return default_configs[name_or_path] 30 | path = Path(name_or_path) 31 | if not path.exists(): 32 | raise FileNotFoundError( 33 | f"Cannot find the config file: {name_or_path}. " 34 | f"Not in the default configs {list(default_configs.keys())} " 35 | "and not an existing path." 36 | ) 37 | return Path(path) 38 | 39 | 40 | def extract_benchmark_conf(conf, benchmark, only_model=True): 41 | if only_model: 42 | conf_ = OmegaConf.create({"model": conf.get("model", {})}) 43 | else: 44 | conf_ = conf 45 | OmegaConf.set_struct(conf_, None) 46 | # mconf = OmegaConf.create({"model": conf.get("model", {})}) 47 | if "benchmarks" in conf.keys(): 48 | return OmegaConf.merge(conf_, conf.benchmarks.get(benchmark, {})) 49 | # return OmegaConf.merge(mconf, conf.benchmarks.get(benchmark, {})) 50 | else: 51 | return conf_ 52 | # return mconf 53 | 54 | 55 | def parse_eval_args(benchmark, args, configs_path, default=None, only_custom_model=True): 56 | conf = {"data": {}, "model": {}, "eval": {}} 57 | 58 | if args.conf: 59 | print(f"Loading config: {configs_path}") 60 | conf_path = parse_config_path(args.conf, configs_path) 61 | initialize(version_base=None, config_path=configs_path) 62 | custom_conf = compose(config_name=args.conf) 63 | conf = extract_benchmark_conf( 64 | OmegaConf.merge(conf, custom_conf), benchmark, only_custom_model 65 | ) 66 | args.tag = args.tag if args.tag is not None else conf_path.name.replace(".yaml", "") 67 | 68 | cli_conf = OmegaConf.from_cli(args.dotlist) 69 | conf = OmegaConf.merge(conf, cli_conf) 70 | conf.checkpoint = args.checkpoint or conf.get("checkpoint") 71 | 72 | if conf.checkpoint and not conf.checkpoint.endswith(".tar"): 73 | checkpoint_conf = OmegaConf.load(TRAINING_PATH / conf.checkpoint / "config.yaml") 74 | conf = OmegaConf.merge(extract_benchmark_conf(checkpoint_conf, benchmark), conf) 75 | 76 | if default: 77 | conf = OmegaConf.merge(default, conf) 78 | 79 | if args.tag is not None: 80 | name = args.tag 81 | elif args.conf and conf.checkpoint: 82 | name = f"{args.conf}_{conf.checkpoint}" 83 | elif args.conf: 84 | name = args.conf 85 | elif conf.checkpoint: 86 | name = conf.checkpoint 87 | if len(args.dotlist) > 0 and not args.tag: 88 | name = f"{name}_" + ":".join(args.dotlist) 89 | 90 | print("Running benchmark:", benchmark) 91 | print("Experiment tag:", name) 92 | print("Config:") 93 | pprint(OmegaConf.to_container(conf)) 94 | return name, conf 95 | 96 | 97 | def load_model(model_conf, checkpoint, get_last=False): 98 | if checkpoint: 99 | model = load_experiment(checkpoint, conf=model_conf, get_last=get_last).eval() 100 | else: 101 | model = get_model(model_conf.name)(model_conf).eval() 102 | return model 103 | 104 | 105 | def get_eval_parser(): 106 | parser = argparse.ArgumentParser() 107 | parser.add_argument("--tag", type=str, default=None) 108 | parser.add_argument("--checkpoint", type=str, default=None) 109 | parser.add_argument("--conf", type=str, default=None) 110 | parser.add_argument("--overwrite", action="store_true") 111 | parser.add_argument("--overwrite_eval", action="store_true") 112 | parser.add_argument("--plot", action="store_true") 113 | parser.add_argument("dotlist", nargs="*") 114 | return parser 115 | -------------------------------------------------------------------------------- /siclib/eval/lamar2k.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Lamar2k(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset", 26 | "dataset_dir": "data/lamar2k", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "augmentations": {"name": "identity"}, 30 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 31 | "test_batch_size": 1, 32 | }, 33 | "model": {}, 34 | "eval": { 35 | "thresholds": [1, 5, 10], 36 | "pixel_thresholds": [0.5, 1, 3, 5], 37 | "num_vis": 10, 38 | "verbose": True, 39 | }, 40 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/lamar2k.zip", 41 | } 42 | 43 | export_keys = [ 44 | "camera", 45 | "gravity", 46 | ] 47 | 48 | optional_export_keys = [ 49 | "focal_uncertainty", 50 | "vfov_uncertainty", 51 | "roll_uncertainty", 52 | "pitch_uncertainty", 53 | "gravity_uncertainty", 54 | "up_field", 55 | "up_confidence", 56 | "latitude_field", 57 | "latitude_confidence", 58 | ] 59 | 60 | 61 | if __name__ == "__main__": 62 | dataset_name = Path(__file__).stem 63 | parser = get_eval_parser() 64 | args = parser.parse_intermixed_args() 65 | 66 | default_conf = OmegaConf.create(Lamar2k.default_conf) 67 | 68 | # mingle paths 69 | output_dir = Path(EVAL_PATH, dataset_name) 70 | output_dir.mkdir(exist_ok=True, parents=True) 71 | 72 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 73 | 74 | experiment_dir = output_dir / name 75 | experiment_dir.mkdir(exist_ok=True) 76 | 77 | pipeline = Lamar2k(conf) 78 | s, f, r = pipeline.run( 79 | experiment_dir, 80 | overwrite=args.overwrite, 81 | overwrite_eval=args.overwrite_eval, 82 | ) 83 | 84 | pprint(s) 85 | 86 | if args.plot: 87 | for name, fig in f.items(): 88 | fig.canvas.manager.set_window_title(name) 89 | plt.show() 90 | -------------------------------------------------------------------------------- /siclib/eval/lamar2k_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH # type: ignore 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Lamar2k(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/lamar2k", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "test_h5": "${.dataset_dir}/images.h5", 30 | "augmentations": {"name": "identity"}, 31 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 32 | "test_batch_size": 1, 33 | }, 34 | "model": {}, 35 | "eval": { 36 | "thresholds": [1, 5, 10], 37 | "pixel_thresholds": [0.5, 1, 3, 5], 38 | "num_vis": 10, 39 | "verbose": True, 40 | "eval_on_edit": False, 41 | }, 42 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/lamar2k.zip", 43 | "url_edit": "https://drive.google.com/uc?id=1h2HThhQbvqZlMw0OdpF7Y73wodeZMmdj", 44 | } 45 | 46 | export_keys = ["intrinsics"] 47 | 48 | optional_export_keys = [ 49 | # "intrinsics_uncertainty", 50 | # "rays", 51 | # "log_covs", 52 | ] 53 | 54 | 55 | if __name__ == "__main__": 56 | dataset_name = Path(__file__).stem 57 | parser = get_eval_parser() 58 | args = parser.parse_intermixed_args() 59 | 60 | default_conf = OmegaConf.create(Lamar2k.default_conf) 61 | 62 | # mingle paths 63 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 64 | output_dir.mkdir(exist_ok=True, parents=True) 65 | 66 | name, conf = parse_eval_args( 67 | dataset_name, args, "configs/", default_conf, only_custom_model=False 68 | ) 69 | 70 | experiment_dir = output_dir / name 71 | experiment_dir.mkdir(exist_ok=True) 72 | 73 | pipeline = Lamar2k(conf) 74 | s, f, r = pipeline.run( 75 | experiment_dir, 76 | overwrite=args.overwrite, 77 | overwrite_eval=args.overwrite_eval, 78 | ) 79 | 80 | pprint(s) 81 | 82 | if args.plot: 83 | for name, fig in f.items(): 84 | fig.canvas.manager.set_window_title(name) 85 | plt.show() 86 | -------------------------------------------------------------------------------- /siclib/eval/megadepth2k.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Megadepth2k(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset", 26 | "dataset_dir": "data/megadepth2k", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "augmentations": {"name": "identity"}, 30 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 31 | "test_batch_size": 1, 32 | }, 33 | "model": {}, 34 | "eval": { 35 | "thresholds": [1, 5, 10], 36 | "pixel_thresholds": [0.5, 1, 3, 5], 37 | "num_vis": 10, 38 | "verbose": True, 39 | }, 40 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/megadepth2k.zip", 41 | } 42 | 43 | export_keys = [ 44 | "camera", 45 | "gravity", 46 | ] 47 | 48 | optional_export_keys = [ 49 | "focal_uncertainty", 50 | "vfov_uncertainty", 51 | "roll_uncertainty", 52 | "pitch_uncertainty", 53 | "gravity_uncertainty", 54 | "up_field", 55 | "up_confidence", 56 | "latitude_field", 57 | "latitude_confidence", 58 | ] 59 | 60 | 61 | if __name__ == "__main__": 62 | dataset_name = Path(__file__).stem 63 | parser = get_eval_parser() 64 | args = parser.parse_intermixed_args() 65 | 66 | default_conf = OmegaConf.create(Megadepth2k.default_conf) 67 | 68 | # mingle paths 69 | output_dir = Path(EVAL_PATH, dataset_name) 70 | output_dir.mkdir(exist_ok=True, parents=True) 71 | 72 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 73 | 74 | experiment_dir = output_dir / name 75 | experiment_dir.mkdir(exist_ok=True) 76 | 77 | pipeline = Megadepth2k(conf) 78 | s, f, r = pipeline.run( 79 | experiment_dir, 80 | overwrite=args.overwrite, 81 | overwrite_eval=args.overwrite_eval, 82 | ) 83 | 84 | pprint(s) 85 | 86 | if args.plot: 87 | for name, fig in f.items(): 88 | fig.canvas.manager.set_window_title(name) 89 | plt.show() 90 | -------------------------------------------------------------------------------- /siclib/eval/megadepth2k_radial.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.eval.utils import download_and_extract_benchmark 12 | from siclib.geometry.camera import SimpleRadial 13 | from siclib.settings import EVAL_PATH 14 | 15 | # flake8: noqa 16 | # mypy: ignore-errors 17 | 18 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 19 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 20 | 21 | torch.set_grad_enabled(False) 22 | 23 | 24 | class Megadepth2kRadial(SimplePipeline): 25 | default_conf = { 26 | "data": { 27 | "name": "simple_dataset", 28 | "dataset_dir": "data/megadepth2k-radial", 29 | "test_img_dir": "${.dataset_dir}/images", 30 | "test_csv": "${.dataset_dir}/images.csv", 31 | "augmentations": {"name": "identity"}, 32 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 33 | "test_batch_size": 1, 34 | }, 35 | "model": {}, 36 | "eval": { 37 | "thresholds": [1, 5, 10], 38 | "pixel_thresholds": [0.5, 1, 3, 5], 39 | "num_vis": 10, 40 | "verbose": True, 41 | }, 42 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/megadepth2k-radial.zip", 43 | } 44 | 45 | export_keys = [ 46 | "camera", 47 | "gravity", 48 | ] 49 | 50 | optional_export_keys = [ 51 | "focal_uncertainty", 52 | "vfov_uncertainty", 53 | "roll_uncertainty", 54 | "pitch_uncertainty", 55 | "gravity_uncertainty", 56 | "up_field", 57 | "up_confidence", 58 | "latitude_field", 59 | "latitude_confidence", 60 | ] 61 | 62 | def _init(self, conf): 63 | self.verbose = conf.eval.verbose 64 | self.num_vis = self.conf.eval.num_vis 65 | 66 | self.CameraModel = SimpleRadial 67 | 68 | if conf.url is not None: 69 | ds_dir = Path(conf.data.dataset_dir) 70 | download_and_extract_benchmark(ds_dir.name, conf.url, ds_dir.parent) 71 | 72 | 73 | if __name__ == "__main__": 74 | dataset_name = Path(__file__).stem 75 | parser = get_eval_parser() 76 | args = parser.parse_intermixed_args() 77 | 78 | default_conf = OmegaConf.create(Megadepth2kRadial.default_conf) 79 | 80 | # mingle paths 81 | output_dir = Path(EVAL_PATH, dataset_name) 82 | output_dir.mkdir(exist_ok=True, parents=True) 83 | 84 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 85 | 86 | experiment_dir = output_dir / name 87 | experiment_dir.mkdir(exist_ok=True) 88 | 89 | pipeline = Megadepth2kRadial(conf) 90 | s, f, r = pipeline.run( 91 | experiment_dir, 92 | overwrite=args.overwrite, 93 | overwrite_eval=args.overwrite_eval, 94 | ) 95 | 96 | pprint(s) 97 | 98 | if args.plot: 99 | for name, fig in f.items(): 100 | fig.canvas.manager.set_window_title(name) 101 | plt.show() 102 | -------------------------------------------------------------------------------- /siclib/eval/megadepth2k_radial_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Megadepth2kRadial(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/megadepth2k-radial", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "test_h5": "${.dataset_dir}/images.h5", 30 | "augmentations": {"name": "identity"}, 31 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 32 | "test_batch_size": 1, 33 | }, 34 | "model": {}, 35 | "eval": { 36 | "thresholds": [1, 5, 10], 37 | "pixel_thresholds": [0.5, 1, 3, 5], 38 | "num_vis": 10, 39 | "verbose": True, 40 | "eval_on_edit": False, 41 | }, 42 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/megadepth2k-radial.zip", 43 | } 44 | 45 | export_keys = ["intrinsics"] 46 | 47 | optional_export_keys = [ 48 | # "intrinsics_uncertainty", 49 | # "rays", 50 | # "log_covs", 51 | ] 52 | 53 | 54 | if __name__ == "__main__": 55 | dataset_name = Path(__file__).stem 56 | parser = get_eval_parser() 57 | args = parser.parse_intermixed_args() 58 | 59 | default_conf = OmegaConf.create(Megadepth2kRadial.default_conf) 60 | 61 | # mingle paths 62 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 63 | output_dir.mkdir(exist_ok=True, parents=True) 64 | 65 | name, conf = parse_eval_args( 66 | dataset_name, args, "configs/", default_conf, only_custom_model=False 67 | ) 68 | 69 | experiment_dir = output_dir / name 70 | experiment_dir.mkdir(exist_ok=True) 71 | 72 | pipeline = Megadepth2kRadial(conf) 73 | s, f, r = pipeline.run( 74 | experiment_dir, 75 | overwrite=args.overwrite, 76 | overwrite_eval=args.overwrite_eval, 77 | ) 78 | 79 | pprint(s) 80 | 81 | if args.plot: 82 | for name, fig in f.items(): 83 | fig.canvas.manager.set_window_title(name) 84 | plt.show() 85 | -------------------------------------------------------------------------------- /siclib/eval/megadepth2k_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH # type: ignore 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Megadepth2k(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/megadepth2k", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "test_h5": "${.dataset_dir}/images.h5", 30 | "augmentations": {"name": "identity"}, 31 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 32 | "test_batch_size": 1, 33 | }, 34 | "model": {}, 35 | "eval": { 36 | "thresholds": [1, 5, 10], 37 | "pixel_thresholds": [0.5, 1, 3, 5], 38 | "num_vis": 10, 39 | "verbose": True, 40 | "eval_on_edit": False, 41 | }, 42 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/megadepth2k.zip", 43 | } 44 | 45 | export_keys = ["intrinsics"] 46 | 47 | optional_export_keys = [ 48 | # "intrinsics_uncertainty", 49 | # "rays", 50 | # "log_covs", 51 | ] 52 | 53 | 54 | if __name__ == "__main__": 55 | dataset_name = Path(__file__).stem 56 | parser = get_eval_parser() 57 | args = parser.parse_intermixed_args() 58 | 59 | default_conf = OmegaConf.create(Megadepth2k.default_conf) 60 | 61 | # mingle paths 62 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 63 | output_dir.mkdir(exist_ok=True, parents=True) 64 | 65 | name, conf = parse_eval_args( 66 | dataset_name, args, "configs/", default_conf, only_custom_model=False 67 | ) 68 | 69 | experiment_dir = output_dir / name 70 | experiment_dir.mkdir(exist_ok=True) 71 | 72 | pipeline = Megadepth2k(conf) 73 | s, f, r = pipeline.run( 74 | experiment_dir, 75 | overwrite=args.overwrite, 76 | overwrite_eval=args.overwrite_eval, 77 | ) 78 | 79 | pprint(s) 80 | 81 | if args.plot: 82 | for name, fig in f.items(): 83 | fig.canvas.manager.set_window_title(name) 84 | plt.show() 85 | -------------------------------------------------------------------------------- /siclib/eval/monovo2k_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class MonoVO2k(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/monovo2k", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "test_h5": "${.dataset_dir}/images.h5", 30 | "augmentations": {"name": "identity"}, 31 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 32 | "test_batch_size": 1, 33 | "cam_id": None, 34 | }, 35 | "model": {}, 36 | "eval": { 37 | "thresholds": [1, 5, 10], 38 | "pixel_thresholds": [0.5, 1, 3, 5], 39 | "num_vis": 10, 40 | "verbose": True, 41 | "eval_on_edit": False, 42 | }, 43 | "url": None, 44 | "url": "https://drive.google.com/uc?id=18rQ-WZcxXogeBqMwWvSkxVVEmZ9C12BZ", 45 | } 46 | 47 | export_keys = ["intrinsics"] 48 | 49 | optional_export_keys = [ 50 | # "intrinsics_uncertainty", 51 | # "rays", 52 | # "log_covs", 53 | ] 54 | 55 | 56 | if __name__ == "__main__": 57 | dataset_name = Path(__file__).stem 58 | parser = get_eval_parser() 59 | args = parser.parse_intermixed_args() 60 | 61 | default_conf = OmegaConf.create(MonoVO2k.default_conf) 62 | 63 | # mingle paths 64 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 65 | output_dir.mkdir(exist_ok=True, parents=True) 66 | 67 | name, conf = parse_eval_args( 68 | dataset_name, args, "configs/", default_conf, only_custom_model=False 69 | ) 70 | 71 | experiment_dir = output_dir / name 72 | experiment_dir.mkdir(exist_ok=True) 73 | 74 | pipeline = MonoVO2k(conf) 75 | s, f, r = pipeline.run( 76 | experiment_dir, 77 | overwrite=args.overwrite, 78 | overwrite_eval=args.overwrite_eval, 79 | ) 80 | 81 | pprint(s) 82 | 83 | if args.plot: 84 | for name, fig in f.items(): 85 | fig.canvas.manager.set_window_title(name) 86 | plt.show() 87 | -------------------------------------------------------------------------------- /siclib/eval/openpano.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class OpenPano(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset", 26 | "dataset_dir": "data/poly+maps+laval/poly+maps+laval", 27 | "augmentations": {"name": "identity"}, 28 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 29 | "test_batch_size": 1, 30 | }, 31 | "model": {}, 32 | "eval": { 33 | "thresholds": [1, 5, 10], 34 | "pixel_thresholds": [0.5, 1, 3, 5], 35 | "num_vis": 10, 36 | "verbose": True, 37 | }, 38 | "url": None, 39 | } 40 | 41 | 42 | if __name__ == "__main__": 43 | dataset_name = Path(__file__).stem 44 | parser = get_eval_parser() 45 | args = parser.parse_intermixed_args() 46 | 47 | default_conf = OmegaConf.create(OpenPano.default_conf) 48 | 49 | # mingle paths 50 | output_dir = Path(EVAL_PATH, dataset_name) 51 | output_dir.mkdir(exist_ok=True, parents=True) 52 | 53 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 54 | 55 | experiment_dir = output_dir / name 56 | experiment_dir.mkdir(exist_ok=True) 57 | 58 | pipeline = OpenPano(conf) 59 | s, f, r = pipeline.run( 60 | experiment_dir, 61 | overwrite=args.overwrite, 62 | overwrite_eval=args.overwrite_eval, 63 | ) 64 | 65 | pprint(s) 66 | 67 | if args.plot: 68 | for name, fig in f.items(): 69 | fig.canvas.manager.set_window_title(name) 70 | plt.show() 71 | -------------------------------------------------------------------------------- /siclib/eval/openpano_radial.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class OpenPanoRadial(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset", 26 | "dataset_dir": "data/poly+maps+laval/pano_dataset_distorted", 27 | "augmentations": {"name": "identity"}, 28 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 29 | "test_batch_size": 1, 30 | }, 31 | "model": {}, 32 | "eval": { 33 | "thresholds": [1, 5, 10], 34 | "pixel_thresholds": [0.5, 1, 3, 5], 35 | "num_vis": 10, 36 | "verbose": True, 37 | }, 38 | "url": None, 39 | } 40 | 41 | 42 | if __name__ == "__main__": 43 | dataset_name = Path(__file__).stem 44 | parser = get_eval_parser() 45 | args = parser.parse_intermixed_args() 46 | 47 | default_conf = OmegaConf.create(OpenPanoRadial.default_conf) 48 | 49 | # mingle paths 50 | output_dir = Path(EVAL_PATH, dataset_name) 51 | output_dir.mkdir(exist_ok=True, parents=True) 52 | 53 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 54 | 55 | experiment_dir = output_dir / name 56 | experiment_dir.mkdir(exist_ok=True) 57 | 58 | pipeline = OpenPanoRadial(conf) 59 | s, f, r = pipeline.run( 60 | experiment_dir, 61 | overwrite=args.overwrite, 62 | overwrite_eval=args.overwrite_eval, 63 | ) 64 | 65 | pprint(s) 66 | 67 | if args.plot: 68 | for name, fig in f.items(): 69 | fig.canvas.manager.set_window_title(name) 70 | plt.show() 71 | -------------------------------------------------------------------------------- /siclib/eval/openpano_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH # type: ignore 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class OpenPano(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/openpano/openpano", 27 | "augmentations": {"name": "identity"}, 28 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 29 | "test_batch_size": 1, 30 | }, 31 | "model": {}, 32 | "eval": { 33 | "thresholds": [1, 5, 10], 34 | "pixel_thresholds": [0.5, 1, 3, 5], 35 | "num_vis": 5, 36 | "verbose": True, 37 | "eval_on_edit": False, 38 | }, 39 | "url": None, 40 | } 41 | 42 | 43 | if __name__ == "__main__": 44 | dataset_name = Path(__file__).stem 45 | parser = get_eval_parser() 46 | args = parser.parse_intermixed_args() 47 | 48 | default_conf = OmegaConf.create(OpenPano.default_conf) 49 | 50 | # mingle paths 51 | output_dir = Path(EVAL_PATH, dataset_name) 52 | output_dir.mkdir(exist_ok=True, parents=True) 53 | 54 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 55 | 56 | experiment_dir = output_dir / name 57 | experiment_dir.mkdir(exist_ok=True) 58 | 59 | pipeline = OpenPano(conf) 60 | s, f, r = pipeline.run( 61 | experiment_dir, 62 | overwrite=args.overwrite, 63 | overwrite_eval=args.overwrite_eval, 64 | ) 65 | 66 | pprint(s) 67 | 68 | if args.plot: 69 | for name, fig in f.items(): 70 | fig.canvas.manager.set_window_title(name) 71 | plt.show() 72 | -------------------------------------------------------------------------------- /siclib/eval/scannetpp2k_images.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/eval/scannetpp2k_images.h5 -------------------------------------------------------------------------------- /siclib/eval/scannetpp2k_rays.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import resource 3 | import shutil 4 | from pathlib import Path 5 | from pprint import pprint 6 | 7 | import h5py 8 | import matplotlib.pyplot as plt 9 | import torch 10 | from omegaconf import OmegaConf 11 | from tqdm import tqdm 12 | 13 | from siclib.eval.io import get_eval_parser, parse_eval_args 14 | from siclib.eval.simple_pipeline_rays import SimplePipeline 15 | from siclib.settings import EVAL_PATH 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 20 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 21 | 22 | torch.set_grad_enabled(False) 23 | 24 | 25 | def prepare_benchmark(bench_dir: Path, scannetpp_dir: Path): 26 | if bench_dir.exists(): 27 | logger.info(f"Benchmark ScanNet++ already exists at {bench_dir}, skipping preparation.") 28 | return 29 | if not scannetpp_dir.exists(): 30 | raise FileNotFoundError(f"ScanNet++ directory {scannetpp_dir} does not exist.") 31 | 32 | bench_im_dir = bench_dir / "images" 33 | bench_im_dir.mkdir(parents=True) 34 | 35 | # copy eval data to benchmark dir 36 | eval_data = Path(__file__).parent / "scannetpp2k_images.h5" 37 | shutil.copy(eval_data, bench_dir / "images.h5") 38 | 39 | # copy eval images from ScanNet++ to benchmark dir 40 | logger.info(f"Copying evaluation images from {scannetpp_dir} to {bench_im_dir}.") 41 | base_dir = scannetpp_dir / "data" 42 | rel_im_dir = Path("dslr") / "resized_images" 43 | with h5py.File(eval_data, "r") as f: 44 | scene_im_names = list(f.keys()) # format name: "_.JPG" 45 | for scene_im_name in tqdm(scene_im_names): 46 | scene, im_name = scene_im_name.split("_") 47 | im_path = base_dir / scene / rel_im_dir / im_name 48 | shutil.copy(im_path, bench_im_dir / scene_im_name) 49 | 50 | 51 | class ScanNetpp2k(SimplePipeline): 52 | default_conf = { 53 | "data": { 54 | "name": "simple_dataset_rays", 55 | "dataset_dir": "data/scannetpp2k", 56 | "test_img_dir": "${.dataset_dir}/images", 57 | "test_csv": "${.dataset_dir}/images.csv", 58 | "test_h5": "${.dataset_dir}/images.h5", 59 | "augmentations": {"name": "identity"}, 60 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 61 | "test_batch_size": 1, 62 | "cam_id": None, 63 | }, 64 | "model": {}, 65 | "eval": { 66 | "thresholds": [1, 5, 10], 67 | "pixel_thresholds": [0.5, 1, 3, 5], 68 | "num_vis": 10, 69 | "verbose": True, 70 | "eval_on_edit": False, 71 | }, 72 | "url": None, 73 | "scannetpp_root": "data/scannetpp", 74 | } 75 | 76 | export_keys = ["intrinsics"] 77 | 78 | optional_export_keys = [ 79 | # "intrinsics_uncertainty", 80 | # "rays", 81 | # "log_covs", 82 | ] 83 | 84 | def _init(self, conf): 85 | self.verbose = conf.eval.verbose 86 | self.num_vis = self.conf.eval.num_vis 87 | 88 | prepare_benchmark(Path(conf.data.dataset_dir), Path(conf.scannetpp_root)) 89 | 90 | 91 | if __name__ == "__main__": 92 | dataset_name = Path(__file__).stem 93 | parser = get_eval_parser() 94 | args = parser.parse_intermixed_args() 95 | 96 | default_conf = OmegaConf.create(ScanNetpp2k.default_conf) 97 | 98 | # mingle paths 99 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 100 | output_dir.mkdir(exist_ok=True, parents=True) 101 | 102 | name, conf = parse_eval_args( 103 | dataset_name, args, "configs/", default_conf, only_custom_model=False 104 | ) 105 | 106 | experiment_dir = output_dir / name 107 | experiment_dir.mkdir(exist_ok=True) 108 | 109 | pipeline = ScanNetpp2k(conf) 110 | s, f, r = pipeline.run( 111 | experiment_dir, 112 | overwrite=args.overwrite, 113 | overwrite_eval=args.overwrite_eval, 114 | ) 115 | 116 | pprint(s) 117 | 118 | if args.plot: 119 | for name, fig in f.items(): 120 | fig.canvas.manager.set_window_title(name) 121 | plt.show() 122 | -------------------------------------------------------------------------------- /siclib/eval/stanford2d3d.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Stanford2D3D(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset", 26 | "dataset_dir": "data/stanford2d3d", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "augmentations": {"name": "identity"}, 30 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 31 | "test_batch_size": 1, 32 | }, 33 | "model": {}, 34 | "eval": { 35 | "thresholds": [1, 5, 10], 36 | "pixel_thresholds": [0.5, 1, 3, 5], 37 | "num_vis": 10, 38 | "verbose": True, 39 | }, 40 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/stanford2d3d.zip", 41 | } 42 | 43 | export_keys = [ 44 | "camera", 45 | "gravity", 46 | ] 47 | 48 | optional_export_keys = [ 49 | "focal_uncertainty", 50 | "vfov_uncertainty", 51 | "roll_uncertainty", 52 | "pitch_uncertainty", 53 | "gravity_uncertainty", 54 | "up_field", 55 | "up_confidence", 56 | "latitude_field", 57 | "latitude_confidence", 58 | ] 59 | 60 | 61 | if __name__ == "__main__": 62 | dataset_name = Path(__file__).stem 63 | parser = get_eval_parser() 64 | args = parser.parse_intermixed_args() 65 | 66 | default_conf = OmegaConf.create(Stanford2D3D.default_conf) 67 | 68 | # mingle paths 69 | output_dir = Path(EVAL_PATH, dataset_name) 70 | output_dir.mkdir(exist_ok=True, parents=True) 71 | 72 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 73 | 74 | experiment_dir = output_dir / name 75 | experiment_dir.mkdir(exist_ok=True) 76 | 77 | pipeline = Stanford2D3D(conf) 78 | s, f, r = pipeline.run( 79 | experiment_dir, 80 | overwrite=args.overwrite, 81 | overwrite_eval=args.overwrite_eval, 82 | ) 83 | 84 | pprint(s) 85 | 86 | if args.plot: 87 | for name, fig in f.items(): 88 | fig.canvas.manager.set_window_title(name) 89 | plt.show() 90 | -------------------------------------------------------------------------------- /siclib/eval/stanford2d3d_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH # type: ignore 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Stanford2D3D(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/stanford2d3d", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "test_h5": "${.dataset_dir}/images.h5", 30 | "augmentations": {"name": "identity"}, 31 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 32 | "test_batch_size": 1, 33 | }, 34 | "model": {}, 35 | "eval": { 36 | "thresholds": [1, 5, 10], 37 | "pixel_thresholds": [0.5, 1, 3, 5], 38 | "num_vis": 10, 39 | "verbose": True, 40 | "eval_on_edit": False, 41 | }, 42 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/stanford2d3d.zip", 43 | "url_edit": "https://drive.google.com/uc?id=1VvZvxwlx3FyDoMpq2kUeVhfnDOa6x62J", 44 | } 45 | 46 | export_keys = ["intrinsics"] 47 | 48 | optional_export_keys = [ 49 | # "intrinsics_uncertainty", 50 | # "rays", 51 | # "log_covs", 52 | ] 53 | 54 | 55 | if __name__ == "__main__": 56 | dataset_name = Path(__file__).stem 57 | parser = get_eval_parser() 58 | args = parser.parse_intermixed_args() 59 | 60 | default_conf = OmegaConf.create(Stanford2D3D.default_conf) 61 | 62 | # mingle paths 63 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 64 | output_dir.mkdir(exist_ok=True, parents=True) 65 | 66 | name, conf = parse_eval_args( 67 | dataset_name, args, "configs/", default_conf, only_custom_model=False 68 | ) 69 | 70 | experiment_dir = output_dir / name 71 | experiment_dir.mkdir(exist_ok=True) 72 | 73 | pipeline = Stanford2D3D(conf) 74 | s, f, r = pipeline.run( 75 | experiment_dir, 76 | overwrite=args.overwrite, 77 | overwrite_eval=args.overwrite_eval, 78 | ) 79 | 80 | pprint(s) 81 | 82 | if args.plot: 83 | for name, fig in f.items(): 84 | fig.canvas.manager.set_window_title(name) 85 | plt.show() 86 | -------------------------------------------------------------------------------- /siclib/eval/tartanair.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline import SimplePipeline 11 | from siclib.settings import EVAL_PATH 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Tartanair(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset", 26 | "dataset_dir": "data/tartanair", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "augmentations": {"name": "identity"}, 30 | "preprocessing": {"resize": 320, "edge_divisible_by": 32}, 31 | "test_batch_size": 1, 32 | }, 33 | "model": {}, 34 | "eval": { 35 | "thresholds": [1, 5, 10], 36 | "pixel_thresholds": [0.5, 1, 3, 5], 37 | "num_vis": 10, 38 | "verbose": True, 39 | }, 40 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/tartanair.zip", 41 | } 42 | 43 | export_keys = [ 44 | "camera", 45 | "gravity", 46 | ] 47 | 48 | optional_export_keys = [ 49 | "focal_uncertainty", 50 | "vfov_uncertainty", 51 | "roll_uncertainty", 52 | "pitch_uncertainty", 53 | "gravity_uncertainty", 54 | "up_field", 55 | "up_confidence", 56 | "latitude_field", 57 | "latitude_confidence", 58 | ] 59 | 60 | 61 | if __name__ == "__main__": 62 | dataset_name = Path(__file__).stem 63 | parser = get_eval_parser() 64 | args = parser.parse_intermixed_args() 65 | 66 | default_conf = OmegaConf.create(Tartanair.default_conf) 67 | 68 | # mingle paths 69 | output_dir = Path(EVAL_PATH, dataset_name) 70 | output_dir.mkdir(exist_ok=True, parents=True) 71 | 72 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 73 | 74 | experiment_dir = output_dir / name 75 | experiment_dir.mkdir(exist_ok=True) 76 | 77 | pipeline = Tartanair(conf) 78 | s, f, r = pipeline.run( 79 | experiment_dir, 80 | overwrite=args.overwrite, 81 | overwrite_eval=args.overwrite_eval, 82 | ) 83 | 84 | pprint(s) 85 | 86 | if args.plot: 87 | for name, fig in f.items(): 88 | fig.canvas.manager.set_window_title(name) 89 | plt.show() 90 | -------------------------------------------------------------------------------- /siclib/eval/tartanair_rays.py: -------------------------------------------------------------------------------- 1 | import resource 2 | from pathlib import Path 3 | from pprint import pprint 4 | 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from omegaconf import OmegaConf 8 | 9 | from siclib.eval.io import get_eval_parser, parse_eval_args 10 | from siclib.eval.simple_pipeline_rays import SimplePipeline 11 | from siclib.settings import EVAL_PATH # type: ignore 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 17 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 18 | 19 | torch.set_grad_enabled(False) 20 | 21 | 22 | class Tartanair(SimplePipeline): 23 | default_conf = { 24 | "data": { 25 | "name": "simple_dataset_rays", 26 | "dataset_dir": "data/tartanair", 27 | "test_img_dir": "${.dataset_dir}/images", 28 | "test_csv": "${.dataset_dir}/images.csv", 29 | "test_h5": "${.dataset_dir}/images.h5", 30 | "augmentations": {"name": "identity"}, 31 | "preprocessing": {"resize": None, "edge_divisible_by": None}, 32 | "test_batch_size": 1, 33 | }, 34 | "model": {}, 35 | "eval": { 36 | "thresholds": [1, 5, 10], 37 | "pixel_thresholds": [0.5, 1, 3, 5], 38 | "num_vis": 5, 39 | "verbose": True, 40 | "eval_on_edit": False, 41 | }, 42 | "url": "https://cvg-data.inf.ethz.ch/GeoCalib_ECCV2024/tartanair.zip", 43 | "url_edit": "https://drive.google.com/uc?id=18uwpZqOxiJUcD2NFa0p3JMP2hBb4YgCX", 44 | } 45 | 46 | export_keys = ["intrinsics"] 47 | 48 | optional_export_keys = [ 49 | # "intrinsics_uncertainty", 50 | # "rays", 51 | # "log_covs", 52 | ] 53 | 54 | 55 | if __name__ == "__main__": 56 | dataset_name = Path(__file__).stem 57 | parser = get_eval_parser() 58 | args = parser.parse_intermixed_args() 59 | 60 | default_conf = OmegaConf.create(Tartanair.default_conf) 61 | 62 | # mingle paths 63 | output_dir = Path(EVAL_PATH, dataset_name) # type: ignore 64 | output_dir.mkdir(exist_ok=True, parents=True) 65 | 66 | name, conf = parse_eval_args( 67 | dataset_name, args, "configs/", default_conf, only_custom_model=False 68 | ) 69 | 70 | experiment_dir = output_dir / name 71 | experiment_dir.mkdir(exist_ok=True) 72 | 73 | pipeline = Tartanair(conf) 74 | s, f, r = pipeline.run( 75 | experiment_dir, 76 | overwrite=args.overwrite, 77 | overwrite_eval=args.overwrite_eval, 78 | ) 79 | 80 | pprint(s) 81 | 82 | if args.plot: 83 | for name, fig in f.items(): 84 | fig.canvas.manager.set_window_title(name) 85 | plt.show() 86 | -------------------------------------------------------------------------------- /siclib/eval/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import shutil 3 | from pathlib import Path 4 | 5 | import gdown 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import torch 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | # flake8: noqa 13 | # mypy: ignore-errors 14 | 15 | 16 | def download_and_extract_benchmark(name: str, url: str, output: Path) -> None: 17 | benchmark_dir = output / name 18 | if not output.exists(): 19 | output.mkdir(parents=True) 20 | 21 | if benchmark_dir.exists(): 22 | logger.info(f"Benchmark {name} already exists at {benchmark_dir}, skipping download.") 23 | return 24 | 25 | if name == "stanford2d3d" or name == "stanford2d3d_edit": 26 | # prompt user to sign data sharing and usage terms 27 | txt = "\n" + "#" * 108 + "\n\n" 28 | txt += "To download the Stanford2D3D dataset, you must agree to the terms of use:\n\n" 29 | txt += ( 30 | "https://docs.google.com/forms/d/e/" 31 | + "1FAIpQLScFR0U8WEUtb7tgjOhhnl31OrkEs73-Y8bQwPeXgebqVKNMpQ/viewform?c=0&w=1\n\n" 32 | ) 33 | txt += "#" * 108 + "\n\n" 34 | txt += "Did you fill out the data sharing and usage terms? [y/n] " 35 | choice = input(txt) 36 | if choice.lower() != "y": 37 | raise ValueError( 38 | "You must agree to the terms of use to download the Stanford2D3D dataset." 39 | ) 40 | 41 | zip_file = output / f"{name}.zip" 42 | 43 | if not zip_file.exists(): 44 | logger.info(f"Downloading benchmark {name} to {zip_file} from {url}.") 45 | if "drive.google.com" in str(url): 46 | gdown.download(url, str(zip_file)) 47 | else: 48 | torch.hub.download_url_to_file(url, str(zip_file)) 49 | 50 | logger.info(f"Extracting benchmark {name} in {output}.") 51 | shutil.unpack_archive(zip_file, output, format="zip") 52 | zip_file.unlink() 53 | 54 | 55 | def check_keys_recursive(d, pattern): 56 | if isinstance(pattern, dict): 57 | {check_keys_recursive(d[k], v) for k, v in pattern.items()} 58 | else: 59 | for k in pattern: 60 | assert k in d.keys() 61 | 62 | 63 | def plot_scatter_grid( 64 | results, x_keys, y_keys, name=None, diag=False, ax=None, line_idx=0, show_means=True 65 | ): # sourcery skip: low-code-quality 66 | if ax is None: 67 | N, M = len(y_keys), len(x_keys) 68 | fig, ax = plt.subplots(N, M, figsize=(M * 6, N * 5)) 69 | 70 | if N == 1: 71 | ax = np.array(ax) 72 | ax = ax.reshape(1, -1) 73 | 74 | if M == 1: 75 | ax = np.array(ax) 76 | ax = ax.reshape(-1, 1) 77 | else: 78 | fig = None 79 | 80 | for j, kx in enumerate(x_keys): 81 | for i, ky in enumerate(y_keys): 82 | ax[i, j].scatter( 83 | results[kx], 84 | results[ky], 85 | s=1, 86 | alpha=0.5, 87 | label=name or None, 88 | ) 89 | 90 | ax[i, j].set_xlabel(f"{' '.join(kx.split('_')).title()}") 91 | ax[i, j].set_ylabel(f"{' '.join(ky.split('_')).title()}") 92 | 93 | low = min(ax[i, j].get_xlim()[0], ax[i, j].get_ylim()[0]) 94 | high = max(ax[i, j].get_xlim()[1], ax[i, j].get_ylim()[1]) 95 | if diag == "all" or (i == j and diag): 96 | ax[i, j].plot([low, high], [low, high], ls="--", c="red", label="y=x") 97 | 98 | if name or diag == "all" or (i == j and diag): 99 | ax[i, j].legend() 100 | 101 | if not show_means: 102 | return fig, ax 103 | 104 | means = {"y": {}, "x": {}} 105 | for kx in x_keys: 106 | for ky in y_keys: 107 | means["x"][kx] = np.mean(results[kx]) 108 | means["y"][ky] = np.mean(results[ky]) 109 | 110 | for j, kx in enumerate(x_keys): 111 | for i, ky in enumerate(y_keys): 112 | xlim = np.min(results[kx]), np.max(results[kx]) 113 | ylim = np.min(results[ky]), np.max(results[ky]) 114 | means_x = [means["x"][kx]] 115 | means_y = [means["y"][ky]] 116 | color = plt.cm.tab10(line_idx) # type:ignore 117 | ax[i, j].vlines(means_x, *ylim, colors=[color]) 118 | ax[i, j].hlines(means_y, *xlim, colors=[color]) 119 | 120 | return fig, ax 121 | -------------------------------------------------------------------------------- /siclib/eval/visual.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import resource 3 | from collections import defaultdict 4 | from math import pi 5 | from pathlib import Path 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import torch 10 | from torch import Tensor 11 | from tqdm import tqdm 12 | 13 | from anycalib.cameras import CameraFactory 14 | from anycalib.cameras.base import BaseCamera 15 | from anycalib.manifolds import Unit3 16 | from anycalib.visualization.viz_batch import make_batch_figures 17 | from siclib.datasets import get_dataset 18 | from siclib.eval.eval_pipeline import EvalPipeline 19 | from siclib.eval.io import load_model 20 | from siclib.eval.utils import download_and_extract_benchmark, plot_scatter_grid 21 | from siclib.models.cache_loader import CacheLoader 22 | from siclib.utils.export_predictions import export_predictions 23 | from siclib.utils.tools import AUCMetric 24 | 25 | RAD2DEG = 180 / pi 26 | 27 | 28 | class ResultsPlotter: 29 | 30 | def run(self, experiment_dir, model=None, overwrite=False, overwrite_eval=False): 31 | """Run export+eval loop""" 32 | self.save_conf(experiment_dir, overwrite=overwrite, overwrite_eval=overwrite_eval) 33 | pred_file = self.get_predictions(experiment_dir, model=model, overwrite=overwrite) 34 | # pred_file = experiment_dir / "predictions.h5" 35 | 36 | f = {} 37 | if not exists_eval(experiment_dir) or overwrite_eval or overwrite: 38 | s, f, r = self.run_eval(self.get_dataloader(self.conf.data, 1), pred_file) 39 | save_eval(experiment_dir, s, f, r) 40 | s, r = load_eval(experiment_dir) 41 | if self.conf.eval.get("delete_cache", False): 42 | for file in ("results.h5", "predictions.h5", "summaries.json", "conf.yaml"): 43 | (experiment_dir / file).unlink() 44 | experiment_dir.rmdir() 45 | return s, f, r 46 | 47 | 48 | if __name__ == "__main__": 49 | import pprint 50 | 51 | from omegaconf import OmegaConf 52 | 53 | from siclib.eval.io import get_eval_parser, parse_eval_args 54 | from siclib.settings import EVAL_PATH # type: ignore 55 | 56 | dataset_name = Path(__file__).stem 57 | parser = get_eval_parser() 58 | args = parser.parse_intermixed_args() 59 | 60 | default_conf = OmegaConf.create(SimplePipeline.default_conf) 61 | 62 | # mingle paths 63 | output_dir = Path(EVAL_PATH, dataset_name) 64 | output_dir.mkdir(exist_ok=True, parents=True) 65 | 66 | name, conf = parse_eval_args(dataset_name, args, "configs/", default_conf) 67 | 68 | experiment_dir = output_dir / name 69 | experiment_dir.mkdir(exist_ok=True) 70 | 71 | pipeline = SimplePipeline(conf) 72 | s, f, r = pipeline.run( 73 | experiment_dir, overwrite=args.overwrite, overwrite_eval=args.overwrite_eval 74 | ) 75 | 76 | pprint.pprint(s) 77 | 78 | if args.plot: 79 | for name, fig in f.items(): 80 | fig.canvas.manager.set_window_title(name) 81 | plt.show() 82 | -------------------------------------------------------------------------------- /siclib/geometry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/geometry/__init__.py -------------------------------------------------------------------------------- /siclib/geometry/jacobians.py: -------------------------------------------------------------------------------- 1 | """Jacobians for optimization.""" 2 | 3 | import torch 4 | 5 | # flake8: noqa: E741 6 | 7 | 8 | @torch.jit.script 9 | def J_vecnorm(vec: torch.Tensor) -> torch.Tensor: 10 | """Compute the jacobian of vec / norm2(vec). 11 | 12 | Args: 13 | vec (torch.Tensor): [..., D] tensor. 14 | 15 | Returns: 16 | torch.Tensor: [..., D, D] Jacobian. 17 | """ 18 | D = vec.shape[-1] 19 | norm_x = torch.norm(vec, dim=-1, keepdim=True).unsqueeze(-1) # (..., 1, 1) 20 | 21 | if (norm_x == 0).any(): 22 | norm_x = norm_x + 1e-6 23 | 24 | xxT = torch.einsum("...i,...j->...ij", vec, vec) # (..., D, D) 25 | identity = torch.eye(D, device=vec.device, dtype=vec.dtype) # (D, D) 26 | 27 | return identity / norm_x - (xxT / norm_x**3) # (..., D, D) 28 | 29 | 30 | @torch.jit.script 31 | def J_focal2fov(focal: torch.Tensor, h: torch.Tensor) -> torch.Tensor: 32 | """Compute the jacobian of the focal2fov function.""" 33 | return -4 * h / (4 * focal**2 + h**2) 34 | 35 | 36 | @torch.jit.script 37 | def J_up_projection(uv: torch.Tensor, abc: torch.Tensor, wrt: str = "uv") -> torch.Tensor: 38 | """Compute the jacobian of the up-vector projection. 39 | 40 | Args: 41 | uv (torch.Tensor): Normalized image coordinates of shape (..., 2). 42 | abc (torch.Tensor): Gravity vector of shape (..., 3). 43 | wrt (str, optional): Parameter to differentiate with respect to. Defaults to "uv". 44 | 45 | Raises: 46 | ValueError: If the wrt parameter is unknown. 47 | 48 | Returns: 49 | torch.Tensor: Jacobian with respect to the parameter. 50 | """ 51 | if wrt == "uv": 52 | c = abc[..., 2][..., None, None, None] 53 | return -c * torch.eye(2, device=uv.device, dtype=uv.dtype).expand(uv.shape[:-1] + (2, 2)) 54 | 55 | elif wrt == "abc": 56 | J = uv.new_zeros(uv.shape[:-1] + (2, 3)) 57 | J[..., 0, 0] = 1 58 | J[..., 1, 1] = 1 59 | J[..., 0, 2] = -uv[..., 0] 60 | J[..., 1, 2] = -uv[..., 1] 61 | return J 62 | 63 | else: 64 | raise ValueError(f"Unknown wrt: {wrt}") 65 | -------------------------------------------------------------------------------- /siclib/geometry/manifolds.py: -------------------------------------------------------------------------------- 1 | """Implementation of manifolds.""" 2 | 3 | import logging 4 | 5 | import torch 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class EuclideanManifold: 11 | """Simple euclidean manifold.""" 12 | 13 | @staticmethod 14 | def J_plus(x: torch.Tensor) -> torch.Tensor: 15 | """Plus operator Jacobian.""" 16 | return torch.eye(x.shape[-1]).to(x) 17 | 18 | @staticmethod 19 | def plus(x: torch.Tensor, delta: torch.Tensor) -> torch.Tensor: 20 | """Plus operator.""" 21 | return x + delta 22 | 23 | 24 | class SphericalManifold: 25 | """Implementation of the spherical manifold. 26 | 27 | Following the derivation from 'Integrating Generic Sensor Fusion Algorithms with Sound State 28 | Representations through Encapsulation of Manifolds' by Hertzberg et al. (B.2, p. 25). 29 | 30 | Householder transformation following Algorithm 5.1.1 (p. 210) from 'Matrix Computations' by 31 | Golub et al. 32 | """ 33 | 34 | @staticmethod 35 | def householder_vector(x: torch.Tensor) -> torch.Tensor: 36 | """Return the Householder vector and beta. 37 | 38 | Algorithm 5.1.1 (p. 210) from 'Matrix Computations' by Golub et al. (Johns Hopkins Studies 39 | in Mathematical Sciences) but using the nth element of the input vector as pivot instead of 40 | first. 41 | 42 | This computes the vector v with v(n) = 1 and beta such that H = I - beta * v * v^T is 43 | orthogonal and H * x = ||x||_2 * e_n. 44 | 45 | Args: 46 | x (torch.Tensor): [..., n] tensor. 47 | 48 | Returns: 49 | torch.Tensor: v of shape [..., n] 50 | torch.Tensor: beta of shape [...] 51 | """ 52 | sigma = torch.sum(x[..., :-1] ** 2, -1) 53 | xpiv = x[..., -1] 54 | norm = torch.norm(x, dim=-1) 55 | if torch.any(sigma < 1e-7): 56 | sigma = torch.where(sigma < 1e-7, sigma + 1e-7, sigma) 57 | logger.warning("sigma < 1e-7") 58 | 59 | vpiv = torch.where(xpiv < 0, xpiv - norm, -sigma / (xpiv + norm)) 60 | beta = 2 * vpiv**2 / (sigma + vpiv**2) 61 | v = torch.cat([x[..., :-1] / vpiv[..., None], torch.ones_like(vpiv)[..., None]], -1) 62 | return v, beta 63 | 64 | @staticmethod 65 | def apply_householder(y: torch.Tensor, v: torch.Tensor, beta: torch.Tensor) -> torch.Tensor: 66 | """Apply Householder transformation. 67 | 68 | Args: 69 | y (torch.Tensor): Vector to transform of shape [..., n]. 70 | v (torch.Tensor): Householder vector of shape [..., n]. 71 | beta (torch.Tensor): Householder beta of shape [...]. 72 | 73 | Returns: 74 | torch.Tensor: Transformed vector of shape [..., n]. 75 | """ 76 | return y - v * (beta * torch.einsum("...i,...i->...", v, y))[..., None] 77 | 78 | @classmethod 79 | def J_plus(cls, x: torch.Tensor) -> torch.Tensor: 80 | """Plus operator Jacobian.""" 81 | v, beta = cls.householder_vector(x) 82 | H = -torch.einsum("..., ...k, ...l->...kl", beta, v, v) 83 | H = H + torch.eye(H.shape[-1]).to(H) 84 | return H[..., :-1] # J 85 | 86 | @classmethod 87 | def plus(cls, x: torch.Tensor, delta: torch.Tensor) -> torch.Tensor: 88 | """Plus operator. 89 | 90 | Equation 109 (p. 25) from 'Integrating Generic Sensor Fusion Algorithms with Sound State 91 | Representations through Encapsulation of Manifolds' by Hertzberg et al. but using the nth 92 | element of the input vector as pivot instead of first. 93 | 94 | Args: 95 | x: point on the manifold 96 | delta: tangent vector 97 | """ 98 | eps = 1e-7 99 | # keep norm is not equal to 1 100 | nx = torch.norm(x, dim=-1, keepdim=True) 101 | nd = torch.norm(delta, dim=-1, keepdim=True) 102 | 103 | # make sure we don't divide by zero in backward as torch.where computes grad for both 104 | # branches 105 | nd_ = torch.where(nd < eps, nd + eps, nd) 106 | sinc = torch.where(nd < eps, nd.new_ones(nd.shape), torch.sin(nd_) / nd_) 107 | 108 | # cos is applied to last dim instead of first 109 | exp_delta = torch.cat([sinc * delta, torch.cos(nd)], -1) 110 | 111 | v, beta = cls.householder_vector(x) 112 | return nx * cls.apply_householder(exp_delta, v, beta) 113 | -------------------------------------------------------------------------------- /siclib/models/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | 3 | from siclib.models.base_model import BaseModel 4 | from siclib.utils.tools import get_class 5 | 6 | 7 | def get_model(name): 8 | import_paths = [ 9 | name, 10 | f"{__name__}.{name}", 11 | ] 12 | for path in import_paths: 13 | try: 14 | spec = importlib.util.find_spec(path) 15 | except ModuleNotFoundError: 16 | spec = None 17 | if spec is not None: 18 | try: 19 | return get_class(path, BaseModel) 20 | except AssertionError: 21 | mod = __import__(path, fromlist=[""]) 22 | try: 23 | return mod.__main_model__ 24 | except AttributeError as exc: 25 | print(exc) 26 | continue 27 | 28 | raise RuntimeError(f'Model {name} not found in any of [{" ".join(import_paths)}]') 29 | -------------------------------------------------------------------------------- /siclib/models/cache_loader.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | import h5py 4 | import torch 5 | 6 | from siclib.datasets.base_dataset import collate 7 | from siclib.models.base_model import BaseModel 8 | from siclib.settings import DATA_PATH 9 | from siclib.utils.tensor import batch_to_device 10 | 11 | # flake8: noqa 12 | # mypy: ignore-errors 13 | 14 | 15 | def pad_line_features(pred, seq_l: int = None): 16 | raise NotImplementedError 17 | 18 | 19 | def recursive_load(grp, pkeys): 20 | return { 21 | k: ( 22 | torch.from_numpy(grp[k].__array__()) 23 | if isinstance(grp[k], h5py.Dataset) 24 | else recursive_load(grp[k], list(grp.keys())) 25 | ) 26 | for k in pkeys 27 | } 28 | 29 | 30 | class CacheLoader(BaseModel): 31 | default_conf = { 32 | "path": "???", # can be a format string like exports/{scene}/ 33 | "data_keys": None, # load all keys 34 | "device": None, # load to same device as data 35 | "trainable": False, 36 | "add_data_path": True, 37 | "collate": True, 38 | "scale": ["keypoints"], 39 | "padding_fn": None, 40 | "padding_length": None, # required for batching! 41 | "numeric_type": "float32", # [None, "float16", "float32", "float64"] 42 | } 43 | 44 | required_data_keys = ["name"] # we need an identifier 45 | 46 | def _init(self, conf): 47 | self.hfiles = {} 48 | self.padding_fn = conf.padding_fn 49 | if self.padding_fn is not None: 50 | self.padding_fn = eval(self.padding_fn) 51 | self.numeric_dtype = { 52 | None: None, 53 | "float16": torch.float16, 54 | "float32": torch.float32, 55 | "float64": torch.float64, 56 | }[conf.numeric_type] 57 | 58 | def _forward(self, data): # sourcery skip: low-code-quality 59 | preds = [] 60 | device = self.conf.device 61 | if not device: 62 | if devices := {v.device for v in data.values() if isinstance(v, torch.Tensor)}: 63 | assert len(devices) == 1 64 | device = devices.pop() 65 | 66 | else: 67 | device = "cpu" 68 | 69 | var_names = [x[1] for x in string.Formatter().parse(self.conf.path) if x[1]] 70 | for i, name in enumerate(data["name"]): 71 | fpath = self.conf.path.format(**{k: data[k][i] for k in var_names}) 72 | if self.conf.add_data_path: 73 | fpath = DATA_PATH / fpath 74 | hfile = h5py.File(str(fpath), "r") 75 | grp = hfile[name] 76 | pkeys = self.conf.data_keys if self.conf.data_keys is not None else grp.keys() 77 | pred = recursive_load(grp, pkeys) 78 | if self.numeric_dtype is not None: 79 | pred = { 80 | k: ( 81 | v 82 | if not isinstance(v, torch.Tensor) or not torch.is_floating_point(v) 83 | else v.to(dtype=self.numeric_dtype) 84 | ) 85 | for k, v in pred.items() 86 | } 87 | pred = batch_to_device(pred, device) 88 | for k, v in pred.items(): 89 | for pattern in self.conf.scale: 90 | if k.startswith(pattern): 91 | view_idx = k.replace(pattern, "") 92 | scales = ( 93 | data["scales"] 94 | if len(view_idx) == 0 95 | else data[f"view{view_idx}"]["scales"] 96 | ) 97 | pred[k] = pred[k] * scales[i] 98 | # use this function to fix number of keypoints etc. 99 | if self.padding_fn is not None: 100 | pred = self.padding_fn(pred, self.conf.padding_length) 101 | preds.append(pred) 102 | hfile.close() 103 | if self.conf.collate: 104 | return batch_to_device(collate(preds), device) 105 | assert len(preds) == 1 106 | return batch_to_device(preds[0], device) 107 | 108 | def loss(self, pred, data): 109 | raise NotImplementedError 110 | -------------------------------------------------------------------------------- /siclib/models/decoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/models/decoders/__init__.py -------------------------------------------------------------------------------- /siclib/models/decoders/perspective_decoder.py: -------------------------------------------------------------------------------- 1 | """Perspective fields decoder heads. 2 | 3 | Adapted from https://github.com/jinlinyi/PerspectiveFields 4 | """ 5 | 6 | import logging 7 | 8 | from siclib.models import get_model 9 | from siclib.models.base_model import BaseModel 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | # flake8: noqa 14 | # mypy: ignore-errors 15 | 16 | 17 | class PerspectiveDecoder(BaseModel): 18 | default_conf = { 19 | "up_decoder": {"name": "decoders.up_decoder"}, 20 | "latitude_decoder": {"name": "decoders.latitude_decoder"}, 21 | } 22 | 23 | required_data_keys = ["features"] 24 | 25 | def _init(self, conf): 26 | logger.debug(f"Initializing PerspectiveDecoder with config: {conf}") 27 | self.use_up = conf.up_decoder is not None 28 | self.use_latitude = conf.latitude_decoder is not None 29 | 30 | if self.use_up: 31 | self.up_head = get_model(conf.up_decoder.name)(conf.up_decoder) 32 | 33 | if self.use_latitude: 34 | self.latitude_head = get_model(conf.latitude_decoder.name)(conf.latitude_decoder) 35 | 36 | def _forward(self, data): 37 | out_up = self.up_head(data) if self.use_up else {} 38 | out_lat = self.latitude_head(data) if self.use_latitude else {} 39 | return out_up | out_lat 40 | 41 | def loss(self, pred, data): 42 | ref = data["up_field"] if self.use_up else data["latitude_field"] 43 | 44 | total = ref.new_zeros(ref.shape[0]) 45 | losses, metrics = {}, {} 46 | if self.use_up: 47 | up_losses, up_metrics = self.up_head.loss(pred, data) 48 | losses |= up_losses 49 | metrics |= up_metrics 50 | total = total + losses.get("up_total", 0) 51 | 52 | if self.use_latitude: 53 | latitude_losses, latitude_metrics = self.latitude_head.loss(pred, data) 54 | losses |= latitude_losses 55 | metrics |= latitude_metrics 56 | total = total + losses.get("latitude_total", 0) 57 | 58 | losses["perspective_total"] = total 59 | return losses, metrics 60 | -------------------------------------------------------------------------------- /siclib/models/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/models/encoders/__init__.py -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | from .dino_head import DINOHead 7 | from .mlp import Mlp 8 | from .patch_embed import PatchEmbed 9 | from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused 10 | from .block import NestedTensorBlock 11 | from .attention import MemEffAttention 12 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/attention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py 9 | 10 | import logging 11 | import os 12 | import warnings 13 | 14 | from torch import Tensor 15 | from torch import nn 16 | 17 | 18 | logger = logging.getLogger("dinov2") 19 | 20 | 21 | XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None 22 | try: 23 | if XFORMERS_ENABLED: 24 | from xformers.ops import memory_efficient_attention, unbind 25 | 26 | XFORMERS_AVAILABLE = True 27 | warnings.warn("xFormers is available (Attention)") 28 | else: 29 | warnings.warn("xFormers is disabled (Attention)") 30 | raise ImportError 31 | except ImportError: 32 | XFORMERS_AVAILABLE = False 33 | warnings.warn("xFormers is not available (Attention)") 34 | 35 | 36 | class Attention(nn.Module): 37 | def __init__( 38 | self, 39 | dim: int, 40 | num_heads: int = 8, 41 | qkv_bias: bool = False, 42 | proj_bias: bool = True, 43 | attn_drop: float = 0.0, 44 | proj_drop: float = 0.0, 45 | ) -> None: 46 | super().__init__() 47 | self.num_heads = num_heads 48 | head_dim = dim // num_heads 49 | self.scale = head_dim**-0.5 50 | 51 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) 52 | self.attn_drop = nn.Dropout(attn_drop) 53 | self.proj = nn.Linear(dim, dim, bias=proj_bias) 54 | self.proj_drop = nn.Dropout(proj_drop) 55 | 56 | def forward(self, x: Tensor) -> Tensor: 57 | B, N, C = x.shape 58 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) 59 | 60 | q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] 61 | attn = q @ k.transpose(-2, -1) 62 | 63 | attn = attn.softmax(dim=-1) 64 | attn = self.attn_drop(attn) 65 | 66 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) 67 | x = self.proj(x) 68 | x = self.proj_drop(x) 69 | return x 70 | 71 | 72 | class MemEffAttention(Attention): 73 | def forward(self, x: Tensor, attn_bias=None) -> Tensor: 74 | if not XFORMERS_AVAILABLE: 75 | if attn_bias is not None: 76 | raise AssertionError("xFormers is required for using nested tensors") 77 | return super().forward(x) 78 | 79 | B, N, C = x.shape 80 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) 81 | 82 | q, k, v = unbind(qkv, 2) 83 | 84 | x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) 85 | x = x.reshape([B, N, C]) 86 | 87 | x = self.proj(x) 88 | x = self.proj_drop(x) 89 | return x 90 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/dino_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.nn.init import trunc_normal_ 9 | from torch.nn.utils import weight_norm 10 | 11 | 12 | class DINOHead(nn.Module): 13 | def __init__( 14 | self, 15 | in_dim, 16 | out_dim, 17 | use_bn=False, 18 | nlayers=3, 19 | hidden_dim=2048, 20 | bottleneck_dim=256, 21 | mlp_bias=True, 22 | ): 23 | super().__init__() 24 | nlayers = max(nlayers, 1) 25 | self.mlp = _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=hidden_dim, use_bn=use_bn, bias=mlp_bias) 26 | self.apply(self._init_weights) 27 | self.last_layer = weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False)) 28 | self.last_layer.weight_g.data.fill_(1) 29 | 30 | def _init_weights(self, m): 31 | if isinstance(m, nn.Linear): 32 | trunc_normal_(m.weight, std=0.02) 33 | if isinstance(m, nn.Linear) and m.bias is not None: 34 | nn.init.constant_(m.bias, 0) 35 | 36 | def forward(self, x): 37 | x = self.mlp(x) 38 | eps = 1e-6 if x.dtype == torch.float16 else 1e-12 39 | x = nn.functional.normalize(x, dim=-1, p=2, eps=eps) 40 | x = self.last_layer(x) 41 | return x 42 | 43 | 44 | def _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=None, use_bn=False, bias=True): 45 | if nlayers == 1: 46 | return nn.Linear(in_dim, bottleneck_dim, bias=bias) 47 | else: 48 | layers = [nn.Linear(in_dim, hidden_dim, bias=bias)] 49 | if use_bn: 50 | layers.append(nn.BatchNorm1d(hidden_dim)) 51 | layers.append(nn.GELU()) 52 | for _ in range(nlayers - 2): 53 | layers.append(nn.Linear(hidden_dim, hidden_dim, bias=bias)) 54 | if use_bn: 55 | layers.append(nn.BatchNorm1d(hidden_dim)) 56 | layers.append(nn.GELU()) 57 | layers.append(nn.Linear(hidden_dim, bottleneck_dim, bias=bias)) 58 | return nn.Sequential(*layers) 59 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/drop_path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py 9 | 10 | 11 | from torch import nn 12 | 13 | 14 | def drop_path(x, drop_prob: float = 0.0, training: bool = False): 15 | if drop_prob == 0.0 or not training: 16 | return x 17 | keep_prob = 1 - drop_prob 18 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 19 | random_tensor = x.new_empty(shape).bernoulli_(keep_prob) 20 | if keep_prob > 0.0: 21 | random_tensor.div_(keep_prob) 22 | output = x * random_tensor 23 | return output 24 | 25 | 26 | class DropPath(nn.Module): 27 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" 28 | 29 | def __init__(self, drop_prob=None): 30 | super(DropPath, self).__init__() 31 | self.drop_prob = drop_prob 32 | 33 | def forward(self, x): 34 | return drop_path(x, self.drop_prob, self.training) 35 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/layer_scale.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110 7 | 8 | from typing import Union 9 | 10 | import torch 11 | from torch import Tensor 12 | from torch import nn 13 | 14 | 15 | class LayerScale(nn.Module): 16 | def __init__( 17 | self, 18 | dim: int, 19 | init_values: Union[float, Tensor] = 1e-5, 20 | inplace: bool = False, 21 | ) -> None: 22 | super().__init__() 23 | self.inplace = inplace 24 | self.gamma = nn.Parameter(init_values * torch.ones(dim)) 25 | 26 | def forward(self, x: Tensor) -> Tensor: 27 | return x.mul_(self.gamma) if self.inplace else x * self.gamma 28 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/mlp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py 9 | 10 | 11 | from typing import Callable, Optional 12 | 13 | from torch import Tensor, nn 14 | 15 | 16 | class Mlp(nn.Module): 17 | def __init__( 18 | self, 19 | in_features: int, 20 | hidden_features: Optional[int] = None, 21 | out_features: Optional[int] = None, 22 | act_layer: Callable[..., nn.Module] = nn.GELU, 23 | drop: float = 0.0, 24 | bias: bool = True, 25 | ) -> None: 26 | super().__init__() 27 | out_features = out_features or in_features 28 | hidden_features = hidden_features or in_features 29 | self.fc1 = nn.Linear(in_features, hidden_features, bias=bias) 30 | self.act = act_layer() 31 | self.fc2 = nn.Linear(hidden_features, out_features, bias=bias) 32 | self.drop = nn.Dropout(drop) 33 | 34 | def forward(self, x: Tensor) -> Tensor: 35 | x = self.fc1(x) 36 | x = self.act(x) 37 | x = self.drop(x) 38 | x = self.fc2(x) 39 | x = self.drop(x) 40 | return x 41 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/patch_embed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | # References: 7 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py 9 | 10 | from typing import Callable, Optional, Tuple, Union 11 | 12 | from torch import Tensor 13 | import torch.nn as nn 14 | 15 | 16 | def make_2tuple(x): 17 | if isinstance(x, tuple): 18 | assert len(x) == 2 19 | return x 20 | 21 | assert isinstance(x, int) 22 | return (x, x) 23 | 24 | 25 | class PatchEmbed(nn.Module): 26 | """ 27 | 2D image to patch embedding: (B,C,H,W) -> (B,N,D) 28 | 29 | Args: 30 | img_size: Image size. 31 | patch_size: Patch token size. 32 | in_chans: Number of input image channels. 33 | embed_dim: Number of linear projection output channels. 34 | norm_layer: Normalization layer. 35 | """ 36 | 37 | def __init__( 38 | self, 39 | img_size: Union[int, Tuple[int, int]] = 224, 40 | patch_size: Union[int, Tuple[int, int]] = 16, 41 | in_chans: int = 3, 42 | embed_dim: int = 768, 43 | norm_layer: Optional[Callable] = None, 44 | flatten_embedding: bool = True, 45 | ) -> None: 46 | super().__init__() 47 | 48 | image_HW = make_2tuple(img_size) 49 | patch_HW = make_2tuple(patch_size) 50 | patch_grid_size = ( 51 | image_HW[0] // patch_HW[0], 52 | image_HW[1] // patch_HW[1], 53 | ) 54 | 55 | self.img_size = image_HW 56 | self.patch_size = patch_HW 57 | self.patches_resolution = patch_grid_size 58 | self.num_patches = patch_grid_size[0] * patch_grid_size[1] 59 | 60 | self.in_chans = in_chans 61 | self.embed_dim = embed_dim 62 | 63 | self.flatten_embedding = flatten_embedding 64 | 65 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW) 66 | self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() 67 | 68 | def forward(self, x: Tensor) -> Tensor: 69 | _, _, H, W = x.shape 70 | patch_H, patch_W = self.patch_size 71 | 72 | assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" 73 | assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" 74 | 75 | x = self.proj(x) # B C H W 76 | H, W = x.size(2), x.size(3) 77 | x = x.flatten(2).transpose(1, 2) # B HW C 78 | x = self.norm(x) 79 | if not self.flatten_embedding: 80 | x = x.reshape(-1, H, W, self.embed_dim) # B H W C 81 | return x 82 | 83 | def flops(self) -> float: 84 | Ho, Wo = self.patches_resolution 85 | flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) 86 | if self.norm is not None: 87 | flops += Ho * Wo * self.embed_dim 88 | return flops 89 | -------------------------------------------------------------------------------- /siclib/models/encoders/dinov2_layers/swiglu_ffn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | import os 7 | from typing import Callable, Optional 8 | import warnings 9 | 10 | from torch import Tensor, nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class SwiGLUFFN(nn.Module): 15 | def __init__( 16 | self, 17 | in_features: int, 18 | hidden_features: Optional[int] = None, 19 | out_features: Optional[int] = None, 20 | act_layer: Callable[..., nn.Module] = None, 21 | drop: float = 0.0, 22 | bias: bool = True, 23 | ) -> None: 24 | super().__init__() 25 | out_features = out_features or in_features 26 | hidden_features = hidden_features or in_features 27 | self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias) 28 | self.w3 = nn.Linear(hidden_features, out_features, bias=bias) 29 | 30 | def forward(self, x: Tensor) -> Tensor: 31 | x12 = self.w12(x) 32 | x1, x2 = x12.chunk(2, dim=-1) 33 | hidden = F.silu(x1) * x2 34 | return self.w3(hidden) 35 | 36 | 37 | XFORMERS_ENABLED = os.environ.get("XFORMERS_DISABLED") is None 38 | try: 39 | if XFORMERS_ENABLED: 40 | from xformers.ops import SwiGLU 41 | 42 | XFORMERS_AVAILABLE = True 43 | warnings.warn("xFormers is available (SwiGLU)") 44 | else: 45 | warnings.warn("xFormers is disabled (SwiGLU)") 46 | raise ImportError 47 | except ImportError: 48 | SwiGLU = SwiGLUFFN 49 | XFORMERS_AVAILABLE = False 50 | 51 | warnings.warn("xFormers is not available (SwiGLU)") 52 | 53 | 54 | class SwiGLUFFNFused(SwiGLU): 55 | def __init__( 56 | self, 57 | in_features: int, 58 | hidden_features: Optional[int] = None, 59 | out_features: Optional[int] = None, 60 | act_layer: Callable[..., nn.Module] = None, 61 | drop: float = 0.0, 62 | bias: bool = True, 63 | ) -> None: 64 | out_features = out_features or in_features 65 | hidden_features = hidden_features or in_features 66 | hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8 67 | super().__init__( 68 | in_features=in_features, 69 | hidden_features=hidden_features, 70 | out_features=out_features, 71 | bias=bias, 72 | ) 73 | -------------------------------------------------------------------------------- /siclib/models/encoders/low_level_encoder.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | 5 | from siclib.models.base_model import BaseModel 6 | from siclib.models.utils.modules import ConvModule 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | # flake8: noqa 11 | # mypy: ignore-errors 12 | 13 | 14 | class LowLevelEncoder(BaseModel): 15 | default_conf = { 16 | "feat_dim": 64, 17 | "in_channel": 3, 18 | "keep_resolution": True, 19 | } 20 | 21 | required_data_keys = ["image"] 22 | 23 | def _init(self, conf): 24 | logger.debug(f"Initializing LowLevelEncoder with {conf}") 25 | 26 | if self.conf.keep_resolution: 27 | self.conv1 = ConvModule(conf.in_channel, conf.feat_dim, kernel_size=3, padding=1) 28 | self.conv2 = ConvModule(conf.feat_dim, conf.feat_dim, kernel_size=3, padding=1) 29 | else: 30 | self.conv1 = nn.Conv2d( 31 | conf.in_channel, conf.feat_dim, kernel_size=7, stride=2, padding=3, bias=False 32 | ) 33 | self.bn1 = nn.BatchNorm2d(conf.feat_dim) 34 | self.relu = nn.ReLU(inplace=True) 35 | 36 | def _forward(self, data): 37 | x = data["image"] 38 | 39 | assert ( 40 | x.shape[-1] % 32 == 0 and x.shape[-2] % 32 == 0 41 | ), "Image size must be multiple of 32 if not using single image input." 42 | 43 | if self.conf.keep_resolution: 44 | c1 = self.conv1(x) 45 | c2 = self.conv2(c1) 46 | else: 47 | x = self.conv1(x) 48 | x = self.bn1(x) 49 | c2 = self.relu(x) 50 | 51 | return {"features": c2} 52 | 53 | def loss(self, pred, data): 54 | raise NotImplementedError 55 | -------------------------------------------------------------------------------- /siclib/models/encoders/resnet.py: -------------------------------------------------------------------------------- 1 | """Basic ResNet encoder for image feature extraction. 2 | 3 | https://pytorch.org/hub/pytorch_vision_resnet/ 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torchvision 9 | from torchvision.models.feature_extraction import create_feature_extractor 10 | 11 | from siclib.models.base_model import BaseModel 12 | 13 | # mypy: ignore-errors 14 | 15 | 16 | def remove_conv_stride(conv): 17 | """Remove the stride from a convolutional layer.""" 18 | conv_new = nn.Conv2d( 19 | conv.in_channels, 20 | conv.out_channels, 21 | conv.kernel_size, 22 | bias=conv.bias is not None, 23 | stride=1, 24 | padding=conv.padding, 25 | ) 26 | conv_new.weight = conv.weight 27 | conv_new.bias = conv.bias 28 | return conv_new 29 | 30 | 31 | class ResNet(BaseModel): 32 | """ResNet encoder for image features extraction.""" 33 | 34 | default_conf = { 35 | "encoder": "resnet18", 36 | "pretrained": True, 37 | "input_dim": 3, 38 | "remove_stride_from_first_conv": True, 39 | "num_downsample": None, # how many downsample bloc 40 | "pixel_mean": [0.485, 0.456, 0.406], 41 | "pixel_std": [0.229, 0.224, 0.225], 42 | } 43 | 44 | required_data_keys = ["image"] 45 | 46 | def build_encoder(self, conf): 47 | """Build the encoder from the configuration.""" 48 | if conf.pretrained: 49 | assert conf.input_dim == 3 50 | 51 | Encoder = getattr(torchvision.models, conf.encoder) 52 | 53 | layers = ["layer1", "layer2", "layer3", "layer4"] 54 | kw = {"replace_stride_with_dilation": [False, False, False]} 55 | 56 | if conf.num_downsample is not None: 57 | layers = layers[: conf.num_downsample] 58 | 59 | encoder = Encoder(weights="DEFAULT" if conf.pretrained else None, **kw) 60 | encoder = create_feature_extractor(encoder, return_nodes=layers) 61 | 62 | if conf.remove_stride_from_first_conv: 63 | encoder.conv1 = remove_conv_stride(encoder.conv1) 64 | 65 | return encoder, layers 66 | 67 | def _init(self, conf): 68 | self.register_buffer("pixel_mean", torch.tensor(conf.pixel_mean).view(-1, 1, 1), False) 69 | self.register_buffer("pixel_std", torch.tensor(conf.pixel_std).view(-1, 1, 1), False) 70 | 71 | self.encoder, self.layers = self.build_encoder(conf) 72 | 73 | def _forward(self, data): 74 | image = data["image"] 75 | image = (image - self.pixel_mean) / self.pixel_std 76 | skip_features = list(self.encoder(image).values()) 77 | 78 | # print(f"skip_features: {[f.shape for f in skip_features]}") 79 | return {"features": skip_features} 80 | 81 | def loss(self, pred, data): 82 | """Compute the loss.""" 83 | raise NotImplementedError 84 | -------------------------------------------------------------------------------- /siclib/models/encoders/vgg.py: -------------------------------------------------------------------------------- 1 | """Simple VGG encoder for image features extraction.""" 2 | 3 | import torch 4 | import torchvision 5 | from torchvision.models.feature_extraction import create_feature_extractor 6 | 7 | from siclib.models.base_model import BaseModel 8 | 9 | # mypy: ignore-errors 10 | 11 | 12 | class VGG(BaseModel): 13 | """VGG encoder for image features extraction.""" 14 | 15 | default_conf = { 16 | "encoder": "vgg13", 17 | "pretrained": True, 18 | "input_dim": 3, 19 | "num_downsample": None, # how many downsample blocs to use 20 | "pixel_mean": [0.485, 0.456, 0.406], 21 | "pixel_std": [0.229, 0.224, 0.225], 22 | } 23 | 24 | required_data_keys = ["image"] 25 | 26 | def build_encoder(self, conf): 27 | """Build the encoder from the configuration.""" 28 | if conf.pretrained: 29 | assert conf.input_dim == 3 30 | 31 | Encoder = getattr(torchvision.models, conf.encoder) 32 | 33 | kw = {} 34 | if conf.encoder == "vgg13": 35 | layers = [ 36 | "features.3", 37 | "features.8", 38 | "features.13", 39 | "features.18", 40 | "features.23", 41 | ] 42 | elif conf.encoder == "vgg16": 43 | layers = [ 44 | "features.3", 45 | "features.8", 46 | "features.15", 47 | "features.22", 48 | "features.29", 49 | ] 50 | else: 51 | raise NotImplementedError(f"Encoder not implemented: {conf.encoder}") 52 | 53 | if conf.num_downsample is not None: 54 | layers = layers[: conf.num_downsample] 55 | 56 | encoder = Encoder(weights="DEFAULT" if conf.pretrained else None, **kw) 57 | encoder = create_feature_extractor(encoder, return_nodes=layers) 58 | 59 | return encoder, layers 60 | 61 | def _init(self, conf): 62 | self.register_buffer("pixel_mean", torch.tensor(conf.pixel_mean).view(-1, 1, 1), False) 63 | self.register_buffer("pixel_std", torch.tensor(conf.pixel_std).view(-1, 1, 1), False) 64 | 65 | self.encoder, self.layers = self.build_encoder(conf) 66 | 67 | def _forward(self, data): 68 | image = data["image"] 69 | image = (image - self.pixel_mean) / self.pixel_std 70 | skip_features = self.encoder(image).values() 71 | return {"features": skip_features} 72 | 73 | def loss(self, pred, data): 74 | """Compute the loss.""" 75 | raise NotImplementedError 76 | -------------------------------------------------------------------------------- /siclib/models/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/models/networks/__init__.py -------------------------------------------------------------------------------- /siclib/models/networks/anycalib_pretrained.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from anycalib.manifolds import Unit3 4 | from anycalib.model import AnyCalib 5 | from siclib.models.base_model import BaseModel 6 | 7 | 8 | class AnyCalibPretrained(BaseModel): 9 | """AnyCalib pretrained model for evaluation.""" 10 | 11 | default_conf = { 12 | "model_id": "anycalib_pinhole", 13 | "nonlin_opt_method": "gauss_newton", 14 | "nonlin_opt_conf": None, 15 | "init_with_sac": False, 16 | "fallback_to_sac": True, 17 | "ransac_conf": None, 18 | "rm_borders": 0, 19 | "sample_size": -1, 20 | } 21 | 22 | def _init(self, conf): 23 | """Initialize pretrained AnyCalib model.""" 24 | self.model = AnyCalib( 25 | model_id=conf.model_id, 26 | nonlin_opt_method=conf.nonlin_opt_method, 27 | nonlin_opt_conf=conf.nonlin_opt_conf, 28 | init_with_sac=conf.init_with_sac, 29 | fallback_to_sac=conf.fallback_to_sac, 30 | ransac_conf=conf.ransac_conf, 31 | rm_borders=conf.rm_borders, 32 | sample_size=conf.sample_size, 33 | ) 34 | 35 | def _forward(self, data: dict): 36 | assert len(data["image"]) == 1, "Batch size must be 1" 37 | pred = self.model.predict(data["image"], data["cam_id"]) 38 | # upsample tangent_coords (FoV field) to input resolution for visualization 39 | h, w = pred["pred_size"] 40 | ho, wo = data["image"].shape[-2:] 41 | pred["tangent_coords"] = ( 42 | F.interpolate( 43 | pred["tangent_coords"].view(1, h, w, 2).permute(0, 3, 1, 2), 44 | size=(ho, wo), 45 | mode="bilinear", 46 | align_corners=False, 47 | ) 48 | .permute(0, 2, 3, 1) 49 | .view(1, ho * wo, 2) 50 | ) 51 | # map to rays 52 | pred["rays"] = Unit3.expmap_at_z1(pred["tangent_coords"]) 53 | return pred 54 | 55 | def metrics(self, pred, data): 56 | raise NotImplementedError 57 | 58 | def loss(self, pred, data): 59 | raise NotImplementedError 60 | -------------------------------------------------------------------------------- /siclib/models/networks/diffcalib_pretrained.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import torch 4 | from diffcalib.diffcalib.diffcalib_pipeline_rgb_12inchannels import DiffcalibPipeline 5 | from diffcalib.diffcalib.util.seed_all import seed_all 6 | from diffcalib.tools.calibrator import MonocularCalibrator 7 | from diffusers import UNet2DConditionModel # type: ignore 8 | from diffusers.schedulers import DDIMScheduler # type: ignore 9 | from PIL import Image 10 | from torchvision import transforms 11 | 12 | from siclib.models import BaseModel 13 | 14 | 15 | class DiffCalib(BaseModel): 16 | 17 | default_conf = { 18 | "denoise_steps": 10, 19 | "checkpoint": "third_party/diffcalib/checkpoint/stable-diffusion-2-1-marigold-8inchannels", 20 | "unet_ckpt_path": "third_party/diffcalib/checkpoint/diffcalib-best-0.07517", 21 | "preprocessing_res": 768, 22 | "device": "cuda", 23 | } 24 | 25 | def _init(self, conf): 26 | """Initialize DiffCalib model.""" 27 | repo_root = Path(__file__).parents[3] 28 | checkpoint_path = str(repo_root / conf.checkpoint) 29 | unet_ckpt_path = str(repo_root / conf.unet_ckpt_path) 30 | diffcalib_params_ckpt = { 31 | "torch_dtype": torch.float32, 32 | "unet": UNet2DConditionModel.from_pretrained( 33 | unet_ckpt_path, subfolder="unet", revision=None 34 | ), 35 | "scheduler": DDIMScheduler.from_pretrained(checkpoint_path, subfolder="scheduler"), 36 | } 37 | 38 | pipe = DiffcalibPipeline.from_pretrained(checkpoint_path, **diffcalib_params_ckpt) 39 | pipe.enable_xformers_memory_efficient_attention() 40 | 41 | self.dev = torch.device(conf.device) 42 | self.pipe = pipe.to(self.dev) 43 | self.totensor = transforms.ToTensor() 44 | self.normalize = transforms.Normalize(mean=0.5, std=0.5) 45 | self.monocalibrator = MonocularCalibrator(l1_th=0.02) 46 | 47 | @torch.no_grad() 48 | def _forward(self, data): 49 | assert len(data["path"]) == 1, f"Only batch size of 1 is supported (bs={len(data['path'])}" 50 | cam_id = data["cam_id"][0] 51 | assert cam_id == "pinhole" 52 | 53 | rgb = Image.open(data["path"][0]) 54 | wo, ho = rgb.size 55 | 56 | # resize 57 | rgb = rgb.resize((self.conf.preprocessing_res, self.conf.preprocessing_res)) 58 | rgb = self.normalize(self.totensor(rgb)) 59 | pipe_out = self.pipe( 60 | # validation_prompt, 61 | rgb, 62 | denoising_steps=10, 63 | mode="incident", 64 | ) 65 | incidence = pipe_out["incident_np"] 66 | 67 | # if args.mode == 'incident': 68 | K = self.monocalibrator.calibrate_camera_4DoF( 69 | torch.tensor(incidence).unsqueeze(0).to(self.dev), self.dev, RANSAC_trial=2048 70 | ) 71 | scale_x = wo / self.conf.preprocessing_res 72 | scale_y = ho / self.conf.preprocessing_res 73 | intrinsics = torch.stack( 74 | ( 75 | K[0, 0] * scale_x, 76 | K[1, 1] * scale_y, 77 | K[0, 2] * scale_x, 78 | K[1, 2] * scale_y, 79 | ) 80 | ).unsqueeze(0) 81 | return {"intrinsics": intrinsics} 82 | 83 | def loss(self, pred, data): 84 | raise NotImplementedError 85 | 86 | 87 | if __name__ == "__main__": 88 | from pathlib import Path 89 | 90 | dir_root = Path(__file__).parents[3] 91 | path = dir_root / "data/lamar2k/images/655367721.jpg" 92 | 93 | model = DiffCalib({}) 94 | output = model({"path": [str(path)], "cam_id": ["pinhole"]}) 95 | print(output) 96 | -------------------------------------------------------------------------------- /siclib/models/networks/dust3r_pretrained.py: -------------------------------------------------------------------------------- 1 | """Wrapper for DUSt3R model to estimate focal length. 2 | 3 | DUSt3R: Geometric 3D Vision Made Easy, https://arxiv.org/abs/2312.14132 4 | """ 5 | 6 | import torch 7 | from dust3r.cloud_opt import GlobalAlignerMode, global_aligner 8 | from dust3r.image_pairs import make_pairs 9 | from dust3r.inference import inference 10 | from dust3r.model import load_model 11 | from dust3r.utils.image import load_images 12 | 13 | from siclib.geometry.base_camera import BaseCamera 14 | from siclib.geometry.gravity import Gravity 15 | from siclib.models import BaseModel 16 | 17 | # mypy: ignore-errors 18 | 19 | 20 | class Dust3R(BaseModel): 21 | """DUSt3R model for focal length estimation.""" 22 | 23 | default_conf = { 24 | "model_path": "weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth", 25 | "device": "cuda", 26 | "batch_size": 1, 27 | "schedule": "cosine", 28 | "lr": 0.01, 29 | "niter": 300, 30 | "show_scene": False, 31 | } 32 | 33 | required_data_keys = ["path"] 34 | 35 | def _init(self, conf): 36 | """Initialize the DUSt3R model.""" 37 | self.model = load_model(conf["model_path"], conf["device"]) 38 | 39 | def _forward(self, data): 40 | """Forward pass of the DUSt.""" 41 | assert len(data["path"]) == 1, f"Only batch size of 1 is supported (bs={len(data['path'])}" 42 | 43 | path = data["path"][0] 44 | images = [path] * 2 45 | 46 | with torch.enable_grad(): 47 | images = load_images(images, size=512) 48 | pairs = make_pairs(images, scene_graph="complete", prefilter=None, symmetrize=True) 49 | output = inference( 50 | pairs, self.model, self.conf["device"], batch_size=self.conf["batch_size"] # type: ignore 51 | ) 52 | scene = global_aligner( 53 | output, device=self.conf["device"], mode=GlobalAlignerMode.PointCloudOptimizer # type: ignore 54 | ) 55 | _ = scene.compute_global_alignment( 56 | init="mst", 57 | niter=self.conf["niter"], # type: ignore 58 | schedule=self.conf["schedule"], # type: ignore 59 | lr=self.conf["lr"], # type: ignore 60 | ) 61 | 62 | # retrieve useful values from scene: 63 | focals = scene.get_focals().mean(dim=0) # type: ignore 64 | 65 | h, w = images[0]["true_shape"][:, 0], images[0]["true_shape"][:, 1] 66 | h, w = focals.new_tensor(h), focals.new_tensor(w) 67 | 68 | camera = BaseCamera.from_dict({"height": h, "width": w, "f": focals}) 69 | gravity = Gravity.from_rp([0.0], [0.0]) # type: ignore 70 | 71 | if self.conf["show_scene"]: # type: ignore 72 | scene.show() 73 | 74 | return {"camera": camera, "gravity": gravity} 75 | 76 | def loss(self, pred, data): 77 | """Loss function for DUSt3R model.""" 78 | return {}, {} 79 | 80 | 81 | if __name__ == "__main__": 82 | from pathlib import Path 83 | 84 | dir_root = Path(__file__).parents[3] 85 | 86 | # load image 87 | path = dir_root / "data/lamar2k/images/655367721.jpg" 88 | 89 | dust3r = Dust3R({}) 90 | output = dust3r({"path": [str(path)]}) 91 | print(output) 92 | -------------------------------------------------------------------------------- /siclib/models/networks/dust3r_pretrained_rays.py: -------------------------------------------------------------------------------- 1 | """Wrapper for DUSt3R model to estimate focal length. 2 | 3 | DUSt3R: Geometric 3D Vision Made Easy, https://arxiv.org/abs/2312.14132 4 | """ 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn.functional as F 9 | from dust3r.cloud_opt import GlobalAlignerMode, global_aligner 10 | from dust3r.image_pairs import make_pairs 11 | from dust3r.inference import inference 12 | from dust3r.model import load_model 13 | from dust3r.utils.image import load_images 14 | from PIL import Image 15 | 16 | from siclib.models import BaseModel 17 | 18 | 19 | class Dust3R(BaseModel): 20 | """DUSt3R model for focal length estimation.""" 21 | 22 | default_conf = { 23 | "model_path": "weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth", 24 | "device": "cuda", 25 | "batch_size": 1, 26 | "schedule": "cosine", 27 | "lr": 0.01, 28 | "niter": 300, 29 | "show_scene": False, 30 | } 31 | 32 | required_data_keys = ["path"] 33 | 34 | def _init(self, conf): 35 | self.model = load_model(conf["model_path"], conf["device"]) 36 | 37 | def _forward(self, data): 38 | assert len(data["path"]) == 1, f"Only batch size of 1 is supported (bs={len(data['path'])}" 39 | 40 | path = data["path"][0] 41 | images = [path] * 2 42 | ho, wo = data["image"].shape[-2:] if "image" in data else Image.open(path).size[::-1] 43 | 44 | # with torch.enable_grad(): 45 | images = load_images(images, size=512) 46 | pairs = make_pairs(images, scene_graph="complete", prefilter=None, symmetrize=True) 47 | output = inference( 48 | pairs, self.model, self.conf["device"], batch_size=self.conf["batch_size"] # type: ignore 49 | ) 50 | 51 | # raw ray predictions 52 | rays = F.normalize(output["pred1"]["pts3d"][:1], dim=-1).permute(0, 3, 1, 2) # type: ignore 53 | # resize the grid of rays to the image size for visualization purposes 54 | rays = F.normalize( 55 | F.interpolate(rays, (ho, wo), mode="bilinear", align_corners=False), dim=1 56 | ) # (1, 3, H, W) 57 | rays = rays.view(1, 3, -1).permute(0, 2, 1).contiguous() # (1, H*W, 3) 58 | 59 | # fit to raw scene 60 | scene = global_aligner( 61 | output, device=self.conf["device"], mode=GlobalAlignerMode.PairViewer # type: ignore 62 | ) 63 | # get scale for intrinsics 64 | ht, wt = images[0]["true_shape"][:, 0], images[0]["true_shape"][:, 1] 65 | scale = float(np.mean([ho / ht, wo / wt])) 66 | # intrinsics 67 | intrinsics = scene.get_intrinsics().mean(dim=0) 68 | f = intrinsics[0, 0] 69 | cx = intrinsics[0, 2] 70 | cy = intrinsics[1, 2] 71 | intrinsics = scale * ( 72 | torch.stack([f, cx, cy]) 73 | if "simple" in data["cam_id"][0] 74 | else torch.stack([f, f, cx, cy]) 75 | ) 76 | return {"intrinsics": intrinsics[None], "rays": rays} 77 | 78 | def loss(self, pred, data): 79 | raise NotImplementedError 80 | 81 | 82 | if __name__ == "__main__": 83 | from pathlib import Path 84 | 85 | dir_root = Path(__file__).parents[3] 86 | path = dir_root / "data/lamar2k/images/655367721.jpg" 87 | dust3r = Dust3R({}) 88 | output = dust3r({"path": [str(path)], "cam_id": ["pinhole"]}) 89 | print(output) 90 | print(output["intrinsics"].shape) 91 | -------------------------------------------------------------------------------- /siclib/models/networks/geocalib.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from siclib.models import get_model 4 | from siclib.models.base_model import BaseModel 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | # flake8: noqa 9 | # mypy: ignore-errors 10 | 11 | 12 | class GeoCalib(BaseModel): 13 | default_conf = { 14 | "backbone": {"name": "encoders.mscan"}, 15 | "ll_enc": {"name": "encoders.low_level_encoder"}, 16 | "perspective_decoder": {"name": "decoders.perspective_decoder"}, 17 | "optimizer": {"name": "optimization.lm_optimizer"}, 18 | } 19 | 20 | required_data_keys = ["image"] 21 | 22 | def _init(self, conf): 23 | logger.debug(f"Initializing GeoCalib with {conf}") 24 | self.backbone = get_model(conf.backbone["name"])(conf.backbone) 25 | self.ll_enc = get_model(conf.ll_enc["name"])(conf.ll_enc) if conf.ll_enc else None 26 | 27 | self.perspective_decoder = get_model(conf.perspective_decoder["name"])( 28 | conf.perspective_decoder 29 | ) 30 | 31 | self.optimizer = ( 32 | get_model(conf.optimizer["name"])(conf.optimizer) if conf.optimizer else None 33 | ) 34 | 35 | def _forward(self, data): 36 | backbone_out = self.backbone(data) 37 | features = {"hl": backbone_out["features"], "padding": backbone_out.get("padding", None)} 38 | 39 | if self.ll_enc is not None: 40 | features["ll"] = self.ll_enc(data)["features"] # low level features 41 | 42 | out = self.perspective_decoder({"features": features}) 43 | 44 | out |= { 45 | k: data[k] 46 | for k in ["image", "scales", "prior_gravity", "prior_focal", "prior_k1"] 47 | if k in data 48 | } 49 | 50 | if self.optimizer is not None: 51 | out |= self.optimizer(out) 52 | 53 | return out 54 | 55 | def loss(self, pred, data): 56 | losses, metrics = self.perspective_decoder.loss(pred, data) 57 | total = losses["perspective_total"] 58 | 59 | if self.optimizer is not None: 60 | opt_losses, param_metrics = self.optimizer.loss(pred, data) 61 | losses |= opt_losses 62 | metrics |= param_metrics 63 | total = total + opt_losses["param_total"] 64 | 65 | losses["total"] = total 66 | return losses, metrics 67 | -------------------------------------------------------------------------------- /siclib/models/networks/geocalib_pretrained.py: -------------------------------------------------------------------------------- 1 | """Interface for GeoCalib inference package.""" 2 | 3 | from geocalib import GeoCalib 4 | from siclib.models.base_model import BaseModel 5 | 6 | 7 | # mypy: ignore-errors 8 | class GeoCalibPretrained(BaseModel): 9 | """GeoCalib pretrained model.""" 10 | 11 | default_conf = { 12 | "camera_model": "pinhole", 13 | "model_weights": "pinhole", 14 | } 15 | 16 | def _init(self, conf): 17 | """Initialize pretrained GeoCalib model.""" 18 | self.model = GeoCalib(weights=conf.model_weights) 19 | 20 | def _forward(self, data): 21 | """Forward pass.""" 22 | priors = {} 23 | if "prior_gravity" in data: 24 | priors["gravity"] = data["prior_gravity"] 25 | 26 | if "prior_focal" in data: 27 | priors["focal"] = data["prior_focal"] 28 | 29 | results = self.model.calibrate( 30 | data["image"], camera_model=self.conf.camera_model, priors=priors 31 | ) 32 | 33 | return results 34 | 35 | def metrics(self, pred, data): 36 | """Compute metrics.""" 37 | raise NotImplementedError("GeoCalibPretrained does not support metrics computation.") 38 | 39 | def loss(self, pred, data): 40 | """Compute loss.""" 41 | raise NotImplementedError("GeoCalibPretrained does not support loss computation.") 42 | -------------------------------------------------------------------------------- /siclib/models/networks/geocalib_pretrained_rays.py: -------------------------------------------------------------------------------- 1 | """Interface for GeoCalib inference package with inference compatible with 2 | models that predict rays""" 3 | 4 | import torch 5 | from geocalib import GeoCalib 6 | 7 | from siclib.models.base_model import BaseModel 8 | 9 | 10 | # mypy: ignore-errors 11 | class GeoCalibPretrained(BaseModel): 12 | """GeoCalib pretrained model.""" 13 | 14 | default_conf = { 15 | "camera_model": "pinhole", 16 | "model_weights": "pinhole", 17 | } 18 | 19 | def _init(self, conf): 20 | """Initialize pretrained GeoCalib model.""" 21 | self.model = GeoCalib(weights=conf.model_weights) 22 | 23 | def _forward(self, data): 24 | """Forward pass.""" 25 | priors = {} 26 | if "prior_gravity" in data: 27 | priors["gravity"] = data["prior_gravity"] 28 | 29 | if "prior_focal" in data: 30 | priors["focal"] = data["prior_focal"] 31 | 32 | results = self.model.calibrate( 33 | data["image"], camera_model=self.conf.camera_model, priors=priors 34 | ) 35 | 36 | # assert all(id_ == "pinhole" for id_ in data["cam_id"]), "Only pinhole is supported for now." 37 | cam = results["camera"] 38 | fxfy = cam.f # type: ignore 39 | cxcy = cam.c # type: ignore 40 | if self.conf.camera_model == "pinhole": 41 | assert all(id_ == "pinhole" for id_ in data["cam_id"]), data["cam_id"] 42 | results["intrinsics"] = torch.cat([fxfy, cxcy], dim=-1) 43 | 44 | elif self.conf.camera_model == "simple_radial": 45 | assert all(id_ == "radial:1" for id_ in data["cam_id"]), data["cam_id"] 46 | k1 = cam.k1.unsqueeze(-1) # type: ignore 47 | params = torch.cat([fxfy, cxcy, k1], dim=-1) 48 | assert params.shape[-1] == 5 49 | results["intrinsics"] = params 50 | 51 | elif self.conf.camera_model == "simple_divisional": 52 | assert all(id_ == "division:1" for id_ in data["cam_id"]), data["cam_id"] 53 | k1 = cam.k1.unsqueeze(-1) # type: ignore 54 | params = torch.cat([fxfy, cxcy, k1], dim=-1) 55 | assert params.shape[-1] == 5 56 | results["intrinsics"] = torch.cat([fxfy, cxcy, k1], dim=-1) 57 | 58 | else: 59 | raise NotImplementedError 60 | return results 61 | 62 | def metrics(self, pred, data): 63 | """Compute metrics.""" 64 | raise NotImplementedError("GeoCalibPretrained does not support metrics computation.") 65 | 66 | def loss(self, pred, data): 67 | """Compute loss.""" 68 | raise NotImplementedError("GeoCalibPretrained does not support loss computation.") 69 | -------------------------------------------------------------------------------- /siclib/models/networks/moge_pretrained.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from moge.model import MoGeModel 4 | 5 | from siclib.models import BaseModel 6 | 7 | 8 | class MoGe(BaseModel): 9 | """MoGe model for focal length estimation.""" 10 | 11 | default_conf = {"force_projection": False} 12 | 13 | # required_data_keys = ["path"] 14 | required_data_keys = ["image"] 15 | 16 | def _init(self, conf): 17 | """Initialize the MoGe model.""" 18 | self.device = torch.device("cuda") 19 | self.model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(self.device) 20 | self.force_projection = conf.force_projection 21 | 22 | def _forward(self, data): 23 | """MoGe forward pass""" 24 | assert len(data["image"]) == 1, "Only batch size = 1 is supported." 25 | if "cam_id" in data: 26 | assert ( 27 | data["cam_id"][0] == "pinhole" 28 | ), f"Only pinhole camera supported. Got: {data['cam_id']}" 29 | image = data["image"].squeeze(0) 30 | try: 31 | # if force_projection is False, we get the raw points 32 | pred = self.model.infer(image, force_projection=self.force_projection) 33 | except ValueError: 34 | # some outlier cases lead to crash. Return bogus intrinsics 35 | return {"intrinsics": torch.tensor([[1.0, 1.0, 1.0, 1.0]], device=self.device)} 36 | # normalized intrinsics 37 | K = pred["intrinsics"] 38 | fx, cx = K[0, 0], K[0, 2] 39 | fy, cy = K[1, 1], K[1, 2] 40 | # unnormalize 41 | h, w = image.shape[-2:] 42 | fx, cx = fx * w, cx * w 43 | fy, cy = fy * h, cy * h 44 | pred["intrinsics"] = torch.tensor([[fx, fy, cx, cy]], device=self.device) 45 | pred["rays"] = F.normalize(pred["points"], dim=-1).view(1, -1, 3) 46 | return pred 47 | 48 | def loss(self, pred, data): 49 | """Loss function for DUSt3R model.""" 50 | return {}, {} 51 | 52 | 53 | if __name__ == "__main__": 54 | from pathlib import Path 55 | 56 | import numpy as np 57 | from PIL import Image 58 | 59 | dir_root = Path(__file__).parents[3] 60 | 61 | # load image 62 | path = dir_root / "data/lamar2k/images/655367721.jpg" 63 | im = torch.from_numpy(np.array(Image.open(path).convert("RGB"))) / 255.0 64 | im_ = im.permute(2, 0, 1).unsqueeze(0).cuda() 65 | 66 | model = MoGe({}) 67 | output = model({"image": im_}) 68 | 69 | print(output["intrinsics"]) 70 | -------------------------------------------------------------------------------- /siclib/models/networks/wildcam_pretrained.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from PIL import Image 5 | from WildCamera.newcrfs.newcrf_incidencefield import NEWCRFIF 6 | 7 | from siclib.models import BaseModel 8 | 9 | 10 | class WildCam(BaseModel): 11 | 12 | default_conf = {"with_assumption": False} 13 | 14 | def _init(self, conf): 15 | """Initialize WildCam model.""" 16 | model = NEWCRFIF(version="large07", pretrained=None) 17 | # pretrained 18 | pretrained_resource = "https://huggingface.co/datasets/Shengjie/WildCamera/resolve/main/checkpoint/wild_camera_all.pth" 19 | state_dict = torch.hub.load_state_dict_from_url(pretrained_resource, map_location="cpu") 20 | model.load_state_dict(state_dict, strict=True) 21 | self.model = model 22 | 23 | def _forward(self, data): 24 | """WildCam forward pass""" 25 | assert len(data["image"]) == 1, "Only batch size = 1 supported." 26 | if "cam_id" in data: 27 | assert ( 28 | "pinhole" in data["cam_id"][0] 29 | ), f"Only pinhole camera supported. Got: {data['cam_id']}" 30 | 31 | # convert tensor to PIL image 32 | image = Image.fromarray( 33 | (data["image"].squeeze(0).permute(1, 2, 0).cpu().numpy() * 255) 34 | .clip(0, 255) 35 | .astype(np.uint8) 36 | ) 37 | w, h = image.size 38 | 39 | K, rays = self.model.inference(image, wtassumption=self.conf.with_assumption) 40 | 41 | # the predicted grid of rays by WildCam always have a shape of (480, 640), so we 42 | # resize the grid of rays to the image size for visualization purposes 43 | rays = F.normalize( 44 | F.interpolate(rays, (h, w), mode="bilinear", align_corners=False), dim=1 45 | ) # (1, 3, H, W) 46 | rays = rays.view(1, 3, -1).permute(0, 2, 1).contiguous() # (1, H*W, 3) 47 | 48 | if "cam_id" in data and data["cam_id"][0] == "simple_pinhole": 49 | intrinsics = torch.tensor([K[0, 0], K[0, 2], K[1, 2]]).unsqueeze(0) 50 | else: 51 | intrinsics = torch.tensor([K[0, 0], K[1, 1], K[0, 2], K[1, 2]]).unsqueeze(0) 52 | return {"intrinsics": intrinsics, "rays": rays} 53 | 54 | def loss(self, pred, data): 55 | raise NotImplementedError 56 | 57 | 58 | if __name__ == "__main__": 59 | from pathlib import Path 60 | 61 | dir_root = Path(__file__).parents[3] 62 | 63 | # load image 64 | path = dir_root / "data/lamar2k/images/655367721.jpg" 65 | im = torch.from_numpy(np.array(Image.open(path).convert("RGB"))) / 255.0 66 | im_ = im.permute(2, 0, 1).unsqueeze(0).cuda() 67 | 68 | model = WildCam({}) 69 | output = model({"image": im_}) 70 | 71 | print(output["intrinsics"]) 72 | print(output["rays"].shape) 73 | -------------------------------------------------------------------------------- /siclib/models/optimization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/models/optimization/__init__.py -------------------------------------------------------------------------------- /siclib/models/optimization/inference_optimizer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from siclib.models.optimization.lm_optimizer import LMOptimizer 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | # flake8: noqa 8 | # mypy: ignore-errors 9 | 10 | 11 | class InferenceOptimizer(LMOptimizer): 12 | default_conf = { 13 | # Camera model parameters 14 | "camera_model": "pinhole", # {"pinhole", "simple_radial", "simple_spherical"} 15 | "shared_intrinsics": False, # share focal length across all images in batch 16 | "estimate_gravity": True, 17 | "estimate_focal": True, 18 | "estimate_k1": True, # will be ignored if camera_model is pinhole 19 | # LM optimizer parameters 20 | "num_steps": 30, 21 | "lambda_": 0.1, 22 | "fix_lambda": False, 23 | "early_stop": True, 24 | "atol": 1e-8, 25 | "rtol": 1e-8, 26 | "use_spherical_manifold": True, # use spherical manifold for gravity optimization 27 | "use_log_focal": True, # use log focal length for optimization 28 | # Loss function parameters 29 | "loss_fn": "huber_loss", # {"squared_loss", "huber_loss"} 30 | "up_loss_fn_scale": 1e-2, 31 | "lat_loss_fn_scale": 1e-2, 32 | "init_conf": {"name": "trivial"}, # pass config of other models to use as initializer 33 | # Misc 34 | "loss_weight": 1, 35 | "verbose": False, 36 | } 37 | -------------------------------------------------------------------------------- /siclib/models/optimization/losses.py: -------------------------------------------------------------------------------- 1 | """Generic losses and error functions for optimization or training deep networks.""" 2 | 3 | from typing import Callable, Tuple 4 | 5 | import torch 6 | 7 | 8 | def scaled_loss( 9 | x: torch.Tensor, fn: Callable, a: float 10 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 11 | """Apply a loss function to a tensor and pre- and post-scale it. 12 | 13 | Args: 14 | x: the data tensor, should already be squared: `x = y**2`. 15 | fn: the loss function, with signature `fn(x) -> y`. 16 | a: the scale parameter. 17 | 18 | Returns: 19 | The value of the loss, and its first and second derivatives. 20 | """ 21 | a2 = a**2 22 | loss, loss_d1, loss_d2 = fn(x / a2) 23 | return loss * a2, loss_d1, loss_d2 / a2 24 | 25 | 26 | def squared_loss(x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 27 | """A dummy squared loss.""" 28 | return x, torch.ones_like(x), torch.zeros_like(x) 29 | 30 | 31 | def huber_loss(x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 32 | """The classical robust Huber loss, with first and second derivatives.""" 33 | mask = x <= 1 34 | sx = torch.sqrt(x + 1e-8) # avoid nan in backward pass 35 | isx = torch.max(sx.new_tensor(torch.finfo(torch.float).eps), 1 / sx) 36 | loss = torch.where(mask, x, 2 * sx - 1) 37 | loss_d1 = torch.where(mask, torch.ones_like(x), isx) 38 | loss_d2 = torch.where(mask, torch.zeros_like(x), -isx / (2 * x)) 39 | return loss, loss_d1, loss_d2 40 | 41 | 42 | def barron_loss( 43 | x: torch.Tensor, alpha: torch.Tensor, derivatives: bool = True, eps: float = 1e-7 44 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 45 | """Parameterized & adaptive robust loss function. 46 | 47 | Described in: 48 | A General and Adaptive Robust Loss Function, Barron, CVPR 2019 49 | 50 | alpha = 2 -> L2 loss 51 | alpha = 1 -> Charbonnier loss (smooth L1) 52 | alpha = 0 -> Cauchy loss 53 | alpha = -2 -> Geman-McClure loss 54 | alpha = -inf -> Welsch loss 55 | 56 | Contrary to the original implementation, assume the the input is already 57 | squared and scaled (basically scale=1). Computes the first derivative, but 58 | not the second (TODO if needed). 59 | """ 60 | loss_two = x 61 | loss_zero = 2 * torch.log1p(torch.clamp(0.5 * x, max=33e37)) 62 | 63 | # The loss when not in one of the above special cases. 64 | # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by. 65 | beta_safe = torch.abs(alpha - 2.0).clamp(min=eps) 66 | # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by. 67 | alpha_safe = torch.where(alpha >= 0, torch.ones_like(alpha), -torch.ones_like(alpha)) 68 | alpha_safe = alpha_safe * torch.abs(alpha).clamp(min=eps) 69 | 70 | loss_otherwise = ( 71 | 2 * (beta_safe / alpha_safe) * (torch.pow(x / beta_safe + 1.0, 0.5 * alpha) - 1.0) 72 | ) 73 | 74 | # Select which of the cases of the loss to return. 75 | loss = torch.where(alpha == 0, loss_zero, torch.where(alpha == 2, loss_two, loss_otherwise)) 76 | dummy = torch.zeros_like(x) 77 | 78 | if derivatives: 79 | loss_two_d1 = torch.ones_like(x) 80 | loss_zero_d1 = 2 / (x + 2) 81 | loss_otherwise_d1 = torch.pow(x / beta_safe + 1.0, 0.5 * alpha - 1.0) 82 | loss_d1 = torch.where( 83 | alpha == 0, loss_zero_d1, torch.where(alpha == 2, loss_two_d1, loss_otherwise_d1) 84 | ) 85 | 86 | return loss, loss_d1, dummy 87 | else: 88 | return loss, dummy, dummy 89 | 90 | 91 | def scaled_barron(a, c): 92 | """Return a scaled Barron loss function.""" 93 | return lambda x: scaled_loss(x, lambda y: barron_loss(y, y.new_tensor(a)), c) 94 | -------------------------------------------------------------------------------- /siclib/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/models/utils/__init__.py -------------------------------------------------------------------------------- /siclib/models/utils/metrics.py: -------------------------------------------------------------------------------- 1 | """Various metrics for evaluating predictions.""" 2 | 3 | import logging 4 | 5 | import torch 6 | from torch.nn import functional as F 7 | 8 | from siclib.geometry.base_camera import BaseCamera 9 | from siclib.geometry.gravity import Gravity 10 | from siclib.utils.conversions import rad2deg 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def pitch_error(pred_gravity: Gravity, target_gravity: Gravity) -> torch.Tensor: 16 | """Computes the pitch error between two gravities. 17 | 18 | Args: 19 | pred_gravity (Gravity): Predicted camera. 20 | target_gravity (Gravity): Ground truth camera. 21 | 22 | Returns: 23 | torch.Tensor: Pitch error in degrees. 24 | """ 25 | return rad2deg(torch.abs(pred_gravity.pitch - target_gravity.pitch)) 26 | 27 | 28 | def roll_error(pred_gravity: Gravity, target_gravity: Gravity) -> torch.Tensor: 29 | """Computes the roll error between two gravities. 30 | 31 | Args: 32 | pred_gravity (Gravity): Predicted Gravity. 33 | target_gravity (Gravity): Ground truth Gravity. 34 | 35 | Returns: 36 | torch.Tensor: Roll error in degrees. 37 | """ 38 | return rad2deg(torch.abs(pred_gravity.roll - target_gravity.roll)) 39 | 40 | 41 | def gravity_error(pred_gravity: Gravity, target_gravity: Gravity) -> torch.Tensor: 42 | """Computes the gravity error between two gravities. 43 | 44 | Args: 45 | pred_gravity (Gravity): Predicted Gravity. 46 | target_gravity (Gravity): Ground truth Gravity. 47 | 48 | Returns: 49 | torch.Tensor: Gravity error in degrees. 50 | """ 51 | assert ( 52 | pred_gravity.vec3d.shape == target_gravity.vec3d.shape 53 | ), f"{pred_gravity.vec3d.shape} != {target_gravity.vec3d.shape}" 54 | assert pred_gravity.vec3d.ndim == 2, f"{pred_gravity.vec3d.ndim} != 2" 55 | assert pred_gravity.vec3d.shape[1] == 3, f"{pred_gravity.vec3d.shape[1]} != 3" 56 | 57 | cossim = F.cosine_similarity(pred_gravity.vec3d, target_gravity.vec3d, dim=-1).clamp(-1, 1) 58 | return rad2deg(torch.acos(cossim)) 59 | 60 | 61 | def vfov_error(pred_cam: BaseCamera, target_cam: BaseCamera) -> torch.Tensor: 62 | """Computes the vertical field of view error between two cameras. 63 | 64 | Args: 65 | pred_cam (Camera): Predicted camera. 66 | target_cam (Camera): Ground truth camera. 67 | 68 | Returns: 69 | torch.Tensor: Vertical field of view error in degrees. 70 | """ 71 | return rad2deg(torch.abs(pred_cam.vfov - target_cam.vfov)) 72 | 73 | 74 | def dist_error(pred_cam: BaseCamera, target_cam: BaseCamera) -> torch.Tensor: 75 | """Computes the distortion parameter error between two cameras. 76 | 77 | Returns zero if the cameras do not have distortion parameters. 78 | 79 | Args: 80 | pred_cam (Camera): Predicted camera. 81 | target_cam (Camera): Ground truth camera. 82 | 83 | Returns: 84 | torch.Tensor: distortion error. 85 | """ 86 | if hasattr(pred_cam, "dist") and hasattr(target_cam, "dist"): 87 | return torch.abs(pred_cam.dist[..., 0] - target_cam.dist[..., 0]) 88 | 89 | logger.debug( 90 | f"Predicted / target camera doesn't have distortion parameters: {pred_cam}/{target_cam}" 91 | ) 92 | return pred_cam.new_zeros(pred_cam.f.shape[0]) 93 | 94 | 95 | def latitude_error(predictions: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: 96 | """Computes the latitude error between two tensors. 97 | 98 | Args: 99 | predictions (torch.Tensor): Predicted latitude field of shape (B, 1, H, W). 100 | targets (torch.Tensor): Ground truth latitude field of shape (B, 1, H, W). 101 | 102 | Returns: 103 | torch.Tensor: Latitude error in degrees of shape (B, H, W). 104 | """ 105 | return rad2deg(torch.abs(predictions - targets)).squeeze(1) 106 | 107 | 108 | def up_error(predictions: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: 109 | """Computes the up error between two tensors. 110 | 111 | Args: 112 | predictions (torch.Tensor): Predicted up field of shape (B, 2, H, W). 113 | targets (torch.Tensor): Ground truth up field of shape (B, 2, H, W). 114 | 115 | Returns: 116 | torch.Tensor: Up error in degrees of shape (B, H, W). 117 | """ 118 | assert predictions.shape == targets.shape, f"{predictions.shape} != {targets.shape}" 119 | assert predictions.ndim == 4, f"{predictions.ndim} != 4" 120 | assert predictions.shape[1] == 2, f"{predictions.shape[1]} != 2" 121 | 122 | angle = F.cosine_similarity(predictions, targets, dim=1).clamp(-1, 1) 123 | return rad2deg(torch.acos(angle)) 124 | -------------------------------------------------------------------------------- /siclib/models/utils/perspective_encoding.py: -------------------------------------------------------------------------------- 1 | """Perspective field utilities. 2 | 3 | Adapted from https://github.com/jinlinyi/PerspectiveFields 4 | """ 5 | 6 | import torch 7 | 8 | from siclib.utils.conversions import deg2rad, rad2deg 9 | 10 | 11 | def encode_up_bin(vector_field: torch.Tensor, num_bin: int) -> torch.Tensor: 12 | """Encode vector field into classification bins. 13 | 14 | Args: 15 | vector_field (torch.Tensor): gravity field of shape (2, h, w), with channel 0 cos(theta) and 16 | 1 sin(theta) 17 | num_bin (int): number of classification bins 18 | 19 | Returns: 20 | torch.Tensor: encoded bin indices of shape (1, h, w) 21 | """ 22 | angle = ( 23 | torch.atan2(vector_field[1, :, :], vector_field[0, :, :]) / torch.pi * 180 + 180 24 | ) % 360 # [0,360) 25 | angle_bin = torch.round(torch.div(angle, (360 / (num_bin - 1)))).long() 26 | angle_bin[angle_bin == num_bin - 1] = 0 27 | invalid = (vector_field == 0).sum(0) == vector_field.size(0) 28 | angle_bin[invalid] = num_bin - 1 29 | return deg2rad(angle_bin.type(torch.LongTensor)) 30 | 31 | 32 | def decode_up_bin(angle_bin: torch.Tensor, num_bin: int) -> torch.Tensor: 33 | """Decode classification bins into vector field. 34 | 35 | Args: 36 | angle_bin (torch.Tensor): bin indices of shape (1, h, w) 37 | num_bin (int): number of classification bins 38 | 39 | Returns: 40 | torch.Tensor: decoded vector field of shape (2, h, w) 41 | """ 42 | angle = (angle_bin * (360 / (num_bin - 1)) - 180) / 180 * torch.pi 43 | cos = torch.cos(angle) 44 | sin = torch.sin(angle) 45 | vector_field = torch.stack((cos, sin), dim=1) 46 | invalid = angle_bin == num_bin - 1 47 | invalid = invalid.unsqueeze(1).repeat(1, 2, 1, 1) 48 | vector_field[invalid] = 0 49 | return vector_field 50 | 51 | 52 | def encode_bin_latitude(latimap: torch.Tensor, num_classes: int) -> torch.Tensor: 53 | """Encode latitude map into classification bins. 54 | 55 | Args: 56 | latimap (torch.Tensor): latitude map of shape (h, w) with values in [-90, 90] 57 | num_classes (int): number of classes 58 | 59 | Returns: 60 | torch.Tensor: encoded latitude bin indices 61 | """ 62 | boundaries = torch.arange(-90, 90, 180 / num_classes)[1:] 63 | binmap = torch.bucketize(rad2deg(latimap), boundaries) 64 | return binmap.type(torch.LongTensor) 65 | 66 | 67 | def decode_bin_latitude(binmap: torch.Tensor, num_classes: int) -> torch.Tensor: 68 | """Decode classification bins to latitude map. 69 | 70 | Args: 71 | binmap (torch.Tensor): encoded classification bins 72 | num_classes (int): number of classes 73 | 74 | Returns: 75 | torch.Tensor: latitude map of shape (h, w) 76 | """ 77 | bin_size = 180 / num_classes 78 | bin_centers = torch.arange(-90, 90, bin_size) + bin_size / 2 79 | bin_centers = bin_centers.to(binmap.device) 80 | latimap = bin_centers[binmap] 81 | 82 | return deg2rad(latimap) 83 | -------------------------------------------------------------------------------- /siclib/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "siclib" 7 | version = "1.0" 8 | description = "Training library for GeoCalib: Learning Single-image Calibration with Geometric Optimization" 9 | authors = [ 10 | { name = "Alexander Veicht" }, 11 | { name = "Paul-Edouard Sarlin" }, 12 | { name = "Philipp Lindenberger" }, 13 | ] 14 | requires-python = ">=3.10" 15 | license = { file = "LICENSE" } 16 | classifiers = [ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: Apache Software License", 19 | "Operating System :: OS Independent", 20 | ] 21 | urls = { Repository = "https://github.com/cvg/GeoCalib" } 22 | 23 | dynamic = ["dependencies"] 24 | 25 | [project.optional-dependencies] 26 | dev = ["black==23.9.1", "flake8", "isort==5.12.0"] 27 | 28 | [tool.setuptools.packages.find] 29 | where = [".."] 30 | include = ["siclib"] 31 | 32 | [tool.setuptools.dynamic] 33 | dependencies = { file = ["requirements.txt"] } 34 | 35 | [tool.black] 36 | line-length = 100 37 | exclude = "(venv/|docs/|third_party/)" 38 | 39 | [tool.isort] 40 | profile = "black" 41 | line_length = 100 42 | atomic = true 43 | 44 | [tool.flake8] 45 | max-line-length = 100 46 | docstring-convention = "google" 47 | ignore = ["E203", "W503", "E402"] 48 | exclude = [".git", "__pycache__", "venv", "docs", "third_party", "scripts"] 49 | -------------------------------------------------------------------------------- /siclib/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | opencv-python-headless 4 | kornia 5 | matplotlib 6 | 7 | omegaconf 8 | albumentations 9 | h5py 10 | hydra-core 11 | pandas 12 | tqdm 13 | tensorboard 14 | wandb 15 | 16 | gdown -------------------------------------------------------------------------------- /siclib/settings.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # flake8: noqa 4 | # mypy: ignore-errors 5 | try: 6 | from settings import DATA_PATH, EVAL_PATH, TRAINING_PATH 7 | except ModuleNotFoundError: 8 | # @TODO: Add a way to patch paths 9 | root = Path(__file__).parent.parent # top-level directory 10 | DATA_PATH = root / "data/" # datasets and pretrained weights 11 | TRAINING_PATH = root / "outputs/training/" # training checkpoints 12 | EVAL_PATH = root / "outputs/results/" # evaluation results 13 | -------------------------------------------------------------------------------- /siclib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/utils/__init__.py -------------------------------------------------------------------------------- /siclib/utils/conversions.py: -------------------------------------------------------------------------------- 1 | """Utility functions for conversions between different representations.""" 2 | 3 | from typing import Optional 4 | 5 | import torch 6 | 7 | 8 | def skew_symmetric(v: torch.Tensor) -> torch.Tensor: 9 | """Create a skew-symmetric matrix from a (batched) vector of size (..., 3). 10 | 11 | Args: 12 | (torch.Tensor): Vector of size (..., 3). 13 | 14 | Returns: 15 | (torch.Tensor): Skew-symmetric matrix of size (..., 3, 3). 16 | """ 17 | z = torch.zeros_like(v[..., 0]) 18 | return torch.stack( 19 | [ 20 | z, 21 | -v[..., 2], 22 | v[..., 1], 23 | v[..., 2], 24 | z, 25 | -v[..., 0], 26 | -v[..., 1], 27 | v[..., 0], 28 | z, 29 | ], 30 | dim=-1, 31 | ).reshape(v.shape[:-1] + (3, 3)) 32 | 33 | 34 | def rad2rotmat( 35 | roll: torch.Tensor, pitch: torch.Tensor, yaw: Optional[torch.Tensor] = None 36 | ) -> torch.Tensor: 37 | """Convert (batched) roll, pitch, yaw angles (in radians) to rotation matrix. 38 | 39 | Args: 40 | roll (torch.Tensor): Roll angle in radians. 41 | pitch (torch.Tensor): Pitch angle in radians. 42 | yaw (torch.Tensor, optional): Yaw angle in radians. Defaults to None. 43 | 44 | Returns: 45 | torch.Tensor: Rotation matrix of shape (..., 3, 3). 46 | """ 47 | if yaw is None: 48 | yaw = roll.new_zeros(roll.shape) 49 | 50 | Rx = pitch.new_zeros(pitch.shape + (3, 3)) 51 | Rx[..., 0, 0] = 1 52 | Rx[..., 1, 1] = torch.cos(pitch) 53 | Rx[..., 1, 2] = torch.sin(pitch) 54 | Rx[..., 2, 1] = -torch.sin(pitch) 55 | Rx[..., 2, 2] = torch.cos(pitch) 56 | 57 | Ry = yaw.new_zeros(yaw.shape + (3, 3)) 58 | Ry[..., 0, 0] = torch.cos(yaw) 59 | Ry[..., 0, 2] = -torch.sin(yaw) 60 | Ry[..., 1, 1] = 1 61 | Ry[..., 2, 0] = torch.sin(yaw) 62 | Ry[..., 2, 2] = torch.cos(yaw) 63 | 64 | Rz = roll.new_zeros(roll.shape + (3, 3)) 65 | Rz[..., 0, 0] = torch.cos(roll) 66 | Rz[..., 0, 1] = torch.sin(roll) 67 | Rz[..., 1, 0] = -torch.sin(roll) 68 | Rz[..., 1, 1] = torch.cos(roll) 69 | Rz[..., 2, 2] = 1 70 | 71 | return Rz @ Rx @ Ry 72 | 73 | 74 | def fov2focal(fov: torch.Tensor, size: torch.Tensor) -> torch.Tensor: 75 | """Compute focal length from (vertical/horizontal) field of view. 76 | 77 | Args: 78 | fov (torch.Tensor): Field of view in radians. 79 | size (torch.Tensor): Image height / width in pixels. 80 | 81 | Returns: 82 | torch.Tensor: Focal length in pixels. 83 | """ 84 | return size / 2 / torch.tan(fov / 2) 85 | 86 | 87 | def focal2fov(focal: torch.Tensor, size: torch.Tensor) -> torch.Tensor: 88 | """Compute (vertical/horizontal) field of view from focal length. 89 | 90 | Args: 91 | focal (torch.Tensor): Focal length in pixels. 92 | size (torch.Tensor): Image height / width in pixels. 93 | 94 | Returns: 95 | torch.Tensor: Field of view in radians. 96 | """ 97 | return 2 * torch.arctan(size / (2 * focal)) 98 | 99 | 100 | def pitch2rho(pitch: torch.Tensor, f: torch.Tensor, h: torch.Tensor) -> torch.Tensor: 101 | """Compute the distance from principal point to the horizon. 102 | 103 | Args: 104 | pitch (torch.Tensor): Pitch angle in radians. 105 | f (torch.Tensor): Focal length in pixels. 106 | h (torch.Tensor): Image height in pixels. 107 | 108 | Returns: 109 | torch.Tensor: Relative distance to the horizon. 110 | """ 111 | return torch.tan(pitch) * f / h 112 | 113 | 114 | def rho2pitch(rho: torch.Tensor, f: torch.Tensor, h: torch.Tensor) -> torch.Tensor: 115 | """Compute the pitch angle from the distance to the horizon. 116 | 117 | Args: 118 | rho (torch.Tensor): Relative distance to the horizon. 119 | f (torch.Tensor): Focal length in pixels. 120 | h (torch.Tensor): Image height in pixels. 121 | 122 | Returns: 123 | torch.Tensor: Pitch angle in radians. 124 | """ 125 | return torch.atan(rho * h / f) 126 | 127 | 128 | def rad2deg(rad: torch.Tensor) -> torch.Tensor: 129 | """Convert radians to degrees. 130 | 131 | Args: 132 | rad (torch.Tensor): Angle in radians. 133 | 134 | Returns: 135 | torch.Tensor: Angle in degrees. 136 | """ 137 | return rad / torch.pi * 180 138 | 139 | 140 | def deg2rad(deg: torch.Tensor) -> torch.Tensor: 141 | """Convert degrees to radians. 142 | 143 | Args: 144 | deg (torch.Tensor): Angle in degrees. 145 | 146 | Returns: 147 | torch.Tensor: Angle in radians. 148 | """ 149 | return deg / 180 * torch.pi 150 | -------------------------------------------------------------------------------- /siclib/utils/experiments.py: -------------------------------------------------------------------------------- 1 | """ 2 | A set of utilities to manage and load checkpoints of training experiments. 3 | 4 | Author: Paul-Edouard Sarlin (skydes) 5 | """ 6 | 7 | import logging 8 | import os 9 | import re 10 | import shutil 11 | from pathlib import Path 12 | 13 | import torch 14 | from omegaconf import OmegaConf 15 | 16 | from siclib.models import get_model 17 | from siclib.settings import TRAINING_PATH 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | # flake8: noqa 22 | # mypy: ignore-errors 23 | 24 | 25 | def list_checkpoints(dir_): 26 | """List all valid checkpoints in a given directory.""" 27 | checkpoints = [] 28 | for p in dir_.glob("checkpoint_*.tar"): 29 | numbers = re.findall(r"(\d+)", p.name) 30 | assert len(numbers) <= 2 31 | if len(numbers) == 0: 32 | continue 33 | if len(numbers) == 1: 34 | checkpoints.append((int(numbers[0]), p)) 35 | else: 36 | checkpoints.append((int(numbers[1]), p)) 37 | return checkpoints 38 | 39 | 40 | def get_last_checkpoint(exper, allow_interrupted=True): 41 | """Get the last saved checkpoint for a given experiment name.""" 42 | ckpts = list_checkpoints(Path(TRAINING_PATH, exper)) 43 | if not allow_interrupted: 44 | ckpts = [(n, p) for (n, p) in ckpts if "_interrupted" not in p.name] 45 | assert len(ckpts) > 0 46 | return sorted(ckpts)[-1][1] 47 | 48 | 49 | def get_best_checkpoint(exper): 50 | """Get the checkpoint with the best loss, for a given experiment name.""" 51 | return Path(TRAINING_PATH, exper, "checkpoint_best.tar") 52 | 53 | 54 | def delete_old_checkpoints(dir_, num_keep): 55 | """Delete all but the num_keep last saved checkpoints.""" 56 | ckpts = list_checkpoints(dir_) 57 | ckpts = sorted(ckpts)[::-1] 58 | kept = 0 59 | for ckpt in ckpts: 60 | if ("_interrupted" in str(ckpt[1]) and kept > 0) or kept >= num_keep: 61 | logger.info(f"Deleting checkpoint {ckpt[1].name}") 62 | ckpt[1].unlink() 63 | else: 64 | kept += 1 65 | 66 | 67 | def load_experiment(exper, conf=None, get_last=False, ckpt=None): 68 | """Load and return the model of a given experiment.""" 69 | if conf is None: 70 | conf = {} 71 | 72 | exper = Path(exper) 73 | if exper.suffix != ".tar": 74 | ckpt = get_last_checkpoint(exper) if get_last else get_best_checkpoint(exper) 75 | else: 76 | ckpt = exper 77 | logger.info(f"Loading checkpoint {ckpt.name}") 78 | ckpt = torch.load(str(ckpt), map_location="cpu") 79 | 80 | loaded_conf = OmegaConf.create(ckpt["conf"]) 81 | OmegaConf.set_struct(loaded_conf, False) 82 | conf = OmegaConf.merge(loaded_conf.model, OmegaConf.create(conf)) 83 | model = get_model(conf.name)(conf).eval() 84 | 85 | state_dict = ckpt["model"] 86 | 87 | dict_params = set(state_dict.keys()) 88 | model_params = set(map(lambda n: n[0], model.named_parameters())) 89 | diff = model_params - dict_params 90 | if len(diff) > 0: 91 | subs = os.path.commonprefix(list(diff)).rstrip(".") 92 | logger.warning(f"Missing {len(diff)} parameters in {subs}: {diff}") 93 | model.load_state_dict(state_dict, strict=False) 94 | return model 95 | 96 | 97 | def save_experiment( 98 | model, 99 | optimizer, 100 | lr_scheduler, 101 | conf, 102 | losses, 103 | results, 104 | best_eval, 105 | epoch, 106 | iter_i, 107 | output_dir, 108 | stop=False, 109 | distributed=False, 110 | cp_name=None, 111 | ): 112 | """Save the current model to a checkpoint 113 | and return the best result so far.""" 114 | state = (model.module if distributed else model).state_dict() 115 | checkpoint = { 116 | "model": state, 117 | "optimizer": optimizer.state_dict(), 118 | "lr_scheduler": lr_scheduler.state_dict(), 119 | "conf": OmegaConf.to_container(conf, resolve=True), 120 | "epoch": epoch, 121 | "losses": losses, 122 | "eval": results, 123 | } 124 | if cp_name is None: 125 | cp_name = f"checkpoint_{epoch}_{iter_i}" + ("_interrupted" if stop else "") + ".tar" 126 | logger.info(f"Saving checkpoint {cp_name}") 127 | cp_path = str(output_dir / cp_name) 128 | torch.save(checkpoint, cp_path) 129 | 130 | if cp_name != "checkpoint_best.tar" and results[conf.train.best_key] < best_eval: 131 | best_eval = results[conf.train.best_key] 132 | logger.info(f"New best val: {conf.train.best_key}={best_eval}") 133 | shutil.copy(cp_path, str(output_dir / "checkpoint_best.tar")) 134 | delete_old_checkpoints(output_dir, conf.train.keep_last_checkpoints) 135 | return best_eval 136 | -------------------------------------------------------------------------------- /siclib/utils/export_predictions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export the predictions of a model for a given dataloader (e.g. ImageFolder). 3 | Use a standalone script with `python3 -m geocalib.scipts.export_predictions dir` 4 | or call from another script. 5 | """ 6 | 7 | import logging 8 | from pathlib import Path 9 | 10 | import h5py 11 | import numpy as np 12 | import torch 13 | from tqdm import tqdm 14 | 15 | from siclib.utils.tensor import batch_to_device 16 | from siclib.utils.tools import get_device 17 | 18 | # flake8: noqa 19 | # mypy: ignore-errors 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | @torch.no_grad() 25 | def export_predictions( 26 | loader, 27 | model, 28 | output_file, 29 | as_half=False, 30 | keys="*", 31 | callback_fn=None, 32 | optional_keys=None, 33 | verbose=True, 34 | ): # sourcery skip: low-code-quality 35 | if optional_keys is None: 36 | optional_keys = [] 37 | 38 | assert keys == "*" or isinstance(keys, (tuple, list)) 39 | Path(output_file).parent.mkdir(exist_ok=True, parents=True) 40 | hfile = h5py.File(str(output_file), "w") 41 | device = get_device() 42 | model = model.to(device).eval() 43 | 44 | if not verbose: 45 | logger.info(f"Exporting predictions to {output_file}") 46 | 47 | for data_ in tqdm(loader, desc="Exporting", total=len(loader), ncols=80, disable=not verbose): 48 | data = batch_to_device(data_, device, non_blocking=True) 49 | pred = model(data) 50 | if callback_fn is not None: 51 | pred = {**callback_fn(pred, data), **pred} 52 | if keys != "*": 53 | if len(set(keys) - set(pred.keys())) > 0: 54 | raise ValueError(f"Missing key {set(keys) - set(pred.keys())}") 55 | pred = {k: v for k, v in pred.items() if k in keys + optional_keys} 56 | 57 | # assert len(pred) > 0, "No predictions found" 58 | 59 | for idx in range(len(data["name"])): 60 | pred_ = {k: v[idx].cpu().numpy() for k, v in pred.items()} 61 | 62 | if as_half: 63 | for k in pred_: 64 | dt = pred_[k].dtype 65 | if (dt == np.float32) and (dt != np.float16): 66 | pred_[k] = pred_[k].astype(np.float16) 67 | try: 68 | name = data["name"][idx] 69 | try: 70 | grp = hfile.create_group(name) 71 | except ValueError as e: 72 | raise ValueError(f"Group already exists {name}") from e 73 | 74 | # grp = hfile.create_group(name) 75 | for k, v in pred_.items(): 76 | grp.create_dataset(k, data=v) 77 | except RuntimeError: 78 | print(f"Failed to export {name}") 79 | continue 80 | 81 | del pred 82 | 83 | hfile.close() 84 | return output_file 85 | -------------------------------------------------------------------------------- /siclib/utils/summary_writer.py: -------------------------------------------------------------------------------- 1 | """This module implements the writer class for logging to tensorboard or wandb.""" 2 | 3 | import logging 4 | import os 5 | from typing import Any, Dict, Optional 6 | 7 | from omegaconf import DictConfig 8 | from torch import nn 9 | from torch.utils.tensorboard import SummaryWriter as TFSummaryWriter 10 | 11 | from siclib import __module_name__ 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | try: 16 | import wandb 17 | except ImportError: 18 | logger.debug("Could not import wandb.") 19 | wandb = None 20 | 21 | # mypy: ignore-errors 22 | 23 | 24 | def dot_conf(conf: DictConfig) -> Dict[str, Any]: 25 | """Recursively convert a DictConfig to a flat dict with keys joined by dots.""" 26 | d = {} 27 | for k, v in conf.items(): 28 | if isinstance(v, DictConfig): 29 | d |= {f"{k}.{k2}": v2 for k2, v2 in dot_conf(v).items()} 30 | else: 31 | d[k] = v 32 | return d 33 | 34 | 35 | class SummaryWriter: 36 | """Writer class for logging to tensorboard or wandb.""" 37 | 38 | def __init__(self, conf: DictConfig, args: DictConfig, log_dir: str): 39 | """Initialize the writer.""" 40 | self.conf = conf 41 | 42 | if not conf.train.writer: 43 | self.use_wandb = False 44 | self.use_tensorboard = False 45 | return 46 | 47 | self.use_wandb = "wandb" in conf.train.writer 48 | self.use_tensorboard = "tensorboard" in conf.train.writer 49 | 50 | if self.use_wandb and not wandb: 51 | raise ImportError("wandb not installed.") 52 | 53 | if self.use_tensorboard: 54 | self.writer = TFSummaryWriter(log_dir=log_dir) 55 | 56 | if self.use_wandb: 57 | os.environ["WANDB__SERVICE_WAIT"] = "300" 58 | wandb.init(project=__module_name__, name=args.experiment, config=dot_conf(conf)) 59 | 60 | if conf.train.writer and not self.use_wandb and not self.use_tensorboard: 61 | raise NotImplementedError(f"Writer {conf.train.writer} not implemented") 62 | 63 | def add_scalar(self, tag: str, value: float, step: Optional[int] = None): 64 | """Log a scalar value to tensorboard or wandb.""" 65 | if self.use_wandb: 66 | step = 1 if step == 0 else step 67 | wandb.log({tag: value}, step=step) 68 | 69 | if self.use_tensorboard: 70 | self.writer.add_scalar(tag, value, step) 71 | 72 | def add_figure(self, tag: str, figure, step: Optional[int] = None): 73 | """Log a figure to tensorboard or wandb.""" 74 | if self.use_wandb: 75 | step = 1 if step == 0 else step 76 | wandb.log({tag: figure}, step=step) 77 | if self.use_tensorboard: 78 | self.writer.add_figure(tag, figure, step) 79 | 80 | def add_histogram(self, tag: str, values, step: Optional[int] = None): 81 | """Log a histogram to tensorboard or wandb.""" 82 | if self.use_tensorboard: 83 | self.writer.add_histogram(tag, values, step) 84 | 85 | def add_text(self, tag: str, text: str, step: Optional[int] = None): 86 | """Log text to tensorboard or wandb.""" 87 | if self.use_tensorboard: 88 | self.writer.add_text(tag, text, step) 89 | 90 | def add_pr_curve(self, tag: str, values, step: Optional[int] = None): 91 | """Log a precision-recall curve to tensorboard or wandb.""" 92 | if self.use_wandb: 93 | step = 1 if step == 0 else step 94 | # @TODO: check if this works 95 | # wandb.log({"pr": wandb.plots.precision_recall(y_test, y_probas, labels)}) 96 | wandb.log({tag: wandb.plots.precision_recall(values)}, step=step) 97 | 98 | if self.use_tensorboard: 99 | self.writer.add_pr_curve(tag, values, step) 100 | 101 | def watch(self, model: nn.Module, log_freq: int = 1000): 102 | """Watch a model for gradient updates.""" 103 | if self.use_wandb: 104 | wandb.watch( 105 | model, 106 | log="gradients", 107 | log_freq=log_freq, 108 | ) 109 | 110 | def close(self): 111 | """Close the writer.""" 112 | if self.use_wandb: 113 | wandb.finish() 114 | 115 | if self.use_tensorboard: 116 | self.writer.close() 117 | -------------------------------------------------------------------------------- /siclib/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/javrtg/AnyCalib/7bfa51b5797e8784cc6d438aab96c57c226a699e/siclib/visualization/__init__.py --------------------------------------------------------------------------------