├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── assets ├── images │ └── example.jpg └── videos │ └── demo_deeplsd.gif ├── deeplsd ├── __init__.py ├── configs │ ├── export_eth3d.yaml │ ├── export_hpatches.yaml │ ├── export_minidepth.yaml │ ├── export_nyu.yaml │ ├── export_rdnim.yaml │ ├── export_wireframe.yaml │ ├── export_york_urban.yaml │ ├── export_york_urban_lines.yaml │ ├── train_merged_datasets.yaml │ ├── train_minidepth.yaml │ └── train_wireframe.yaml ├── datasets │ ├── __init__.py │ ├── base_dataset.py │ ├── hpatches.py │ ├── merge_dataset.py │ ├── minidepth.py │ ├── nyu.py │ ├── rdnim.py │ ├── utils │ │ ├── __init__.py │ │ ├── data_augmentation.py │ │ ├── homographies.py │ │ ├── megadepth_train_scenes.txt │ │ ├── megadepth_val_scenes.txt │ │ └── preprocessing.py │ ├── wireframe_eval.py │ ├── wireframe_ha.py │ ├── york_urban.py │ └── york_urban_lines.py ├── evaluation │ ├── __init__.py │ └── ls_evaluation.py ├── geometry │ ├── __init__.py │ ├── homography_adaptation.py │ ├── line_utils.py │ ├── projection.py │ ├── utils.py │ └── viz_2d.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ └── vgg_unet.py │ ├── base_model.py │ ├── deeplsd.py │ ├── deeplsd_inference.py │ ├── lbd.py │ └── line_refiner.py ├── scripts │ ├── evaluate_line_detection.py │ ├── evaluate_vp_estimation.py │ ├── export_features.py │ ├── homography_adaptation_df.py │ ├── line_refinement.py │ └── train.py ├── settings.py └── utils │ ├── __init__.py │ ├── experiments.py │ ├── stdout_capturing.py │ ├── tensor.py │ └── tools.py ├── install.sh ├── line_refinement ├── CMakeLists.txt ├── cost_functions.h ├── line_vp_optim.cpp ├── setup.py └── vp_det.h ├── notebooks ├── demo_line_detection.ipynb └── quickstart_demo.ipynb ├── quickstart_install.sh ├── requirements.txt ├── setup.py └── third_party └── afm_lib ├── afm_op ├── __init__.py ├── afm.h ├── cuda │ ├── afm.cu │ └── afm.h ├── example.py ├── setup.py └── vision.cpp └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Build ----------------------------------------------------------------------- 3 | 4 | ### Output and Cache Directories ### 5 | /External/ 6 | /Debug/ 7 | /Docs/ 8 | /Release/ 9 | /build/ 10 | /bin/ 11 | /out/ 12 | /cmake-build-*/ 13 | /dist/ 14 | 15 | ### CMake ### 16 | CMakeCache.txt 17 | CMakeFiles 18 | CMakeScripts 19 | Makefile 20 | cmake_install.cmake 21 | install_manifest.txt 22 | 23 | # Language -------------------------------------------------------------------- 24 | 25 | ### C++ ### 26 | # Compiled Object files 27 | *.slo 28 | *.lo 29 | *.o 30 | 31 | # Comment this line if you want to version Wavefront .obj files. 32 | *.obj 33 | 34 | # Precompiled Headers 35 | *.gch 36 | *.pch 37 | 38 | # Compiled Dynamic libraries 39 | *.so 40 | *.dylib 41 | *.dll 42 | 43 | # Fortran module files 44 | *.mod 45 | 46 | # Compiled Static libraries 47 | *.lai 48 | *.la 49 | *.a 50 | *.lib 51 | 52 | # Executables 53 | *.exe 54 | *.out 55 | *.app 56 | 57 | # Byte-compiled / optimized / DLL files 58 | __pycache__/ 59 | *.py[cod] 60 | *$py.class 61 | 62 | # C extensions 63 | *.so 64 | 65 | # Distribution / packaging 66 | .Python 67 | build/ 68 | develop-eggs/ 69 | dist/ 70 | downloads/ 71 | eggs/ 72 | .eggs/ 73 | lib/ 74 | lib64/ 75 | parts/ 76 | sdist/ 77 | var/ 78 | wheels/ 79 | pip-wheel-metadata/ 80 | share/python-wheels/ 81 | *.egg-info/ 82 | .installed.cfg 83 | *.egg 84 | MANIFEST 85 | 86 | # PyInstaller 87 | # Usually these files are written by a python script from a template 88 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 89 | *.manifest 90 | *.spec 91 | 92 | # Installer logs 93 | pip-log.txt 94 | pip-delete-this-directory.txt 95 | 96 | # Unit test / coverage reports 97 | htmlcov/ 98 | .tox/ 99 | .nox/ 100 | .coverage 101 | .coverage.* 102 | .cache 103 | nosetests.xml 104 | coverage.xml 105 | *.cover 106 | *.py,cover 107 | .hypothesis/ 108 | .pytest_cache/ 109 | 110 | # Translations 111 | *.mo 112 | *.pot 113 | 114 | # Django stuff: 115 | *.log 116 | local_settings.py 117 | db.sqlite3 118 | db.sqlite3-journal 119 | 120 | # Flask stuff: 121 | instance/ 122 | .webassets-cache 123 | 124 | # Scrapy stuff: 125 | .scrapy 126 | 127 | # Sphinx documentation 128 | docs/_build/ 129 | 130 | # PyBuilder 131 | target/ 132 | 133 | # Jupyter Notebook 134 | .ipynb_checkpoints 135 | 136 | # IPython 137 | profile_default/ 138 | ipython_config.py 139 | 140 | # pyenv 141 | .python-version 142 | 143 | # pipenv 144 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 145 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 146 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 147 | # install all needed dependencies. 148 | #Pipfile.lock 149 | 150 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 151 | __pypackages__/ 152 | 153 | # Celery stuff 154 | celerybeat-schedule 155 | celerybeat.pid 156 | 157 | # SageMath parsed files 158 | *.sage.py 159 | 160 | # Environments 161 | .env 162 | .venv 163 | env/ 164 | venv/ 165 | ENV/ 166 | env.bak/ 167 | venv.bak/ 168 | 169 | # Spyder project settings 170 | .spyderproject 171 | .spyproject 172 | 173 | # Rope project settings 174 | .ropeproject 175 | 176 | # mkdocs documentation 177 | /site 178 | 179 | # mypy 180 | .mypy_cache/ 181 | .dmypy.json 182 | dmypy.json 183 | 184 | # Pyre type checker 185 | .pyre/ 186 | 187 | ### VS Code 188 | .vscode 189 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/progressive-x"] 2 | path = third_party/progressive-x 3 | url = https://github.com/danini/progressive-x.git 4 | [submodule "third_party/pytlbd"] 5 | path = third_party/pytlbd 6 | url = https://github.com/iago-suarez/pytlbd.git 7 | [submodule "line_refinement/pybind11"] 8 | path = line_refinement/pybind11 9 | url = https://github.com/pybind/pybind11 10 | [submodule "third_party/homography_est"] 11 | path = third_party/homography_est 12 | url = https://github.com/rpautrat/homography_est.git 13 | [submodule "third_party/pytlsd"] 14 | path = third_party/pytlsd 15 | url = https://github.com/iago-suarez/pytlsd.git 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Rémi Pautrat 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepLSD 2 | Implementation of the paper [DeepLSD: Line Segment Detection and Refinement with Deep Image Gradients](https://arxiv.org/abs/2212.07766), accepted at CVPR 2023. **DeepLSD is a generic line detector that combines the robustness of deep learning with the accuracy of handcrafted detectors**. It can be used to extract **generic line segments from images in-the-wild**, and is **suitable for any task requiring high precision**, such as homography estimation, visual localization, and 3D reconstruction. By predicting a line distance and angle fields, **it can furthermore refine any existing line segments** through an optimization. 3 | 4 | Demo of the lines detected by DeepLSD, its line distance field, and line angle field: 5 | 6 |

7 | 8 |

9 | 10 | ## Installation 11 | First clone the repository and its submodules: 12 | ``` 13 | git clone --recurse-submodules https://github.com/cvg/DeepLSD.git 14 | cd DeepLSD 15 | ``` 16 | 17 | ### Quickstart install (for inference only) 18 | 19 | To test the pre-trained model on your images, without the final line refinement, the following installation is sufficient: 20 | ``` 21 | bash quickstart_install.sh 22 | ``` 23 | You can then test it with the notebook `notebooks/quickstart_demo.ipynb`. 24 | 25 | ### Full install 26 | 27 | Follow these instructions if you wish to re-train DeepLSD, evaluate it, or use the final step of line refinement. 28 | 29 | Dependencies that need to be installed on your system: 30 | - [OpenCV](https://opencv.org/) 31 | - [GFlags](https://github.com/gflags/gflags) 32 | - [GLog](https://github.com/google/glog) 33 | - [Ceres](http://ceres-solver.org/) 34 | - DeepLSD was successfully tested with GCC 9, Python 3.7, and CUDA 11. Other combinations may work as well. 35 | 36 | Once these libraries are installed, you can proceed with the installation of the necessary requirements and third party libraries: 37 | ``` 38 | bash install.sh 39 | ``` 40 | 41 | This repo uses a base experiment folder (EXPER_PATH) containing the output of all trainings, and a base dataset path (DATA_PATH) containing all the evaluation and training datasets. You can set the path to these two folders in the file `deeplsd/settings.py`. 42 | 43 | ## Usage 44 | We provide two pre-trained models for DeepLSD: [deeplsd_wireframe.tar](https://cvg-data.inf.ethz.ch/DeepLSD/deeplsd_wireframe.tar) and [deeplsd_md.tar](https://cvg-data.inf.ethz.ch/DeepLSD/deeplsd_md.tar), trained respectively on the Wireframe and MegaDepth datasets. The former can be used for easy indoor datasets, while the latter is more generic and works outdoors and on more challenging scenes. 45 | The example notebook `notebooks/demo_line_detection.ipynb` showcases how to use DeepLSD in practice. Please refer to the comments on the config of this notebook to understand the usage of each hyperparameter. 46 | 47 | You can download the two models with the following command: 48 | ``` 49 | mkdir weights 50 | wget https://cvg-data.inf.ethz.ch/DeepLSD/deeplsd_wireframe.tar -O weights/deeplsd_wireframe.tar 51 | wget https://cvg-data.inf.ethz.ch/DeepLSD/deeplsd_md.tar -O weights/deeplsd_md.tar 52 | ``` 53 | 54 | ## Ground truth (GT) generation 55 | DeepLSD requires generating a ground truth for line distance and angle field, through homography adaptation. We provide a Python script to do it for any list of images, leveraging CUDA: 56 | ``` 57 | python -m deeplsd.scripts.homography_adaptation_df --num_H --n_jobs 58 | ``` 59 | Note that the GT generation can take a long time, from hours to days depending on the number of images, number of homographies and computation power of your machine. 60 | The output folder can then be specified in the training config of the corresponding dataset. For example, after generating the GT for the wireframe dataset in the folder `DATA_PATH/export_datasets/wireframe_ha5`, the field 'gt_dir' of the config file `deeplsd/configs/train_wireframe.yaml` can be updated with the value `export_datasets/wireframe_ha5` (paths are given relative to DATA_PATH). 61 | 62 | ## Training 63 | To train the network, simply run the following command: 64 | ``` 65 | python -m deeplsd.scripts.train --conf 66 | ``` 67 | 68 | We provide data loaders for the [Wireframe dataset](https://github.com/huangkuns/wireframe) and [MegaDepth](https://www.cs.cornell.edu/projects/megadepth/), using the config files `deeplsd/configs/train_wireframe.yaml` and `deeplsd/configs/train_minidepth.yaml`, or `deeplsd/configs/train_merged_datasets.yaml` to train on both at the same time. Note that due to the sheer size of the MegaDepth dataset, we only sampled 50 images per scene (hence the name "Minidepth") and used the train/val split available in `deeplsd/datasets/utils/`. To train on the wireframe dataset, the command would typically look like: 69 | ``` 70 | python -m deeplsd.scripts.train deeplsd_wireframe --conf deeplsd/configs/train_wireframe.yaml 71 | ``` 72 | 73 | A model can be restored or fine-tuned by adding the '--restore' option. 74 | 75 | ## Line refinement 76 | The backbone extractor of DeepLSD can also be used to generate a distance and angle field from an image, and to refine existing line segments (from any existsing line detector). This can be done given a folder of images and pre-extracted lines as follows: 77 | ``` 78 | python -m deeplsd.scripts.line_refinement 79 | ``` 80 | Please refer to the help tool of this function for more details on the format of the line detections. 81 | 82 | ## Evaluation 83 | Similarly as in the paper, we provide code for the evaluation of low-level line detection metrics, as well as vanishing point (VP) estimation. In both cases, the lines and VPs need to be extracted with the script: 84 | ``` 85 | python -m deeplsd.scripts.export_features 86 | ``` 87 | Add the option '--pred_vps' to predict the vanishing points in addition to the line segments. 88 | 89 | ### Low-level line detection metrics 90 | We provide dataloaders for the following datasets: 91 | - [Wireframe](https://github.com/huangkuns/wireframe) 92 | - [HPatches](https://github.com/hpatches/hpatches-dataset) (with the full image sequences) 93 | - [RDNIM](https://www.polybox.ethz.ch/index.php/s/P89YkZyOfdhmdPN) 94 | - [York Urban DB](https://www.elderlab.yorku.ca/resources/york-urban-line-segment-database-information/) 95 | 96 | The corresponding config files (to export the features) are located in the `deeplsd/configs` folder. For example, exporting line detections on HPatches would look like: 97 | ``` 98 | python -m deeplsd.scripts.export_features deeplsd/configs/export_hpatches.yaml weights/deeplsd_md.tar hpatches_outputs 99 | ``` 100 | 101 | The evaluation can then be run with: 102 | ``` 103 | python -m deeplsd.scripts.evaluate_line_detection 104 | ``` 105 | On HPatches, this could look like: 106 | ``` 107 | python -m deeplsd.scripts.evaluate_line_detection hpatches hpatches_outputs hpatches_evaluation deeplsd 108 | ``` 109 | 110 | ### VP estimation metrics 111 | We provide dataloaders for the following datasets: 112 | - [York Urban DB](https://www.elderlab.yorku.ca/resources/york-urban-line-segment-database-information/) 113 | - [NYU-VP](https://github.com/fkluger/nyu_vp) 114 | 115 | First, the VP needs to be exported. For example, exporting line detections on York Urban would look like: 116 | ``` 117 | python -m deeplsd.scripts.export_features deeplsd/configs/export_york_urban.yaml weights/deeplsd_md.tar yud_outputs --pred_vps 118 | ``` 119 | 120 | The evaluation can then be run with: 121 | ``` 122 | python -m deeplsd.scripts.evaluate_vp_estimation 123 | ``` 124 | On York Urban, this could look like: 125 | ``` 126 | python -m deeplsd.scripts.evaluate_vp_estimation york_urban yud_outputs yud_evaluation deeplsd 127 | ``` 128 | 129 | **Note:** the 3D line reconstruction and visual localization applications of the paper will be released in a separate repository. 130 | 131 | ## Bibtex 132 | If you use this code in your project, please consider citing the following paper: 133 | ```bibtex 134 | @InProceedings{Pautrat_2023_DeepLSD, 135 | author = {Pautrat, Rémi and Barath, Daniel and Larsson, Viktor and Oswald, Martin R. and Pollefeys, Marc}, 136 | title = {DeepLSD: Line Segment Detection and Refinement with Deep Image Gradients}, 137 | booktitle = {Computer Vision and Pattern Recognition (CVPR)}, 138 | year = {2023}, 139 | } 140 | ``` 141 | -------------------------------------------------------------------------------- /assets/images/example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/assets/images/example.jpg -------------------------------------------------------------------------------- /assets/videos/demo_deeplsd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/assets/videos/demo_deeplsd.gif -------------------------------------------------------------------------------- /deeplsd/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', 5 | datefmt='%m/%d/%Y %H:%M:%S', 6 | level=logging.INFO) 7 | 8 | if not (Path(__file__).parent / 'settings.py').exists(): 9 | raise ValueError('Cannot find settings.py file') 10 | -------------------------------------------------------------------------------- /deeplsd/configs/export_eth3d.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: eth3d 3 | dataset_dir: ETH3D 4 | downsize_factor: 8 5 | inpainted_depth: True 6 | batch_size: 1 7 | num_workers: 4 8 | model: 9 | name: deeplsd 10 | tiny: False 11 | sharpen: True 12 | line_neighborhood: 5 13 | loss_weights: 14 | df: 1. 15 | angle: 1. 16 | detect_lines: True 17 | multiscale: False 18 | scale_factors: [1., 1.5] 19 | line_detection_params: 20 | grad_nfa: True 21 | merge: False 22 | use_vps: False 23 | optimize: False 24 | optimize_vps: False 25 | filtering: True 26 | grad_thresh: 3 27 | -------------------------------------------------------------------------------- /deeplsd/configs/export_hpatches.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: hpatches 3 | dataset_dir: HPatches_sequences 4 | alteration: all 5 | max_side: 1200 6 | batch_size: 1 7 | num_workers: 4 8 | model: 9 | name: deeplsd 10 | tiny: False 11 | sharpen: True 12 | line_neighborhood: 5 13 | loss_weights: 14 | df: 1. 15 | angle: 1. 16 | detect_lines: True 17 | multiscale: False 18 | scale_factors: [1., 1.5] 19 | line_detection_params: 20 | grad_nfa: True 21 | merge: False 22 | optimize: False 23 | use_vps: False 24 | optimize_vps: False 25 | filtering: True 26 | grad_thresh: 3 27 | -------------------------------------------------------------------------------- /deeplsd/configs/export_minidepth.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: minidepth 3 | dataset_dir: MiniDepth 4 | gt_dir: export_datasets/minidepth_ha3 5 | batch_size: 1 6 | resize: [512, 512] 7 | num_workers: 4 8 | photometric_augmentation: 9 | enable: False 10 | primitives: ['random_brightness', 'random_contrast', 11 | 'additive_speckle_noise', 'additive_gaussian_noise', 12 | 'additive_shade', 'motion_blur'] 13 | params: 14 | random_brightness: 15 | brightness: 0.5 16 | random_contrast: 17 | strength_range: [0.5, 1.5] 18 | additive_gaussian_noise: 19 | stddev_range: [5, 95] 20 | additive_speckle_noise: 21 | prob_range: [0, 0.01] 22 | additive_shade: 23 | transparency_range: [-0.8, 0.8] 24 | kernel_size_range: [100, 150] 25 | motion_blur: 26 | max_kernel_size: 3 27 | seed: 0 28 | model: 29 | name: deeplsd 30 | tiny: False 31 | sharpen: True 32 | line_neighborhood: 5 33 | loss_weights: 34 | df: 1. 35 | angle: 1. 36 | detect_lines: True 37 | multiscale: False 38 | scale_factors: [1., 1.5] 39 | line_detection_params: 40 | grad_nfa: True 41 | merge: False 42 | use_vps: False 43 | optimize: False 44 | optimize_vps: False 45 | filtering: True 46 | grad_thresh: 3 47 | -------------------------------------------------------------------------------- /deeplsd/configs/export_nyu.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: nyu 3 | dataset_dir: NYU_depth_v2 4 | batch_size: 1 5 | num_workers: 4 6 | model: 7 | name: deeplsd 8 | tiny: False 9 | sharpen: True 10 | line_neighborhood: 5 11 | loss_weights: 12 | df: 1. 13 | angle: 1. 14 | detect_lines: True 15 | multiscale: False 16 | scale_factors: [1., 1.5] 17 | line_detection_params: 18 | grad_nfa: True 19 | merge: False 20 | optimize: False 21 | use_vps: True 22 | optimize_vps: True 23 | filtering: True 24 | grad_thresh: 3 25 | -------------------------------------------------------------------------------- /deeplsd/configs/export_rdnim.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: rdnim 3 | dataset_dir: RDNIM 4 | reference: night 5 | batch_size: 1 6 | num_workers: 4 7 | model: 8 | name: deeplsd 9 | tiny: False 10 | sharpen: True 11 | line_neighborhood: 5 12 | loss_weights: 13 | df: 1. 14 | angle: 1. 15 | detect_lines: True 16 | multiscale: False 17 | scale_factors: [1., 1.5] 18 | line_detection_params: 19 | grad_nfa: False 20 | merge: False 21 | optimize: False 22 | use_vps: False 23 | optimize_vps: False 24 | filtering: True 25 | grad_thresh: 3 26 | -------------------------------------------------------------------------------- /deeplsd/configs/export_wireframe.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: wireframe_eval 3 | dataset_dir: Wireframe_raw 4 | batch_size: 1 5 | # resize: None 6 | num_workers: 2 7 | seed: 0 8 | model: 9 | name: deeplsd 10 | tiny: False 11 | sharpen: True 12 | line_neighborhood: 5 13 | loss_weights: 14 | df: 1 15 | angle: 1. 16 | detect_lines: True 17 | multiscale: False 18 | scale_factors: [1., 1.5] 19 | line_detection_params: 20 | grad_nfa: True 21 | merge: False 22 | optimize: False 23 | use_vps: True 24 | optimize_vps: True 25 | filtering: 'strict' 26 | grad_thresh: 3 27 | -------------------------------------------------------------------------------- /deeplsd/configs/export_york_urban.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: york_urban 3 | dataset_dir: YorkUrbanDB 4 | batch_size: 1 5 | num_workers: 4 6 | model: 7 | name: deeplsd 8 | tiny: False 9 | sharpen: True 10 | line_neighborhood: 5 11 | loss_weights: 12 | df: 1. 13 | angle: 1. 14 | detect_lines: True 15 | multiscale: False 16 | scale_factors: [1., 1.5] 17 | line_detection_params: 18 | grad_nfa: True 19 | merge: False 20 | optimize: False 21 | use_vps: True 22 | optimize_vps: True 23 | filtering: True 24 | grad_thresh: 3 25 | -------------------------------------------------------------------------------- /deeplsd/configs/export_york_urban_lines.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: york_urban_lines 3 | dataset_dir: YorkUrbanDB 4 | grayscale: True 5 | batch_size: 1 6 | num_workers: 4 7 | model: 8 | name: deeplsd 9 | tiny: False 10 | sharpen: True 11 | line_neighborhood: 5 12 | loss_weights: 13 | df: 1. 14 | angle: 1. 15 | detect_lines: True 16 | multiscale: False 17 | scale_factors: [1., 1.5] 18 | line_detection_params: 19 | grad_nfa: True 20 | merge: False 21 | optimize: False 22 | use_vps: True 23 | optimize_vps: True 24 | filtering: True 25 | grad_thresh: 3 26 | -------------------------------------------------------------------------------- /deeplsd/configs/train_merged_datasets.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: merge_dataset 3 | names: ['minidepth', 'wireframe_ha'] 4 | dataset_dir: ['MiniDepth', 'Wireframe_raw'] 5 | gt_dir: ['export_datasets/minidepth_ha3', 'export_datasets/wireframe_ha5'] 6 | weights: [0.5, 0.5] 7 | num_workers: 4 8 | batch_size: 5 9 | resize: [512, 512] 10 | photometric_augmentation: 11 | enable: True 12 | primitives: ['random_brightness', 'random_contrast', 13 | 'additive_speckle_noise', 'additive_gaussian_noise', 14 | 'additive_shade', 'motion_blur'] 15 | params: 16 | random_brightness: 17 | brightness: 0.5 18 | random_contrast: 19 | strength_range: [0.5, 1.5] 20 | additive_gaussian_noise: 21 | stddev_range: [5, 95] 22 | additive_speckle_noise: 23 | prob_range: [0, 0.01] 24 | additive_shade: 25 | transparency_range: [-0.8, 0.8] 26 | kernel_size_range: [100, 150] 27 | motion_blur: 28 | max_kernel_size: 3 29 | warped_pair: False 30 | homographic_augmentation: True 31 | seed: 0 32 | model: 33 | name: deeplsd 34 | sharpen: True 35 | line_neighborhood: 5 36 | loss_weights: 37 | df: 1. 38 | angle: 1. 39 | detect_lines: False 40 | line_detection_params: 41 | merge: False 42 | optimize: False 43 | use_vps: False 44 | optimize_vps: False 45 | filtering: True 46 | grad_thresh: 3 47 | train: 48 | epochs: 300 49 | lr: 0.001 50 | scheduler: 'ReduceLROnPlateau' 51 | patience: 10 52 | eval_every_iter: 600 53 | log_every_iter: 100 54 | save_every_iter: 1200 55 | keep_last_checkpoints: 3 56 | seed: 0 57 | -------------------------------------------------------------------------------- /deeplsd/configs/train_minidepth.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: minidepth 3 | dataset_dir: MiniDepth 4 | gt_dir: export_datasets/minidepth_ha3 5 | num_workers: 4 6 | batch_size: 5 7 | resize: [512, 512] 8 | photometric_augmentation: 9 | enable: True 10 | primitives: ['random_brightness', 'random_contrast', 11 | 'additive_speckle_noise', 'additive_gaussian_noise', 12 | 'additive_shade', 'motion_blur'] 13 | params: 14 | random_brightness: 15 | brightness: 0.5 16 | random_contrast: 17 | strength_range: [0.5, 1.5] 18 | additive_gaussian_noise: 19 | stddev_range: [5, 95] 20 | additive_speckle_noise: 21 | prob_range: [0, 0.01] 22 | additive_shade: 23 | transparency_range: [-0.8, 0.8] 24 | kernel_size_range: [100, 150] 25 | motion_blur: 26 | max_kernel_size: 3 27 | warped_pair: False 28 | homographic_augmentation: True 29 | seed: 0 30 | model: 31 | name: deeplsd 32 | sharpen: True 33 | line_neighborhood: 5 34 | loss_weights: 35 | df: 1. 36 | angle: 1. 37 | detect_lines: False 38 | line_detection_params: 39 | merge: False 40 | optimize: False 41 | use_vps: False 42 | optimize_vps: False 43 | filtering: True 44 | grad_thresh: 3 45 | train: 46 | epochs: 300 47 | lr: 0.001 48 | scheduler: 'ReduceLROnPlateau' 49 | patience: 10 50 | eval_every_iter: 10000 51 | log_every_iter: 500 52 | keep_last_checkpoints: 3 53 | seed: 0 54 | -------------------------------------------------------------------------------- /deeplsd/configs/train_wireframe.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: wireframe_ha 3 | dataset_dir: Wireframe_raw 4 | gt_dir: export_datasets/wireframe_ha5 5 | val_size: 500 6 | batch_size: 5 # 10 7 | resize: [333, 333] 8 | num_workers: 4 9 | photometric_augmentation: 10 | enable: True 11 | primitives: ['random_brightness', 'random_contrast', 12 | 'additive_speckle_noise', 'additive_gaussian_noise', 13 | 'additive_shade', 'motion_blur'] 14 | params: 15 | random_brightness: 16 | brightness: 0.5 17 | random_contrast: 18 | strength_range: [0.5, 1.5] 19 | additive_gaussian_noise: 20 | stddev_range: [5, 95] 21 | additive_speckle_noise: 22 | prob_range: [0, 0.01] 23 | additive_shade: 24 | transparency_range: [-0.8, 0.8] 25 | kernel_size_range: [100, 150] 26 | motion_blur: 27 | max_kernel_size: 3 28 | warped_pair: False 29 | homographic_augmentation: True 30 | seed: 0 31 | model: 32 | name: deeplsd 33 | tiny: False 34 | sharpen: True 35 | line_neighborhood: 5 36 | loss_weights: 37 | df: 1. 38 | angle: 1. 39 | detect_lines: False 40 | line_detection_params: 41 | merge: False 42 | optimize: False 43 | use_vps: False 44 | optimize_vps: False 45 | filtering: True 46 | grad_thresh: 3 47 | train: 48 | epochs: 300 49 | lr: 0.001 50 | scheduler: 'ReduceLROnPlateau' 51 | patience: 10 52 | eval_every_iter: 10000 53 | log_every_iter: 500 54 | keep_last_checkpoints: 3 55 | seed: 0 56 | -------------------------------------------------------------------------------- /deeplsd/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from ..utils.tools import get_class 2 | from .base_dataset import BaseDataset 3 | 4 | 5 | def get_dataset(name): 6 | return get_class(name, __name__, BaseDataset) 7 | -------------------------------------------------------------------------------- /deeplsd/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base class for dataset. 3 | See mnist.py for an example of dataset. 4 | """ 5 | 6 | from abc import ABCMeta, abstractmethod 7 | from omegaconf import OmegaConf 8 | import omegaconf 9 | from torch.utils.data import DataLoader, Sampler, get_worker_info 10 | import logging 11 | 12 | from ..utils.tools import set_num_threads 13 | 14 | 15 | class LoopSampler(Sampler): 16 | def __init__(self, loop_size, total_size=None): 17 | self.loop_size = loop_size 18 | self.total_size = total_size - (total_size % loop_size) 19 | 20 | def __iter__(self): 21 | return (i % self.loop_size for i in range(self.total_size)) 22 | 23 | def __len__(self): 24 | return self.total_size 25 | 26 | 27 | def worker_init_fn(i): 28 | info = get_worker_info() 29 | if hasattr(info.dataset, 'conf'): 30 | set_num_threads(info.dataset.conf.num_threads) 31 | else: 32 | set_num_threads(1) 33 | 34 | 35 | class BaseDataset(metaclass=ABCMeta): 36 | """ 37 | What the dataset model is expect to declare: 38 | default_conf: dictionary of the default configuration of the dataset. 39 | It overwrites base_default_conf in BaseModel, and it is overwritten by 40 | the user-provided configuration passed to __init__. 41 | Configurations can be nested. 42 | 43 | _init(self, conf): initialization method, where conf is the final 44 | configuration object (also accessible with `self.conf`). Accessing 45 | unkown configuration entries will raise an error. 46 | 47 | get_dataset(self, split): method that returns an instance of 48 | torch.utils.data.Dataset corresponding to the requested split string, 49 | which can be `'train'`, `'val'`, `'test'`, or `'export'`. 50 | """ 51 | base_default_conf = { 52 | 'name': '???', 53 | 'num_workers': 1, 54 | 'train_batch_size': '???', 55 | 'val_batch_size': '???', 56 | 'test_batch_size': '???', 57 | 'export_batch_size': '???', 58 | 'batch_size': 1, 59 | 'num_threads': 1, 60 | } 61 | default_conf = {} 62 | 63 | def __init__(self, conf): 64 | """Perform some logic and call the _init method of the child model.""" 65 | default_conf = OmegaConf.merge( 66 | OmegaConf.create(self.base_default_conf), 67 | OmegaConf.create(self.default_conf)) 68 | OmegaConf.set_struct(default_conf, True) 69 | if isinstance(conf, dict): 70 | conf = OmegaConf.create(conf) 71 | self.conf = OmegaConf.merge(default_conf, conf) 72 | 73 | # Update the batch sizes if necessary 74 | for split in ['train', 'val', 'test', 'export']: 75 | if OmegaConf.is_missing(self.conf, split + '_batch_size'): 76 | OmegaConf.update(self.conf, split + '_batch_size', 77 | self.conf.batch_size, merge=False) 78 | 79 | OmegaConf.set_readonly(self.conf, True) 80 | logging.info(f'Creating dataset {self.__class__.__name__}') 81 | self._init(self.conf) 82 | 83 | @abstractmethod 84 | def _init(self, conf): 85 | """To be implemented by the child class.""" 86 | raise NotImplementedError 87 | 88 | @abstractmethod 89 | def get_dataset(self, split): 90 | """To be implemented by the child class.""" 91 | raise NotImplementedError 92 | 93 | def get_data_loader(self, split, shuffle=False): 94 | """Return a data loader for a given split.""" 95 | assert split in ['train', 'val', 'test', 'export'] 96 | batch_size = self.conf.get(split+'_batch_size') 97 | num_workers = self.conf.get('num_workers', batch_size) 98 | return DataLoader(self.get_dataset(split), batch_size=batch_size, 99 | shuffle=shuffle or split == 'train', 100 | pin_memory=True, num_workers=num_workers, 101 | worker_init_fn=worker_init_fn) 102 | 103 | def get_overfit_loader(self, split): 104 | """Return an overfit data loader. 105 | The training set is composed of a single duplicated batch, while 106 | the validation and test sets contain a single copy of this same batch. 107 | This is useful to debug a model and make sure that losses and metrics 108 | correlate well. 109 | """ 110 | assert split in ['train', 'val', 'test', 'export'] 111 | dataset = self.get_dataset('train') 112 | sampler = LoopSampler( 113 | self.conf.batch_size, 114 | len(dataset) if split == 'train' else self.conf.batch_size) 115 | num_workers = self.conf.get('num_workers', self.conf.batch_size) 116 | return DataLoader(dataset, batch_size=self.conf.batch_size, 117 | pin_memory=True, num_workers=num_workers, 118 | sampler=sampler, worker_init_fn=worker_init_fn) 119 | -------------------------------------------------------------------------------- /deeplsd/datasets/hpatches.py: -------------------------------------------------------------------------------- 1 | """ 2 | HPatches sequences dataset, to perform homography estimation and 3 | evaluate basic line detection metrics. 4 | """ 5 | import os 6 | import numpy as np 7 | import torch 8 | import cv2 9 | from pathlib import Path 10 | from torch.utils.data import Dataset, DataLoader 11 | 12 | from .base_dataset import BaseDataset 13 | from ..settings import DATA_PATH 14 | 15 | 16 | 17 | class HPatches(BaseDataset, Dataset): 18 | default_conf = { 19 | 'dataset_dir': 'HPatches_sequences', 20 | 'alteration': 'all', # 'i', 'v' or 'all' 21 | 'max_side': 1200, 22 | } 23 | 24 | def _init(self, conf): 25 | pass 26 | 27 | def get_dataset(self, split): 28 | assert split in ['test', 'export'] 29 | return _Dataset(self.conf, split) 30 | 31 | # Overwrite the parent data loader to handle export mode 32 | def get_data_loader(self, split, shuffle=False): 33 | """Return a data loader for a given split.""" 34 | assert split in ['test', 'export'] 35 | batch_size = self.conf.get(split+'_batch_size') 36 | num_workers = self.conf.get('num_workers', batch_size) 37 | return DataLoader( 38 | self.get_dataset(split), batch_size=batch_size, 39 | shuffle=False, pin_memory=True, num_workers=num_workers) 40 | 41 | 42 | class _Dataset(Dataset): 43 | def __init__(self, conf, split): 44 | self.conf = conf 45 | self.root_dir = Path(DATA_PATH, conf.dataset_dir) 46 | folder_paths = [x for x in self.root_dir.iterdir() if x.is_dir()] 47 | self.data = [] 48 | for path in folder_paths: 49 | if conf.alteration == 'i' and path.stem[0] != 'i': 50 | continue 51 | if conf.alteration == 'v' and path.stem[0] != 'v': 52 | continue 53 | if split == 'test': 54 | for i in range(2, 7): 55 | ref_path = Path(path, "1.ppm") 56 | target_path = Path(path, str(i) + '.ppm') 57 | self.data += [{ 58 | "ref_name": str(ref_path.parent.stem + "_" + ref_path.stem), 59 | "ref_img_path": str(ref_path), 60 | "target_name": str(target_path.parent.stem + "_" + target_path.stem), 61 | "target_img_path": str(target_path), 62 | "H": np.loadtxt(str(Path(path, "H_1_" + str(i)))), 63 | }] 64 | else: 65 | for i in range(1, 7): 66 | ref_path = Path(path, str(i) + '.ppm') 67 | self.data += [{ 68 | "ref_name": str(ref_path.parent.stem + "_" + ref_path.stem), 69 | "ref_img_path": str(ref_path)}] 70 | 71 | def get_dataset(self, split): 72 | return self 73 | 74 | def __getitem__(self, idx): 75 | img0_path = self.data[idx]['ref_img_path'] 76 | img0 = cv2.imread(img0_path, 0) 77 | img_size = img0.shape 78 | 79 | if max(img_size) > self.conf.max_side: 80 | s = self.conf.max_side / max(img_size) 81 | h_s = int(img_size[0] * s) 82 | w_s = int(img_size[1] * s) 83 | img0 = cv2.resize(img0, (w_s, h_s), interpolation=cv2.INTER_AREA) 84 | 85 | # Normalize the image in [0, 1] 86 | img0 = img0.astype(float) / 255. 87 | img0 = torch.tensor(img0[None], dtype=torch.float) 88 | outputs = {'image': img0, 'image_path': img0_path, 89 | 'name': self.data[idx]['ref_name']} 90 | 91 | if 'target_name' in self.data[idx]: 92 | img1_path = self.data[idx]['target_img_path'] 93 | img1 = cv2.imread(img1_path, 0) 94 | H = self.data[idx]['H'] 95 | 96 | if max(img_size) > self.conf.max_side: 97 | img1 = cv2.resize(img1, (w_s, h_s), 98 | interpolation=cv2.INTER_AREA) 99 | H = self.adapt_homography_to_preprocessing( 100 | H, img_size, img_size, (h_s, w_s)) 101 | 102 | # Normalize the image in [0, 1] 103 | img1 = img1.astype(float) / 255. 104 | img1 = torch.tensor(img1[None], dtype=torch.float) 105 | H = torch.tensor(H, dtype=torch.float) 106 | 107 | outputs['warped_image'] = img1 108 | outputs['warped_image_path'] = img1_path 109 | outputs['warped_name'] = self.data[idx]['target_name'] 110 | outputs['H'] = H 111 | 112 | return outputs 113 | 114 | def __len__(self): 115 | return len(self.data) 116 | 117 | def adapt_homography_to_preprocessing(self, H, img_shape1, img_shape2, 118 | target_size): 119 | source_size1 = np.array(img_shape1, dtype=float) 120 | source_size2 = np.array(img_shape2, dtype=float) 121 | target_size = np.array(target_size) 122 | 123 | # Get the scaling factor in resize 124 | scale1 = np.amax(target_size / source_size1) 125 | scaling1 = np.diag([1. / scale1, 1. / scale1, 1.]).astype(float) 126 | scale2 = np.amax(target_size / source_size2) 127 | scaling2 = np.diag([scale2, scale2, 1.]).astype(float) 128 | 129 | # Get the translation params in crop 130 | pad_y1 = (source_size1[0] * scale1 - target_size[0]) / 2. 131 | pad_x1 = (source_size1[1] * scale1 - target_size[1]) / 2. 132 | translation1 = np.array([[1., 0., pad_x1], 133 | [0., 1., pad_y1], 134 | [0., 0., 1.]], dtype=float) 135 | pad_y2 = (source_size2[0] * scale2 - target_size[0]) / 2. 136 | pad_x2 = (source_size2[1] * scale2 - target_size[1]) / 2. 137 | translation2 = np.array([[1., 0., -pad_x2], 138 | [0., 1., -pad_y2], 139 | [0., 0., 1.]], dtype=float) 140 | 141 | return translation2 @ scaling2 @ H @ scaling1 @ translation1 142 | -------------------------------------------------------------------------------- /deeplsd/datasets/merge_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Merge multiple datasets for line distance function prediction. 3 | """ 4 | 5 | import torch 6 | import numpy as np 7 | from omegaconf import OmegaConf 8 | from torch.utils.data import DataLoader 9 | 10 | from . import get_dataset 11 | from .base_dataset import BaseDataset, worker_init_fn 12 | 13 | 14 | class MergeDataset(BaseDataset, torch.utils.data.Dataset): 15 | default_conf = { 16 | 'names': ['minidepth', 'wireframe_ha'], 17 | 'dataset_dir': ['MiniDepth', 'Wireframe_raw'], 18 | 'gt_dir': ['export_datasets/minidepth_ha3', 19 | 'export_datasets/wireframe_ha5'], 20 | 'weights': [0.5, 0.5], 21 | 'resize': [512, 512], 22 | 'photometric_augmentation': { 23 | 'enable': True, 24 | 'primitives': [ 25 | 'random_brightness', 'random_contrast', 26 | 'additive_speckle_noise', 'additive_gaussian_noise', 27 | 'additive_shade', 'motion_blur'], 28 | 'params': { 29 | 'random_brightness': {'brightness': 0.5}, 30 | 'random_contrast': {'strength_range': [0.5, 1.5]}, 31 | 'additive_gaussian_noise': {'stddev_range': [5, 95]}, 32 | 'additive_speckle_noise': {'prob_range': [0, 0.01]}, 33 | 'additive_shade': { 34 | 'transparency_range': [-0.8, 0.8], 35 | 'kernel_size_range': [100, 150] 36 | }, 37 | 'motion_blur': {'max_kernel_size': 3} 38 | } 39 | }, 40 | 'warped_pair': False, 41 | 'homographic_augmentation': False, 42 | 'homography': { 43 | 'params': { 44 | 'translation': True, 45 | 'rotation': True, 46 | 'scaling': True, 47 | 'perspective': True, 48 | 'scaling_amplitude': 0.2, 49 | 'perspective_amplitude_x': 0.2, 50 | 'perspective_amplitude_y': 0.2, 51 | 'patch_ratio': 0.85, 52 | 'max_angle': 1.57, 53 | 'allow_artifacts': True 54 | } 55 | }, 56 | 'seed': 0 57 | } 58 | 59 | def _init(self, conf): 60 | pass 61 | 62 | def get_dataset(self, split): 63 | return _Dataset(self.conf, split) 64 | 65 | def collate_fn(self, batch): 66 | """ Customized collate_fn for non-batchable data. """ 67 | batch_keys = ['name', 'image', 'ref_valid_mask', 'df', 'line_level', 68 | 'offset', 'bg_mask', 'H_ref'] 69 | list_keys = [] 70 | 71 | outputs = {} 72 | for data_key in batch[0].keys(): 73 | batch_match = sum([_ in data_key for _ in batch_keys]) 74 | list_match = sum([_ in data_key for _ in list_keys]) 75 | if batch_match > 0 and list_match == 0: 76 | outputs[data_key] = torch.utils.data.dataloader.default_collate( 77 | [b[data_key] for b in batch]) 78 | elif batch_match == 0 and list_match > 0: 79 | outputs[data_key] = [b[data_key] for b in batch] 80 | elif batch_match == 0 and list_match == 0: 81 | continue 82 | else: 83 | raise ValueError( 84 | "A key matches batch keys and list keys simultaneously.") 85 | return outputs 86 | 87 | # Overwrite the parent data loader to handle custom collate_fn 88 | def get_data_loader(self, split, shuffle=False): 89 | """Return a data loader for a given split.""" 90 | assert split in ['train', 'val'], "Merge not available in test mode" 91 | batch_size = self.conf.get(split+'_batch_size') 92 | num_workers = self.conf.get('num_workers', batch_size) 93 | return DataLoader(self.get_dataset(split), batch_size=batch_size, 94 | shuffle=shuffle or split == 'train', 95 | pin_memory=True, num_workers=num_workers, 96 | worker_init_fn=worker_init_fn, 97 | collate_fn=self.collate_fn) 98 | 99 | 100 | class _Dataset(torch.utils.data.Dataset): 101 | def __init__(self, conf, split): 102 | assert split in ['train', 'val'], "Merge not available in test mode" 103 | self.datasets = [] 104 | self.weights = conf.weights 105 | for i, (name, data_dir, gt_dir) in enumerate(zip( 106 | conf.names, conf.dataset_dir, conf.gt_dir)): 107 | if split == 'val' and i > 0: 108 | # Use only the first dataset for val 109 | self.weights = [1] 110 | break 111 | curr_conf = OmegaConf.to_container(conf, resolve=True) 112 | curr_conf['dataset_dir'] = data_dir 113 | curr_conf['gt_dir'] = gt_dir 114 | del curr_conf['weights'] 115 | del curr_conf['names'] 116 | if name == 'hypersim': 117 | curr_conf['gt_lines'] = 'pytlsd_reflectance' 118 | curr_conf['min_perc'] = 0.2 119 | curr_conf['H_params'] = conf.homography.params 120 | del curr_conf['homography'] 121 | curr_conf['warped_pair'] = {'enable': conf.warped_pair} 122 | curr_conf = OmegaConf.create(curr_conf) 123 | self.datasets.append( 124 | get_dataset(name)(curr_conf).get_dataset(split)) 125 | 126 | def __getitem__(self, idx): 127 | dataset = self.datasets[np.random.choice( 128 | range(len(self.datasets)), p=self.weights)] 129 | return dataset[np.random.randint(len(dataset))] 130 | 131 | def __len__(self): 132 | return np.sum([len(d) for d in self.datasets]) 133 | -------------------------------------------------------------------------------- /deeplsd/datasets/nyu.py: -------------------------------------------------------------------------------- 1 | """ NYU dataset for VP estimation evaluation. """ 2 | 3 | import os 4 | import csv 5 | import numpy as np 6 | import torch 7 | import cv2 8 | import scipy.io 9 | from torch.utils.data import Dataset, DataLoader 10 | from pathlib import Path 11 | 12 | from .base_dataset import BaseDataset 13 | from ..evaluation.ls_evaluation import unproject_vp_to_world 14 | from ..settings import DATA_PATH 15 | 16 | 17 | class NYU(BaseDataset, torch.utils.data.Dataset): 18 | default_conf = { 19 | 'dataset_dir': 'NYU_depth_v2', 20 | 'val_size': 49, 21 | } 22 | 23 | def _init(self, conf): 24 | pass 25 | 26 | def get_dataset(self, split): 27 | assert split in ['val', 'test', 'export'] 28 | return _Dataset(self.conf, split) 29 | 30 | # Overwrite the parent data loader to handle custom split 31 | def get_data_loader(self, split, shuffle=False): 32 | """Return a data loader for a given split.""" 33 | assert split in ['val', 'test', 'export'] 34 | batch_size = self.conf.get(split+'_batch_size') 35 | num_workers = self.conf.get('num_workers', batch_size) 36 | return DataLoader(self.get_dataset(split), batch_size=batch_size, 37 | shuffle=False, pin_memory=True, 38 | num_workers=num_workers) 39 | 40 | 41 | class _Dataset(torch.utils.data.Dataset): 42 | def __init__(self, conf, split): 43 | # Extract the image names 44 | num_imgs = 1449 45 | root_dir = os.path.join(DATA_PATH, conf.dataset_dir) 46 | self.img_paths = [os.path.join(root_dir, 'images', str(i) + '.jpg') 47 | for i in range(num_imgs)] 48 | self.vps_paths = [ 49 | os.path.join(root_dir, 'vps', 'vps_' + str(i).zfill(4) + '.csv') 50 | for i in range(num_imgs)] 51 | self.img_names = [str(i).zfill(4) for i in range(num_imgs)] 52 | 53 | # Separate validation and test 54 | if split == 'val': 55 | self.img_paths = self.img_paths[-conf.val_size:] 56 | self.vps_paths = self.vps_paths[-conf.val_size:] 57 | self.img_names = self.img_names[-conf.val_size:] 58 | elif split == 'test': 59 | self.img_paths = self.img_paths[:-conf.val_size] 60 | self.vps_paths = self.vps_paths[:-conf.val_size] 61 | self.img_names = self.img_names[:-conf.val_size] 62 | 63 | # Load the intrinsics 64 | fx_rgb = 5.1885790117450188e+02 65 | fy_rgb = 5.1946961112127485e+02 66 | cx_rgb = 3.2558244941119034e+02 67 | cy_rgb = 2.5373616633400465e+02 68 | self.K = torch.tensor([[fx_rgb, 0, cx_rgb], 69 | [0, fy_rgb, cy_rgb], 70 | [0, 0, 1]]) 71 | 72 | def get_dataset(self, split): 73 | return self 74 | 75 | def __getitem__(self, idx): 76 | img = cv2.imread(self.img_paths[idx], 0) 77 | 78 | # Load the GT VPs 79 | vps = [] 80 | with open(self.vps_paths[idx]) as csv_file: 81 | reader = csv.reader(csv_file, delimiter=' ') 82 | for ri, row in enumerate(reader): 83 | if ri == 0: 84 | continue 85 | vps.append([float(row[1]), float(row[2]), 1.]) 86 | vps = unproject_vp_to_world(np.array(vps), self.K.numpy()) 87 | 88 | # Normalize the images in [0, 1] 89 | img = img.astype(float) / 255. 90 | 91 | # Convert to torch tensors 92 | img = torch.tensor(img[None], dtype=torch.float) 93 | vps = torch.tensor(vps, dtype=torch.float) 94 | 95 | return {'image': img, 'image_path': self.img_paths[idx], 96 | 'name': self.img_names[idx], 'vps': vps, 'K': self.K} 97 | 98 | def __len__(self): 99 | return len(self.img_paths) 100 | -------------------------------------------------------------------------------- /deeplsd/datasets/rdnim.py: -------------------------------------------------------------------------------- 1 | """ Rotated Day-Night Image Matching dataset. """ 2 | 3 | import os 4 | import numpy as np 5 | import torch 6 | import cv2 7 | from pathlib import Path 8 | from torch.utils.data import Dataset, DataLoader 9 | 10 | from .base_dataset import BaseDataset 11 | from .utils.preprocessing import read_timestamps 12 | from ..settings import DATA_PATH 13 | 14 | 15 | class RDNIM(BaseDataset, Dataset): 16 | default_conf = { 17 | 'dataset_dir': 'RDNIM', 18 | 'reference': 'day', 19 | } 20 | 21 | def _init(self, conf): 22 | self._root_dir = Path(DATA_PATH, conf.dataset_dir) 23 | ref = conf.reference 24 | 25 | # Extract the timestamps 26 | timestamp_files = [p for p 27 | in Path(self._root_dir, 'time_stamps').iterdir()] 28 | timestamps = {} 29 | for f in timestamp_files: 30 | id = f.stem 31 | timestamps[id] = read_timestamps(str(f)) 32 | 33 | # Extract the reference images paths 34 | references = {} 35 | seq_paths = [p for p in Path(self._root_dir, 'references').iterdir()] 36 | for seq in seq_paths: 37 | id = seq.stem 38 | references[id] = str(Path(seq, ref + '.jpg')) 39 | 40 | # Extract the images paths and the homographies 41 | seq_path = [p for p in Path(self._root_dir, 'images').iterdir()] 42 | self._files = [] 43 | for seq in seq_path: 44 | id = seq.stem 45 | images_path = [x for x in seq.iterdir() if x.suffix == '.jpg'] 46 | for img in images_path: 47 | timestamp = timestamps[id]['time'][ 48 | timestamps[id]['name'].index(img.name)] 49 | H = np.loadtxt(str(img)[:-4] + '.txt').astype(float) 50 | self._files.append({ 51 | 'img': str(img), 52 | 'ref': str(references[id]), 53 | 'H': H, 54 | 'timestamp': timestamp}) 55 | 56 | def __getitem__(self, item): 57 | img0_path = self._files[item]['ref'] 58 | img0 = cv2.imread(img0_path, 0) 59 | img1_path = self._files[item]['img'] 60 | img1 = cv2.imread(img1_path, 0) 61 | img_size = img0.shape[:2] 62 | H = self._files[item]['H'] 63 | 64 | # Normalize the images in [0, 1] 65 | img0 = img0.astype(float) / 255. 66 | img1 = img1.astype(float) / 255. 67 | 68 | img0 = torch.tensor(img0[None], dtype=torch.float) 69 | img1 = torch.tensor(img1[None], dtype=torch.float) 70 | H = torch.tensor(H, dtype=torch.float) 71 | 72 | return {'image': img0, 'warped_image': img1, 'H': H, 73 | 'timestamp': self._files[item]['timestamp'], 74 | 'image_path': img0_path, 'warped_image_path': img1_path} 75 | 76 | def __len__(self): 77 | return len(self._files) 78 | 79 | def get_dataset(self, split): 80 | assert split in ['test'] 81 | return self 82 | 83 | # Overwrite the parent data loader to handle custom collate_fn 84 | def get_data_loader(self, split, shuffle=False): 85 | """Return a data loader for a given split.""" 86 | assert split in ['test'] 87 | batch_size = self.conf.get(split+'_batch_size') 88 | num_workers = self.conf.get('num_workers', batch_size) 89 | return DataLoader(self, batch_size=batch_size, 90 | shuffle=shuffle or split == 'train', 91 | pin_memory=True, num_workers=num_workers) 92 | -------------------------------------------------------------------------------- /deeplsd/datasets/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/deeplsd/datasets/utils/__init__.py -------------------------------------------------------------------------------- /deeplsd/datasets/utils/data_augmentation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common photometric transforms for data augmentation. 3 | """ 4 | import numpy as np 5 | import cv2 6 | from PIL import Image 7 | from torchvision import transforms as transforms 8 | 9 | 10 | available_augmentations = [ 11 | 'additive_gaussian_noise', 12 | 'additive_speckle_noise', 13 | 'random_brightness', 14 | 'random_contrast', 15 | 'additive_shade', 16 | 'motion_blur' 17 | ] 18 | 19 | 20 | class additive_gaussian_noise(object): 21 | def __init__(self, stddev_range=[5, 95]): 22 | self.stddev_range = stddev_range 23 | 24 | def __call__(self, input_image): 25 | # Get the noise stddev 26 | stddev = np.random.uniform(self.stddev_range[0], self.stddev_range[1]) 27 | noise = np.random.normal(0., stddev, size=input_image.shape) 28 | noisy_image = (input_image + noise).clip(0., 255.) 29 | 30 | return noisy_image 31 | 32 | 33 | class additive_speckle_noise(object): 34 | def __init__(self, prob_range=[0.0, 0.01]): 35 | self.prob_range = prob_range 36 | 37 | def __call__(self, input_image): 38 | # Sample 39 | prob = np.random.uniform(self.prob_range[0], self.prob_range[1]) 40 | sample = np.random.uniform(0., 1., size=input_image.shape) 41 | 42 | # Get the mask 43 | mask0 = sample <= prob 44 | mask1 = sample >= (1 - prob) 45 | 46 | # Mask the image (here we assume the image ranges from 0~255 47 | noisy = input_image.copy() 48 | noisy[mask0] = 0. 49 | noisy[mask1] = 255. 50 | 51 | return noisy 52 | 53 | 54 | class random_brightness(object): 55 | def __init__(self, brightness=0.5): 56 | self.brightness = brightness 57 | 58 | # Initialize the transformer 59 | self.transform = transforms.ColorJitter(brightness=self.brightness) 60 | 61 | def __call__(self, input_image): 62 | # Convert to PIL image 63 | if isinstance(input_image, np.ndarray): 64 | input_image = Image.fromarray(input_image.astype(np.uint8)) 65 | 66 | return np.array(self.transform(input_image)) 67 | 68 | 69 | class random_contrast(object): 70 | def __init__(self, strength_range=[0.5, 1.5]): 71 | self.strength_range = strength_range 72 | 73 | def __call__(self, input_image): 74 | strength = np.random.uniform(self.strength_range[0], 75 | self.strength_range[1]) 76 | contrasted_img = input_image.copy() 77 | mean = np.mean(contrasted_img) 78 | contrasted_img = (contrasted_img - mean) * strength + mean 79 | 80 | return contrasted_img.clip(0, 255) 81 | 82 | 83 | class additive_shade(object): 84 | def __init__(self, nb_ellipses=20, transparency_range=[-0.8, 0.8], 85 | kernel_size_range=[100, 150]): 86 | self.nb_ellipses = nb_ellipses 87 | self.transparency_range = transparency_range 88 | self.kernel_size_range = kernel_size_range 89 | 90 | def __call__(self, input_image): 91 | min_dim = min(input_image.shape[:2]) / 4 92 | mask = np.zeros(input_image.shape[:2], np.uint8) 93 | for _ in range(self.nb_ellipses): 94 | ax = int(max(np.random.rand() * min_dim, min_dim / 5)) 95 | ay = int(max(np.random.rand() * min_dim, min_dim / 5)) 96 | max_rad = max(ax, ay) 97 | x = np.random.randint(max_rad, input_image.shape[1] - max_rad) 98 | y = np.random.randint(max_rad, input_image.shape[0] - max_rad) 99 | angle = np.random.rand() * 90 100 | cv2.ellipse(mask, (x, y), (ax, ay), angle, 0, 360, 255, -1) 101 | 102 | transparency = np.random.uniform(*self.transparency_range) 103 | kernel_size = np.random.randint(*self.kernel_size_range) 104 | 105 | # kernel_size has to be odd 106 | if (kernel_size % 2) == 0: 107 | kernel_size += 1 108 | mask = cv2.GaussianBlur(mask.astype(np.float32), 109 | (kernel_size, kernel_size), 0) 110 | if len(input_image.shape) == 2: 111 | shaded = input_image[:, :, None] * (1 - transparency 112 | * mask[..., np.newaxis] / 255.) 113 | else: 114 | shaded = input_image * (1 - transparency 115 | * mask[..., np.newaxis] / 255.) 116 | shaded = np.clip(shaded, 0, 255) 117 | 118 | return np.reshape(shaded, input_image.shape) 119 | 120 | 121 | class motion_blur(object): 122 | def __init__(self, max_kernel_size=10): 123 | self.max_kernel_size = max_kernel_size 124 | 125 | def __call__(self, input_image): 126 | # Either vertical, horizontal or diagonal blur 127 | mode = np.random.choice(['h', 'v', 'diag_down', 'diag_up']) 128 | ksize = np.random.randint( 129 | 0, int(round((self.max_kernel_size + 1) / 2))) * 2 + 1 130 | center = int((ksize - 1) / 2) 131 | kernel = np.zeros((ksize, ksize)) 132 | if mode == 'h': 133 | kernel[center, :] = 1. 134 | elif mode == 'v': 135 | kernel[:, center] = 1. 136 | elif mode == 'diag_down': 137 | kernel = np.eye(ksize) 138 | elif mode == 'diag_up': 139 | kernel = np.flip(np.eye(ksize), 0) 140 | var = ksize * ksize / 16. 141 | grid = np.repeat(np.arange(ksize)[:, np.newaxis], ksize, axis=-1) 142 | gaussian = np.exp(-(np.square(grid - center) + np.square(grid.T - center)) / (2. * var)) 143 | kernel *= gaussian 144 | kernel /= np.sum(kernel) 145 | blurred = cv2.filter2D(input_image, -1, kernel) 146 | 147 | return np.reshape(blurred, input_image.shape) 148 | 149 | 150 | def photometric_augmentation(input_img, config): 151 | """ Process the input image through multiple transforms. """ 152 | if 'primitives' in config: 153 | transforms = config['primitives'] 154 | else: 155 | transforms = available_augmentations 156 | 157 | # Take a random subset of transforms 158 | n_transforms = len(transforms) 159 | n_used = np.random.randint(n_transforms + 1) 160 | transforms = np.random.choice(transforms, n_used, replace=False) 161 | 162 | # Apply the transforms 163 | transformed_img = input_img.copy() 164 | for primitive in transforms: 165 | transform = globals()[primitive](**config['params'][primitive]) 166 | transformed_img = transform(transformed_img) 167 | 168 | return transformed_img -------------------------------------------------------------------------------- /deeplsd/datasets/utils/homographies.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sample_homography( 5 | shape, perspective=True, scaling=True, rotation=True, 6 | translation=True, n_scales=5, n_angles=25, scaling_amplitude=0.1, 7 | perspective_amplitude_x=0.1, perspective_amplitude_y=0.1, 8 | patch_ratio=0.5, max_angle=1.57, allow_artifacts=False, 9 | translation_overflow=0.): 10 | """ Computes the homography transformation from a random patch in the 11 | original image to a warped projection with the same image size. The 12 | original patch, which is initialized with a simple half-size centered 13 | crop, is iteratively projected, scaled, rotated and translated. 14 | 15 | Args: 16 | shape: A tuple specifying the height and width of the original image. 17 | perspective: A boolean that enables the perspective and affine transformations. 18 | scaling: A boolean that enables the random scaling of the patch. 19 | rotation: A boolean that enables the random rotation of the patch. 20 | translation: A boolean that enables the random translation of the patch. 21 | n_scales: The number of tentative scales that are sampled when scaling. 22 | n_angles: The number of tentatives angles that are sampled when rotating. 23 | scaling_amplitude: Controls the amount of scale. 24 | perspective_amplitude_x: Controls the perspective effect in x direction. 25 | perspective_amplitude_y: Controls the perspective effect in y direction. 26 | patch_ratio: Controls the size of the patches used to create the homography. 27 | max_angle: Maximum angle used in rotations. 28 | allow_artifacts: A boolean that enables artifacts when applying the homography. 29 | translation_overflow: Amount of border artifacts caused by translation. 30 | 31 | Returns: 32 | An np.array of shape `[3, 3]` corresponding to the homography. 33 | """ 34 | # Convert shape to ndarry 35 | if not isinstance(shape, np.ndarray): 36 | shape = np.array(shape) 37 | 38 | # Corners of the output patch 39 | margin = (1 - patch_ratio) / 2 40 | pts1 = margin + np.array([[0, 0], [0, patch_ratio], 41 | [patch_ratio, patch_ratio], [patch_ratio, 0]]) 42 | # Corners of the intput image 43 | pts2 = pts1.copy() 44 | 45 | # Random perspective and affine perturbations 46 | if perspective: 47 | if not allow_artifacts: 48 | perspective_amplitude_x = min(perspective_amplitude_x, margin) 49 | perspective_amplitude_y = min(perspective_amplitude_y, margin) 50 | 51 | # normal distribution with mean=0, std=perspective_amplitude_y/2 52 | perspective_displacement = np.random.normal( 53 | 0., perspective_amplitude_y/2, [1]) 54 | h_displacement_left = np.random.normal(0., perspective_amplitude_x/2, 55 | [1]) 56 | h_displacement_right = np.random.normal(0., perspective_amplitude_x/2, 57 | [1]) 58 | pts2 += np.stack([np.concatenate([h_displacement_left, 59 | perspective_displacement], 0), 60 | np.concatenate([h_displacement_left, 61 | -perspective_displacement], 0), 62 | np.concatenate([h_displacement_right, 63 | perspective_displacement], 0), 64 | np.concatenate([h_displacement_right, 65 | -perspective_displacement], 0)]) 66 | 67 | # Random scaling: sample several scales, check collision with borders, 68 | # randomly pick a valid one 69 | if scaling: 70 | scales = np.concatenate( 71 | [[1.], np.random.normal(1, scaling_amplitude/2, [n_scales])], 0) 72 | center = np.mean(pts2, axis=0, keepdims=True) 73 | scaled = (pts2 - center)[None, ...] * scales[..., None, None] + center 74 | # all scales are valid except scale=1 75 | if allow_artifacts: 76 | valid = np.arange(n_scales) 77 | else: 78 | valid = np.where(np.all((scaled >= 0.) & (scaled < 1.), (1, 2)))[0] 79 | idx = valid[np.random.uniform(0., valid.shape[0], ()).astype(np.int32)] 80 | pts2 = scaled[idx] 81 | 82 | # Random translation 83 | if translation: 84 | t_min, t_max = np.min(pts2, axis=0), np.min(1 - pts2, axis=0) 85 | if allow_artifacts: 86 | t_min += translation_overflow 87 | t_max += translation_overflow 88 | pts2 += (np.stack([np.random.uniform(-t_min[0], t_max[0], ()), 89 | np.random.uniform(-t_min[1], t_max[1], ())]))[None, ...] 90 | 91 | # Random rotation: sample several rotations, check collision with borders, 92 | # randomly pick a valid one 93 | if rotation: 94 | angles = np.linspace(-max_angle, max_angle, n_angles) 95 | # in case no rotation is valid 96 | angles = np.concatenate([[0.], angles], axis=0) 97 | center = np.mean(pts2, axis=0, keepdims=True) 98 | rot_mat = np.reshape(np.stack( 99 | [np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)], 100 | axis=1), [-1, 2, 2]) 101 | rotated = np.matmul( 102 | np.tile((pts2 - center)[None, ...], [n_angles+1, 1, 1]), 103 | rot_mat) + center 104 | if allow_artifacts: 105 | valid = np.array(range(n_angles)) # all angles are valid, except angle=0 106 | else: 107 | valid = np.where(np.all((rotated >= 0.) & (rotated < 1.), axis=(1, 2)))[0] 108 | idx = valid[np.random.uniform(0., valid.shape[0], ()).astype(np.int32)] 109 | pts2 = rotated[idx] 110 | 111 | # Rescale to actual size 112 | shape = shape[::-1].astype(np.float32) # different convention [y, x] 113 | pts1 *= shape[None, ...] 114 | pts2 *= shape[None, ...] 115 | 116 | def ax(p, q): return [p[0], p[1], 1, 0, 0, 0, -p[0] * q[0], -p[1] * q[0]] 117 | 118 | def ay(p, q): return [0, 0, 0, p[0], p[1], 1, -p[0] * q[1], -p[1] * q[1]] 119 | 120 | a_mat = np.stack([f(pts1[i], pts2[i]) for i in range(4) for f in (ax, ay)], axis=0) 121 | p_mat = np.transpose(np.stack([[pts2[i][j] for i in range(4) for j in range(2)]], axis=0)) 122 | homo_vec, _, _, _ = np.linalg.lstsq(a_mat, p_mat, rcond=None) 123 | 124 | # Compose the homography vector back to matrix 125 | homo_mat = np.concatenate([homo_vec[0:3, 0][None, ...], 126 | homo_vec[3:6, 0][None, ...], 127 | np.concatenate((homo_vec[6], homo_vec[7], [1]), 128 | axis=0)[None, ...]], axis=0) 129 | 130 | return homo_mat 131 | 132 | 133 | def warp_points(points, H): 134 | """ Warp 2D points by an homography H. """ 135 | n_points = points.shape[0] 136 | reproj_points = points.copy()[:, [1, 0]] 137 | reproj_points = np.concatenate([reproj_points, np.ones((n_points, 1))], 138 | axis=1) 139 | reproj_points = H.dot(reproj_points.transpose()).transpose() 140 | reproj_points = reproj_points[:, :2] / reproj_points[:, 2:] 141 | reproj_points = reproj_points[:, [1, 0]] 142 | return reproj_points 143 | 144 | 145 | def warp_lines(lines, H): 146 | """ Warp lines of the shape [N, 2, 2] by an homography H. """ 147 | return warp_points(lines.reshape(-1, 2), H).reshape(-1, 2, 2) 148 | -------------------------------------------------------------------------------- /deeplsd/datasets/utils/megadepth_train_scenes.txt: -------------------------------------------------------------------------------- 1 | 0001 2 | 0003 3 | 0004 4 | 0005 5 | 0007 6 | 0012 7 | 0013 8 | 0017 9 | 0023 10 | 0026 11 | 0027 12 | 0035 13 | 0036 14 | 0037 15 | 0039 16 | 0042 17 | 0043 18 | 0046 19 | 0048 20 | 0056 21 | 0057 22 | 0060 23 | 0061 24 | 0065 25 | 0070 26 | 0080 27 | 0083 28 | 0086 29 | 0087 30 | 0095 31 | 0098 32 | 0100 33 | 0101 34 | 0104 35 | 0107 36 | 0115 37 | 0117 38 | 0122 39 | 0130 40 | 0137 41 | 0147 42 | 0148 43 | 0149 44 | 0150 45 | 0156 46 | 0160 47 | 0183 48 | 0189 49 | 0190 50 | 0200 51 | 0214 52 | 0224 53 | 0235 54 | 0237 55 | 0240 56 | 0243 57 | 0258 58 | 0269 59 | 0299 60 | 0312 61 | 0326 62 | 0327 63 | 0331 64 | 0335 65 | 0341 66 | 0348 67 | 0377 68 | 0380 69 | 0394 70 | 0407 71 | 0411 72 | 0430 73 | 0446 74 | 0455 75 | 0472 76 | 0476 77 | 0478 78 | 0493 79 | 0496 80 | 0505 81 | 0559 82 | 0733 83 | 1017 84 | 5004 85 | 5005 86 | 5006 87 | 5007 88 | 5009 89 | 5010 90 | 5012 91 | 5013 92 | 5017 93 | 0290 94 | 0238 95 | 0049 96 | 0062 97 | 0162 98 | 0323 99 | 5003 100 | 0034 101 | 5000 102 | 0257 103 | 0197 104 | 0406 105 | 0099 106 | 0277 107 | 0102 108 | 0041 109 | 0044 110 | 5001 111 | 0151 112 | 0076 113 | 0071 114 | 0402 115 | 0281 116 | 0307 117 | 0094 118 | 0389 119 | 0067 120 | 5002 121 | 0360 122 | 0090 123 | 5011 124 | 5008 125 | 0275 126 | 0306 127 | 0252 128 | 0231 129 | 0141 130 | 0133 131 | 0121 132 | 0349 133 | 0229 134 | 0168 135 | 0412 136 | 0443 137 | 5014 138 | 5016 139 | 5015 140 | 0294 141 | 0204 142 | 0217 143 | 1001 144 | 0178 145 | 0186 146 | 0047 147 | 5018 148 | 0129 149 | 0285 150 | 0271 151 | -------------------------------------------------------------------------------- /deeplsd/datasets/utils/megadepth_val_scenes.txt: -------------------------------------------------------------------------------- 1 | 0015 2 | 0022 3 | 0223 4 | 0768 5 | 0185 6 | 0058 7 | 0303 8 | 0016 9 | 0387 10 | 0181 11 | 0212 12 | 3346 13 | 0482 14 | 0064 15 | 0205 16 | 0286 17 | 0175 18 | -------------------------------------------------------------------------------- /deeplsd/datasets/utils/preprocessing.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import csv 4 | 5 | 6 | def numpy_image_to_torch(image): 7 | """Normalize the image tensor and reorder the dimensions.""" 8 | if image.ndim == 3: 9 | image = image.transpose((2, 0, 1)) # HxWxC to CxHxW 10 | elif image.ndim == 2: 11 | image = image[None] # add channel axis 12 | else: 13 | raise ValueError(f'Not an image: {image.shape}') 14 | return (image / 255.).astype(np.float32, copy=False) 15 | 16 | 17 | def resize(image, size, fn=None, nearest=False): 18 | """Resize an image to a fixed size, or according to max or min edge.""" 19 | h, w = image.shape[:2] 20 | if isinstance(size, int): 21 | scale = size / fn(h, w) 22 | h_new, w_new = int(round(h*scale)), int(round(w*scale)) 23 | scale = (scale, scale) 24 | elif isinstance(size, (tuple, list)): 25 | h_new, w_new = size 26 | scale = (w_new / w, h_new / h) 27 | else: 28 | raise ValueError(f'Incorrect new size: {size}') 29 | mode = cv2.INTER_NEAREST if nearest else cv2.INTER_LINEAR 30 | return cv2.resize(image, (w_new, h_new), interpolation=mode), scale 31 | 32 | 33 | def resize_and_crop(image, size, interp_mode=None): 34 | """ Apply a central crop to an image to resize it to a fixed size. """ 35 | source_size = np.array(image.shape[:2], dtype=float) 36 | target_size = np.array(size, dtype=float) 37 | 38 | # Scale 39 | scale = np.amax(target_size / source_size) 40 | inter_size = np.round(source_size * scale).astype(int) 41 | if interp_mode is None: 42 | interp_mode = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR 43 | image = cv2.resize(image, (inter_size[1], inter_size[0]), 44 | interpolation=interp_mode) 45 | 46 | # Central crop 47 | pad = np.round((source_size * scale - target_size) / 2.).astype(int) 48 | image = image[pad[0]:(pad[0] + int(target_size[0])), 49 | pad[1]:(pad[1] + int(target_size[1]))] 50 | 51 | return image 52 | 53 | 54 | def crop(image, size, random=True, other=None, K=None): 55 | """Random or deterministic crop of an image, adjust depth and intrinsics. 56 | """ 57 | h, w = image.shape[:2] 58 | h_new, w_new = (size, size) if isinstance(size, int) else size 59 | top = np.random.randint(0, h - h_new + 1) if random else 0 60 | left = np.random.randint(0, w - w_new + 1) if random else 0 61 | image = image[top:top+h_new, left:left+w_new] 62 | ret = [image] 63 | if other is not None: 64 | ret += [other[top:top+h_new, left:left+w_new]] 65 | if K is not None: 66 | K[0, 2] -= left 67 | K[1, 2] -= top 68 | ret += [K] 69 | return ret 70 | 71 | 72 | def read_timestamps(text_file): 73 | """ 74 | Read a text file containing the timestamps of images 75 | and return a dictionary matching the name of the image 76 | to its timestamp. 77 | """ 78 | timestamps = {'name': [], 'date': [], 'hour': [], 79 | 'minute': [], 'time': []} 80 | with open(text_file, 'r') as csvfile: 81 | reader = csv.reader(csvfile, delimiter=' ') 82 | for row in reader: 83 | timestamps['name'].append(row[0]) 84 | timestamps['date'].append(row[1]) 85 | hour = int(row[2]) 86 | timestamps['hour'].append(hour) 87 | minute = int(row[3]) 88 | timestamps['minute'].append(minute) 89 | timestamps['time'].append(hour + minute / 60.) 90 | return timestamps 91 | -------------------------------------------------------------------------------- /deeplsd/datasets/wireframe_eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wireframe dataset to evaluate basic line detection metrics. 3 | """ 4 | 5 | from pathlib import Path 6 | import logging 7 | import cv2 8 | import numpy as np 9 | import h5py 10 | import torch 11 | from torch.utils.data import DataLoader 12 | 13 | from .base_dataset import BaseDataset, worker_init_fn 14 | from .utils.preprocessing import resize_and_crop 15 | from .utils.homographies import sample_homography 16 | from ..settings import DATA_PATH 17 | 18 | 19 | class WireframeEval(BaseDataset, torch.utils.data.Dataset): 20 | default_conf = { 21 | 'dataset_dir': 'Wireframe_raw', 22 | 'resize': None, 23 | 'homography': { 24 | 'params': { 25 | 'translation': True, 26 | 'rotation': True, 27 | 'scaling': True, 28 | 'perspective': True, 29 | 'scaling_amplitude': 0.2, 30 | 'perspective_amplitude_x': 0.2, 31 | 'perspective_amplitude_y': 0.2, 32 | 'patch_ratio': 0.85, 33 | 'max_angle': 1.57, 34 | 'allow_artifacts': True 35 | } 36 | }, 37 | 'seed': 0 38 | } 39 | 40 | def _init(self, conf): 41 | pass 42 | 43 | def get_dataset(self, split): 44 | assert split == 'test', "WireframeEval only available in test mode." 45 | return _Dataset(self.conf, split) 46 | 47 | # Overwrite the parent data loader to handle custom collate_fn 48 | def get_data_loader(self, split, shuffle=False): 49 | """Return a data loader for a given split.""" 50 | assert split == 'test', "WireframeEval only available in test mode." 51 | batch_size = self.conf.get(split+'_batch_size') 52 | num_workers = self.conf.get('num_workers', batch_size) 53 | return DataLoader(self.get_dataset(split), batch_size=batch_size, 54 | shuffle=shuffle or split == 'train', 55 | pin_memory=True, num_workers=num_workers, 56 | worker_init_fn=worker_init_fn) 57 | 58 | 59 | class _Dataset(torch.utils.data.Dataset): 60 | def __init__(self, conf, split): 61 | self.conf, self.split = conf, split 62 | torch.manual_seed(conf.seed) 63 | np.random.seed(conf.seed) 64 | folder = 'test' 65 | 66 | # Extract the images paths 67 | self.images = Path(DATA_PATH, conf.dataset_dir, folder) 68 | self.images = [img for img in self.images.iterdir() 69 | if str(img)[-3:] == 'png' or str(img)[-3:] == 'jpg'] 70 | if len(self.images) == 0: 71 | raise ValueError( 72 | f'Could not find any image in folder: {conf.dataset_dir}.') 73 | logging.info(f'Found {len(self.images)} in image folder.') 74 | self.images.sort() 75 | 76 | # Pre-generate all the homographies to ensure reproducibility 77 | self.H = [] 78 | self.w, self.h = 640, 480 79 | img_size = (self.h, self.w) 80 | for _ in range(len(self.images)): 81 | self.H.append(sample_homography(img_size, **conf.homography.params)) 82 | 83 | def get_dataset(self, split): 84 | return self 85 | 86 | def __getitem__(self, idx): 87 | # Read the image 88 | path = self.images[idx] 89 | img = cv2.imread(str(path), 0) 90 | img_size = np.array(img.shape) 91 | h, w = img_size 92 | 93 | # Warp the image 94 | H = self.H[idx] 95 | warped_img = cv2.warpPerspective(img, H, (w, h)) 96 | 97 | # Resize the image and GT if necessary 98 | if self.conf.resize is not None: 99 | H = self.adapt_homography_to_preprocessing(H, (h, w), (h, w)) 100 | img_size = self.conf.resize 101 | h, w = img_size 102 | img = resize_and_crop(img, img_size) 103 | warped_img = resize_and_crop(warped_img, img_size) 104 | 105 | # Normalize the images in [0, 1] 106 | img = img.astype(np.float32) / 255. 107 | warped_img = warped_img.astype(np.float32) / 255. 108 | 109 | # Convert all data to torch tensors 110 | img = torch.tensor(img[None], dtype=torch.float) 111 | H = torch.tensor(H, dtype=torch.float) 112 | warped_img = torch.tensor(warped_img[None], dtype=torch.float) 113 | 114 | data = { 115 | 'name': path.stem, 116 | 'image': img, 117 | 'warped_image': warped_img, 118 | 'H': H, 119 | } 120 | return data 121 | 122 | def __len__(self): 123 | return len(self.images) 124 | 125 | def adapt_homography_to_preprocessing(self, H, img_shape1, img_shape2): 126 | source_size1 = np.array(img_shape1, dtype=float) 127 | source_size2 = np.array(img_shape2, dtype=float) 128 | target_size = np.array(self.conf.resize, dtype=float) 129 | 130 | # Get the scaling factor in resize 131 | scale1 = np.amax(target_size / source_size1) 132 | scaling1 = np.diag([1. / scale1, 1. / scale1, 1.]).astype(float) 133 | scale2 = np.amax(target_size / source_size2) 134 | scaling2 = np.diag([scale2, scale2, 1.]).astype(float) 135 | 136 | # Get the translation params in crop 137 | pad_y1 = (source_size1[0] * scale1 - target_size[0]) / 2. 138 | pad_x1 = (source_size1[1] * scale1 - target_size[1]) / 2. 139 | translation1 = np.array([[1., 0., pad_x1], 140 | [0., 1., pad_y1], 141 | [0., 0., 1.]], dtype=float) 142 | pad_y2 = (source_size2[0] * scale2 - target_size[0]) / 2. 143 | pad_x2 = (source_size2[1] * scale2 - target_size[1]) / 2. 144 | translation2 = np.array([[1., 0., -pad_x2], 145 | [0., 1., -pad_y2], 146 | [0., 0., 1.]], dtype=float) 147 | 148 | return translation2 @ scaling2 @ H @ scaling1 @ translation1 149 | -------------------------------------------------------------------------------- /deeplsd/datasets/york_urban.py: -------------------------------------------------------------------------------- 1 | """ YorkUrban dataset for VP estimation evaluation. """ 2 | 3 | import os 4 | import numpy as np 5 | import torch 6 | import cv2 7 | import scipy.io 8 | from torch.utils.data import Dataset, DataLoader 9 | from pathlib import Path 10 | 11 | from .base_dataset import BaseDataset 12 | from ..settings import DATA_PATH 13 | 14 | 15 | class YorkUrban(BaseDataset, torch.utils.data.Dataset): 16 | default_conf = { 17 | 'dataset_dir': 'YorkUrbanDB', 18 | } 19 | 20 | def _init(self, conf): 21 | pass 22 | 23 | def get_dataset(self, split): 24 | assert split in ['val', 'test'] 25 | return _Dataset(self.conf, split) 26 | 27 | # Overwrite the parent data loader to handle custom collate_fn 28 | def get_data_loader(self, split, shuffle=False): 29 | """Return a data loader for a given split.""" 30 | assert split in ['val', 'test'] 31 | batch_size = self.conf.get(split+'_batch_size') 32 | num_workers = self.conf.get('num_workers', batch_size) 33 | return DataLoader(self.get_dataset(split), batch_size=batch_size, 34 | shuffle=False, pin_memory=True, 35 | num_workers=num_workers) 36 | 37 | 38 | class _Dataset(torch.utils.data.Dataset): 39 | def __init__(self, conf, split): 40 | # Extract the image names 41 | self.root_dir = os.path.join(DATA_PATH, conf.dataset_dir) 42 | self.img_names = [name for name in os.listdir(self.root_dir) 43 | if os.path.isdir(os.path.join(self.root_dir, name))] 44 | assert len(self.img_names) == 102 45 | 46 | # Separate validation and test 47 | split_file = os.path.join(self.root_dir, 48 | 'ECCV_TrainingAndTestImageNumbers.mat') 49 | split_mat = scipy.io.loadmat(split_file) 50 | if split == 'val': 51 | valid_set = split_mat['trainingSetIndex'][:, 0] - 1 52 | else: 53 | valid_set = split_mat['testSetIndex'][:, 0] - 1 54 | self.img_names = np.array(self.img_names)[valid_set] 55 | assert len(self.img_names) == 51 56 | 57 | # Load the intrinsics 58 | K_file = os.path.join(self.root_dir, 'cameraParameters.mat') 59 | K_mat = scipy.io.loadmat(K_file) 60 | f = K_mat['focal'][0, 0] / K_mat['pixelSize'][0, 0] 61 | p_point = K_mat['pp'][0] - 1 # -1 to convert to 0-based conv 62 | self.K = torch.tensor([[f, 0, p_point[0]], 63 | [0, f, p_point[1]], 64 | [0, 0, 1]]) 65 | 66 | def get_dataset(self, split): 67 | return self 68 | 69 | def __getitem__(self, idx): 70 | img_path = os.path.join(self.root_dir, self.img_names[idx], 71 | f'{self.img_names[idx]}.jpg') 72 | name = str(Path(img_path).stem) 73 | img = cv2.imread(img_path, 0) 74 | 75 | # Load the GT lines and VP association 76 | lines_file = os.path.join(self.root_dir, self.img_names[idx], 77 | f'{self.img_names[idx]}LinesAndVP.mat') 78 | lines_mat = scipy.io.loadmat(lines_file) 79 | lines = lines_mat['lines'].reshape(-1, 2, 2)[:, :, [1, 0]] - 1 80 | vp_association = lines_mat['vp_association'][:, 0] - 1 81 | 82 | # Load the VPs (non orthogonal ones) 83 | vp_file = os.path.join( 84 | self.root_dir, self.img_names[idx], 85 | f'{self.img_names[idx]}GroundTruthVP_CamParams.mat') 86 | vps = scipy.io.loadmat(vp_file)['vp'].T 87 | 88 | # Keep only the relevant VPs 89 | unique_vps = np.unique(vp_association) 90 | vps = vps[unique_vps] 91 | for i, index in enumerate(unique_vps): 92 | vp_association[vp_association == index] = i 93 | 94 | # Load the extended VPs of YUD+ 95 | vp_file = os.path.join( 96 | self.root_dir, self.img_names[idx], 97 | f'{self.img_names[idx]}UpdatedGroundTruthVP_CamParams.mat') 98 | updated_vps = scipy.io.loadmat(vp_file)['vp'].T 99 | 100 | # Normalize the images in [0, 1] 101 | img = img.astype(float) / 255. 102 | 103 | # Convert to torch tensors 104 | img = torch.tensor(img[None], dtype=torch.float) 105 | lines = torch.tensor(lines.astype(float), dtype=torch.float) 106 | vps = torch.tensor(vps, dtype=torch.float) 107 | updated_vps = torch.tensor(updated_vps, dtype=torch.float) 108 | vp_association = torch.tensor(vp_association, dtype=torch.int) 109 | 110 | return {'image': img, 'image_path': img_path, 'name': name, 111 | 'gt_lines': lines, 'vps': vps, 'updated_vps': updated_vps, 112 | 'vp_association': vp_association, 'K': self.K} 113 | 114 | def __len__(self): 115 | return len(self.img_names) 116 | -------------------------------------------------------------------------------- /deeplsd/datasets/york_urban_lines.py: -------------------------------------------------------------------------------- 1 | """ 2 | York Urban DB dataset to evaluate basic line detection metrics. 3 | """ 4 | 5 | import os 6 | from pathlib import Path 7 | import cv2 8 | import numpy as np 9 | import h5py 10 | import torch 11 | from torch.utils.data import DataLoader 12 | 13 | from .base_dataset import BaseDataset, worker_init_fn 14 | from .utils.homographies import sample_homography 15 | from ..settings import DATA_PATH 16 | 17 | 18 | class YorkUrbanLines(BaseDataset, torch.utils.data.Dataset): 19 | default_conf = { 20 | 'dataset_dir': 'YorkUrbanDB', 21 | 'grayscale': True, 22 | 'homography': { 23 | 'params': { 24 | 'translation': True, 25 | 'rotation': True, 26 | 'scaling': True, 27 | 'perspective': True, 28 | 'scaling_amplitude': 0.2, 29 | 'perspective_amplitude_x': 0.2, 30 | 'perspective_amplitude_y': 0.2, 31 | 'patch_ratio': 0.85, 32 | 'max_angle': 1.57, 33 | 'allow_artifacts': True 34 | } 35 | }, 36 | 'seed': 0 37 | } 38 | 39 | def _init(self, conf): 40 | pass 41 | 42 | def get_dataset(self, split): 43 | assert split == 'test', "YorkUrbanLines only available in test mode." 44 | return _Dataset(self.conf, split) 45 | 46 | # Overwrite the parent data loader to handle custom collate_fn 47 | def get_data_loader(self, split, shuffle=False): 48 | """Return a data loader for a given split.""" 49 | assert split == 'test', "YorkUrbanLines only available in test mode." 50 | batch_size = self.conf.get(split+'_batch_size') 51 | num_workers = self.conf.get('num_workers', batch_size) 52 | return DataLoader(self.get_dataset(split), batch_size=batch_size, 53 | shuffle=shuffle or split == 'train', 54 | pin_memory=True, num_workers=num_workers, 55 | worker_init_fn=worker_init_fn) 56 | 57 | 58 | class _Dataset(torch.utils.data.Dataset): 59 | def __init__(self, conf, split): 60 | torch.manual_seed(conf.seed) 61 | np.random.seed(conf.seed) 62 | 63 | # Extract the image names 64 | self.root_dir = os.path.join(DATA_PATH, conf.dataset_dir) 65 | self.img_names = [name for name in os.listdir(self.root_dir) 66 | if os.path.isdir(os.path.join(self.root_dir, name))] 67 | self.grayscale = conf.grayscale 68 | assert len(self.img_names) == 102 69 | 70 | # Pre-generate all the homographies to ensure reproducibility 71 | self.H = [] 72 | self.w, self.h = 640, 480 73 | img_size = (self.h, self.w) 74 | for _ in range(len(self.img_names)): 75 | self.H.append(sample_homography(img_size, **conf.homography.params)) 76 | 77 | def get_dataset(self, split): 78 | return self 79 | 80 | def __getitem__(self, idx): 81 | img_path = os.path.join(self.root_dir, self.img_names[idx], 82 | f'{self.img_names[idx]}.jpg') 83 | name = str(Path(img_path).stem) 84 | img = cv2.imread(img_path) 85 | if self.grayscale: 86 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 87 | else: 88 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 89 | 90 | # Warp the image 91 | warped_img = cv2.warpPerspective(img, self.H[idx], (self.w, self.h), 92 | flags=cv2.INTER_LINEAR) 93 | 94 | # Normalize the images in [0, 1] 95 | img = img.astype(np.float32) / 255. 96 | warped_img = warped_img.astype(np.float32) / 255. 97 | 98 | # Convert all data to torch tensors 99 | if self.grayscale: 100 | img = torch.tensor(img[None], dtype=torch.float) 101 | warped_img = torch.tensor(warped_img[None], dtype=torch.float) 102 | else: 103 | img = torch.tensor(img, dtype=torch.float).permute(2, 0, 1) 104 | warped_img = torch.tensor(warped_img, 105 | dtype=torch.float).permute(2, 0, 1) 106 | 107 | return { 108 | 'name': name, 109 | 'image': img, 110 | 'warped_image': warped_img, 111 | 'H': self.H[idx], 112 | 'image_path': img_path, 113 | } 114 | 115 | def __len__(self): 116 | return len(self.img_names) 117 | -------------------------------------------------------------------------------- /deeplsd/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/deeplsd/evaluation/__init__.py -------------------------------------------------------------------------------- /deeplsd/geometry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/deeplsd/geometry/__init__.py -------------------------------------------------------------------------------- /deeplsd/geometry/homography_adaptation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from kornia.geometry.transform import warp_perspective 4 | from kornia.morphology import erosion 5 | 6 | from ..datasets.utils.homographies import sample_homography 7 | 8 | 9 | default_H_params = { 10 | 'translation': True, 11 | 'rotation': True, 12 | 'scaling': True, 13 | 'perspective': True, 14 | 'scaling_amplitude': 0.2, 15 | 'perspective_amplitude_x': 0.2, 16 | 'perspective_amplitude_y': 0.2, 17 | 'patch_ratio': 0.85, 18 | 'max_angle': 1.57, 19 | 'allow_artifacts': True 20 | } 21 | 22 | erosion_kernel = torch.tensor( 23 | [[0, 0, 1, 0, 0], 24 | [0, 1, 1, 1, 0], 25 | [1, 1, 1, 1, 1], 26 | [0, 1, 1, 1, 0], 27 | [0, 0, 1, 0, 0]], 28 | dtype=torch.float 29 | ) 30 | 31 | 32 | def torch_homography_adaptation(img, net, num_H=10, H_params=default_H_params, 33 | aggregation='median'): 34 | """ Perform homography adaptation at test time using Pytorch. 35 | Only works with a batch size of 1. """ 36 | assert len(img) == 1, "torch_homography_adaptation only with a batch size of 1." 37 | bs = 10 38 | device = img.device 39 | h, w = img.shape[2:4] 40 | 41 | # Generate homographies and warp the image 42 | Hs = [] 43 | for i in range(num_H): 44 | if i == 0: 45 | # Always include at least the identity 46 | Hs.append(torch.eye(3, dtype=torch.float, device=device)) 47 | else: 48 | Hs.append(torch.tensor( 49 | sample_homography((h, w), **default_H_params), 50 | dtype=torch.float, device=device)) 51 | Hs = torch.stack(Hs, dim=0) 52 | 53 | # Loop through all mini batches 54 | n_mini_batch = int(np.ceil(num_H / bs)) 55 | dfs = torch.empty((num_H, h, w), dtype=torch.float, device=device) 56 | angles = torch.empty((num_H, h, w), dtype=torch.float, device=device) 57 | offsets = torch.empty((num_H, h, w, 2), dtype=torch.float, device=device) 58 | counts = torch.empty((num_H, h, w), dtype=torch.float, device=device) 59 | for i in range(n_mini_batch): 60 | H = Hs[i*bs:(i+1)*bs] 61 | 62 | # Warp the image 63 | warped_imgs = warp_perspective( 64 | img.repeat(len(H), 1, 1, 1), H, (h, w), mode='bilinear') 65 | 66 | # Forward pass 67 | with torch.no_grad(): 68 | outs = net({'image': warped_imgs}) 69 | assert "lines" not in outs, "Please turn off line detection for generation of homographies as this is known to cause memory and performance issues" 70 | if "offset" not in outs: 71 | outs['offset'] = torch.stack((outs['df']*torch.sin(outs['line_level'] + torch.pi / 2), 72 | outs['df']*torch.cos(outs['line_level'] + torch.pi / 2)), 73 | dim=3) 74 | # Warp back the results 75 | df, angle, offset, count = warp_afm( 76 | outs['df'], outs['line_level'], 77 | outs['offset'], torch.inverse(H)) 78 | 79 | # Aggregate the results 80 | dfs[i*bs:(i+1)*bs] = df 81 | angles[i*bs:(i+1)*bs] = angle 82 | offsets[i*bs:(i+1)*bs] = offset 83 | counts[i*bs:(i+1)*bs] = count 84 | 85 | # Aggregate the results 86 | if aggregation == 'mean': 87 | df = (dfs * counts).sum(dim=0) / counts.sum(dim=0) 88 | offset = ((offsets * counts.unsqueeze(-1)).sum(dim=0) 89 | / counts.sum(dim=0).unsqueeze(-1)) 90 | elif aggregation == 'median': 91 | dfs[counts == 0] = float("nan") 92 | df = torch.nanmedian(dfs, dim=0)[0] 93 | offsets[counts == 0] = float("nan") 94 | offset = torch.nanmedian(offsets, dim=0)[0] 95 | else: 96 | raise ValueError("Unknown aggregation method: " + aggregation) 97 | 98 | # Median of the angle 99 | angles = angles.reshape(num_H, h * w) 100 | counts = counts.reshape(num_H, h * w) 101 | circ_bound = (torch.min(np.pi - angles, angles) 102 | * counts).sum(0) / counts.sum(0) < 0.3 103 | angles[:, circ_bound] -= torch.where( 104 | angles[:, circ_bound] > np.pi /2, 105 | torch.ones_like(angles[:, circ_bound]) * np.pi, 106 | torch.zeros_like(angles[:, circ_bound])) 107 | angles[counts == 0] = float("nan") 108 | angle = torch.remainder(torch.nanmedian(angles, dim=0)[0], 109 | np.pi).reshape(h, w) 110 | 111 | return df, angle, offset 112 | 113 | 114 | def warp_points(points, H): 115 | """ Warp batched 2D points by a batched homography H: 116 | points is [bs, ..., 2] and H is [bs, 3, 3]. """ 117 | shape = points.shape 118 | bs = len(points) 119 | reproj_points = points.reshape(bs, -1, 2)[:, :, [1, 0]].transpose(1, 2) 120 | reproj_points = torch.cat( 121 | [reproj_points, torch.ones_like(reproj_points[:, :1])], dim=1) 122 | reproj_points = (H @ reproj_points).transpose(1, 2) 123 | reproj_points = reproj_points[..., :2] / reproj_points[..., 2:] 124 | reproj_points = reproj_points[..., [1, 0]] 125 | return reproj_points.reshape(shape) 126 | 127 | 128 | def warp_afm(df, angle, offset, H): 129 | """ Warp an attraction field defined by a DF, line level angle and offset 130 | field, with a set of homographies. All tensors are batched. """ 131 | b_size, h, w = df.shape 132 | device = df.device 133 | 134 | # Warp the closest point on a line 135 | pix_loc = torch.stack(torch.meshgrid( 136 | torch.arange(h, dtype=torch.float, device=device), 137 | torch.arange(w, dtype=torch.float, device=device), 138 | indexing='ij'), dim=-1)[None].repeat(b_size, 1, 1, 1) 139 | closest = pix_loc + offset 140 | warped_closest = warp_points(closest, H) 141 | warped_pix_loc = warp_points(pix_loc, H) 142 | offset_norm = torch.norm(offset, dim=-1) 143 | zero_offset = offset_norm < 1e-3 144 | offset_norm[zero_offset] = 1 145 | scaling = (torch.norm(warped_closest - warped_pix_loc, dim=-1) 146 | / offset_norm) 147 | scaling[zero_offset] = 0 148 | warped_closest = warp_perspective( 149 | warped_closest.permute(0, 3, 1, 2), H, (h, w), 150 | mode='nearest').permute(0, 2, 3, 1) 151 | warped_offset = warped_closest - pix_loc 152 | 153 | # Warp the DF 154 | warped_df = warp_perspective(df.unsqueeze(1), H, (h, w), 155 | mode='bilinear')[:, 0] 156 | warped_scaling = warp_perspective(scaling.unsqueeze(1), H, (h, w), 157 | mode='bilinear')[:, 0] 158 | warped_df *= warped_scaling 159 | 160 | # Warp the angle 161 | closest = pix_loc + torch.stack([torch.sin(angle), torch.cos(angle)], 162 | dim=-1) 163 | warped_closest = warp_points(closest, H) 164 | warped_angle = torch.remainder(torch.atan2( 165 | warped_closest[..., 0] - warped_pix_loc[..., 0], 166 | warped_closest[..., 1] - warped_pix_loc[..., 1]), np.pi) 167 | warped_angle = warp_perspective(warped_angle.unsqueeze(1), H, (h, w), 168 | mode='nearest')[:, 0] 169 | 170 | # Compute the counts of valid pixels 171 | H_inv = torch.inverse(H) 172 | counts = warp_perspective(torch.ones_like(df).unsqueeze(1), H_inv, (h, w), 173 | mode='nearest') 174 | counts = erosion(counts, erosion_kernel.to(device)) 175 | counts = warp_perspective(counts, H, (h, w), mode='nearest')[:, 0] 176 | 177 | return warped_df, warped_angle, warped_offset, counts 178 | 179 | 180 | def masked_median(arr, mask): 181 | """ Compute the median of a batched tensor arr, taking into account a 182 | mask of valid pixels. We assume the batch size to be small. """ 183 | b_size = len(arr) 184 | arr_shape = arr.shape[1:] 185 | flat_arr = arr.reshape(b_size, -1) 186 | flat_mask = mask.reshape(b_size, -1) 187 | counts = flat_mask.sum(dim=0) 188 | out_median = torch.zeros_like(flat_arr[0]) 189 | for i in range(1, b_size + 1): 190 | curr_mask = counts == i 191 | curr_val = flat_arr.t()[curr_mask] 192 | curr_val = curr_val[flat_mask.t()[curr_mask] == 1].reshape(-1, i) 193 | out_median[curr_mask] = torch.quantile(curr_val, 0.5, dim=1) 194 | return out_median.reshape(arr_shape) 195 | -------------------------------------------------------------------------------- /deeplsd/geometry/projection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions to project data across different views. 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn.functional as F 8 | from pycolmap import image_to_world, world_to_image 9 | 10 | 11 | def to_homogeneous(arr): 12 | # Adds a new column with ones 13 | if isinstance(arr, torch.Tensor): 14 | return torch.cat([arr, torch.ones_like(arr[..., :1])], dim=-1) 15 | else: 16 | return np.concatenate([arr, np.ones_like(arr[..., :1])], axis=-1) 17 | 18 | 19 | def to_homogeneous_t(arr): 20 | # Adds a new row with ones 21 | if isinstance(arr, torch.Tensor): 22 | return torch.cat([arr, torch.ones_like(arr[..., :1, :])], dim=-2) 23 | else: 24 | return np.concatenate([arr, np.ones_like(arr[..., :1, :])], axis=-2) 25 | 26 | 27 | def to_cartesian(arr): 28 | return arr[..., :-1] / arr[..., -1:] 29 | 30 | 31 | def to_cartesian_t(arr): 32 | return arr[..., :-1, :] / arr[..., -1:, :] 33 | 34 | 35 | def warp_points(points, H, img_shape): 36 | """ Warp 2D points by an homography H. 37 | Args: 38 | points: a [b_size, N, 2] or [N, 2] torch tensor (ij coords). 39 | H: a [N, 3, 3] torch homography tensor. 40 | Returns: 41 | The reprojected points and a mask of valid points. 42 | """ 43 | reproj_points = points.clone()[..., [1, 0]] 44 | reproj_points = to_homogeneous(reproj_points) 45 | reproj_points = (H @ reproj_points.transpose(-1, -2)).transpose(-1, -2) 46 | reproj_points = reproj_points[..., :2] / reproj_points[..., 2:] 47 | reproj_points = reproj_points[..., [1, 0]] 48 | 49 | # Compute the valid points 50 | h, w = img_shape 51 | valid = ((reproj_points[..., 0] >= 0) 52 | & (reproj_points[..., 0] <= h - 1) 53 | & (reproj_points[..., 1] >= 0) 54 | & (reproj_points[..., 1] <= w - 1)) 55 | 56 | return reproj_points, valid 57 | 58 | 59 | ### 3D geometry utils for ETH3D 60 | 61 | # Convert from quaternions to rotation matrix 62 | def qvec2rotmat(qvec): 63 | return np.array([ 64 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 65 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 66 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 67 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 68 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 69 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 70 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 71 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 72 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 73 | 74 | 75 | # Convert a rotation matrix to quaternions 76 | def rotmat2qvec(R): 77 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 78 | K = np.array([ 79 | [Rxx - Ryy - Rzz, 0, 0, 0], 80 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 81 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 82 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 83 | eigvals, eigvecs = np.linalg.eigh(K) 84 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 85 | if qvec[0] < 0: 86 | qvec *= -1 87 | return qvec 88 | 89 | 90 | # Read the camera intrinsics from a file in COLMAP format 91 | def read_cameras(camera_file, scale_factor=None): 92 | with open(camera_file, 'r') as f: 93 | raw_cameras = f.read().rstrip().split('\n') 94 | raw_cameras = raw_cameras[3:] 95 | cameras = [] 96 | for c in raw_cameras: 97 | data = c.split(' ') 98 | cameras.append({ 99 | "model": data[1], 100 | "width": int(data[2]), 101 | "height": int(data[3]), 102 | "params": np.array(list(map(float, data[4:])))}) 103 | 104 | # Optionally scale the intrinsics if the image are resized 105 | if scale_factor is not None: 106 | cameras = [scale_intrinsics(c, scale_factor) for c in cameras] 107 | return cameras 108 | 109 | 110 | # Adapt the camera intrinsics to an image resize 111 | def scale_intrinsics(intrinsics, scale_factor): 112 | new_intrinsics = {"model": intrinsics["model"], 113 | "width": int(intrinsics["width"] * scale_factor + 0.5), 114 | "height": int(intrinsics["height"] * scale_factor + 0.5) 115 | } 116 | params = intrinsics["params"] 117 | # Adapt the focal length 118 | params[:2] *= scale_factor 119 | # Adapt the principal point 120 | params[2:4] = (params[2:4] * scale_factor + 0.5) - 0.5 121 | new_intrinsics["params"] = params 122 | return new_intrinsics 123 | 124 | 125 | # Project points from 2D to 3D, in (x, y, z) format 126 | def project_2d_to_3d(points, depth, T_local_to_world, intrinsics): 127 | # Warp to world homogeneous coordinates 128 | world_points = image_to_world(points[:, [1, 0]], 129 | intrinsics)['world_points'] 130 | world_points *= depth[:, None] 131 | world_points = np.concatenate([world_points, depth[:, None], 132 | np.ones((len(depth), 1))], axis=1) 133 | 134 | # Warp to the world coordinates 135 | world_points = (T_local_to_world @ world_points.T).T 136 | world_points = world_points[:, :3] / world_points[:, 3:] 137 | return world_points 138 | 139 | 140 | # Project points from 3D in (x, y, z) format to 2D 141 | def project_3d_to_2d(points, T_world_to_local, intrinsics): 142 | norm_points = np.concatenate([points, np.ones((len(points), 1))], axis=1) 143 | norm_points = (T_world_to_local @ norm_points.T).T 144 | norm_points = norm_points[:, :3] / norm_points[:, 3:] 145 | norm_points = norm_points[:, :2] / norm_points[:, 2:] 146 | image_points = world_to_image(norm_points, intrinsics) 147 | image_points = np.stack(image_points['image_points'])[:, [1, 0]] 148 | return image_points 149 | 150 | 151 | # Mask out the points that are outside of img_size 152 | def mask_points(points, img_size): 153 | mask = ((points[..., 0] >= 0) 154 | & (points[..., 0] < img_size[0]) 155 | & (points[..., 1] >= 0) 156 | & (points[..., 1] < img_size[1])) 157 | return mask 158 | 159 | 160 | def get_depth(img_points, dist_depth, dist_camera, undist_camera): 161 | """ 162 | Get the depth of a list of image points in the undistorted image, 163 | given the depth of the distorted image. 164 | """ 165 | # Warp the points to world coordinates 166 | world_points = image_to_world(img_points[:, [1, 0]], 167 | undist_camera)['world_points'] 168 | 169 | # Warp them back to the distorted coordinates 170 | dist_img_points = world_to_image(world_points, dist_camera) 171 | dist_img_points = np.stack(dist_img_points['image_points']) 172 | dist_img_points = np.round(dist_img_points).astype(int) 173 | 174 | # Get the depth of valid points (inf otherwise) 175 | dist_shape = (int(dist_camera['height']), int(dist_camera['width'])) 176 | valid = ((dist_img_points[:, 0] >= 0) 177 | & (dist_img_points[:, 0] < dist_shape[1]) 178 | & (dist_img_points[:, 1] >= 0) 179 | & (dist_img_points[:, 1] < dist_shape[0])) 180 | depths = np.array([np.inf] * len(dist_img_points)) 181 | valid_dist_img_points = dist_img_points[valid] 182 | depths[valid] = dist_depth[valid_dist_img_points[:, 1], 183 | valid_dist_img_points[:, 0]] 184 | return depths 185 | 186 | 187 | def filter_and_project_lines( 188 | ref_line_seg, target_line_seg, ref_depth, target_depth, data): 189 | """ Filter out lines without depth, project them to 3D, warp them in 190 | the other view, and keep lines shared between both views. """ 191 | # Get the points with valid depth 192 | ref_depths = get_depth( 193 | ref_line_seg.reshape(-1, 2), ref_depth, 194 | data["ref_dist_camera"], data["ref_undist_camera"]).reshape(-1, 2) 195 | ref_valid = ~np.any(np.isinf(ref_depths), axis=1) 196 | ref_valid_line_seg = ref_line_seg[ref_valid] 197 | target_depths = get_depth( 198 | target_line_seg.reshape(-1, 2), target_depth, 199 | data["target_dist_camera"], 200 | data["target_undist_camera"]).reshape(-1, 2) 201 | target_valid = ~np.any(np.isinf(target_depths), axis=1) 202 | target_valid_line_seg = target_line_seg[target_valid] 203 | 204 | # Useful image shapes 205 | ref_dist_shape = (int(data["ref_dist_camera"]['height']), 206 | int(data["ref_dist_camera"]['width'])) 207 | target_dist_shape = (int(data["target_dist_camera"]['height']), 208 | int(data["target_dist_camera"]['width'])) 209 | 210 | # Project the lines in 3D and then in the other view 211 | # Keep only the lines in common between the two views 212 | # Ref 213 | if len(ref_valid_line_seg) > 0: 214 | ref_3d_lines = project_2d_to_3d( 215 | ref_valid_line_seg.reshape(-1, 2), 216 | ref_depths[ref_valid].flatten(), 217 | np.linalg.inv(data["T_world_to_ref"]), 218 | data["ref_undist_camera"]) 219 | warped_ref_valid_line_seg = project_3d_to_2d( 220 | ref_3d_lines, data["T_world_to_target"], 221 | data["target_undist_camera"]) 222 | else: 223 | ref_3d_lines = np.empty((0, 3)) 224 | warped_ref_valid_line_seg = np.empty((0, 2)) 225 | valid_mask = mask_points(warped_ref_valid_line_seg, target_dist_shape) 226 | valid_mask = np.all(valid_mask.reshape(-1, 2), axis=1) 227 | ref_valid[ref_valid] = valid_mask 228 | ref_valid_line_seg = ref_valid_line_seg[valid_mask] 229 | ref_3d_lines = ref_3d_lines.reshape(-1, 2, 3)[valid_mask] 230 | warped_ref_valid_line_seg = warped_ref_valid_line_seg.reshape( 231 | -1, 2, 2)[valid_mask] 232 | # Target 233 | if len(target_valid_line_seg) > 0: 234 | target_3d_lines = project_2d_to_3d( 235 | target_valid_line_seg.reshape(-1, 2), 236 | target_depths[target_valid].flatten(), 237 | np.linalg.inv(data["T_world_to_target"]), 238 | data["target_undist_camera"]) 239 | warped_target_valid_line_seg = project_3d_to_2d( 240 | target_3d_lines, data["T_world_to_ref"], 241 | data["ref_undist_camera"]) 242 | else: 243 | target_3d_lines = np.empty((0, 3)) 244 | warped_target_valid_line_seg = np.empty((0, 2)) 245 | valid_mask = mask_points(warped_target_valid_line_seg, ref_dist_shape) 246 | valid_mask = np.all(valid_mask.reshape(-1, 2), axis=1) 247 | target_valid[target_valid] = valid_mask 248 | target_valid_line_seg = target_valid_line_seg[valid_mask] 249 | target_3d_lines = target_3d_lines.reshape(-1, 2, 3)[valid_mask] 250 | warped_target_valid_line_seg = warped_target_valid_line_seg.reshape( 251 | -1, 2, 2)[valid_mask] 252 | 253 | return (ref_valid_line_seg, target_valid_line_seg, 254 | ref_3d_lines, target_3d_lines, 255 | warped_ref_valid_line_seg, warped_target_valid_line_seg, 256 | ref_valid, target_valid) 257 | -------------------------------------------------------------------------------- /deeplsd/geometry/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | A set of geometry tools for PyTorch tensors and sometimes NumPy arrays. 3 | """ 4 | 5 | import torch 6 | 7 | 8 | def keypoints_to_grid(keypoints, img_size): 9 | """ Convert the cartesian coordinates of 2D keypoints into a grid in 10 | [-1, 1]² that can be used in torch.nn.functional.interpolate. 11 | Args: 12 | keypoints: a (..., N, 2) tensor of N keypoints. 13 | img_size: image size. 14 | Returns: 15 | A (B, N, 1, 2) tensor of normalized coordinates. 16 | """ 17 | n_points = keypoints.size()[-2] 18 | device = keypoints.device 19 | grid_points = keypoints.float() * 2. / torch.tensor( 20 | img_size, dtype=torch.float, device=device) - 1. 21 | grid_points = grid_points[..., [1, 0]].view(-1, n_points, 1, 2) 22 | return grid_points 23 | 24 | 25 | def get_dist_mask(kp0, kp1, valid_mask, dist_thresh): 26 | """ Compute a 2D matrix indicating the local neighborhood of each point 27 | for a given threshold and two lists of corresponding keypoints. 28 | Args: 29 | kp0, kp1: a (B, N, 2) tensor of 2D points. 30 | valid_mask: a (B*N) boolean mask indicating valid points. 31 | dist_thresh: distance in pixels defining the local neighborhood. 32 | Returns: 33 | A (B*N, B*N) bool tensor indicating points that are spatially close. 34 | """ 35 | b_size, n_points, _ = kp0.size() 36 | dist_mask0 = torch.norm(kp0.unsqueeze(2) - kp0.unsqueeze(1), dim=-1) 37 | dist_mask1 = torch.norm(kp1.unsqueeze(2) - kp1.unsqueeze(1), dim=-1) 38 | dist_mask = torch.min(dist_mask0, dist_mask1) 39 | dist_mask = dist_mask <= dist_thresh 40 | dist_mask = dist_mask.repeat(1, 1, b_size).reshape(b_size * n_points, 41 | b_size * n_points) 42 | dist_mask = dist_mask[valid_mask, :][:, valid_mask] 43 | return dist_mask 44 | -------------------------------------------------------------------------------- /deeplsd/geometry/viz_2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2D visualization primitives based on Matplotlib. 3 | 4 | 1) Plot images with `plot_images`. 5 | 2) Call `plot_keypoints` or `plot_matches` any number of times. 6 | 3) Optionally: save a .png or .pdf plot (nice in papers!) with `save_plot`. 7 | """ 8 | 9 | import matplotlib 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import seaborn as sns 13 | import flow_vis 14 | 15 | 16 | def cm_RdGn(x): 17 | """Custom colormap: red (0) -> yellow (0.5) -> green (1).""" 18 | x = np.clip(x, 0, 1)[..., None]*2 19 | c = x*np.array([[0, 1., 0]]) + (2-x)*np.array([[1., 0, 0]]) 20 | return np.clip(c, 0, 1) 21 | 22 | 23 | def plot_images(imgs, titles=None, cmaps='gray', dpi=100, size=6, pad=.5): 24 | """Plot a set of images horizontally. 25 | Args: 26 | imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W). 27 | titles: a list of strings, as titles for each image. 28 | cmaps: colormaps for monochrome images. 29 | """ 30 | n = len(imgs) 31 | if not isinstance(cmaps, (list, tuple)): 32 | cmaps = [cmaps] * n 33 | figsize = (size*n, size*3/4) if size is not None else None 34 | fig, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) 35 | if n == 1: 36 | ax = [ax] 37 | for i in range(n): 38 | ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i])) 39 | ax[i].get_yaxis().set_ticks([]) 40 | ax[i].get_xaxis().set_ticks([]) 41 | ax[i].set_axis_off() 42 | for spine in ax[i].spines.values(): # remove frame 43 | spine.set_visible(False) 44 | if titles: 45 | ax[i].set_title(titles[i]) 46 | fig.tight_layout(pad=pad) 47 | 48 | 49 | def plot_lines(lines, line_colors='orange', point_color='cyan', 50 | ps=4, lw=2, indices=(0, 1), alpha=1): 51 | """ Plot lines and endpoints for existing images. 52 | Args: 53 | lines: list of ndarrays of size (N, 2, 2). 54 | line_colors: string, or list of list of tuples (one for per line). 55 | point_color: unique color for all endpoints. 56 | ps: size of the keypoints as float pixels. 57 | lw: line width as float pixels. 58 | indices: indices of the images to draw the matches on. 59 | alpha: alpha transparency. 60 | """ 61 | if not isinstance(line_colors, list): 62 | line_colors = [[line_colors] * len(l) for l in lines] 63 | for i in range(len(lines)): 64 | if ((not isinstance(line_colors[i], list)) 65 | and (not isinstance(line_colors[i], np.ndarray))): 66 | line_colors[i] = [line_colors[i]] * len(lines[i]) 67 | 68 | fig = plt.gcf() 69 | ax = fig.axes 70 | assert len(ax) > max(indices) 71 | axes = [ax[i] for i in indices] 72 | fig.canvas.draw() 73 | 74 | # Plot the lines and junctions 75 | for a, l, lc in zip(axes, lines, line_colors): 76 | for i in range(len(l)): 77 | line = matplotlib.lines.Line2D( 78 | (l[i, 0, 0], l[i, 1, 0]), (l[i, 0, 1], l[i, 1, 1]), 79 | zorder=1, c=lc[i], linewidth=lw, alpha=alpha) 80 | a.add_line(line) 81 | pts = l.reshape(-1, 2) 82 | a.scatter(pts[:, 0], pts[:, 1], c=point_color, s=ps, 83 | linewidths=0, zorder=2, alpha=alpha) 84 | 85 | 86 | def plot_vp(lines, vp_labels, lw=2, indices=(0, 1)): 87 | """ Plot the vanishing directions of the lines, given the vp labels. 88 | Lines labelled with -1 are ignored. 89 | Args: 90 | lines: list of ndarrays of size (N, 2, 2). 91 | vp_labels: list of labels indicating the corresponding vp. 92 | lw: line width as float pixels. 93 | indices: indices of the images to draw the matches on. 94 | """ 95 | num_labels = np.amax([np.amax(vp) for vp in vp_labels if len(vp) > 0]) + 1 96 | colors = sns.color_palette("hls", num_labels) 97 | 98 | fig = plt.gcf() 99 | ax = fig.axes 100 | assert len(ax) > max(indices) 101 | axes = [ax[i] for i in indices] 102 | fig.canvas.draw() 103 | 104 | # Plot the lines and junctions 105 | for a, l, vp in zip(axes, lines, vp_labels): 106 | for i in range(len(l)): 107 | if vp[i] == -1: 108 | continue 109 | line = matplotlib.lines.Line2D( 110 | (l[i, 0, 0], l[i, 1, 0]), (l[i, 0, 1], l[i, 1, 1]), 111 | zorder=1, c=colors[vp[i]], linewidth=lw) 112 | a.add_line(line) 113 | 114 | 115 | def plot_color_line_matches(lines, correct_matches=None, 116 | lw=2, indices=(0, 1)): 117 | """Plot line matches for existing images with multiple colors. 118 | Args: 119 | lines: list of ndarrays of size (N, 2, 2). 120 | correct_matches: bool array of size (N,) indicating correct matches. 121 | lw: line width as float pixels. 122 | indices: indices of the images to draw the matches on. 123 | """ 124 | n_lines = len(lines[0]) 125 | colors = sns.color_palette('husl', n_colors=n_lines) 126 | np.random.shuffle(colors) 127 | alphas = np.ones(n_lines) 128 | # If correct_matches is not None, display wrong matches with a low alpha 129 | if correct_matches is not None: 130 | alphas[~np.array(correct_matches)] = 0.2 131 | 132 | fig = plt.gcf() 133 | ax = fig.axes 134 | assert len(ax) > max(indices) 135 | axes = [ax[i] for i in indices] 136 | fig.canvas.draw() 137 | 138 | # Plot the lines 139 | for a, l in zip(axes, lines): 140 | # Transform the points into the figure coordinate system 141 | transFigure = fig.transFigure.inverted() 142 | endpoint0 = transFigure.transform(a.transData.transform(l[:, 0])) 143 | endpoint1 = transFigure.transform(a.transData.transform(l[:, 1])) 144 | fig.lines += [matplotlib.lines.Line2D( 145 | (endpoint0[i, 0], endpoint1[i, 0]), 146 | (endpoint0[i, 1], endpoint1[i, 1]), 147 | zorder=1, transform=fig.transFigure, c=colors[i], 148 | alpha=alphas[i], linewidth=lw) for i in range(n_lines)] 149 | 150 | 151 | def plot_color_lines(lines, correct_matches, wrong_matches, 152 | lw=2, indices=(0, 1)): 153 | """Plot line matches for existing images with multiple colors: 154 | green for correct matches, red for wrong ones, and blue for the rest. 155 | Args: 156 | lines: list of ndarrays of size (N, 2, 2). 157 | correct_matches: list of bool arrays of size N with correct matches. 158 | wrong_matches: list of bool arrays of size (N,) with correct matches. 159 | lw: line width as float pixels. 160 | indices: indices of the images to draw the matches on. 161 | """ 162 | # palette = sns.color_palette() 163 | palette = sns.color_palette("hls", 8) 164 | blue = palette[5] # palette[0] 165 | red = palette[0] # palette[3] 166 | green = palette[2] # palette[2] 167 | colors = [np.array([blue] * len(l)) for l in lines] 168 | for i, c in enumerate(colors): 169 | c[np.array(correct_matches[i])] = green 170 | c[np.array(wrong_matches[i])] = red 171 | 172 | fig = plt.gcf() 173 | ax = fig.axes 174 | assert len(ax) > max(indices) 175 | axes = [ax[i] for i in indices] 176 | fig.canvas.draw() 177 | 178 | # Plot the lines 179 | for a, l, c in zip(axes, lines, colors): 180 | # Transform the points into the figure coordinate system 181 | transFigure = fig.transFigure.inverted() 182 | endpoint0 = transFigure.transform(a.transData.transform(l[:, 0])) 183 | endpoint1 = transFigure.transform(a.transData.transform(l[:, 1])) 184 | fig.lines += [matplotlib.lines.Line2D( 185 | (endpoint0[i, 0], endpoint1[i, 0]), 186 | (endpoint0[i, 1], endpoint1[i, 1]), 187 | zorder=1, transform=fig.transFigure, c=c[i], 188 | linewidth=lw) for i in range(len(l))] 189 | 190 | 191 | def get_flow_vis(df, ang, line_neighborhood=5): 192 | norm = line_neighborhood + 1 - np.clip(df, 0, line_neighborhood) 193 | flow_uv = np.stack([norm * np.cos(ang), norm * np.sin(ang)], axis=-1) 194 | flow_img = flow_vis.flow_to_color(flow_uv, convert_to_bgr=False) 195 | return flow_img 196 | 197 | 198 | def save_plot(path, **kw): 199 | """Save the current figure without any white margin.""" 200 | plt.savefig(path, bbox_inches='tight', pad_inches=0) 201 | -------------------------------------------------------------------------------- /deeplsd/models/__init__.py: -------------------------------------------------------------------------------- 1 | from ..utils.tools import get_class 2 | from .base_model import BaseModel 3 | 4 | 5 | def get_model(name): 6 | return get_class(name, __name__, BaseModel) 7 | -------------------------------------------------------------------------------- /deeplsd/models/backbones/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/deeplsd/models/backbones/__init__.py -------------------------------------------------------------------------------- /deeplsd/models/backbones/vgg_unet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class VGGUNet(torch.nn.Module): 7 | def __init__(self, tiny=False): 8 | super().__init__() 9 | self.pool = nn.AvgPool2d(kernel_size=2, stride=2) 10 | if tiny: 11 | sizes = [32, 64, 128, 256] 12 | else: 13 | sizes = [64, 128, 256, 512] 14 | 15 | # Encoder blocks 16 | self.block1 = nn.Sequential( 17 | nn.Conv2d(1, sizes[0], kernel_size=3, stride=1, padding=1), 18 | nn.ReLU(), 19 | nn.BatchNorm2d(sizes[0]), 20 | nn.Conv2d(sizes[0], sizes[0], kernel_size=3, stride=1, padding=1), 21 | nn.ReLU(), 22 | nn.BatchNorm2d(sizes[0]), 23 | ) 24 | self.block2 = nn.Sequential( 25 | nn.Conv2d(sizes[0], sizes[1], kernel_size=3, stride=1, padding=1), 26 | nn.ReLU(), 27 | nn.BatchNorm2d(sizes[1]), 28 | nn.Conv2d(sizes[1], sizes[1], kernel_size=3, stride=1, padding=1), 29 | nn.ReLU(), 30 | nn.BatchNorm2d(sizes[1]), 31 | ) 32 | self.block3 = nn.Sequential( 33 | nn.Conv2d(sizes[1], sizes[2], kernel_size=3, stride=1, padding=1), 34 | nn.ReLU(), 35 | nn.BatchNorm2d(sizes[2]), 36 | nn.Conv2d(sizes[2], sizes[2], kernel_size=3, stride=1, padding=1), 37 | nn.ReLU(), 38 | nn.BatchNorm2d(sizes[2]), 39 | ) 40 | self.block4 = nn.Sequential( 41 | nn.Conv2d(sizes[2], sizes[3], kernel_size=3, stride=1, padding=1), 42 | nn.ReLU(), 43 | nn.BatchNorm2d(sizes[3]), 44 | nn.Conv2d(sizes[3], sizes[3], kernel_size=3, stride=1, padding=1), 45 | nn.ReLU(), 46 | nn.BatchNorm2d(sizes[3]), 47 | ) 48 | 49 | # Decoder blocks 50 | self.deblock4 = nn.Sequential( 51 | nn.Conv2d(sizes[3], sizes[2], kernel_size=3, stride=1, padding=1), 52 | nn.ReLU(), 53 | nn.BatchNorm2d(sizes[2]), 54 | nn.Conv2d(sizes[2], sizes[2], kernel_size=3, stride=1, padding=1), 55 | nn.ReLU(), 56 | nn.BatchNorm2d(sizes[2]), 57 | ) 58 | self.deblock3 = nn.Sequential( 59 | nn.Conv2d(sizes[3], sizes[2], kernel_size=3, stride=1, padding=1), 60 | nn.ReLU(), 61 | nn.BatchNorm2d(sizes[2]), 62 | nn.Conv2d(sizes[2], sizes[1], kernel_size=3, stride=1, padding=1), 63 | nn.ReLU(), 64 | nn.BatchNorm2d(sizes[1]), 65 | ) 66 | self.deblock2 = nn.Sequential( 67 | nn.Conv2d(sizes[2], sizes[1], kernel_size=3, stride=1, padding=1), 68 | nn.ReLU(), 69 | nn.BatchNorm2d(sizes[1]), 70 | nn.Conv2d(sizes[1], sizes[0], kernel_size=3, stride=1, padding=1), 71 | nn.ReLU(), 72 | nn.BatchNorm2d(sizes[0]), 73 | ) 74 | self.deblock1 = nn.Sequential( 75 | nn.Conv2d(sizes[1], sizes[0], kernel_size=3, stride=1, padding=1), 76 | nn.ReLU(), 77 | nn.BatchNorm2d(sizes[0]), 78 | nn.Conv2d(sizes[0], sizes[0], kernel_size=3, stride=1, padding=1), 79 | nn.ReLU(), 80 | nn.BatchNorm2d(sizes[0]), 81 | ) 82 | 83 | def forward(self, inputs): 84 | # Encoding 85 | features = [self.block1(inputs)] 86 | for block in [self.block2, self.block3, self.block4]: 87 | features.append(block(self.pool(features[-1]))) 88 | 89 | # Decoding 90 | out = self.deblock4(features[-1]) 91 | for deblock, feat in zip( 92 | [self.deblock3, self.deblock2, self.deblock1], features[:-1][::-1]): 93 | out = deblock(torch.cat([ 94 | F.interpolate(out, feat.shape[2:4], mode='bilinear'), 95 | feat], dim=1)) 96 | 97 | return out # dim = 32 if tiny else 64 98 | -------------------------------------------------------------------------------- /deeplsd/models/base_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base class for models. 3 | See mnist_net.py for an example of model. 4 | """ 5 | 6 | from abc import ABCMeta, abstractmethod 7 | from omegaconf import OmegaConf 8 | from torch import nn 9 | from copy import copy 10 | 11 | 12 | class BaseModel(nn.Module, metaclass=ABCMeta): 13 | """ 14 | What the child model is expect to declare: 15 | default_conf: dictionary of the default configuration of the model. 16 | It overwrites base_default_conf in BaseModel, and it is overwritten by 17 | the user-provided configuration passed to __init__. 18 | Configurations can be nested. 19 | 20 | required_data_keys: list of expected keys in the input data dictionary. 21 | 22 | strict_conf (optional): boolean. If false, BaseModel does not raise 23 | an error when the user provides an unknown configuration entry. 24 | 25 | _init(self, conf): initialization method, where conf is the final 26 | configuration object (also accessible with `self.conf`). Accessing 27 | unkown configuration entries will raise an error. 28 | 29 | _forward(self, data): method that returns a dictionary of batched 30 | prediction tensors based on a dictionary of batched input data tensors. 31 | 32 | loss(self, pred, data): method that returns a dictionary of losses, 33 | computed from model predictions and input data. Each loss is a batch 34 | of scalars, i.e. a torch.Tensor of shape (B,). 35 | The total loss to be optimized has the key `'total'`. 36 | 37 | metrics(self, pred, data): method that returns a dictionary of metrics, 38 | each as a batch of scalars. 39 | """ 40 | base_default_conf = { 41 | 'name': None, 42 | 'trainable': True, 43 | } 44 | default_conf = {} 45 | required_data_keys = [] 46 | strict_conf = True 47 | 48 | def __init__(self, conf): 49 | """Perform some logic and call the _init method of the child model.""" 50 | super().__init__() 51 | default_conf = OmegaConf.merge( 52 | OmegaConf.create(self.base_default_conf), 53 | OmegaConf.create(self.default_conf)) 54 | if self.strict_conf: 55 | OmegaConf.set_struct(default_conf, True) 56 | if isinstance(conf, dict): 57 | conf = OmegaConf.create(conf) 58 | self.conf = conf = OmegaConf.merge(default_conf, conf) 59 | OmegaConf.set_readonly(conf, True) 60 | self.required_data_keys = copy(self.required_data_keys) 61 | self._init(conf) 62 | 63 | if not conf.trainable: 64 | for p in self.parameters(): 65 | p.requires_grad = False 66 | 67 | def forward(self, data): 68 | """Check the data and call the _forward method of the child model.""" 69 | for key in self.required_data_keys: 70 | assert key in data, 'Missing key {} in data'.format(key) 71 | return self._forward(data) 72 | 73 | @abstractmethod 74 | def _init(self, conf): 75 | """To be implemented by the child class.""" 76 | raise NotImplementedError 77 | 78 | @abstractmethod 79 | def _forward(self, data): 80 | """To be implemented by the child class.""" 81 | raise NotImplementedError 82 | 83 | @abstractmethod 84 | def loss(self, pred, data): 85 | """To be implemented by the child class.""" 86 | raise NotImplementedError 87 | 88 | @abstractmethod 89 | def metrics(self, pred, data): 90 | """To be implemented by the child class.""" 91 | raise NotImplementedError 92 | -------------------------------------------------------------------------------- /deeplsd/models/deeplsd_inference.py: -------------------------------------------------------------------------------- 1 | """ 2 | Regress the distance function map to all the line segments of an image. 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from .base_model import BaseModel 11 | from .backbones.vgg_unet import VGGUNet 12 | from ..geometry.line_utils import merge_lines, filter_outlier_lines 13 | from ..utils.tensor import preprocess_angle 14 | from pytlsd import lsd 15 | 16 | 17 | class DeepLSD(BaseModel): 18 | default_conf = { 19 | 'line_neighborhood': 5, 20 | 'multiscale': False, 21 | 'scale_factors': [1., 1.5], 22 | 'detect_lines': True, 23 | 'line_detection_params': { 24 | 'merge': False, 25 | 'grad_nfa': True, 26 | 'filtering': 'normal', 27 | 'grad_thresh': 3, 28 | }, 29 | } 30 | required_data_keys = ['image'] 31 | 32 | def _init(self, conf): 33 | # Base network 34 | self.backbone = VGGUNet(tiny=False) 35 | dim = 64 36 | 37 | # Predict the distance field and angle to the nearest line 38 | # DF head 39 | self.df_head = nn.Sequential( 40 | nn.Conv2d(dim, 64, kernel_size=3, padding=1), 41 | nn.ReLU(), 42 | nn.BatchNorm2d(64), 43 | nn.Conv2d(64, 64, kernel_size=3, padding=1), 44 | nn.ReLU(), 45 | nn.BatchNorm2d(64), 46 | nn.Conv2d(64, 1, kernel_size=1), 47 | nn.ReLU(), 48 | ) 49 | 50 | # Closest line direction head 51 | self.angle_head = nn.Sequential( 52 | nn.Conv2d(dim, 64, kernel_size=3, padding=1), 53 | nn.ReLU(), 54 | nn.BatchNorm2d(64), 55 | nn.Conv2d(64, 64, kernel_size=3, padding=1), 56 | nn.ReLU(), 57 | nn.BatchNorm2d(64), 58 | nn.Conv2d(64, 1, kernel_size=1), 59 | nn.Sigmoid(), 60 | ) 61 | 62 | # Loss 63 | self.l1_loss_fn = nn.L1Loss(reduction='none') 64 | self.l2_loss_fn = nn.MSELoss(reduction='none') 65 | 66 | def normalize_df(self, df): 67 | return -torch.log(df / self.conf.line_neighborhood + 1e-6) 68 | 69 | def denormalize_df(self, df_norm): 70 | return torch.exp(-df_norm) * self.conf.line_neighborhood 71 | 72 | def _forward(self, data): 73 | outputs = {} 74 | 75 | if self.conf.multiscale: 76 | outputs = self.ms_forward(data) 77 | else: 78 | base = self.backbone(data['image']) 79 | 80 | # DF prediction 81 | outputs['df_norm'] = self.df_head(base).squeeze(1) 82 | outputs['df'] = self.denormalize_df(outputs['df_norm']) 83 | 84 | # Closest line direction prediction 85 | outputs['line_level'] = self.angle_head(base).squeeze(1) * np.pi 86 | 87 | # Detect line segments 88 | if self.conf.detect_lines: 89 | lines = [] 90 | np_img = (data['image'].cpu().numpy()[:, 0] * 255).astype(np.uint8) 91 | np_df = outputs['df'].cpu().numpy() 92 | np_ll = outputs['line_level'].cpu().numpy() 93 | for img, df, ll in zip(np_img, np_df, np_ll): 94 | line = self.detect_afm_lines( 95 | img, df, ll, **self.conf.line_detection_params) 96 | lines.append(line) 97 | outputs['lines'] = lines 98 | 99 | return outputs 100 | 101 | def ms_forward(self, data): 102 | """ Do several forward passes at multiple image resolutions 103 | and aggregate the results before extracting the lines. """ 104 | img_size = data['image'].shape[2:] 105 | 106 | # Forward pass for each scale 107 | pred_df, pred_angle = [], [] 108 | for s in self.conf.scale_factors: 109 | img = F.interpolate(data['image'], scale_factor=s, mode='bilinear') 110 | with torch.no_grad(): 111 | base = self.backbone(img) 112 | pred_df.append(self.denormalize_df(self.df_head(base))) 113 | pred_angle.append(self.angle_head(base) * np.pi) 114 | 115 | # Fuse the outputs together 116 | for i in range(len(self.conf.scale_factors)): 117 | pred_df[i] = F.interpolate(pred_df[i], img_size, 118 | mode='bilinear').squeeze(1) 119 | pred_angle[i] = F.interpolate(pred_angle[i], img_size, 120 | mode='nearest').squeeze(1) 121 | fused_df = torch.stack(pred_df, dim=0).mean(dim=0) 122 | fused_angle = torch.median(torch.stack(pred_angle, dim=0), dim=0)[0] 123 | 124 | out = {'df': fused_df, 'line_level': fused_angle} 125 | return out 126 | 127 | def detect_afm_lines( 128 | self, img, df, line_level, filtering='normal', 129 | merge=False, grad_thresh=3, grad_nfa=True): 130 | """ Detect lines from the line distance and angle field. 131 | Offer the possibility to ignore line in high DF values, 132 | and to merge close-by lines. """ 133 | gradnorm = np.maximum(5 - df, 0).astype(np.float64) 134 | angle = line_level.astype(np.float64) - np.pi / 2 135 | angle = preprocess_angle(angle, img, mask=True)[0] 136 | angle[gradnorm < grad_thresh] = -1024 137 | lines = lsd( 138 | img.astype(np.float64), scale=1., gradnorm=gradnorm, 139 | gradangle=angle, grad_nfa=grad_nfa)[:, :4].reshape(-1, 2, 2) 140 | 141 | # Optionally filter out lines based on the DF and line_level 142 | if filtering: 143 | if filtering == 'strict': 144 | df_thresh, ang_thresh = 1., np.pi / 12 145 | else: 146 | df_thresh, ang_thresh = 1.5, np.pi / 9 147 | angle = line_level - np.pi / 2 148 | lines = filter_outlier_lines( 149 | img, lines[:, :, [1, 0]], df, angle, mode='inlier_thresh', 150 | use_grad=False, inlier_thresh=0.5, df_thresh=df_thresh, 151 | ang_thresh=ang_thresh)[0][:, :, [1, 0]] 152 | 153 | # Merge close-by lines together 154 | if merge: 155 | lines = merge_lines(lines, thresh=4, 156 | overlap_thresh=0).astype(np.float32) 157 | 158 | return lines 159 | 160 | def loss(self, pred, data): 161 | raise NotImplementedError() 162 | 163 | def metrics(self, pred, data): 164 | raise NotImplementedError() 165 | -------------------------------------------------------------------------------- /deeplsd/models/lbd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import pytlbd 4 | 5 | 6 | ETH_EPS = 1e-10 7 | 8 | class PyTLBD(object): 9 | def __init__(self): 10 | pass 11 | 12 | @staticmethod 13 | def to_multiscale_lines(lines): 14 | ms_lines = [] 15 | for l in lines.reshape(-1, 4): 16 | ll = np.append(l, [0, np.linalg.norm(l[:2] - l[2:4])]) 17 | ms_lines.append([(0, ll)] + [(i, ll / (i * np.sqrt(2))) for i in range(1, 5)]) 18 | return ms_lines 19 | 20 | @staticmethod 21 | def get_lbg_descrs(img, lines): 22 | ########################################################################## 23 | ms_lines = PyTLBD.to_multiscale_lines(lines) 24 | pyramid = get_img_pyramid(img) 25 | descriptors = pytlbd.lbd_multiscale_pyr(pyramid, ms_lines, 9, 7) 26 | return descriptors 27 | 28 | @staticmethod 29 | def match_lbd_hellinger(descriptors0, descriptors1, ratio_th=0.85): 30 | # Get the distance matrix between descriptors 31 | D = multiscale_helinger_dist(descriptors0, descriptors1) 32 | # Compute the ratio-distance matches 33 | _, ratio_dists = matlab_like_desc_distance(D) 34 | # Apply the ratio test 35 | ratio_mask = ratio_dists > (1 / ratio_th) 36 | ratio_sort_indices = np.argsort(-ratio_dists[ratio_mask]) 37 | matches = np.argwhere(ratio_mask)[ratio_sort_indices] 38 | 39 | # Check that the matches are Mutual Nearest Neighbours 40 | argmin_rows = D.argmin(0) 41 | argmin_cols = D.argmin(1) 42 | mnn_cols = argmin_cols[matches[:, 0]] == matches[:, 1] 43 | mnn_rows = argmin_rows[matches[:, 1]] == matches[:, 0] 44 | mnn = np.logical_and(mnn_cols, mnn_rows) 45 | my_matches = matches[mnn] 46 | 47 | pred_matches = np.full(len(descriptors0), -1, dtype=int) 48 | pred_matches[my_matches[:, 0]] = my_matches[:, 1] 49 | return pred_matches 50 | 51 | def compute_descriptors(self, img, lines): 52 | # Compute multi-scale descriptors 53 | desc = self.get_lbg_descrs(img, lines.reshape(-1, 4)) 54 | return np.array(desc) 55 | 56 | def match_lines(self, lines0, lines1, desc0, desc1): 57 | # Find matches using the heuristic approach defined in the paper 58 | multiscale_lines0 = PyTLBD.to_multiscale_lines(lines0) 59 | multiscale_lines1 = PyTLBD.to_multiscale_lines(lines1) 60 | try: 61 | my_matches = np.array(pytlbd.lbd_matching_multiscale( 62 | multiscale_lines0, multiscale_lines1, 63 | list(desc0), list(desc1))) 64 | pred_matches = -np.ones((len(desc0)), dtype=int) 65 | if len(my_matches) > 0: 66 | pred_matches[my_matches[:, 0].astype(np.int32)] = my_matches[:, 1].astype(np.int32) 67 | return pred_matches 68 | except RuntimeError: 69 | return -np.ones((len(desc0)), dtype=int) 70 | 71 | 72 | ### Util functions for LBD heuristic matcher 73 | 74 | def get_img_pyramid(img, n_levels=5, level_scale=np.sqrt(2)): 75 | octave_img = img.copy() 76 | pre_sigma2 = 0 77 | cur_sigma2 = 1.0 78 | pyramid = [] 79 | for i in range(n_levels): 80 | increase_sigma = np.sqrt(cur_sigma2 - pre_sigma2) 81 | blurred = cv2.GaussianBlur(octave_img, (5, 5), increase_sigma, 82 | borderType=cv2.BORDER_REPLICATE) 83 | pyramid.append(blurred) 84 | 85 | # down sample the current octave image to get the next octave image 86 | new_size = (int(octave_img.shape[1] / level_scale), 87 | int(octave_img.shape[0] / level_scale)) 88 | octave_img = cv2.resize(blurred, new_size, 0, 0, 89 | interpolation=cv2.INTER_NEAREST) 90 | pre_sigma2 = cur_sigma2 91 | cur_sigma2 = cur_sigma2 * 2 92 | 93 | return pyramid 94 | 95 | 96 | def matlab_like_desc_distance(distances_mat): 97 | """ 98 | Computes the distance between teo set of descriptors. 99 | :return: A pair of numpy array: 100 | - A matrix where all the elements are HIGH_VALUE, but the one matched. The non HIGH_VALUE elements will 101 | have numbers from 1 to the number of matches where the lower numbers indicates most probable match. 102 | - A matrix containing the ratios between the first and the second match. 103 | :rtype (numpy.ndarray, numpy.ndarray, numpy.ndarray) 104 | """ 105 | HIGH_VALUE = 1000000 106 | # The shape of the output matrices 107 | out_shape = distances_mat.shape 108 | 109 | sorted_nn_mat = np.full(out_shape, HIGH_VALUE, dtype=float) 110 | ratio_dists = np.zeros(out_shape, dtype=float) 111 | tdesc_out = distances_mat.copy() 112 | 113 | # For each distance form the smallest to the higher, se in sorted_nn_mat a index [0, 1, 2, ...] 114 | # that indicates if is the smaller, the second smaller, thrid smaller, ... 115 | dnbr = 0 116 | while True: 117 | minj, mini = np.unravel_index(np.argmin(tdesc_out), tdesc_out.shape) 118 | min_dist = tdesc_out[minj, mini] 119 | 120 | if min_dist >= HIGH_VALUE: 121 | break 122 | 123 | # Set the row and the column with smaller distance to a HIGH_VALUE 124 | tdesc_out[:, mini] = HIGH_VALUE 125 | tdesc_out[minj] = HIGH_VALUE 126 | # Set the position where is the smaller distance to be the smallest one 127 | sorted_nn_mat[minj, mini] = dnbr 128 | dnbr += 1 129 | minratio = 1000000.0 130 | 131 | # Find the second element with smaller distance and the ratio between the first and second one 132 | for j in range(distances_mat.shape[0]): 133 | ratio = distances_mat[j, mini] / min_dist 134 | if 1 < ratio < minratio and j != minj: 135 | minratio = ratio 136 | 137 | ratio_dists[minj, mini] = minratio 138 | 139 | return sorted_nn_mat, ratio_dists 140 | 141 | 142 | # TODO This distance is always between 0 and 1 :-( 143 | def hellinger_dist(mean1, std1, mean2, std2): 144 | h2 = 1 - np.sqrt((2 * std1 * std2) / (ETH_EPS + std1 ** 2 + std2 ** 2)) * np.exp( 145 | -0.25 * (mean1 - mean2) ** 2 / (ETH_EPS + std1 ** 2 + std2 ** 2)) 146 | return np.sqrt(h2) 147 | 148 | 149 | def descriptors_hellinger_dist(a, b): 150 | a = a.reshape(a.shape[:-1] + (-1, 8)) 151 | b = b.reshape(b.shape[:-1] + (-1, 8)) 152 | means1, means2 = a[..., :4], b[..., :4] 153 | stds1, stds2 = a[..., 4:], b[..., 4:] 154 | total = hellinger_dist(means1, stds1, means2, stds2) 155 | # Sum along the last two axes, the number of bands and the gradient directions 156 | return total.sum(axis=(-2, -1)) 157 | 158 | 159 | def multiscale_helinger_dist(descriptorsL, descriptorsR): 160 | if len(descriptorsL) == 0 or len(descriptorsR) == 0: 161 | return np.array([]) 162 | 163 | descriptorsL = list(map(lambda d: np.array(d), descriptorsL)) 164 | ndims = descriptorsL[0].shape[1] 165 | 166 | maxR = np.max(list(map(lambda d: len(d), descriptorsR))) 167 | descriptorsR = np.array(list(map( 168 | lambda d: np.vstack([np.array(d), np.full((maxR - len(d), ndims), 0, np.float32)]), descriptorsR))) 169 | 170 | # Compute the L2 distance matrix and use it to find the matches 171 | D = np.zeros((len(descriptorsL), len(descriptorsR)), dtype=np.float32) 172 | # for r in tqdm(range(len(descriptorsL))): 173 | for r in range(len(descriptorsL)): 174 | D[r] = descriptors_hellinger_dist(descriptorsL[r], descriptorsR[:, :, np.newaxis]).min(axis=(1, 2)) 175 | 176 | return D 177 | 178 | 179 | def multiscale_descr_dist(descriptors_l, descriptors_r): 180 | # maxL = np.max(list(map(lambda d: len(d), descriptorsL))) 181 | max_r = np.max(list(map(lambda d: len(d), descriptors_r))) 182 | 183 | descriptors_l = list(map(lambda d: np.array(d), descriptors_l)) 184 | descriptors_r = np.array(list(map( 185 | lambda d: np.vstack([np.array(d), np.full((max_r - len(d), 72), np.inf, np.float32)]), descriptors_r))) 186 | 187 | # Compute the L2 distance matrix and use it to find the matches 188 | D = np.zeros((len(descriptors_l), len(descriptors_r)), dtype=np.float32) 189 | for r in range(len(descriptors_l)): 190 | D[r] = np.linalg.norm(descriptors_l[r] - descriptors_r[:, :, np.newaxis], axis=-1).min(axis=(1, 2)) 191 | 192 | return D 193 | -------------------------------------------------------------------------------- /deeplsd/models/line_refiner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Regress the distance function map of an image 3 | and use it to refine pre-computed line segments.. 4 | """ 5 | 6 | import numpy as np 7 | import torch 8 | from torch import nn 9 | from copy import deepcopy 10 | from omegaconf import OmegaConf 11 | 12 | from .base_model import BaseModel 13 | from .backbones.vgg_unet import VGGUNet 14 | from ..geometry.line_utils import get_line_orientation, filter_outlier_lines 15 | from ..utils.tensor import preprocess_angle 16 | from line_refinement import line_optim 17 | 18 | 19 | class LineRefiner(BaseModel): 20 | default_conf = { 21 | 'tiny': False, 22 | 'sharpen': True, 23 | 'line_neighborhood': 5, 24 | 'line_detection_params': { 25 | 'use_vps': True, 26 | 'optimize_vps': True, 27 | 'filtering': False, 28 | 'lambda_df': 1., 29 | 'lambda_grad': 1., 30 | 'lambda_vp': 0.5, 31 | 'threshold': 1., 32 | 'max_iters': 100000, 33 | 'minimum_point_number': 2, 34 | 'maximum_model_number': -1, 35 | 'scoring_exponent': 1, 36 | }, 37 | } 38 | required_data_keys = ['image', 'lines'] 39 | 40 | def _init(self, conf): 41 | # Base network 42 | self.backbone = VGGUNet(tiny=self.conf.tiny) 43 | dim = 32 if self.conf.tiny else 64 44 | 45 | # Predict the distance field and angle to the nearest line 46 | # DF head 47 | self.df_head = nn.Sequential( 48 | nn.Conv2d(dim, 64, kernel_size=3, padding=1), 49 | nn.ReLU(), 50 | nn.BatchNorm2d(64), 51 | nn.Conv2d(64, 64, kernel_size=3, padding=1), 52 | nn.ReLU(), 53 | nn.BatchNorm2d(64), 54 | nn.Conv2d(64, 1, kernel_size=1), 55 | nn.ReLU(), 56 | ) 57 | 58 | # Closest line direction head 59 | self.angle_head = nn.Sequential( 60 | nn.Conv2d(dim, 64, kernel_size=3, padding=1), 61 | nn.ReLU(), 62 | nn.BatchNorm2d(64), 63 | nn.Conv2d(64, 64, kernel_size=3, padding=1), 64 | nn.ReLU(), 65 | nn.BatchNorm2d(64), 66 | nn.Conv2d(64, 1, kernel_size=1), 67 | nn.Sigmoid(), 68 | ) 69 | 70 | def normalize_df(self, df): 71 | return -torch.log(df / self.conf.line_neighborhood + 1e-6) 72 | 73 | def denormalize_df(self, df_norm): 74 | return torch.exp(-df_norm) * self.conf.line_neighborhood 75 | 76 | def _forward(self, data): 77 | outputs = {} 78 | 79 | base = self.backbone(data['image']) 80 | 81 | # DF prediction 82 | if self.conf.sharpen: 83 | outputs['df_norm'] = self.df_head(base).squeeze(1) 84 | outputs['df'] = self.denormalize_df(outputs['df_norm']) 85 | else: 86 | outputs['df'] = self.df_head(base).squeeze(1) 87 | 88 | # Closest line direction prediction 89 | outputs['line_level'] = self.angle_head(base).squeeze(1) * np.pi 90 | 91 | # Refine the line segments 92 | np_img = (data['image'].cpu().numpy()[:, 0] * 255).astype(np.uint8) 93 | np_df = outputs['df'].cpu().numpy() 94 | np_ll = outputs['line_level'].cpu().numpy() 95 | outputs['refined_lines'] = [] 96 | outputs['vp_labels'] = [] 97 | outputs['vps'] = [] 98 | line_detection_params = deepcopy(self.conf.line_detection_params) 99 | OmegaConf.set_readonly(line_detection_params, False) 100 | if 'line_detection_params' in data: 101 | line_detection_params.update(data['line_detection_params']) 102 | for img, df, ll, lines in zip(np_img, np_df, np_ll, data['lines']): 103 | out = self.refine_lines( 104 | img, df, ll, lines, **line_detection_params) 105 | for k, v in out.items(): 106 | outputs[k].append(v) 107 | 108 | return outputs 109 | 110 | def refine_lines(self, img, df, line_level, lines, use_vps=False, 111 | optimize_vps=False, filtering='normal', lambda_df=1., 112 | lambda_grad=1., lambda_vp=0.5, threshold=1., 113 | max_iters=10000, minimum_point_number=2, 114 | maximum_model_number=-1, scoring_exponent=1): 115 | """ Refine the given lines using a DF+angle field. 116 | Lines are expected in xy convention. """ 117 | rows, cols = df.shape 118 | angle, img_grad_angle = preprocess_angle(line_level - np.pi / 2, img) 119 | orientations = get_line_orientation(lines[:, :, [1, 0]], 120 | angle)[:, None] 121 | oriented_lines = np.concatenate([lines.reshape(-1, 4), 122 | orientations], axis=1) 123 | out = {} 124 | refined_lines, out['vp_labels'], out['vps'] = line_optim( 125 | oriented_lines, df.flatten(), 126 | angle.flatten(), rows, cols, use_vps, optimize_vps, 127 | lambda_df, lambda_grad, lambda_vp, threshold, max_iters, 128 | minimum_point_number, maximum_model_number, 129 | scoring_exponent) 130 | refined_lines = np.array(refined_lines).reshape(-1, 2, 2) 131 | out['refined_lines'] = refined_lines.astype(np.float32) 132 | 133 | # Optionally filter out lines based on the DF and line_level 134 | if filtering: 135 | if filtering == 'strict': 136 | df_thresh, ang_thresh = 1., np.pi / 12 137 | else: 138 | df_thresh, ang_thresh = 1.5, np.pi / 9 139 | angle = line_level - np.pi / 2 140 | lines, valid = filter_outlier_lines( 141 | img, out['refined_lines'][:, :, [1, 0]], df, angle, 142 | mode='inlier_thresh', use_grad=False, inlier_thresh=0.5, 143 | df_thresh=df_thresh, ang_thresh=ang_thresh) 144 | out['refined_lines'] = lines[:, :, [1, 0]] 145 | out['vp_labels'] = np.array(out['vp_labels'])[valid] 146 | 147 | return out 148 | 149 | def loss(self, pred, data): 150 | return {} 151 | 152 | def metrics(self, pred, data): 153 | return {} 154 | -------------------------------------------------------------------------------- /deeplsd/scripts/evaluate_line_detection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluate line detections with low-level metrics. 3 | """ 4 | 5 | import sys 6 | import os 7 | import argparse 8 | import numpy as np 9 | from tqdm import tqdm 10 | from pathlib import Path 11 | 12 | from ..datasets.wireframe_eval import WireframeEval 13 | from ..datasets.hpatches import HPatches 14 | from ..datasets.rdnim import RDNIM 15 | from ..datasets.york_urban_lines import YorkUrbanLines 16 | from ..datasets.utils.homographies import warp_lines 17 | from ..evaluation.ls_evaluation import ( 18 | match_segments_1_to_1, compute_repeatability, 19 | compute_loc_error, H_estimation, match_segments_lbd) 20 | 21 | 22 | wireframe_config = { 23 | 'dataset_dir': 'Wireframe_raw', 24 | 'resize': None, 25 | } 26 | 27 | hpatches_config = { 28 | 'dataset_dir': 'HPatches_sequences', 29 | 'alteration': 'all', 30 | 'max_side': 1200, 31 | } 32 | 33 | rdnim_config = { 34 | 'dataset_dir': 'RDNIM', 35 | 'reference': 'night', 36 | } 37 | 38 | yorkurban_config = { 39 | 'dataset_dir': 'YorkUrbanDB', 40 | } 41 | 42 | num_lines_thresholds = [10, 25, 50, 100, 300] 43 | thresholds = [1, 2, 3, 4, 5] 44 | 45 | 46 | def get_dataloader(dataset): 47 | if dataset == 'wireframe': 48 | data = WireframeEval(wireframe_config) 49 | elif dataset == 'hpatches': 50 | data = HPatches(hpatches_config) 51 | elif dataset == 'rdnim': 52 | data = RDNIM(rdnim_config) 53 | elif dataset == 'york_urban': 54 | data = YorkUrbanLines(yorkurban_config) 55 | else: 56 | sys.exit(f"Unknown dataset: {dataset}") 57 | return data.get_data_loader('test') 58 | 59 | 60 | def evaluate(dataset, line_folder, output_folder, method, thresh): 61 | # Get the dataloader 62 | dataloader = get_dataloader(dataset) 63 | min_length = 20 if dataset == 'hpatches' else 0 64 | 65 | # Gather all metrics across all line detections 66 | (struct_rep, struct_loc_error, orth_rep, orth_loc_error, 67 | H_estim, num_lines) = [], [], [], [], [], [] 68 | for data in tqdm(dataloader): 69 | img = (data['image'].numpy()[0, 0] * 255).astype(np.uint8) 70 | img_size = img.shape 71 | H = data['H'][0].numpy() 72 | if dataset == 'hpatches': 73 | img_name = data['warped_name'][0] 74 | elif dataset == 'rdnim': 75 | img_name = str(Path(data['warped_image_path'][0]).stem) 76 | else: 77 | img_name = data['name'][0] 78 | 79 | pred_file = os.path.join(line_folder, img_name + '_' + method + '.npz') 80 | with open(pred_file, 'rb') as f: 81 | data = np.load(f) 82 | pred_lines0 = data['lines'] 83 | pred_lines1 = data['warped_lines'] 84 | 85 | # Filter out small lines 86 | pred_lines0 = pred_lines0[ 87 | np.linalg.norm(pred_lines0[:, 1] - pred_lines0[:, 0], axis=1) > min_length] 88 | pred_lines1 = pred_lines1[ 89 | np.linalg.norm(pred_lines1[:, 1] - pred_lines1[:, 0], axis=1) > min_length] 90 | 91 | # Compute the average number of lines 92 | num_lines.append((len(pred_lines0) + len(pred_lines1)) / 2) 93 | 94 | # Compute the structural metrics 95 | segs1, segs2, matched_idx1, matched_idx2, distances = match_segments_1_to_1( 96 | pred_lines0, pred_lines1, H, img_size, line_dist='struct', dist_thresh=5) 97 | if len(matched_idx1) == 0: 98 | struct_rep.append([0] * len(thresholds)) 99 | else: 100 | struct_rep.append(compute_repeatability(segs1, segs2, matched_idx1, matched_idx2, 101 | distances, thresholds, rep_type='num')) 102 | struct_loc_error.append(compute_loc_error(distances, num_lines_thresholds)) 103 | 104 | # Compute the orthogonal metrics 105 | segs1, segs2, matched_idx1, matched_idx2, distances = match_segments_1_to_1( 106 | pred_lines0, pred_lines1, H, img_size, line_dist='orth', dist_thresh=5) 107 | if len(matched_idx1) == 0: 108 | orth_rep.append([0] * len(thresholds)) 109 | else: 110 | orth_rep.append(compute_repeatability(segs1, segs2, matched_idx1, matched_idx2, 111 | distances, thresholds, rep_type='num')) 112 | orth_loc_error.append(compute_loc_error(distances, num_lines_thresholds)) 113 | 114 | # Homography estimation 115 | segs1, segs2, matched_idx1, matched_idx2 = match_segments_lbd( 116 | img, pred_lines0, pred_lines1, H, img_size) 117 | if len(matched_idx1) < 3: 118 | H_estim.append(0) 119 | else: 120 | matched_seg1 = segs1[matched_idx1] 121 | matched_seg2 = warp_lines(segs2, H)[matched_idx2] 122 | score = H_estimation(matched_seg1, matched_seg2, H, 123 | img_size, reproj_thresh=3)[0] 124 | H_estim.append(score) 125 | 126 | num_lines = np.mean(num_lines) 127 | struct_rep = np.mean(np.stack(struct_rep, axis=0), axis=0) 128 | struct_loc_error = np.mean(np.stack(struct_loc_error, axis=0), axis=0) 129 | orth_rep = np.mean(np.stack(orth_rep, axis=0), axis=0) 130 | orth_loc_error = np.mean(np.stack(orth_loc_error, axis=0), axis=0) 131 | H_estim = np.mean(H_estim) 132 | 133 | # Write the results on disk 134 | file_path = os.path.join(output_folder, method + '.npz') 135 | with open(file_path, 'wb') as f: 136 | np.savez(f, struct_rep=struct_rep, struct_loc_error=struct_loc_error, 137 | orth_rep=orth_rep, orth_loc_error=orth_loc_error, 138 | H_estim=H_estim, num_lines=num_lines) 139 | 140 | # Print the results for the requested threshold 141 | print(f"Results for {method}:") 142 | print(f'Num lines: {np.round(num_lines * 1000) / 1000}') 143 | print() 144 | print(f'Struct-repeatability: {np.round(struct_rep[thresh - 1] * 1000) / 1000}') 145 | print() 146 | print(f'Struct-loc: {np.round(struct_loc_error[2] * 1000) / 1000}') 147 | print() 148 | print(f'Orth-repeatability: {np.round(orth_rep[thresh - 1] * 1000) / 1000}') 149 | print() 150 | print(f'Orth-loc: {np.round(orth_loc_error[2] * 1000) / 1000}') 151 | print() 152 | print(f'H estimation: {np.round(H_estim * 1000) / 1000}') 153 | 154 | 155 | if __name__ == "__main__": 156 | parser = argparse.ArgumentParser() 157 | parser.add_argument('dataset', type=str, 158 | help="Dataset to evaluate on ('wireframe', 'hpatches', 'rdnim', 'york_urban').") 159 | parser.add_argument('line_folder', type=str, 160 | help="Path to the fodler containing all line detections.") 161 | parser.add_argument('output_folder', type=str, 162 | help="Path to the output folder.") 163 | parser.add_argument('method', type=str, 164 | help="Name of the method (should match with the file extension, e.g. 'deeplsd' if the file ends with 'deeplsd.npz').") 165 | parser.add_argument('--thresh', type=int, default=3, 166 | help="Threshold for repeatability and homography estimation (from 1 to 5, default: 3).") 167 | args = parser.parse_args() 168 | 169 | if not os.path.exists(args.line_folder): 170 | sys.exit('No folder found in: ' + args.line_folder) 171 | 172 | if args.thresh not in [1, 2, 3, 4, 5]: 173 | sys.exit('Invalid threshold, should be 1, 2, 3, 4, or 5.') 174 | 175 | evaluate(args.dataset, args.line_folder, args.output_folder, 176 | args.method, args.thresh) 177 | -------------------------------------------------------------------------------- /deeplsd/scripts/evaluate_vp_estimation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export vanishing points (VP). 3 | """ 4 | 5 | import sys 6 | import os 7 | import argparse 8 | import numpy as np 9 | from tqdm import tqdm 10 | import seaborn as sns 11 | import matplotlib.pyplot as plt 12 | 13 | from ..datasets.york_urban import YorkUrban 14 | from ..datasets.nyu import NYU 15 | from ..evaluation.ls_evaluation import vp_consistency_check, get_vp_error, get_recall_AUC 16 | 17 | 18 | yorkurban_config = { 19 | 'dataset_dir': 'YorkUrbanDB', 20 | } 21 | 22 | nyu_config = { 23 | 'dataset_dir': 'NYU_depth_v2', 24 | } 25 | 26 | 27 | def get_dataloader(dataset): 28 | if dataset == 'york_urban': 29 | data = YorkUrban(yorkurban_config) 30 | elif dataset == 'nyu': 31 | data = NYU(nyu_config) 32 | else: 33 | sys.exit(f"Unknown dataset: {dataset}") 34 | return data.get_data_loader('test') 35 | 36 | 37 | def plot_vp_consistency(method_names, x, vp_consistency, lw=2): 38 | """ Plot the VP consistency of different methods. """ 39 | n_models = len(method_names) 40 | colors = sns.color_palette(n_colors=n_models) 41 | for m, y, c in zip(method_names, vp_consistency, colors): 42 | plt.plot(x, y, label=m, color=c, linewidth=lw) 43 | plt.legend(loc='lower right', fontsize=16, ncol=2) 44 | plt.xlabel('Error threshold (in px)', fontsize=18) 45 | plt.ylabel('VP consistency (in %)', fontsize=18) 46 | plt.grid() 47 | plt.savefig('vp_consistency.pdf', bbox_inches='tight', pad_inches=0) 48 | 49 | 50 | def evaluate(dataset, vp_folder, output_folder, method): 51 | # Get the dataloader 52 | dataloader = get_dataloader(dataset) 53 | thresholds = list(np.arange(1, 9)) 54 | 55 | # Gather all metrics across all VP detections 56 | vp_consistency, vp_error, AUC = [], [], [] 57 | for data in tqdm(dataloader): 58 | # GT data 59 | img_name = data['name'][0] 60 | K = data['K'][0].numpy() 61 | if dataset == 'york_urban': 62 | gt_lines = data['gt_lines'][0].numpy() 63 | vp_association = data['vp_association'][0].numpy() 64 | gt_vp = data['updated_vps'][0].numpy() 65 | else: 66 | gt_vp = data['vps'][0].numpy() 67 | 68 | # Regress line segments, VPs and the associated VPs 69 | pred_file = os.path.join(vp_folder, img_name + '_' + method + '.npz') 70 | with open(pred_file, 'rb') as f: 71 | vp = np.load(f)['vps'] 72 | 73 | # VP consistency 74 | if dataset == 'york_urban': 75 | vp_consistency.append(vp_consistency_check( 76 | gt_lines, vp_association, vp, tol=thresholds)) 77 | 78 | # VP error 79 | vp_error.append(get_vp_error(gt_vp, vp, K)) 80 | 81 | # VP recall AUC 82 | AUC.append(get_recall_AUC(gt_vp, vp, K)[1]) 83 | 84 | if dataset == 'york_urban': 85 | vp_consistency = np.stack(vp_consistency, axis=0).mean(axis=0) 86 | vp_error = np.stack(vp_error, axis=0).mean(axis=0) 87 | AUC = np.stack(AUC, axis=0).mean(axis=0) 88 | 89 | # Write the results on disk 90 | file_path = os.path.join(output_folder, method + '.npz') 91 | with open(file_path, 'wb') as f: 92 | if dataset == 'york_urban': 93 | np.savez(f, vp_consistency=vp_consistency, vp_error=vp_error, AUC=AUC) 94 | else: 95 | np.savez(f, vp_error=vp_error, AUC=AUC) 96 | 97 | # Print the results for the requested threshold 98 | print(f"Results for {method}:") 99 | print(f'VP error: {np.round(vp_error * 1000) / 1000}') 100 | print() 101 | print(f'VP recall AUC: {np.round(AUC * 100) / 1000}') 102 | if dataset == 'york_urban': 103 | plot_vp_consistency([method], thresholds, [vp_consistency], lw=2) 104 | 105 | 106 | if __name__ == "__main__": 107 | parser = argparse.ArgumentParser() 108 | parser.add_argument('dataset', type=str, 109 | help="Dataset to evaluate on ('york_urban' or 'nyu').") 110 | parser.add_argument('vp_folder', type=str, 111 | help="Path to the folder containing all VP detections.") 112 | parser.add_argument('output_folder', type=str, 113 | help="Path to the output folder.") 114 | parser.add_argument('method', type=str, 115 | help="Name of the method (should match with the file extension, e.g. 'deeplsd' if the file ends with 'deeplsd.npz').") 116 | args = parser.parse_args() 117 | 118 | if not os.path.exists(args.vp_folder): 119 | sys.exit('No folder found in: ' + args.vp_folder) 120 | 121 | evaluate(args.dataset, args.vp_folder, args.output_folder, args.method) 122 | -------------------------------------------------------------------------------- /deeplsd/scripts/export_features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export line detections for all images of a given dataset. 3 | """ 4 | 5 | import sys 6 | import os 7 | import argparse 8 | import numpy as np 9 | import torch 10 | import torch.nn.functional as F 11 | from omegaconf import OmegaConf 12 | import cv2 13 | from tqdm import tqdm 14 | from pathlib import Path 15 | from pyprogressivex import findVanishingPoints 16 | 17 | from ..models import get_model 18 | from ..datasets import get_dataset 19 | from ..utils.experiments import get_best_checkpoint 20 | from ..settings import EXPER_PATH 21 | 22 | 23 | default_vp_params = { 24 | 'threshold': 1.5, 25 | 'conf': 0.99, 26 | 'spatial_coherence_weight': 0.0, 27 | 'neighborhood_ball_radius': 1.0, 28 | 'maximum_tanimoto_similarity': 1.0, 29 | 'max_iters': 100000, 30 | 'minimum_point_number': 2, 31 | 'maximum_model_number': -1, 32 | 'sampler_id': 0, 33 | 'scoring_exponent': 1.0, 34 | 'do_logging': False, 35 | } 36 | 37 | 38 | def compute_vp_progressive_x(lines, h, w, vp_params=default_vp_params): 39 | """ Compute vanishing points with progressive-X. """ 40 | # Order lines by decreasing length 41 | order = np.argsort(np.linalg.norm(lines[:, 0] - lines[:, 1], axis=1))[::-1] 42 | sorted_lines = lines[order] 43 | 44 | # Compute weights based on the line length 45 | weights = np.linalg.norm(sorted_lines[:, 0] - sorted_lines[:, 1], axis=1) 46 | weights /= np.amax(weights) 47 | # weights = np.ones(len(sorted_lines)) 48 | 49 | # Compute VPs 50 | vp, ord_label = findVanishingPoints(np.ascontiguousarray(sorted_lines.reshape(-1, 4)), 51 | np.ascontiguousarray(weights), w, h, **vp_params) 52 | 53 | # Put back in the right order 54 | label = np.zeros_like(ord_label) 55 | label[order] = ord_label 56 | label[label == label.max()] = -1 # Last value is the outlier class 57 | 58 | return vp, label 59 | 60 | 61 | def export(conf, ckpt, output_folder, extension, pred_vps): 62 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 63 | 64 | # Load the dataset 65 | dataset = get_dataset(conf.data.name)(conf.data) 66 | split = 'export' if (conf.data.name == 'nyu' 67 | or conf.data.name == 'eth3d') else 'test' 68 | dataloader = dataset.get_data_loader(split) 69 | 70 | # Load the model 71 | ckpt = torch.load(str(ckpt), map_location='cpu') 72 | net = get_model(conf.model.name)(conf.model) 73 | net.load_state_dict(ckpt['model']) 74 | net = net.to(device).eval() 75 | 76 | pred_lines = {} 77 | for data in tqdm(dataloader): 78 | input0 = {'image': data['image'].to(device)} 79 | h, w = data['image'].shape[2:4] 80 | with torch.no_grad(): 81 | out0 = net(input0) 82 | pred_lines['lines'] = out0['lines'][0][:, :, [1, 0]] 83 | if pred_vps: 84 | if conf.model.line_detection_params.optimize: 85 | pred_lines['vps'] = out0['vps'][0] 86 | pred_lines['vp_labels'] = out0['vp_labels'][0] 87 | else: 88 | # Detect VPs with ProgressiveX 89 | pred_lines['vps'], pred_lines['vp_labels'] = compute_vp_progressive_x( 90 | out0['lines'][0], h, w) 91 | if 'warped_image' in data: 92 | input1 = {'image': data['warped_image'].to(device)} 93 | pred_lines['warped_lines'] = net( 94 | input1)['lines'][0][:, :, [1, 0]] 95 | 96 | # Save the results on disk 97 | if conf.data.name == 'rdnim': 98 | img_name = str(Path(data['warped_image_path'][0]).stem) 99 | elif conf.data.name == 'hpatches': 100 | img_name = data['warped_name'][0] 101 | else: 102 | img_name = data['name'][0] 103 | filename = img_name + '_deeplsd' + ( 104 | '_'+ extension if extension != '' else '') + '.npz' 105 | path = os.path.join(output_folder, filename) 106 | with open(path, 'wb') as output_file: 107 | np.savez(output_file, **pred_lines) 108 | 109 | 110 | if __name__ == "__main__": 111 | parser = argparse.ArgumentParser() 112 | parser.add_argument('conf', type=str, 113 | help='Path to the config file.') 114 | parser.add_argument('ckpt', type=str, 115 | help="Path to model checkpoint.") 116 | parser.add_argument('output_folder', type=str, 117 | help="Path to the output folder.") 118 | parser.add_argument('--extension', type=str, default='', 119 | help="Extension to the path.") 120 | parser.add_argument('--pred_vps', action='store_true', 121 | help="Add the VP prediction to the prediction.") 122 | parser.add_argument('dotlist', nargs='*') 123 | args = parser.parse_intermixed_args() 124 | 125 | conf = OmegaConf.from_cli(args.dotlist) 126 | if args.conf: 127 | conf = OmegaConf.merge(OmegaConf.load(args.conf), conf) 128 | 129 | if not os.path.exists(args.ckpt): 130 | sys.exit('No model found in: ' + args.ckpt) 131 | 132 | export(conf, args.ckpt, args.output_folder, args.extension, args.pred_vps) 133 | -------------------------------------------------------------------------------- /deeplsd/scripts/homography_adaptation_df.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the homography adaptation for all images in a given folder 3 | to regress and aggregate line distance function maps. 4 | """ 5 | 6 | import os 7 | import argparse 8 | import numpy as np 9 | import cv2 10 | import h5py 11 | import torch 12 | from tqdm import tqdm 13 | from pytlsd import lsd 14 | from afm_op import afm 15 | from joblib import Parallel, delayed 16 | 17 | from ..datasets.utils.homographies import sample_homography, warp_lines 18 | from ..datasets.utils.data_augmentation import random_contrast 19 | 20 | 21 | homography_params = { 22 | 'translation': True, 23 | 'rotation': True, 24 | 'scaling': True, 25 | 'perspective': True, 26 | 'scaling_amplitude': 0.2, 27 | 'perspective_amplitude_x': 0.2, 28 | 'perspective_amplitude_y': 0.2, 29 | 'patch_ratio': 0.85, 30 | 'max_angle': 1.57, 31 | 'allow_artifacts': True 32 | } 33 | 34 | 35 | def ha_df(img, num=100, border_margin=3, min_counts=5): 36 | """ Perform homography adaptation to regress line distance function maps. 37 | Args: 38 | img: a grayscale np image. 39 | num: number of homographies used during HA. 40 | border_margin: margin used to erode the boundaries of the mask. 41 | min_counts: any pixel which is not activated by more than min_count is BG. 42 | Returns: 43 | The aggregated distance function maps in pixels 44 | and the angle to the closest line. 45 | """ 46 | h, w = img.shape[:2] 47 | size = (w, h) 48 | df_maps, angles, closests, counts = [], [], [], [] 49 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, 50 | (border_margin * 2, border_margin * 2)) 51 | pix_loc = np.stack(np.meshgrid(np.arange(h), np.arange(w), indexing='ij'), 52 | axis=-1) 53 | raster_lines = np.zeros_like(img) 54 | 55 | # Loop through all the homographies 56 | for i in range(num): 57 | # Generate a random homography 58 | if i == 0: 59 | H = np.eye(3) 60 | else: 61 | H = sample_homography(img.shape, **homography_params) 62 | H_inv = np.linalg.inv(H) 63 | 64 | # Warp the image 65 | warped_img = cv2.warpPerspective(img, H, size, 66 | borderMode=cv2.BORDER_REPLICATE) 67 | 68 | # Regress the DF on the warped image 69 | warped_lines = lsd(warped_img)[:, [1, 0, 3, 2]].reshape(-1, 2, 2) 70 | 71 | # Warp the lines back 72 | lines = warp_lines(warped_lines, H_inv) 73 | 74 | # Get the DF and angles 75 | num_lines = len(lines) 76 | cuda_lines = torch.from_numpy(lines[:, :, [1, 0]].astype(np.float32)) 77 | cuda_lines = cuda_lines.reshape(-1, 4)[None].cuda() 78 | offset = afm( 79 | cuda_lines, 80 | torch.IntTensor([[0, num_lines, h, w]]).cuda(), h, w)[0] 81 | offset = offset[0].permute(1, 2, 0).cpu().numpy()[:, :, [1, 0]] 82 | closest = pix_loc + offset 83 | df = np.linalg.norm(offset, axis=-1) 84 | angle = np.mod(np.arctan2( 85 | offset[:, :, 0], offset[:, :, 1]) + np.pi / 2, np.pi) 86 | 87 | df_maps.append(df) 88 | angles.append(angle) 89 | closests.append(closest) 90 | 91 | # Compute the valid pixels 92 | count = cv2.warpPerspective(np.ones_like(img), H_inv, size, 93 | flags=cv2.INTER_NEAREST) 94 | count = cv2.erode(count, kernel) 95 | counts.append(count) 96 | raster_lines += (df < 1).astype(np.uint8) * count 97 | 98 | # Compute the median of all DF maps, with counts as weights 99 | df_maps, angles = np.stack(df_maps), np.stack(angles) 100 | counts, closests = np.stack(counts), np.stack(closests) 101 | 102 | # Median of the DF 103 | df_maps[counts == 0] = np.nan 104 | avg_df = np.nanmedian(df_maps, axis=0) 105 | 106 | # Median of the closest 107 | closests[counts == 0] = np.nan 108 | avg_closest = np.nanmedian(closests, axis=0) 109 | 110 | # Median of the angle 111 | circ_bound = (np.minimum(np.pi - angles, angles) 112 | * counts).sum(0) / counts.sum(0) < 0.3 113 | angles[:, circ_bound] -= np.where( 114 | angles[:, circ_bound] > np.pi /2, 115 | np.ones_like(angles[:, circ_bound]) * np.pi, 116 | np.zeros_like(angles[:, circ_bound])) 117 | angles[counts == 0] = np.nan 118 | avg_angle = np.mod(np.nanmedian(angles, axis=0), np.pi) 119 | 120 | # Generate the background mask and a saliency score 121 | raster_lines = np.where(raster_lines > min_counts, np.ones_like(img), 122 | np.zeros_like(img)) 123 | raster_lines = cv2.dilate(raster_lines, np.ones((21, 21), dtype=np.uint8)) 124 | bg_mask = (1 - raster_lines).astype(float) 125 | 126 | return avg_df, avg_angle, avg_closest[:, :, [1, 0]], bg_mask 127 | 128 | 129 | def process_image(img_path, randomize_contrast, num_H, output_folder): 130 | img = cv2.imread(img_path, 0) 131 | if randomize_contrast is not None: 132 | img = randomize_contrast(img) 133 | 134 | # Run homography adaptation 135 | df, angle, closest, bg_mask = ha_df(img, num=num_H) 136 | 137 | # Save the DF in a hdf5 file 138 | out_path = os.path.splitext(os.path.basename(img_path))[0] 139 | out_path = os.path.join(output_folder, out_path) + '.hdf5' 140 | with h5py.File(out_path, "w") as f: 141 | f.create_dataset("df", data=df.flatten()) 142 | f.create_dataset("line_level", data=angle.flatten()) 143 | f.create_dataset("closest", data=closest.flatten()) 144 | f.create_dataset("bg_mask", data=bg_mask.flatten()) 145 | 146 | 147 | def export_ha(images_list, output_folder, num_H=100, 148 | rdm_contrast=False, n_jobs=1): 149 | # Parse the data 150 | with open(images_list, 'r') as f: 151 | image_files = f.readlines() 152 | image_files = [path.strip('\n') for path in image_files] 153 | 154 | # Random contrast object 155 | randomize_contrast = random_contrast() if rdm_contrast else None 156 | 157 | # Process each image in parallel 158 | Parallel(n_jobs=n_jobs, backend='multiprocessing')(delayed(process_image)( 159 | img_path, randomize_contrast, num_H, output_folder) 160 | for img_path in tqdm(image_files)) 161 | 162 | 163 | if __name__ == "__main__": 164 | parser = argparse.ArgumentParser() 165 | parser.add_argument('images_list', type=str, 166 | help='Path to a txt file containing the image paths.') 167 | parser.add_argument('output_folder', type=str, help='Output folder.') 168 | parser.add_argument('--num_H', type=int, default=100, 169 | help='Number of homographies used during HA.') 170 | parser.add_argument('--random_contrast', action='store_true', 171 | help='Add random contrast to the images (disabled by default).') 172 | parser.add_argument('--n_jobs', type=int, default=1, 173 | help='Number of jobs to run in parallel.') 174 | args = parser.parse_args() 175 | 176 | export_ha(args.images_list, args.output_folder, args.num_H, 177 | args.random_contrast, args.n_jobs) 178 | -------------------------------------------------------------------------------- /deeplsd/scripts/line_refinement.py: -------------------------------------------------------------------------------- 1 | """ 2 | Refine the lines matching an extension in a given folder. 3 | """ 4 | 5 | import os 6 | import argparse 7 | import numpy as np 8 | import h5py 9 | import cv2 10 | from tqdm import tqdm 11 | from pathlib import Path 12 | import cv2 13 | import torch 14 | 15 | from ..models.line_refiner import LineRefiner 16 | from ..utils.experiments import get_best_checkpoint 17 | 18 | 19 | line_refinement_conf = { 20 | 'line_detection_params': { 21 | 'use_vps': True, 22 | 'optimize_vps': True, 23 | 'filtering': False, 24 | 'lambda_df': 1., 25 | 'lambda_grad': 1., 26 | 'lambda_vp': 0.2, 27 | 'threshold': 1., 28 | 'max_iters': 100000, 29 | 'minimum_point_number': 2, 30 | 'maximum_model_number': -1, 31 | 'scoring_exponent': 1, 32 | }, 33 | } 34 | 35 | 36 | def refine_lines(img, lines, refiner): 37 | # Refine the lines (the line refiner uses the xy convention for lines) 38 | torch_img = img.astype(np.float32) / 255 39 | torch_img = torch.tensor(torch_img, dtype=torch.float, 40 | device='cuda')[None, None] 41 | inputs = {'image': torch_img, 'lines': lines[None]} 42 | with torch.no_grad(): 43 | lines = refiner(inputs)['refined_lines'][0] 44 | return lines[:, :, [1, 0]] 45 | 46 | 47 | def export(image_folder, line_folder, ckpt): 48 | # Load the image paths 49 | img_paths = list(Path(image_folder).iterdir()) 50 | img_paths.sort() 51 | img_names = [path.stem for path in img_paths] 52 | line_paths = list(Path(line_folder).iterdir()) 53 | line_paths.sort() 54 | num_imgs = len(img_paths) 55 | 56 | # Load the line refiner 57 | model_name = 'refined' 58 | refiner = LineRefiner(line_refinement_conf) 59 | ckpt = torch.load(str(ckpt), map_location='cpu') 60 | refiner.load_state_dict(ckpt['model'], strict=False) 61 | refiner = refiner.eval().cuda() 62 | 63 | for i in tqdm(range(num_imgs)): 64 | img = cv2.imread(str(img_paths[i]), 0) 65 | 66 | # Load the pre-computed lines matching the extension 67 | # Input lines are assumed to be in row-col coordinates 68 | pred_lines = np.load(str(line_paths[i])) 69 | lines = pred_lines['lines'][:, :, [1, 0]] 70 | 71 | # Refine the lines 72 | outputs = {} 73 | if len(lines) == 0: 74 | outputs['lines'] = lines[:, :, [1, 0]] 75 | else: 76 | outputs['lines'] = refine_lines(img, lines, refiner) 77 | 78 | # Save the results on disk 79 | filename = img_names[i] + '_' + model_name + '.npz' 80 | path = os.path.join(line_folder, filename) 81 | with open(path, 'wb') as output_file: 82 | np.savez(output_file, **outputs) 83 | 84 | 85 | if __name__ == "__main__": 86 | parser = argparse.ArgumentParser() 87 | parser.add_argument( 88 | 'image_folder', type=str, 89 | help="Path to the original image folder.") 90 | parser.add_argument( 91 | 'line_folder', type=str, 92 | help="Path to the folder containing the line detections. Lines are expected in .npz files, with same root name as the images, with a dict {'lines': my_lines}, where my_lines is an array [n_lines, 2, 2] (row-col convention).") 93 | parser.add_argument( 94 | 'ckpt', type=str, 95 | help="Path to the DeepLSD checkpoint.") 96 | args = parser.parse_args() 97 | 98 | if not os.path.exists(args.ckpt): 99 | sys.exit('No model found in: ' + args.ckpt) 100 | 101 | export(args.image_folder, args.line_folder, args.ckpt) 102 | -------------------------------------------------------------------------------- /deeplsd/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modify `EXPER_PATH` and `DATA_PATH` to point to any directories, 3 | with absolute or relative paths. 4 | """ 5 | EXPER_PATH = '/home/remi/Documents/experiments/deeplsd/' 6 | DATA_PATH = '/home/remi/Documents/datasets/' 7 | -------------------------------------------------------------------------------- /deeplsd/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvg/DeepLSD/d873fd3619d6e44a9f625bc437ab4786057677e5/deeplsd/utils/__init__.py -------------------------------------------------------------------------------- /deeplsd/utils/experiments.py: -------------------------------------------------------------------------------- 1 | """ 2 | A set of utilities to manage and load checkpoints of training experiments. 3 | """ 4 | 5 | from pathlib import Path 6 | import logging 7 | import re 8 | import shutil 9 | from omegaconf import OmegaConf 10 | import torch 11 | import os 12 | 13 | from ..settings import EXPER_PATH 14 | from ..models import get_model 15 | 16 | 17 | def list_checkpoints(dir_): 18 | """List all valid checkpoints in a given directory.""" 19 | checkpoints = [] 20 | for p in dir_.glob('checkpoint_*.tar'): 21 | numbers = re.findall(r'(\d+)', p.name) 22 | assert len(numbers) <= 2 23 | if len(numbers) == 0: 24 | continue 25 | if len(numbers) == 1: 26 | checkpoints.append((int(numbers[0]), p)) 27 | else: 28 | checkpoints.append((int(numbers[1]), p)) 29 | return checkpoints 30 | 31 | 32 | def get_last_checkpoint(exper, allow_interrupted=True): 33 | """Get the last saved checkpoint for a given experiment name.""" 34 | ckpts = list_checkpoints(Path(EXPER_PATH, exper)) 35 | if not allow_interrupted: 36 | ckpts = [(n, p) for (n, p) in ckpts if '_interrupted' not in p.name] 37 | assert len(ckpts) > 0 38 | return sorted(ckpts)[-1][1] 39 | 40 | 41 | def get_best_checkpoint(exper): 42 | """Get the checkpoint with the best loss, for a given experiment name.""" 43 | p = Path(EXPER_PATH, exper, 'checkpoint_best.tar') 44 | return p 45 | 46 | 47 | def delete_old_checkpoints(dir_, num_keep): 48 | """Delete all but the num_keep last saved checkpoints.""" 49 | ckpts = list_checkpoints(dir_) 50 | ckpts = sorted(ckpts)[::-1] 51 | kept = 0 52 | for ckpt in ckpts: 53 | if ('_interrupted' in str(ckpt[1]) and kept > 0) or kept >= num_keep: 54 | logging.info(f'Deleting checkpoint {ckpt[1].name}') 55 | ckpt[1].unlink() 56 | else: 57 | kept += 1 58 | 59 | 60 | def load_experiment(exper, conf={}): 61 | """Load and return the model of a given experiment.""" 62 | ckpt = get_best_checkpoint(exper) 63 | logging.info(f'Loading checkpoint {ckpt.name}') 64 | ckpt = torch.load(str(ckpt), map_location='cpu') 65 | 66 | conf = OmegaConf.merge(ckpt['conf'].model, OmegaConf.create(conf)) 67 | model = get_model(conf.name)(conf).eval() 68 | model.load_state_dict(ckpt['model']) 69 | return model 70 | 71 | 72 | def flexible_load(state_dict, model, verbose=False): 73 | """ Load the state dict of a previous experiments even if 74 | all the parameters do not match and handles different GPU modes, 75 | such as with/without DataParallel. """ 76 | dict_params = set(state_dict.keys()) 77 | model_params = set(map(lambda n: n[0], model.named_parameters())) 78 | 79 | if dict_params == model_params: # prefect fit 80 | logging.info('Loading all parameters of the checkpoint.') 81 | model.load_state_dict(state_dict, strict=True) 82 | return 83 | elif len(dict_params & model_params) == 0: # perfect mismatch 84 | state_dict = {'.'.join(n.split('.')[1:]): p 85 | for n, p in state_dict.items()} 86 | dict_params = set(state_dict.keys()) 87 | if len(dict_params & model_params) == 0: 88 | raise ValueError('Could not manage to load the checkpoint with' 89 | 'parameters:' + '\n\t'.join(sorted(dict_params))) 90 | common_params = dict_params & model_params 91 | left_params = dict_params - model_params 92 | if verbose: 93 | logging.info('Loading parameters:\n\t'+'\n\t'.join( 94 | sorted(common_params))) 95 | if len(left_params) > 0: 96 | logging.info('Could not load parameters:\n\t' 97 | + '\n\t'.join(sorted(left_params))) 98 | model.load_state_dict(state_dict, strict=False) 99 | 100 | 101 | def save_experiment(model, optimizer, lr_scheduler, conf, losses, results, 102 | best_eval, epoch, iter, output_dir, stop=False): 103 | """ Save the current model to a checkpoint 104 | and return the best result so far. """ 105 | state = model.state_dict() 106 | checkpoint = { 107 | 'model': state, 108 | 'optimizer': optimizer.state_dict(), 109 | 'lr_scheduler': lr_scheduler.state_dict(), 110 | 'conf': OmegaConf.to_container(conf, resolve=True), 111 | 'epoch': epoch, 112 | 'losses': losses, 113 | 'eval': results, 114 | } 115 | cp_name = f'checkpoint_{epoch}_{iter}'+('_interrupted' 116 | if stop else '')+'.tar' 117 | logging.info(f'Saving checkpoint {cp_name}') 118 | cp_path = str(output_dir / cp_name) 119 | torch.save(checkpoint, cp_path) 120 | if results[conf.train.best_key] < best_eval: 121 | best_eval = results[conf.train.best_key] 122 | logging.info( 123 | f'New best checkpoint: {conf.train.best_key}={best_eval}') 124 | shutil.copy(cp_path, str(output_dir / 'checkpoint_best.tar')) 125 | delete_old_checkpoints(output_dir, conf.train.keep_last_checkpoints) 126 | return best_eval 127 | -------------------------------------------------------------------------------- /deeplsd/utils/stdout_capturing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Based on sacred/stdout_capturing.py in project Sacred 3 | https://github.com/IDSIA/sacred 4 | """ 5 | 6 | from __future__ import division, print_function, unicode_literals 7 | import os 8 | import sys 9 | import subprocess 10 | from threading import Timer 11 | from contextlib import contextmanager 12 | 13 | 14 | def apply_backspaces_and_linefeeds(text): 15 | """ 16 | Interpret backspaces and linefeeds in text like a terminal would. 17 | Interpret text like a terminal by removing backspace and linefeed 18 | characters and applying them line by line. 19 | If final line ends with a carriage it keeps it to be concatenable with next 20 | output chunk. 21 | """ 22 | orig_lines = text.split('\n') 23 | orig_lines_len = len(orig_lines) 24 | new_lines = [] 25 | for orig_line_idx, orig_line in enumerate(orig_lines): 26 | chars, cursor = [], 0 27 | orig_line_len = len(orig_line) 28 | for orig_char_idx, orig_char in enumerate(orig_line): 29 | if orig_char == '\r' and (orig_char_idx != orig_line_len - 1 or 30 | orig_line_idx != orig_lines_len - 1): 31 | cursor = 0 32 | elif orig_char == '\b': 33 | cursor = max(0, cursor - 1) 34 | else: 35 | if (orig_char == '\r' and 36 | orig_char_idx == orig_line_len - 1 and 37 | orig_line_idx == orig_lines_len - 1): 38 | cursor = len(chars) 39 | if cursor == len(chars): 40 | chars.append(orig_char) 41 | else: 42 | chars[cursor] = orig_char 43 | cursor += 1 44 | new_lines.append(''.join(chars)) 45 | return '\n'.join(new_lines) 46 | 47 | 48 | def flush(): 49 | """Try to flush all stdio buffers, both from python and from C.""" 50 | try: 51 | sys.stdout.flush() 52 | sys.stderr.flush() 53 | except (AttributeError, ValueError, IOError): 54 | pass # unsupported 55 | 56 | 57 | # Duplicate stdout and stderr to a file. Inspired by: 58 | # http://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/ 59 | # http://stackoverflow.com/a/651718/1388435 60 | # http://stackoverflow.com/a/22434262/1388435 61 | @contextmanager 62 | def capture_outputs(filename): 63 | """Duplicate stdout and stderr to a file on the file descriptor level.""" 64 | with open(str(filename), 'a+') as target: 65 | original_stdout_fd = 1 66 | original_stderr_fd = 2 67 | target_fd = target.fileno() 68 | 69 | # Save a copy of the original stdout and stderr file descriptors 70 | saved_stdout_fd = os.dup(original_stdout_fd) 71 | saved_stderr_fd = os.dup(original_stderr_fd) 72 | 73 | tee_stdout = subprocess.Popen( 74 | ['tee', '-a', '-i', '/dev/stderr'], start_new_session=True, 75 | stdin=subprocess.PIPE, stderr=target_fd, stdout=1) 76 | tee_stderr = subprocess.Popen( 77 | ['tee', '-a', '-i', '/dev/stderr'], start_new_session=True, 78 | stdin=subprocess.PIPE, stderr=target_fd, stdout=2) 79 | 80 | flush() 81 | os.dup2(tee_stdout.stdin.fileno(), original_stdout_fd) 82 | os.dup2(tee_stderr.stdin.fileno(), original_stderr_fd) 83 | 84 | try: 85 | yield 86 | finally: 87 | flush() 88 | 89 | # then redirect stdout back to the saved fd 90 | tee_stdout.stdin.close() 91 | tee_stderr.stdin.close() 92 | 93 | # restore original fds 94 | os.dup2(saved_stdout_fd, original_stdout_fd) 95 | os.dup2(saved_stderr_fd, original_stderr_fd) 96 | 97 | # wait for completion of the tee processes with timeout 98 | # implemented using a timer because timeout support is py3 only 99 | def kill_tees(): 100 | tee_stdout.kill() 101 | tee_stderr.kill() 102 | 103 | tee_timer = Timer(1, kill_tees) 104 | try: 105 | tee_timer.start() 106 | tee_stdout.wait() 107 | tee_stderr.wait() 108 | finally: 109 | tee_timer.cancel() 110 | 111 | os.close(saved_stdout_fd) 112 | os.close(saved_stderr_fd) 113 | 114 | # Cleanup log file 115 | with open(str(filename), 'r') as target: 116 | text = target.read() 117 | text = apply_backspaces_and_linefeeds(text) 118 | with open(str(filename), 'w') as target: 119 | target.write(text) 120 | -------------------------------------------------------------------------------- /deeplsd/utils/tensor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skimage import measure as skmeasure 3 | import collections.abc as collections 4 | import torch 5 | import cv2 6 | 7 | string_classes = str 8 | 9 | def map_tensor(input_, func): 10 | if isinstance(input_, torch.Tensor): 11 | return func(input_) 12 | elif isinstance(input_, string_classes): 13 | return input_ 14 | elif isinstance(input_, collections.Mapping): 15 | return {k: map_tensor(sample, func) for k, sample in input_.items()} 16 | elif isinstance(input_, collections.Sequence): 17 | return [map_tensor(sample, func) for sample in input_] 18 | else: 19 | raise TypeError( 20 | f'input must be tensor, dict or list; found {type(input_)}') 21 | 22 | 23 | def batch_to_numpy(batch): 24 | return map_tensor(batch, lambda tensor: tensor.cpu().numpy()) 25 | 26 | 27 | def batch_to_device(batch, device, non_blocking=False): 28 | def _func(tensor): 29 | return tensor.to(device=device, non_blocking=non_blocking) 30 | 31 | return map_tensor(batch, _func) 32 | 33 | 34 | def create_pairwise_conv_kernel(kernel_size, 35 | center_size=16, 36 | dia_stride=4, 37 | random_dia=False, random_tot_1=1024, 38 | return_neiCnt=False, diff_kernel=True): 39 | ''' 40 | @func: create 1 x fH x fW x num_ch kernel for conv2D to compute pairwise-diff 41 | @param: kernel_size --[filter_ht, filter_wd] 42 | center_size -- scalar, neighbours in this window is all considered. 43 | dia_stride -- scalar, consider dialated pairs on further sides. 44 | random_dia -- perform random choice to assign neighbour 45 | random_tot_1 -- # of 1 in random result 46 | return_neiCnt -- return # of selected neighbour 47 | ''' 48 | # selected neighbours for computing pairwise difference 49 | selected_neiI = np.zeros(kernel_size) 50 | axis_x, axis_y = np.meshgrid(range(kernel_size[1]), range(kernel_size[0])) 51 | cy, cx = kernel_size[0]//2, kernel_size[1]//2 52 | dist_mat_x = np.abs(axis_x-cx) 53 | dist_mat_y = np.abs(axis_y-cy) 54 | selected_neiI[(dist_mat_x+dist_mat_y) with Timer('mytimer'): 69 | > # some computations 70 | [mytimer] Elapsed: X 71 | ``` 72 | """ 73 | def __init__(self, name=None): 74 | self.name = name 75 | 76 | def __enter__(self): 77 | self.tstart = time.time() 78 | return self 79 | 80 | def __exit__(self, type, value, traceback): 81 | self.duration = time.time() - self.tstart 82 | if self.name is not None: 83 | print('[%s] Elapsed: %s' % (self.name, self.duration)) 84 | 85 | 86 | def set_num_threads(nt): 87 | """Force numpy and other libraries to use a limited number of threads.""" 88 | try: 89 | import mkl 90 | except ImportError: 91 | pass 92 | else: 93 | mkl.set_num_threads(nt) 94 | torch.set_num_threads(1) 95 | os.environ['IPC_ENABLE'] = '1' 96 | for o in ['OPENBLAS_NUM_THREADS', 'NUMEXPR_NUM_THREADS', 97 | 'OMP_NUM_THREADS', 'MKL_NUM_THREADS']: 98 | os.environ[o] = str(nt) 99 | 100 | 101 | class InterpolateModule(nn.Module): 102 | """ 103 | This is a module version of F.interpolate (rip nn.Upsampling). 104 | Any arguments you give it just get passed along for the ride. 105 | """ 106 | 107 | def __init__(self, *args, **kwdargs): 108 | super().__init__() 109 | 110 | self.args = args 111 | self.kwdargs = kwdargs 112 | 113 | def forward(self, x): 114 | return F.interpolate(x, *self.args, **self.kwdargs) 115 | 116 | 117 | def make_net(in_channels, conf, include_last_relu=True): 118 | """ 119 | A helper function to take a config setting and turn it into a network. 120 | Used by protonet and extrahead. Returns (network, out_channels) 121 | """ 122 | def make_layer(layer_cfg): 123 | nonlocal in_channels 124 | 125 | # Possible patterns: 126 | # ( 256, 3, {}) -> conv 127 | # ( 256,-2, {}) -> deconv 128 | # (None,-2, {}) -> bilinear interpolate 129 | # ('cat',[],{}) -> concat the subnetworks in the list 130 | # 131 | # You know it would have probably been simpler just to adopt a 'c' 'd' 'u' naming scheme. 132 | # Whatever, it's too late now. 133 | if isinstance(layer_cfg[0], str): 134 | layer_name = layer_cfg[0] 135 | 136 | if layer_name == 'cat': 137 | nets = [make_net(in_channels, x) for x in layer_cfg[1]] 138 | layer = Concat([net[0] for net in nets], layer_cfg[2]) 139 | num_channels = sum([net[1] for net in nets]) 140 | else: 141 | num_channels = layer_cfg[0] 142 | kernel_size = layer_cfg[1] 143 | 144 | if kernel_size > 0: 145 | layer = nn.Conv2d(in_channels, num_channels, kernel_size, 146 | **layer_cfg[2]) 147 | else: 148 | if num_channels is None: 149 | layer = InterpolateModule(scale_factor=-kernel_size, 150 | mode='bilinear', align_corners=False, 151 | **layer_cfg[2]) 152 | else: 153 | layer = nn.ConvTranspose2d(in_channels, num_channels, 154 | -kernel_size, **layer_cfg[2]) 155 | 156 | in_channels = num_channels if num_channels is not None else in_channels 157 | 158 | # Don't return a ReLU layer if we're doing an upsample. 159 | # This probably doesn't affect anything 160 | # output-wise, but there's no need to go through a ReLU here. 161 | # Commented out for backwards compatibility with previous models 162 | # if num_channels is None: 163 | # return [layer] 164 | # else: 165 | return [layer, nn.ReLU(inplace=True)] 166 | 167 | # Use sum to concat together all the component layer lists 168 | net = sum([make_layer(x) for x in conf], []) 169 | if not include_last_relu: 170 | net = net[:-1] 171 | 172 | return nn.Sequential(*(net)), in_channels 173 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | pip install scikit-build 2 | pip install -r requirements.txt # Install the requirements 3 | cd third_party/progressive-x/graph-cut-ransac/build; cmake ..; make -j8; cd ../../../.. # Install the C++ library Graph Cut RANSAC 4 | cd third_party/progressive-x/build; cmake ..; make -j8; cd ../../.. # Install the C++ library Progressive-X 5 | pip install -e third_party/progressive-x # Install the Python bindings of Progressive-X for VP estimation 6 | cd third_party/afm_lib/afm_op; python setup.py build_ext --inplace; rm -rf build; cd ..; pip install -e .; cd ../.. # Install the Cuda code to generate AFM from lines (taken from https://github.com/cherubicXN/afm_cvpr2019) 7 | pip install -e line_refinement # Install the Python bindings to optimize lines wrt a distance/angle field 8 | pip install -e third_party/homography_est # Install the code for homography estimation from lines 9 | pip install -e third_party/pytlbd # Install the LBD line matcher for evaluation 10 | pip install -e . # Install DeepLSD 11 | -------------------------------------------------------------------------------- /line_refinement/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | project(LineRefinement) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | 7 | find_package(Ceres REQUIRED) 8 | find_package(OpenCV REQUIRED) 9 | find_package(Eigen3 REQUIRED) 10 | if(${CERES_VERSION} VERSION_LESS "2.2.0") 11 | # ceres 2.2.0 changed the interface of local parameterization 12 | add_definitions("-DCERES_PARAMETERIZATION_ENABLED") 13 | endif() 14 | 15 | add_subdirectory(pybind11) 16 | include_directories(../third_party/progressive-x/graph-cut-ransac/src/pygcransac/include 17 | ../third_party/progressive-x/src/pyprogressivex/include 18 | ../third_party/progressive-x/src/pyprogressivex/src) 19 | 20 | # Add GC-RANSAC 21 | find_library(GraphCutRANSAC_LIB 22 | NAMES GraphCutRANSAC 23 | PATHS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/progressive-x/ 24 | PATH_SUFFIXES build) 25 | if(NOT GraphCutRANSAC_LIB) 26 | message(FATAL_ERROR "GraphCutRANSAC library not found") 27 | endif() 28 | 29 | # Add Progressive-X 30 | find_library(ProgressiveX_LIB 31 | NAMES ProgressiveX 32 | PATHS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/progressive-x/ 33 | PATH_SUFFIXES build) 34 | if(NOT ProgressiveX_LIB) 35 | message(FATAL_ERROR "ProgressiveX library not found") 36 | endif() 37 | 38 | pybind11_add_module(line_refinement line_vp_optim.cpp cost_functions.h vp_det.h) 39 | target_link_libraries(line_refinement PUBLIC ceres stdc++fs 40 | ${OpenCV_LIBS} "${GraphCutRANSAC_LIB}" 41 | "${ProgressiveX_LIB}") 42 | -------------------------------------------------------------------------------- /line_refinement/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import platform 5 | import subprocess 6 | 7 | from setuptools import setup, Extension 8 | from setuptools.command.build_ext import build_ext 9 | from distutils.version import LooseVersion 10 | 11 | 12 | # Cores used for building the project 13 | N_CORES = 8 14 | 15 | 16 | class CMakeExtension(Extension): 17 | def __init__(self, name, sourcedir=''): 18 | Extension.__init__(self, name, sources=[]) 19 | self.sourcedir = os.path.abspath(sourcedir) 20 | 21 | 22 | class CMakeBuild(build_ext): 23 | def run(self): 24 | try: 25 | out = subprocess.check_output(['cmake', '--version']) 26 | except OSError: 27 | raise RuntimeError("CMake must be installed to build the following extensions: " + 28 | ", ".join(e.name for e in self.extensions)) 29 | 30 | if platform.system() == "Windows": 31 | cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) 32 | if cmake_version < '3.1.0': 33 | raise RuntimeError("CMake >= 3.1.0 is required on Windows") 34 | 35 | for ext in self.extensions: 36 | self.build_extension(ext) 37 | 38 | def build_extension(self, ext): 39 | extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) 40 | cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, 41 | '-DPYTHON_EXECUTABLE=' + sys.executable] 42 | 43 | cfg = 'Debug' if self.debug else 'Release' 44 | build_args = ['--config', cfg] 45 | 46 | if platform.system() == "Windows": 47 | cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] 48 | if sys.maxsize > 2**32: 49 | cmake_args += ['-A', 'x64'] 50 | build_args += ['--', '/m'] 51 | else: 52 | cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] 53 | build_args += ['--', '-j2'] 54 | 55 | cmake_args += ['-DCMAKE_INSTALL_PREFIX=~/.local/'] 56 | 57 | env = os.environ.copy() 58 | env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format( 59 | env.get('CXXFLAGS', ''), 60 | self.distribution.get_version() 61 | ) 62 | if not os.path.exists(self.build_temp): 63 | os.makedirs(self.build_temp) 64 | print(['cmake', ext.sourcedir] + cmake_args) 65 | subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) 66 | subprocess.check_call(['cmake', '--build', '.', "--parallel {0}".format(N_CORES)] + build_args, cwd=self.build_temp) 67 | 68 | setup( 69 | name='line_refinement', 70 | version='0.0.0', 71 | author='Rémi Pautrat', 72 | author_email='remi.pautrat@inf.ethz.ch', 73 | description='Line segments and VP optimization from attraction fields', 74 | long_description='', 75 | ext_modules=[CMakeExtension('ceres')], 76 | cmdclass=dict(build_ext=CMakeBuild), 77 | zip_safe=False, 78 | ) 79 | -------------------------------------------------------------------------------- /line_refinement/vp_det.h: -------------------------------------------------------------------------------- 1 | // #ifndef VP_DET_H 2 | // #define VP_DET_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "progressivex_python.h" 13 | #include "progressivex_python.cpp" 14 | 15 | 16 | /*---------------------- Vanishing point estimation ----------------------*/ 17 | 18 | /** Compute VPs from a set of line segments, with Progressive-X. 19 | */ 20 | std::pair >, std::vector > compute_vps( 21 | const std::vector >& lines, const size_t &w, const size_t &h, 22 | const double &threshold = 1.5, 23 | const size_t &max_iters = 100000, 24 | const size_t &minimum_point_number = 2, 25 | const int &maximum_model_number = -1, 26 | const double &scoring_exponent = 1., 27 | const double &conf = 0.99, 28 | const double &spatial_coherence_weight = 0., 29 | const double &neighborhood_ball_radius = 1., 30 | const double &maximum_tanimoto_similarity = 1., 31 | const size_t &sampler_id = 0, 32 | const bool do_logging = false, 33 | const int &min_num_supports = 10) 34 | { 35 | int num_lines = lines.size(); 36 | std::vector lines_flat(num_lines * 4); 37 | for(int i=0; i < num_lines; i++) 38 | { 39 | for(int j=0; j < 4; j++) 40 | lines_flat[i * 4 + j] = lines[i][j]; 41 | } 42 | std::vector vanishingPoints; 43 | std::vector labeling(num_lines); 44 | 45 | // Compute weights proportional to the line length 46 | std::vector weights(num_lines); 47 | double max_w = 0.; 48 | for(int i=0; i < num_lines; i++) 49 | { 50 | weights[i] = std::sqrt(std::pow(lines[i][0] - lines[i][2], 2.) 51 | + std::pow(lines[i][1] - lines[i][3], 2.)); 52 | max_w = std::max(max_w, weights[i]); 53 | } 54 | for(int i=0; i < num_lines; i++) 55 | weights[i] /= max_w; 56 | 57 | int num_models = findVanishingPoints_( 58 | lines_flat, weights, labeling, vanishingPoints, w, h, 59 | spatial_coherence_weight, threshold, conf, neighborhood_ball_radius, 60 | maximum_tanimoto_similarity, max_iters, minimum_point_number, 61 | maximum_model_number, sampler_id, scoring_exponent, do_logging); 62 | 63 | // Count the number of inliers for each VP 64 | // and keep only VPs with enough support 65 | std::vector counts(num_models, 0); 66 | for(int i=0; i < num_lines; i++) 67 | if((int) labeling[i] < num_models) 68 | counts[(int) labeling[i]]++; 69 | std::vector reassign(num_models + 1, -1); 70 | int counter = 0; 71 | for(int i=0; i < num_models; i++) 72 | { 73 | if(counts[i] >= min_num_supports) 74 | { 75 | reassign[i] = counter; 76 | counter++; 77 | } 78 | } 79 | 80 | // Convert to the output format 81 | std::vector labels(num_lines); 82 | for(int i = 0; i < num_lines; i++) 83 | labels[i] = reassign[(int) labeling[i]]; 84 | std::vector > vps(counter); 85 | for(int i = 0; i < num_models; i++) 86 | { 87 | if(reassign[i] >= 0) 88 | vps[reassign[i]] = {vanishingPoints[3 * i], 89 | vanishingPoints[3 * i + 1], 90 | vanishingPoints[3 * i + 2]}; 91 | } 92 | return std::make_pair(vps, labels); 93 | } 94 | 95 | 96 | /** Compute the distance of a line to a VP. 97 | */ 98 | double dist_line_vp(const std::array &line, const std::array &vp) 99 | { 100 | // Middle point 101 | double cx = (line[0] + line[2]) / 2; 102 | double cy = (line[1] + line[3]) / 2; 103 | 104 | // Line from the mid point to the VP 105 | double l1 = cy * vp[2] - vp[1]; 106 | double l2 = vp[0] - cx * vp[2]; 107 | double l3 = cx * vp[1] - cy * vp[0]; 108 | 109 | // Dist = max orthogonal distance of the two endpoints to l 110 | double dist = (std::abs(line[0] * l1 + line[1] * l2 + l3) 111 | / std::sqrt(l1 * l1 + l2 * l2)); 112 | return dist; 113 | } 114 | 115 | 116 | /** Re-assign VPs to a set of lines. 117 | */ 118 | void assign_vps(const std::vector > &lines, const std::vector > &vps, 119 | std::vector &vp_labels, double tol=1.5) 120 | { 121 | int num_lines = lines.size(); 122 | double min_dist, dist; 123 | int min_idx; 124 | for(int i=0; i < num_lines; i++) 125 | { 126 | min_idx = -1; 127 | min_dist = tol; 128 | for(int j=0; j < vps.size(); j++) 129 | { 130 | dist = dist_line_vp(lines[i], vps[j]); 131 | if(dist < min_dist) 132 | { 133 | min_dist = dist; 134 | min_idx = j; 135 | } 136 | } 137 | vp_labels[i] = min_idx; 138 | } 139 | } 140 | 141 | // #endif 142 | -------------------------------------------------------------------------------- /quickstart_install.sh: -------------------------------------------------------------------------------- 1 | pip install scikit-build 2 | pip install -r requirements.txt # Install the requirements 3 | pip install -e . # Install DeepLSD 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | torch>=1.12 3 | torchvision>=0.13 4 | numpy 5 | matplotlib 6 | brewer2mpl 7 | opencv-python 8 | opencv-contrib-python 9 | tensorboard 10 | omegaconf 11 | tqdm 12 | future # tensorboard dependency 13 | kornia>=0.6 14 | cython 15 | shapely 16 | scikit-image 17 | h5py 18 | flow_vis 19 | jupyter 20 | seaborn 21 | 22 | ./third_party/pytlsd 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='deeplsd', version="0.0", packages=find_packages()) 4 | -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .CUDA import afm -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/afm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cuda/afm.h" 3 | 4 | std::tuple afm( 5 | const at::Tensor& lines, 6 | const at::Tensor& shape_info, 7 | const int height, 8 | const int width) 9 | { 10 | return afm_cuda(lines,shape_info,height,width); 11 | } 12 | -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/cuda/afm.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | // int const CUDA_NUM_THREADS = sizeof(unsigned long long) * 8; 11 | int const CUDA_NUM_THREADS = 1024; 12 | 13 | inline int CUDA_GET_BLOCKS(const int N) { 14 | return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; 15 | } 16 | 17 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 18 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ 19 | i += blockDim.x * gridDim.x) 20 | 21 | // template 22 | float inline __device__ sgn(float x) 23 | { 24 | return x>0?1.0:-1.0; 25 | } 26 | // template 27 | __global__ void afm_kernel(const int nthreads, const float* lines, const int* shape_info, const int num, const int height, const int width, float* afmap, int* aflabel) 28 | { 29 | // aflabel[0] = 100; 30 | CUDA_1D_KERNEL_LOOP(index, nthreads){ 31 | // printf("%d, %d\n",index,nthreads); 32 | // afmap[index] = 1; 33 | // afmap[index+height*width] = 2; 34 | // aflabel[index] = index; 35 | int w = index % width; 36 | int h = (index / width) % height; 37 | int n = index / width / height; 38 | int x_index = n*2*height*width + h*width + w; 39 | int y_index = n*2*height*width + height*width + h*width + w; 40 | int label_index = n*height*width + h*width + w; 41 | // printf("%d, %d, %d, %d, %d\n",index,nthreads, n, h, w); 42 | 43 | 44 | float px = (float) w; 45 | float py = (float) h; 46 | int start = shape_info[n*4]; 47 | int end = shape_info[n*4+1]; 48 | float min_dis = 1e30; 49 | for(int i = start; i < end; ++i) { 50 | float xs = (float)width /(float)shape_info[n*4+3]; 51 | float ys = (float)height /(float)shape_info[n*4+2]; 52 | float x1 = lines[4*i]*xs; 53 | float y1 = lines[4*i+1]*ys; 54 | float x2 = lines[4*i+2]*xs; 55 | float y2 = lines[4*i+3]*ys; 56 | 57 | float dx = x2 - x1; 58 | float dy = y2 - y1; 59 | float norm2 = dx*dx + dy*dy; 60 | 61 | float t = ((px-x1)*dx + (py-y1)*dy)/(norm2+1e-6); 62 | t = t<1.0?t:1.0; 63 | t = t>0.0?t:0.0; 64 | 65 | float ax = x1 + t*(x2-x1) - px; 66 | float ay = y1 + t*(y2-y1) - py; 67 | 68 | float dis = ax*ax + ay*ay; 69 | if (dis < min_dis) { 70 | min_dis = dis; 71 | // ax_opt = -sgn(ax)*log(fabs(ax/float(width)) + 1e-6); 72 | // ay_opt = -sgn(ay)*log(fabs(ay/float(height)) + 1e-6); 73 | // afmap[x_index] = -sgn(ax)*log(fabs(ax/float(width)) + 1e-6); 74 | // afmap[y_index] = -sgn(ay)*log(fabs(ay/float(height)) + 1e-6); 75 | afmap[x_index] = ax; 76 | afmap[y_index] = ay; 77 | aflabel[label_index] = i - start; 78 | } 79 | } 80 | // afmap[x_index] = ax_opt; 81 | // afmap[y_index] = ay_opt; 82 | // aflabel[label_index] = ind_opt-start; 83 | } 84 | } 85 | 86 | std::tuple afm_cuda( 87 | const at::Tensor& lines, 88 | const at::Tensor& shape_info, 89 | const int height, 90 | const int width) 91 | { 92 | auto batch_size = shape_info.size(0); 93 | auto afmap = at::zeros({batch_size,2,height,width}, lines.options()); 94 | auto aflabel = at::zeros({batch_size,1,height,width}, lines.options().dtype(at::kInt)); 95 | 96 | auto nthreads = batch_size*height*width; 97 | // printf("nthreads = %d\n",nthreads); 98 | 99 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 100 | 101 | float* afmap_data = afmap.data(); 102 | int* aflabel_data = aflabel.data(); 103 | 104 | // printf("%.8f\n", log(1e-6)); 105 | afm_kernel<<>>( 106 | nthreads, 107 | lines.contiguous().data(), 108 | shape_info.contiguous().data(), 109 | batch_size, height, width, 110 | afmap_data, 111 | aflabel_data); 112 | cudaDeviceSynchronize(); 113 | // THCudaCheck(cudaMemcpy(&aflabel_host[0],aflabel_dev, 114 | // sizeof(int)*batch_size*height*width, cudaMemcpyDeviceToHost)); 115 | // THCudaCheck(cudaMemcpy(&afmap_host[0],afmap_dev, 116 | // sizeof(int)*batch_size*2*height*width, cudaMemcpyDeviceToHost)); 117 | 118 | // THCudaFree(state, aflabel_dev); 119 | // THCudaFree(state, afmap_dev); 120 | // THCudaCheck(cudaGetLastError()); 121 | AT_CUDA_CHECK(cudaGetLastError()); 122 | return std::make_tuple(afmap, aflabel); 123 | } 124 | -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/cuda/afm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | std::tuple afm_cuda( 6 | const at::Tensor& lines, 7 | const at::Tensor& shape_info, 8 | const int height, 9 | const int width); 10 | 11 | -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import afmop 3 | 4 | import os 5 | import os.path as osp 6 | import pickle 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import cv2 10 | import scipy.io as sio 11 | from tqdm import tqdm 12 | from lib.afm.gpu_afm import afm_transform_gpu as afm_transform 13 | 14 | data_root = '../data/wireframe_raw' 15 | output_root = '../data/wireframe' 16 | 17 | 18 | with open(osp.join(data_root,'train.txt')) as handle: 19 | train_lst = [f.rstrip('.jpg\n') for f in handle.readlines()] 20 | 21 | with open(osp.join(data_root,'test.txt')) as handle: 22 | test_lst = [f.rstrip('.jpg\n') for f in handle.readlines()] 23 | 24 | 25 | def load_datum(filename, height = 0, width = 0, mirror = 0): 26 | with open(osp.join(data_root,'pointlines',filename+'.pkl'),'rb') as handle: 27 | d = pickle.load(handle, encoding='latin1') 28 | h, w = d['img'].shape[:2] 29 | points = d['points'] 30 | lines = d['lines'] 31 | lsgs = np.array([[points[i][0], points[i][1], points[j][0], points[j][1]] for i, j in lines], 32 | dtype=np.float32) 33 | image = d['img'] 34 | 35 | return image, lsgs 36 | 37 | BATCH_SIZE = 16 38 | data = [load_datum(f) for f in train_lst[:BATCH_SIZE]] 39 | 40 | lines = np.concatenate([data[i][1] for i in range(BATCH_SIZE)]) 41 | start = np.array([data[i][1].shape[0] for i in range(BATCH_SIZE)]) 42 | end = np.cumsum(start) 43 | start = end-start 44 | shape_info = np.array([[start[i], end[i], data[i][0].shape[0],data[i][0].shape[1]] for i in range(BATCH_SIZE) ]) 45 | 46 | lines = torch.Tensor(lines).cuda() 47 | shape_info = torch.IntTensor(shape_info).cuda() 48 | # shape_info = np.array([[0,data[i][1], data[i][0].shape[0], data[i][0].shape[1]] for i in range(4), data[i][0].shape[0], data[i][0].shape[1]]],dtype=np.float32) 49 | import time 50 | start = time.time() 51 | for i in range(3000): 52 | afmap, aflabel = afmop.afm(lines, shape_info, 375,500) 53 | print((time.time()-start)) 54 | 55 | for i in range(BATCH_SIZE): 56 | xx, yy = np.meshgrid(range(500),range(375)) 57 | im_tensor = torch.Tensor(data[i][0].transpose([2,0,1])).unsqueeze(0) 58 | 59 | im_tensor = torch.nn.functional.interpolate(im_tensor,size=[375,500],mode='bilinear',align_corners=False) 60 | afx = afmap[i][0].data.cpu().numpy() + xx 61 | afy = afmap[i][1].data.cpu().numpy() + yy 62 | image = im_tensor.data.cpu().numpy()[0].transpose([1,2,0])/255.0 63 | plt.imshow(image) 64 | plt.plot(afx,afy,'r.',markersize=0.5) 65 | plt.show() 66 | -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | import glob 4 | import os 5 | 6 | extension_dir = '.' 7 | 8 | main_file = glob.glob(os.path.join(extension_dir,'*.cpp')) 9 | source_cuda = glob.glob(os.path.join(extension_dir,'cuda', '*.cu')) 10 | 11 | sources = main_file + source_cuda 12 | 13 | extra_compile_args = {'cxx': []} 14 | defined_macros = [] 15 | extra_compile_args["nvcc"] = [ 16 | "-DCUDA_HAS_FP16=1", 17 | "-D__CUDA_NO_HALF_OPERATORS__", 18 | "-D__CUDA_NO_HALF_CONVERSIONS__", 19 | "-D__CUDA_NO_HALF2_OPERATORS__", 20 | ] 21 | 22 | extension = CUDAExtension 23 | 24 | include_dirs = [extension_dir] 25 | 26 | ext_module = [ 27 | extension( 28 | "CUDA", 29 | sources, 30 | include_dirs=include_dirs, 31 | defined_macros=defined_macros, 32 | # extra_compile_args=extra_compile_args, 33 | ) 34 | ] 35 | setup( 36 | name='afm_op', 37 | ext_modules=ext_module, 38 | cmdclass={ 39 | 'build_ext': BuildExtension 40 | }) 41 | -------------------------------------------------------------------------------- /third_party/afm_lib/afm_op/vision.cpp: -------------------------------------------------------------------------------- 1 | #include "afm.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("afm", &afm, "attraction field map generation"); 5 | } -------------------------------------------------------------------------------- /third_party/afm_lib/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='afm_op', version="0.0", packages=['afm_op']) 4 | --------------------------------------------------------------------------------