├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── cfg └── default.yml ├── dfvo ├── .gitignore ├── LICENSE ├── README.md ├── apis │ ├── __init__.py │ └── run.py ├── libs │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── adelaide.py │ │ ├── dataset.py │ │ ├── kinect.py │ │ ├── kitti.py │ │ ├── oxford_robotcar.py │ │ └── tum.py │ ├── deep_models │ │ ├── __init__.py │ │ ├── checkpoint_logger.py │ │ ├── deep_models.py │ │ ├── depth │ │ │ ├── __init__.py │ │ │ ├── deep_depth.py │ │ │ └── monodepth2 │ │ │ │ ├── __init__.py │ │ │ │ ├── depth_decoder.py │ │ │ │ ├── layers.py │ │ │ │ ├── monodepth2.py │ │ │ │ └── resnet_encoder.py │ │ ├── flow │ │ │ ├── __init__.py │ │ │ ├── deep_flow.py │ │ │ ├── hd3 │ │ │ │ ├── hd3_flow.py │ │ │ │ ├── hd3losses.py │ │ │ │ ├── hd3model.py │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── correlation.py │ │ │ │ │ ├── decoder.py │ │ │ │ │ ├── dla.py │ │ │ │ │ ├── dla_up.py │ │ │ │ │ ├── hd3_ops.py │ │ │ │ │ ├── hd3net.py │ │ │ │ │ └── vgg.py │ │ │ └── lite_flow_net │ │ │ │ ├── __init__.py │ │ │ │ ├── correlation.py │ │ │ │ ├── lite_flow.py │ │ │ │ └── lite_flow_net.py │ │ └── pose │ │ │ ├── __init__.py │ │ │ ├── deep_pose.py │ │ │ └── monodepth2 │ │ │ ├── __init__.py │ │ │ ├── monodepth2.py │ │ │ └── pose_decoder.py │ ├── dfvo.py │ ├── dfvo_module.py │ ├── flowlib │ │ ├── __init__.py │ │ ├── flowlib.py │ │ └── png.py │ ├── general │ │ ├── __init__.py │ │ ├── configuration.py │ │ ├── frame_drawer.py │ │ ├── kitti_raw_utils.py │ │ ├── kitti_utils.py │ │ ├── timer.py │ │ └── utils.py │ ├── geometry │ │ ├── __init__.py │ │ ├── backprojection.py │ │ ├── camera_modules.py │ │ ├── ops_3d.py │ │ ├── pose_graph_optimizer.py │ │ ├── projection.py │ │ ├── reprojection.py │ │ ├── rigid_flow.py │ │ └── transformation3d.py │ ├── matching │ │ ├── __init__.py │ │ ├── depth_consistency.py │ │ ├── keypoint_sampler.py │ │ └── kp_selection.py │ └── tracker │ │ ├── E_tracker.py │ │ ├── __init__.py │ │ ├── gric.py │ │ └── pnp_tracker.py ├── options │ ├── kitti │ │ ├── dfvo_test.yml │ │ ├── kitti_mono_sc_0.yml │ │ ├── kitti_stereo_0.yml │ │ ├── kitti_stereo_1.yml │ │ ├── sampling_test.yml │ │ └── tro_exp │ │ │ └── reference.yml │ └── unit_test │ │ ├── adelaide_0.yml │ │ ├── default.yml │ │ ├── kitti_0.yml │ │ ├── kitti_1.yml │ │ ├── kitti_2.yml │ │ ├── kitti_3.yml │ │ ├── robotcar.yml │ │ └── tum_0.yml ├── scripts │ └── run_kitti.sh └── tools │ ├── __init__.py │ ├── evaluation │ ├── __init__.py │ ├── odometry │ │ ├── __init__.py │ │ ├── eval_odom.py │ │ └── kitti_odometry.py │ ├── robotcar │ │ └── get_gt_poses.py │ └── tum_tool │ │ ├── associate.py │ │ └── pose_evaluation_utils.py │ ├── generate_flow_prediction.py │ ├── generate_kitti_raw_pose.py │ └── undistort_robotcar.py ├── drawer ├── frame_drawer.py └── utils.py ├── envs ├── README ├── min_requirements.yml └── requirements.yaml ├── lcd ├── .idea │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── loop_closure_detect.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── __init__.py ├── extract_deep_vlad_feature.py ├── hmm_utils.py ├── loop_closure_detect.py └── netvlad.py ├── loader ├── .idea │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── loader.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── __init__.py ├── dataset.py ├── kitti.py └── utils.py ├── main.py ├── misc └── topo_slam.png └── slam.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # data directories 133 | data/ 134 | result/ 135 | model_zoo/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Best of Australian Centre for Robotic Vision (ACRV) 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This repo implements a topological SLAM system. 4 | Deep Visual Odometry ([DF-VO](https://github.com/Huangying-Zhan/DF-VO)) and [Visual Place Recognition](https://github.com/dadung/Visual-Localization-Filtering) are 5 | combined to form the topological SLAM system. 6 | 7 | ## Publications 8 | 9 | 1. [Visual Odometry Revisited: What Should Be Learnt? 10 | ](https://arxiv.org/abs/1909.09803) 11 | 12 | 2. [DF-VO: What Should Be Learnt for Visual Odometry? 13 | ](https://arxiv.org/abs/2103.00933) 14 | 15 | 3. [Scalable Place Recognition Under Appearance Change for Autonomous Driving](https://openaccess.thecvf.com/content_ICCV_2019/html/Doan_Scalable_Place_Recognition_Under_Appearance_Change_for_Autonomous_Driving_ICCV_2019_paper.html) 16 | 17 | 18 | ``` 19 | @INPROCEEDINGS{zhan2019dfvo, 20 | author={H. {Zhan} and C. S. {Weerasekera} and J. -W. {Bian} and I. {Reid}}, 21 | booktitle={2020 IEEE International Conference on Robotics and Automation (ICRA)}, 22 | title={Visual Odometry Revisited: What Should Be Learnt?}, 23 | year={2020}, 24 | volume={}, 25 | number={}, 26 | pages={4203-4210}, 27 | doi={10.1109/ICRA40945.2020.9197374}} 28 | 29 | @misc{zhan2021dfvo, 30 | title={DF-VO: What Should Be Learnt for Visual Odometry?}, 31 | author={Huangying Zhan and Chamara Saroj Weerasekera and Jia-Wang Bian and Ravi Garg and Ian Reid}, 32 | year={2021}, 33 | eprint={2103.00933}, 34 | archivePrefix={arXiv}, 35 | primaryClass={cs.CV} 36 | } 37 | 38 | @inproceedings{doan2019scalable, 39 | title={Scalable place recognition under appearance change for autonomous driving}, 40 | author={Doan, Anh-Dzung and Latif, Yasir and Chin, Tat-Jun and Liu, Yu and Do, Thanh-Toan and Reid, Ian}, 41 | booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, 42 | pages={9319--9328}, 43 | year={2019} 44 | } 45 | 46 | ``` 47 | 48 | ## Demo: 49 | 50 | 51 | ### Contents 52 | 1. [Requirements](#part-1-requirements) 53 | 2. [Prepare dataset](#part-2-download-dataset-and-models) 54 | 3. [Run example](#part-3-run-example) 55 | 4. [Result evaluation](#part-4-result-evaluation) 56 | 57 | 58 | ### Part 1. Requirements 59 | 60 | This code was tested with Python 3.6, CUDA 10.0, Ubuntu 16.04, and [PyTorch-1.0](https://pytorch.org/). 61 | 62 | We suggest use [Anaconda](https://www.anaconda.com/distribution/) for installing the prerequisites. 63 | 64 | ``` 65 | cd envs 66 | conda env create -f min_requirements.yml -p {ANACONDA_DIR/envs/topo_slam} # install prerequisites 67 | conda activate topo_slam # activate the environment [topo_slam] 68 | ``` 69 | 70 | ### Part 2. Download dataset and models 71 | 72 | The main dataset used in this project is [KITTI Driving Dataset](http://www.cvlibs.net/datasets/kitti/eval_odometry.php). After downloaing the dataset, create a softlink in the current repo. 73 | ``` 74 | ln -s KITTI_ODOMETRY/sequences dataset/kitti_odom/odom_data 75 | ``` 76 | 77 | For our trained models, please visit [here](https://www.dropbox.com/sh/9by21564eb0xloh/AABHFMlWd_ja14c5wU4R1KUua?dl=0) to download the models and save the models into the directory `model_zoo/`. 78 | 79 | ### Part 3. Run example 80 | ``` 81 | # run default kitti setup 82 | python main.py -d options/examples/default.yml -r data/kitti_odom 83 | ``` 84 | More configuration examples can be found in [configuration examples](https://github.com/Huangying-Zhan/DF-VO/tree/master/options/examples). 85 | 86 | The result (trajectory pose file) is saved in `result_dir` defined in the configuration file. 87 | Please check [Configuration Documentation](https://df-vo.readthedocs.io/en/latest/rsts/configuration.html) for reference. 88 | 89 | ### Part 4. Result evaluation 90 | Please check [here](https://github.com/Huangying-Zhan/DF-VO#part-4-result-evaluation) for evaluating the result. 91 | 92 | ### License 93 | Please check License file. 94 | 95 | ### Acknowledgement 96 | Some of the codes were borrowed from the excellent works of [monodepth2](https://github.com/nianticlabs/monodepth2), [LiteFlowNet](https://github.com/twhui/LiteFlowNet) and [pytorch-liteflownet](https://github.com/sniklaus/pytorch-liteflownet). The borrowed files are licensed under their original license respectively. 97 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/__init__.py -------------------------------------------------------------------------------- /dfvo/.gitignore: -------------------------------------------------------------------------------- 1 | # cache 2 | *__pycache__ 3 | *.pyc 4 | *.ipynb_checkpoints 5 | 6 | .vscode 7 | 8 | result/ 9 | dataset/ 10 | model_zoo/ 11 | 12 | # docs 13 | docs/ 14 | 15 | # temporary files 16 | tmp/ 17 | options/adelaide/ 18 | tools/paper_tool/ 19 | tools/evaluation/flow/ -------------------------------------------------------------------------------- /dfvo/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Huangying Zhan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dfvo/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This repo implements the system described in the paper: 4 | 5 | [Visual Odometry Revisited: What Should Be Learnt? 6 | ](https://arxiv.org/abs/1909.09803) 7 | 8 | Huangying Zhan, Chamara Saroj Weerasekera, Jiawang Bian, Ian Reid 9 | 10 | The demo video can be found [here](https://www.youtube.com/watch?v=Nl8mFU4SJKY). 11 | 12 | ``` 13 | # The paper is accepted to ICRA-2020. Updated bibtex will be provided in the future. 14 | 15 | @article{zhan2019dfvo, 16 | title={Visual Odometry Revisited: What Should Be Learnt?}, 17 | author={Zhan, Huangying and Weerasekera, Chamara Saroj and Bian, Jiawang and Reid, Ian}, 18 | journal={arXiv preprint arXiv:1909.09803}, 19 | year={2019} 20 | } 21 | ``` 22 | 23 | 24 | 25 | This repo includes 26 | 1. the frame-to-frame tracking system **DF-VO**; 27 | 2. evaluation scripts for visual odometry; 28 | 3. trained models and VO results 29 | 30 | 31 | ### Contents 32 | 1. [Requirements](#part-1-requirements) 33 | 2. [Prepare dataset](#part-2-download-dataset-and-models) 34 | 3. [DF-VO](#part-3-DF-VO) 35 | 4. [Result evaluation](#part-4-result-evaluation) 36 | 37 | 38 | ### Part 1. Requirements 39 | 40 | This code was tested with Python 3.6, CUDA 9.0, Ubuntu 16.04, and [PyTorch](https://pytorch.org/). 41 | 42 | We suggest use [Anaconda](https://www.anaconda.com/distribution/) for installing the prerequisites. 43 | 44 | ``` 45 | cd envs 46 | conda env create -f requirement.yml -p {ANACONDA_DIR/envs/dfvo} # install prerequisites 47 | conda activate dfvo # activate the environment [dfvo] 48 | ``` 49 | 50 | ### Part 2. Download dataset and models 51 | 52 | The main dataset used in this project is [KITTI Driving Dataset](http://www.cvlibs.net/datasets/kitti/eval_odometry.php). After downloaing the dataset, create a softlink in the current repo. 53 | ``` 54 | ln -s KITTI_ODOMETRY/sequences dataset/kitti_odom/odom_data 55 | ``` 56 | 57 | For our trained models, please visit [here](https://www.dropbox.com/sh/9by21564eb0xloh/AABHFMlWd_ja14c5wU4R1KUua?dl=0) to download the models and save the models into the directory `model_zoo/`. 58 | 59 | ### Part 3. DF-VO 60 | We have created different configurations for running the algrithm. 61 | 62 | ``` 63 | # Example 1: run default kitti setup 64 | python apis/run.py -d options/kitti/default_configuration.yml 65 | 66 | # Example 2: Run custom kitti setup 67 | # kitti_default_configuration.yml and kitti_stereo_0.yml are merged 68 | python apis/run.py -d options/kitti/default_configuration.yml -c options/kitti/kitti_stereo_0.yml 69 | ``` 70 | 71 | The result (trajectory pose file) is saved in `result_dir` defined in the configuration file. 72 | Please check the `options/kitti/default_configuration.yml` for reference. 73 | FIXME: add link to RTD page 74 | 75 | ### Part 4. Result evaluation 76 | 77 | 78 | 79 | 80 | The original results, including related works, can be found [here](https://www.dropbox.com/sh/u7x3rt4lz6zx8br/AADshjd33Q3TLCy2stKt6qpJa?dl=0). 81 | 82 | #### KITTI 83 | [KITTI Odometry benchmark](http://www.cvlibs.net/datasets/kitti/eval_odometry.php) contains 22 stereo sequences, in which 11 sequences are provided with ground truth. The 11 sequences are used for evaluating visual odometry. 84 | 85 | ``` 86 | python tools/evaluation/odometry/eval_odom.py --result result/tmp/0 --align 6dof 87 | ``` 88 | 89 | For more information about the evaluation toolkit, please check the [toolbox page](https://github.com/Huangying-Zhan/kitti_odom_eval) or the [wiki page](https://github.com/Huangying-Zhan/DF-VO/wiki). 90 | 91 | ### Part 5. Run your own dataset 92 | 93 | We also provide a guideline to run DF-VO on your own dataset. 94 | Please check ... 95 | FIXME: add RTD link 96 | 97 | ### License 98 | For academic usage, the code is released under the permissive MIT license. Our intension of sharing the project is for research/personal purpose. For any commercial purpose, please contact the authors. 99 | 100 | 101 | ### Acknowledgement 102 | Some of the codes were borrowed from the excellent works of [monodepth2](https://github.com/nianticlabs/monodepth2), [LiteFlowNet](https://github.com/twhui/LiteFlowNet) and [pytorch-liteflownet](https://github.com/sniklaus/pytorch-liteflownet). The borrowed files are licensed under their original license respectively. 103 | -------------------------------------------------------------------------------- /dfvo/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/apis/__init__.py -------------------------------------------------------------------------------- /dfvo/apis/run.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | LastEditTime: 2020-09-24 7 | LastEditors: Huangying Zhan 8 | @Description: This API runs DF-VO. 9 | ''' 10 | 11 | import argparse 12 | import numpy as np 13 | import os 14 | import random 15 | import torch 16 | 17 | from dfvo.libs.dfvo_main import DFVO 18 | from dfvo.libs.general.utils import mkdir_if_not_exists 19 | from dfvo.libs.general.configuration import ConfigLoader 20 | 21 | 22 | config_loader = ConfigLoader() 23 | 24 | def read_cfgs(): 25 | """Parse arguments and laod configurations 26 | 27 | Returns 28 | ------- 29 | args : args 30 | arguments 31 | cfg : edict 32 | configuration dictionary 33 | """ 34 | ''' Argument Parsing ''' 35 | parser = argparse.ArgumentParser(description='VO system') 36 | parser.add_argument("-s", "--seq", 37 | default=None, help="sequence") 38 | parser.add_argument("-d", "--default_configuration", type=str, 39 | default="options/kitti/kitti_default_configuration.yml", 40 | help="default configuration files") 41 | parser.add_argument("-c", "--configuration", type=str, 42 | default=None, 43 | help="custom configuration file") 44 | parser.add_argument("--no_confirm", action="store_true", 45 | help="no confirmation questions") 46 | args = parser.parse_args() 47 | 48 | ''' Read configuration ''' 49 | # read default and custom config, merge cfgs 50 | config_files = [args.default_configuration, args.configuration] 51 | cfg = config_loader.merge_cfg(config_files) 52 | if args.seq is not None: 53 | if cfg.dataset == "kitti_odom": 54 | cfg.seq = "{:02}".format(int(args.seq)) 55 | else: 56 | cfg.seq = args.seq 57 | cfg.seq = str(cfg.seq) 58 | 59 | ''' double check result directory ''' 60 | if args.no_confirm: 61 | mkdir_if_not_exists(cfg.directory.result_dir) 62 | cfg.no_confirm = True 63 | else: 64 | cfg.no_confirm = False 65 | continue_flag = input("Save result in {}? [y/n]".format(cfg.directory.result_dir)) 66 | if continue_flag == "y": 67 | mkdir_if_not_exists(cfg.directory.result_dir) 68 | else: 69 | exit() 70 | return args, cfg 71 | 72 | 73 | if __name__ == '__main__': 74 | # Read config 75 | args, cfg = read_cfgs() 76 | 77 | # Set random seed 78 | SEED = cfg.seed 79 | np.random.seed(SEED) 80 | torch.cuda.manual_seed(SEED) 81 | torch.manual_seed(SEED) 82 | 83 | # setup DFVO 84 | vo = DFVO(cfg) 85 | vo.main() 86 | 87 | # Save configuration file 88 | cfg_path = os.path.join(cfg.directory.result_dir, 'configuration_{}.yml'.format(cfg.seq)) 89 | config_loader.save_cfg([args.default_configuration, args.configuration], file_path=cfg_path) 90 | -------------------------------------------------------------------------------- /dfvo/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .kitti import KittiOdom, KittiRaw 2 | from .tum import TUM 3 | from .adelaide import Adelaide 4 | from .kinect import Kinect 5 | # from .oxford_robotcar import OxfordRobotCar 6 | 7 | datasets = { 8 | "kitti_odom": KittiOdom, 9 | "kitti_raw": KittiRaw, 10 | "tum-1": TUM, 11 | "tum-2": TUM, 12 | "tum-3": TUM, 13 | "adelaide1": Adelaide, 14 | "adelaide2": Adelaide, 15 | "kinect": Kinect, 16 | # 'robotcar': OxfordRobotCar 17 | } 18 | -------------------------------------------------------------------------------- /dfvo/libs/datasets/adelaide.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-13 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-28 7 | @LastEditors: Huangying Zhan 8 | @Description: Dataset loaders for Adelaide Driving Sequence 9 | ''' 10 | 11 | import numpy as np 12 | from glob import glob 13 | import os 14 | 15 | from .dataset import Dataset 16 | from dfvo.libs.general.utils import * 17 | 18 | 19 | class Adelaide(Dataset): 20 | """Base class of dataset loaders for Adelaide Driving Sequence 21 | """ 22 | 23 | def __init__(self, *args, **kwargs): 24 | super(Adelaide, self).__init__(*args, **kwargs) 25 | 26 | ''' In general, you don't need to change this part ''' 27 | def synchronize_timestamps(self): 28 | """Synchronize RGB, Depth, and Pose timestamps to form pairs 29 | 30 | Returns: 31 | a dictionary containing 32 | - **rgb_timestamp** : {'depth': depth_timestamp, 'pose': pose_timestamp} 33 | """ 34 | self.rgb_d_pose_pair = {} 35 | len_seq = len(glob(os.path.join(self.data_dir['img'], "*.{}".format(self.cfg.image.ext)))) 36 | for i in range(len_seq): 37 | self.rgb_d_pose_pair[i] = {} 38 | self.rgb_d_pose_pair[i]['depth'] = i 39 | self.rgb_d_pose_pair[i]['pose'] = i 40 | 41 | def get_timestamp(self, img_id): 42 | """Get timestamp for the query img_id 43 | 44 | Args: 45 | img_id (int): query image id 46 | 47 | Returns: 48 | timestamp (int): timestamp for query image 49 | """ 50 | return img_id 51 | 52 | def save_result_traj(self, traj_txt, poses): 53 | """Save trajectory (absolute poses) as KITTI odometry file format 54 | 55 | Args: 56 | txt (str): pose text file path 57 | poses (dict): poses, each pose is a [4x4] array 58 | """ 59 | global_poses_arr = convert_SE3_to_arr(poses) 60 | save_traj(traj_txt, global_poses_arr, format='kitti') 61 | 62 | ''' In general, you need to write the following functions for your own dataset''' 63 | def get_intrinsics_param(self): 64 | """Read intrinsics parameters for each dataset 65 | 66 | Returns: 67 | intrinsics_param (list): [cx, cy, fx, fy] 68 | """ 69 | # Reference image size 70 | # Camera one 71 | if self.cfg.dataset == "adelaide1": 72 | self.height = 256 73 | self.width = 832 74 | 75 | # Camera two 76 | elif self.cfg.dataset == "adelaide2": 77 | self.height = 512 78 | self.width = 1664 79 | 80 | else: 81 | assert False, "Wrong dataset is given" 82 | 83 | img_seq_dir = os.path.join( 84 | self.cfg.directory.img_seq_dir, 85 | self.cfg.seq 86 | ) 87 | K = np.loadtxt(os.path.join(img_seq_dir, "cam.txt")) 88 | K[0] *= (self.cfg.image.width / self.width) 89 | K[1] *= (self.cfg.image.height / self.height) 90 | 91 | intrinsics_param = [K[0,2], K[1,2], K[0,0], K[1,1]] 92 | return intrinsics_param 93 | 94 | def get_data_dir(self): 95 | """Get data directory 96 | 97 | Returns: 98 | a dictionary containing 99 | - **img** (str) : image data directory 100 | - (optional) **depth** (str) : depth data direcotry or None 101 | - (optional) **depth_src** (str) : depth data type [gt/None] 102 | """ 103 | data_dir = {"depth": None, "depth_src": None} 104 | 105 | # get image data directory 106 | img_seq_dir = os.path.join( 107 | self.cfg.directory.img_seq_dir, 108 | self.cfg.seq 109 | ) 110 | data_dir['img'] = os.path.join(img_seq_dir) 111 | 112 | return data_dir 113 | 114 | def get_image(self, timestamp): 115 | """Get image data given the image timestamp 116 | 117 | Args: 118 | timestamp (int): timestamp for the image 119 | 120 | Returns: 121 | img (array, [CxHxW]): image data 122 | """ 123 | img_path = os.path.join(self.data_dir['img'], 124 | "{:06d}.{}".format(timestamp, self.cfg.image.ext) 125 | ) 126 | img = read_image(img_path, self.cfg.image.height, self.cfg.image.width) 127 | return img 128 | 129 | ''' 130 | These functions are not necessary to run DF-VO. 131 | However, if you want to use RGB-D data, get_depth() is required. 132 | If you have gt poses as well for comparison, get_gt_poses() is required. 133 | ''' 134 | def get_depth(self, timestamp): 135 | """Get GT/precomputed depth data given the timestamp 136 | 137 | Args: 138 | timestamp (int): timestamp for the depth 139 | 140 | Returns: 141 | depth (array, [HxW]): depth data 142 | """ 143 | raise NotImplementedError 144 | 145 | def get_gt_poses(self): 146 | """Load ground-truth poses 147 | 148 | Returns: 149 | gt_poses (dict): each pose is a [4x4] array 150 | """ 151 | raise NotImplementedError 152 | -------------------------------------------------------------------------------- /dfvo/libs/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-07-09 7 | @LastEditors: Huangying Zhan 8 | @Description: This is the Base class for dataset loader. 9 | ''' 10 | 11 | import numpy as np 12 | 13 | from dfvo.libs.geometry.camera_modules import Intrinsics 14 | 15 | class Dataset(): 16 | """This is the Base class for dataset loader. 17 | """ 18 | 19 | def __init__(self, cfg): 20 | """ 21 | Args: 22 | cfg (edict): configuration edict 23 | """ 24 | self.cfg = cfg 25 | 26 | # read camera intrinsics 27 | K_params = self.get_intrinsics_param() 28 | self.cam_intrinsics = Intrinsics(K_params) 29 | 30 | # get data directories 31 | self.data_dir = self.get_data_dir() 32 | 33 | # synchronize timestamps 34 | self.synchronize_timestamps() 35 | 36 | # get gt poses (for visualization comparison purpose) 37 | if self.cfg.directory.gt_pose_dir is not None: 38 | self.gt_poses = self.get_gt_poses() 39 | else: 40 | self.gt_poses = {0: np.eye(4)} 41 | 42 | def __len__(self): 43 | return len(self.rgb_d_pose_pair) 44 | 45 | def get_intrinsics_param(self): 46 | """Read intrinsics parameters for each dataset 47 | 48 | Returns: 49 | intrinsics_param (list): [cx, cy, fx, fy] 50 | """ 51 | raise NotImplementedError 52 | 53 | def synchronize_timestamps(self): 54 | """Synchronize RGB, Depth, and Pose timestamps to form pairs 55 | 56 | Returns: 57 | a dictionary containing 58 | - **rgb_timestamp** : {'depth': depth_timestamp, 'pose': pose_timestamp} 59 | """ 60 | raise NotImplementedError 61 | 62 | def get_data_dir(self): 63 | """Get data directory 64 | 65 | Returns: 66 | a dictionary containing 67 | - **img** (str) : image data directory 68 | - (optional) **depth** (str) : depth data direcotry or None 69 | - (optional) **depth_src** (str) : depth data type [gt/None] 70 | """ 71 | raise NotImplementedError 72 | 73 | def get_gt_poses(self): 74 | """Get ground-truth poses 75 | 76 | Returns: 77 | gt_poses (dict): each pose is a [4x4] array 78 | """ 79 | raise NotImplementedError 80 | 81 | def get_timestamp(self, img_id): 82 | """Get timestamp for the query img_id 83 | 84 | Args: 85 | img_id (int): query image id 86 | 87 | Returns: 88 | timestamp (int): timestamp for query image 89 | """ 90 | raise NotImplementedError 91 | 92 | def get_image(self, timestamp): 93 | """Get image data given the image timestamp 94 | 95 | Args: 96 | timestamp (int): timestamp for the image 97 | 98 | Returns: 99 | img (array, [CxHxW]): image data 100 | """ 101 | raise NotImplementedError 102 | 103 | def get_depth(self, timestamp): 104 | """Get GT/precomputed depth data given the timestamp 105 | 106 | Args: 107 | timestamp (int): timestamp for the depth 108 | 109 | Returns: 110 | depth (array, [HxW]): depth data 111 | """ 112 | raise NotImplementedError 113 | 114 | def save_result_traj(self, traj_txt, poses): 115 | """Save trajectory (absolute poses) as KITTI odometry file format 116 | 117 | Args: 118 | txt (str): pose text file path 119 | poses (dict): poses, each pose is a [4x4] array 120 | """ 121 | raise NotImplementedError -------------------------------------------------------------------------------- /dfvo/libs/deep_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/checkpoint_logger.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 3 | @Date: 1970-01-01 4 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 5 | @LastEditTime: 2020-06-11 6 | @LastEditors: Huangying Zhan 7 | @Description: 8 | ''' 9 | 10 | import os 11 | import torch 12 | 13 | from dfvo.libs.general.utils import mkdir_if_not_exists 14 | 15 | class CheckpointLogger(): 16 | def __init__(self, epoch_freq, step_freq, result_dir): 17 | # # logging frequency 18 | # self.freqs = {} 19 | # self.freqs['epoch'] = epoch_freq 20 | # self.freqs['step'] = step_freq 21 | 22 | # directory 23 | self.result_dir = result_dir 24 | 25 | def save_checkpoint(self, item, cur_cnt, ckpt_data, is_best=False): 26 | """Save trained models, optimizer and training states 27 | 28 | Args: 29 | item (str): epoch / iter 30 | cur_counter (int): current counter 31 | ckpt_data (dict): checkpoint data dictionary 32 | 33 | - models: network model states 34 | - optimzier: optimizer state 35 | - train_state: extra information 36 | - epoch 37 | - step 38 | is_best (bool): model with best validation loss 39 | """ 40 | models = ckpt_data['models'] 41 | optimizer = ckpt_data['optimizer'] 42 | train_state = ckpt_data['train_state'] 43 | 44 | # Save current checkpoint 45 | save_folder = os.path.join( 46 | self.result_dir, "models", 47 | "{}_{}".format(item, cur_cnt) 48 | ) 49 | mkdir_if_not_exists(save_folder) 50 | 51 | print("==> Save checkpoint at {} {}".format(item, cur_cnt)) 52 | self.save_model(save_folder, models) 53 | self.save_optimizer(save_folder, optimizer) 54 | self.save_train_state(save_folder, train_state) 55 | 56 | # Save best model 57 | if is_best: 58 | save_folder = os.path.join( 59 | self.result_dir, "models", "best" 60 | ) 61 | mkdir_if_not_exists(save_folder) 62 | print("==> Save best model.") 63 | self.save_model(save_folder, models) 64 | self.save_optimizer(save_folder, optimizer) 65 | self.save_train_state(save_folder, train_state) 66 | with open(os.path.join(save_folder, "best.txt"), 'w') as f: 67 | line = "{}: {}".format(item, cur_cnt) 68 | f.writelines(line) 69 | 70 | def save_model(self, save_folder, models): 71 | """Save model checkpoints 72 | Args: 73 | save_folder (str): directory for saving models 74 | models (dict): model dictionary 75 | """ 76 | for model_name, model in models.items(): 77 | ckpt_path = os.path.join(save_folder, "{}.pth".format(model_name)) 78 | torch.save(model.state_dict(), ckpt_path) 79 | 80 | def save_optimizer(self, save_folder, optimizer): 81 | """Save optimizer data 82 | Args: 83 | save_folder (str): directory for saving models 84 | optimizer (torch.optim): torch optimizer data 85 | """ 86 | ckpt_path = os.path.join(save_folder, "optimizer.pth") 87 | torch.save(optimizer.state_dict(), ckpt_path) 88 | 89 | def save_train_state(self, save_folder, train_state): 90 | """Save optimizer data 91 | Args: 92 | save_folder (str): directory for saving models 93 | train_state (dict): extra training state information 94 | """ 95 | ckpt_path = os.path.join(save_folder, "train_state.pth") 96 | torch.save(train_state, ckpt_path) -------------------------------------------------------------------------------- /dfvo/libs/deep_models/depth/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/depth/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/depth/monodepth2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/depth/monodepth2/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/depth/monodepth2/depth_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | from collections import OrderedDict 10 | import numpy as np 11 | import torch 12 | import torch.nn as nn 13 | 14 | from .layers import * 15 | 16 | 17 | class DepthDecoder(nn.Module): 18 | def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True): 19 | super(DepthDecoder, self).__init__() 20 | 21 | self.num_output_channels = num_output_channels 22 | self.use_skips = use_skips 23 | self.upsample_mode = 'nearest' 24 | self.scales = scales 25 | 26 | self.num_ch_enc = num_ch_enc 27 | self.num_ch_dec = np.array([16, 32, 64, 128, 256]) 28 | 29 | # decoder 30 | self.convs = OrderedDict() 31 | for i in range(4, -1, -1): 32 | # upconv_0 33 | num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1] 34 | num_ch_out = self.num_ch_dec[i] 35 | self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out) 36 | 37 | # upconv_1 38 | num_ch_in = self.num_ch_dec[i] 39 | if self.use_skips and i > 0: 40 | num_ch_in += self.num_ch_enc[i - 1] 41 | num_ch_out = self.num_ch_dec[i] 42 | self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out) 43 | 44 | for s in self.scales: 45 | self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels) 46 | 47 | self.decoder = nn.ModuleList(list(self.convs.values())) 48 | self.sigmoid = nn.Sigmoid() 49 | 50 | def forward(self, input_features): 51 | self.outputs = {} 52 | 53 | # decoder 54 | x = input_features[-1] 55 | for i in range(4, -1, -1): 56 | x = self.convs[("upconv", i, 0)](x) 57 | x = [upsample(x)] 58 | if self.use_skips and i > 0: 59 | x += [input_features[i - 1]] 60 | x = torch.cat(x, 1) 61 | x = self.convs[("upconv", i, 1)](x) 62 | if i in self.scales: 63 | self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x)) 64 | 65 | return self.outputs 66 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/depth/monodepth2/monodepth2.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-19 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-07-06 7 | @LastEditors: Huangying Zhan 8 | @Description: This is the interface for Monodepth2 depth network 9 | ''' 10 | 11 | import numpy as np 12 | import os 13 | import sys 14 | import torch 15 | 16 | from .depth_decoder import DepthDecoder 17 | from .layers import disp_to_depth 18 | from .resnet_encoder import ResnetEncoder 19 | from ..deep_depth import DeepDepth 20 | 21 | 22 | class Monodepth2DepthNet(DeepDepth): 23 | """This is the interface for Monodepth2 depth network 24 | """ 25 | def __init__(self, *args, **kwargs): 26 | super(Monodepth2DepthNet, self).__init__(*args, **kwargs) 27 | 28 | self.enable_finetune = False 29 | 30 | def initialize_network_model(self, weight_path, dataset, finetune): 31 | """initialize network and load pretrained model 32 | 33 | Args: 34 | weight_path (str): a directory stores the pretrained models. 35 | - **encoder.pth**: encoder model 36 | - **depth.pth**: depth decoder model 37 | dataset (str): dataset setup for min/max depth [kitti, tum] 38 | finetune (bool): finetune model on the run if True 39 | """ 40 | # initilize network 41 | self.encoder = ResnetEncoder(18, False) 42 | self.depth_decoder = DepthDecoder( 43 | num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) 44 | 45 | print("==> Initialize Depth-CNN with [{}]".format(weight_path)) 46 | # loading pretrained model (encoder) 47 | encoder_path = os.path.join(weight_path, 'encoder.pth') 48 | loaded_dict_enc = torch.load(encoder_path, map_location=self.device) 49 | filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict()} 50 | self.encoder.load_state_dict(filtered_dict_enc) 51 | self.encoder.to(self.device) 52 | 53 | # loading pretrained model (depth-decoder) 54 | depth_decoder_path = os.path.join(weight_path, 'depth.pth') 55 | loaded_dict = torch.load(depth_decoder_path, map_location=self.device) 56 | self.depth_decoder.load_state_dict(loaded_dict) 57 | self.depth_decoder.to(self.device) 58 | 59 | # concatenate encoders and decoders 60 | self.model = torch.nn.Sequential(self.encoder, self.depth_decoder) 61 | 62 | if finetune: 63 | self.encoder.train() 64 | self.depth_decoder.train() 65 | else: 66 | self.encoder.eval() 67 | self.depth_decoder.eval() 68 | 69 | # image size 70 | self.feed_height = loaded_dict_enc['height'] 71 | self.feed_width = loaded_dict_enc['width'] 72 | 73 | # dataset parameters 74 | if 'kitti' in dataset: 75 | self.min_depth = 0.1 76 | self.max_depth = 100 77 | self.stereo_baseline_multiplier = 5.4 78 | elif 'tum' in dataset: 79 | self.min_depth = 0.1 80 | self.max_depth = 10 81 | self.stereo_baseline_multiplier = 1 82 | elif 'robotcar' in dataset: 83 | self.min_depth = 0.1 84 | self.max_depth = 100 85 | self.stereo_baseline_multiplier = 5.4 86 | else: 87 | self.min_depth = 0.1 88 | self.max_depth = 100 89 | self.stereo_baseline_multiplier = 1 90 | 91 | def inference(self, img): 92 | """Depth prediction 93 | 94 | Args: 95 | img (tensor, [Nx3HxW]): image 96 | 97 | Returns: 98 | a dictionary containing depths and disparities at different scales, resized back to input scale 99 | 100 | - **depth** (dict): depth predictions, each element is **scale-N** (tensor, [Nx1xHxW]): depth predictions at scale-N 101 | - **disp** (dict): disparity predictions, each element is **scale-N** (tensor, [Nx1xHxW]): disparity predictions at scale-N 102 | """ 103 | _, _, original_height, original_width = img.shape 104 | 105 | # Prediction 106 | features = self.encoder(img) 107 | pred_disps = self.depth_decoder(features) 108 | 109 | outputs = {'depth': {}, 'disp': {}} 110 | for s in self.depth_scales: 111 | disp = pred_disps[('disp', s)] 112 | disp_resized = torch.nn.functional.interpolate( 113 | disp, (original_height, original_width), mode='bilinear', align_corners=False) 114 | 115 | scaled_disp, _ = disp_to_depth(disp_resized, self.min_depth, self.max_depth) 116 | outputs['depth'][s] = 1. / scaled_disp # monodepth2 assumes 0.1 unit baseline 117 | outputs['disp'][s] = scaled_disp 118 | 119 | return outputs 120 | 121 | def inference_depth(self, img): 122 | """Depth prediction 123 | 124 | Args: 125 | img (tensor, [Nx3HxW]): image 126 | 127 | Returns: 128 | depth (tensor, [Nx1xHxW]): depth prediction at highest resolution 129 | """ 130 | if self.enable_finetune: 131 | predictions = self.inference(img) 132 | else: 133 | predictions = self.inference_no_grad(img) 134 | self.pred_depths = predictions['depth'] 135 | self.pred_disps = predictions['disp'] 136 | 137 | # summarize depth predictions for DF-VO 138 | depth = self.pred_depths[0].clone() * self.stereo_baseline_multiplier # monodepth2 assumes 0.1 unit baseline 139 | return depth 140 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/depth/monodepth2/resnet_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision.models as models 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | 17 | class ResNetMultiImageInput(models.ResNet): 18 | """Constructs a resnet model with varying number of input images. 19 | Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 20 | """ 21 | def __init__(self, block, layers, num_classes=1000, num_input_images=1): 22 | super(ResNetMultiImageInput, self).__init__(block, layers) 23 | self.inplanes = 64 24 | self.conv1 = nn.Conv2d( 25 | num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 26 | self.bn1 = nn.BatchNorm2d(64) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 29 | self.layer1 = self._make_layer(block, 64, layers[0]) 30 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 31 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 32 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 33 | 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 37 | elif isinstance(m, nn.BatchNorm2d): 38 | nn.init.constant_(m.weight, 1) 39 | nn.init.constant_(m.bias, 0) 40 | 41 | 42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1): 43 | """Constructs a ResNet model. 44 | Args: 45 | num_layers (int): Number of resnet layers. Must be 18 or 50 46 | pretrained (bool): If True, returns a model pre-trained on ImageNet 47 | num_input_images (int): Number of frames stacked as input 48 | """ 49 | assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet" 50 | blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers] 51 | block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers] 52 | model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) 53 | 54 | if pretrained: 55 | loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)]) 56 | loaded['conv1.weight'] = torch.cat( 57 | [loaded['conv1.weight']] * num_input_images, 1) / num_input_images 58 | model.load_state_dict(loaded) 59 | return model 60 | 61 | 62 | class ResnetEncoder(nn.Module): 63 | """Pytorch module for a resnet encoder 64 | """ 65 | def __init__(self, num_layers, pretrained, num_input_images=1): 66 | super(ResnetEncoder, self).__init__() 67 | 68 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 69 | 70 | resnets = {18: models.resnet18, 71 | 34: models.resnet34, 72 | 50: models.resnet50, 73 | 101: models.resnet101, 74 | 152: models.resnet152} 75 | 76 | if num_layers not in resnets: 77 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 78 | 79 | if num_input_images > 1: 80 | self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images) 81 | else: 82 | self.encoder = resnets[num_layers](pretrained) 83 | 84 | if num_layers > 34: 85 | self.num_ch_enc[1:] *= 4 86 | 87 | def forward(self, input_image): 88 | self.features = [] 89 | x = (input_image - 0.45) / 0.225 90 | x = self.encoder.conv1(x) 91 | x = self.encoder.bn1(x) 92 | self.features.append(self.encoder.relu(x)) 93 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 94 | self.features.append(self.encoder.layer2(self.features[-1])) 95 | self.features.append(self.encoder.layer3(self.features[-1])) 96 | self.features.append(self.encoder.layer4(self.features[-1])) 97 | 98 | return self.features 99 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/flow/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/hd3/hd3losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .models.hd3_ops import * 5 | 6 | 7 | class LossCalculator(object): 8 | 9 | def __init__(self, task): 10 | assert task in ['flow', 'stereo'] 11 | self.task = task 12 | self.dim = 1 if task == 'stereo' else 2 13 | 14 | def __call__(self, ms_prob, ms_pred, gt, corr_range, ds=6): 15 | B, C, H, W = gt.size() 16 | lv = len(ms_prob) 17 | criterion = nn.KLDivLoss(reduction='batchmean').cuda() 18 | losses = {} 19 | kld_loss = 0 20 | for l in range(lv): 21 | scaled_gt, valid_mask = downsample_flow(gt, 1 / 2**(ds - l)) 22 | if self.task == 'stereo': 23 | scaled_gt = scaled_gt[:, 0, :, :].unsqueeze(1) 24 | if l > 0: 25 | scaled_gt = scaled_gt - F.interpolate( 26 | ms_pred[l - 1], 27 | scale_factor=2, 28 | mode='bilinear', 29 | align_corners=True) 30 | scaled_gt = scaled_gt / 2**(ds - l) 31 | gt_dist = vector2density(scaled_gt, corr_range[l], 32 | self.dim) * valid_mask 33 | kld_loss += 4**(ds - l) / (H * W) * criterion( 34 | F.log_softmax(ms_prob[l], dim=1), gt_dist.detach()) 35 | 36 | losses['total'] = kld_loss 37 | for loss_type, loss_value in losses.items(): 38 | losses[loss_type] = loss_value.reshape(1) 39 | return losses 40 | 41 | 42 | def EndPointError(output, gt): 43 | # output: [B, 1/2, H, W], stereo or flow prediction 44 | # gt: [B, C, H, W], 2D ground-truth annotation which may contain a mask 45 | # NOTE: To benchmark the result, please ensure the ground-truth keeps 46 | # its ORIGINAL RESOLUTION. 47 | if output.size(1) == 1: # stereo 48 | output = disp2flow(output) 49 | output = resize_dense_vector(output, gt.size(2), gt.size(3)) 50 | error = torch.norm(output - gt[:, :2, :, :], 2, 1, keepdim=False) 51 | if gt.size(1) == 3: 52 | mask = (gt[:, 2, :, :] > 0).float() 53 | else: 54 | mask = torch.ones_like(error) 55 | epe = (error * mask).sum() / mask.sum() 56 | return epe.reshape(1) 57 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/hd3/hd3model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .models.hd3net import HD3Net 4 | # from .hd3losses import * 5 | # from utils.visualizer import get_visualization 6 | 7 | 8 | class HD3Model(nn.Module): 9 | 10 | def __init__(self, task, encoder, decoder, corr_range=None, context=False): 11 | super(HD3Model, self).__init__() 12 | self.ds = 6 # default downsample ratio of the coarsest level 13 | self.task = task 14 | self.encoder = encoder 15 | self.decoder = decoder 16 | self.corr_range = corr_range 17 | self.context = context 18 | # self.criterion = LossCalculator(task) 19 | # self.eval_epe = EndPointError 20 | self.hd3net = HD3Net(task, encoder, decoder, corr_range, context, 21 | self.ds) 22 | 23 | def forward(self, 24 | img_list, 25 | label_list=None, 26 | get_vect=True, 27 | get_prob=False, 28 | get_loss=False, 29 | get_epe=False, 30 | get_vis=False): 31 | result = {} 32 | 33 | ms_prob, ms_vect = self.hd3net(torch.cat(img_list, 1)) 34 | if get_vect: 35 | result['vect'] = ms_vect[-1] 36 | # if get_prob: 37 | # result['prob'] = ms_prob[-1] 38 | # if get_loss: 39 | # result['loss'] = self.criterion(ms_prob, ms_vect, label_list[0], 40 | # self.corr_range, self.ds) 41 | # if get_epe: 42 | # scale_factor = 1 / 2**(self.ds - len(ms_vect) + 1) 43 | # result['epe'] = self.eval_epe(ms_vect[-1] * scale_factor, 44 | # label_list[0]) 45 | # if get_vis: 46 | # result['vis'] = get_visualization(img_list, label_list, ms_vect, 47 | # ms_prob, self.ds) 48 | 49 | return result 50 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/hd3/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/flow/hd3/models/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/hd3/models/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | BatchNorm = nn.BatchNorm2d 5 | 6 | 7 | class PreActBlock(nn.Module): 8 | '''Pre-activation version of the BasicBlock.''' 9 | expansion = 1 10 | 11 | def __init__(self, in_planes, planes, stride=1, normalize=True): 12 | super(PreActBlock, self).__init__() 13 | if normalize: 14 | self.bn1 = BatchNorm(in_planes) 15 | self.conv1 = nn.Conv2d( 16 | in_planes, 17 | planes, 18 | kernel_size=3, 19 | stride=stride, 20 | padding=1, 21 | bias=False) 22 | self.bn2 = BatchNorm(planes) 23 | self.conv2 = nn.Conv2d( 24 | planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 25 | self.relu = nn.ReLU(inplace=True) 26 | 27 | if stride != 1 or in_planes != self.expansion * planes: 28 | self.shortcut = nn.Sequential( 29 | nn.Conv2d( 30 | in_planes, 31 | self.expansion * planes, 32 | kernel_size=1, 33 | stride=stride, 34 | bias=False)) 35 | 36 | def forward(self, x): 37 | out = self.relu(self.bn1(x)) if hasattr(self, 'bn1') else x 38 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 39 | out = self.conv1(out) 40 | out = self.conv2(self.relu(self.bn2(out))) 41 | out += shortcut 42 | return out 43 | 44 | 45 | class ResBlock(nn.Module): 46 | expansion = 1 47 | 48 | def __init__(self, in_planes, planes, stride=1): 49 | super(ResBlock, self).__init__() 50 | self.conv1 = nn.Conv2d( 51 | in_planes, 52 | planes, 53 | kernel_size=3, 54 | stride=stride, 55 | padding=1, 56 | bias=False) 57 | self.bn1 = BatchNorm(planes) 58 | self.conv2 = nn.Conv2d( 59 | planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 60 | self.bn2 = BatchNorm(planes) 61 | self.relu = nn.ReLU(inplace=True) 62 | 63 | self.shortcut = nn.Sequential() 64 | if stride != 1 or in_planes != self.expansion * planes: 65 | self.shortcut = nn.Sequential( 66 | nn.Conv2d( 67 | in_planes, 68 | self.expansion * planes, 69 | kernel_size=1, 70 | stride=stride, 71 | bias=False), BatchNorm(self.expansion * planes)) 72 | 73 | def forward(self, x): 74 | out = self.relu(self.bn1(self.conv1(x))) 75 | out = self.bn2(self.conv2(out)) 76 | out += self.shortcut(x) 77 | out = self.relu(out) 78 | return out 79 | 80 | 81 | class ResnetDecoder(nn.Module): 82 | 83 | def __init__(self, inplane, outplane): 84 | super(ResnetDecoder, self).__init__() 85 | self.block1 = PreActBlock(inplane, outplane, normalize=False) 86 | self.block2 = PreActBlock(outplane, outplane, normalize=True) 87 | 88 | def forward(self, x): 89 | x = self.block1(x) 90 | out = self.block2(x) 91 | return out 92 | 93 | 94 | class HDADecoder(nn.Module): 95 | 96 | def __init__(self, inplane, outplane): 97 | super(HDADecoder, self).__init__() 98 | self.block1 = PreActBlock(inplane, outplane, normalize=False) 99 | self.block2 = PreActBlock(outplane, outplane, normalize=True) 100 | self.root = nn.Sequential( 101 | BatchNorm(outplane * 2), nn.ReLU(inplace=True), 102 | nn.Conv2d( 103 | outplane * 2, outplane, kernel_size=1, stride=1, bias=False)) 104 | 105 | def forward(self, x): 106 | y1 = self.block1(x) 107 | y2 = self.block2(y1) 108 | out = self.root(torch.cat([y1, y2], 1)) 109 | return out 110 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/hd3/models/dla_up.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | from torch import nn 5 | from . import dla 6 | 7 | BatchNorm = nn.BatchNorm2d 8 | 9 | 10 | class Identity(nn.Module): 11 | 12 | def __init__(self): 13 | super(Identity, self).__init__() 14 | 15 | def forward(self, x): 16 | return x 17 | 18 | 19 | def fill_up_weights(up): 20 | w = up.weight.data 21 | f = math.ceil(w.size(2) / 2) 22 | c = (2 * f - 1 - f % 2) / (2. * f) 23 | for i in range(w.size(2)): 24 | for j in range(w.size(3)): 25 | w[0, 0, i, j] = \ 26 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 27 | for c in range(1, w.size(0)): 28 | w[c, 0, :, :] = w[0, 0, :, :] 29 | 30 | 31 | class IDAUp(nn.Module): 32 | 33 | def __init__(self, node_kernel, out_dim, channels, up_factors): 34 | super(IDAUp, self).__init__() 35 | self.channels = channels 36 | self.out_dim = out_dim 37 | for i, c in enumerate(channels): 38 | if c == out_dim: 39 | proj = Identity() 40 | else: 41 | proj = nn.Sequential( 42 | nn.Conv2d(c, out_dim, kernel_size=1, stride=1, bias=False), 43 | BatchNorm(out_dim), nn.ReLU(inplace=True)) 44 | f = int(up_factors[i]) 45 | if f == 1: 46 | up = Identity() 47 | else: 48 | up = nn.ConvTranspose2d( 49 | out_dim, 50 | out_dim, 51 | f * 2, 52 | stride=f, 53 | padding=f // 2, 54 | output_padding=0, 55 | groups=out_dim, 56 | bias=False) 57 | fill_up_weights(up) 58 | setattr(self, 'proj_' + str(i), proj) 59 | setattr(self, 'up_' + str(i), up) 60 | 61 | for i in range(1, len(channels)): 62 | node = nn.Sequential( 63 | nn.Conv2d( 64 | out_dim * 2, 65 | out_dim, 66 | kernel_size=node_kernel, 67 | stride=1, 68 | padding=node_kernel // 2, 69 | bias=False), BatchNorm(out_dim), nn.ReLU(inplace=True)) 70 | setattr(self, 'node_' + str(i), node) 71 | 72 | for m in self.modules(): 73 | classname = m.__class__.__name__ 74 | if isinstance(m, nn.Conv2d): 75 | m.weight.data.normal_(0, 0.02) 76 | elif classname.find('BatchNorm') != -1: 77 | m.weight.data.fill_(1) 78 | m.bias.data.zero_() 79 | 80 | def forward(self, layers): 81 | assert len(self.channels) == len(layers), \ 82 | '{} vs {} layers'.format(len(self.channels), len(layers)) 83 | layers = list(layers) 84 | for i, l in enumerate(layers): 85 | upsample = getattr(self, 'up_' + str(i)) 86 | project = getattr(self, 'proj_' + str(i)) 87 | layers[i] = upsample(project(l)) 88 | x = layers[0] 89 | y = [] 90 | for i in range(1, len(layers)): 91 | node = getattr(self, 'node_' + str(i)) 92 | x = node(torch.cat([x, layers[i]], 1)) 93 | y.append(x) 94 | return x, y 95 | 96 | 97 | class DLAUp(nn.Module): 98 | 99 | def __init__(self, channels, scales=(1, 2, 4, 8, 16), in_channels=None): 100 | super(DLAUp, self).__init__() 101 | if in_channels is None: 102 | in_channels = channels 103 | self.channels = channels 104 | channels = list(channels) 105 | scales = np.array(scales, dtype=int) 106 | for i in range(len(channels) - 1): 107 | j = -i - 2 108 | setattr( 109 | self, 'ida_{}'.format(i), 110 | IDAUp(3, channels[j], in_channels[j:], 111 | scales[j:] // scales[j])) 112 | scales[j + 1:] = scales[j] 113 | in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]] 114 | 115 | def forward(self, layers): 116 | layers = list(layers) 117 | assert len(layers) > 1 118 | ms_feat = [layers[-1]] 119 | for i in range(len(layers) - 1): 120 | ida = getattr(self, 'ida_{}'.format(i)) 121 | x, y = ida(layers[-i - 2:]) # y : aggregation nodes 122 | layers[-i - 1:] = y 123 | ms_feat.append(x) 124 | return ms_feat # x 125 | 126 | 127 | class DLAUpEncoder(nn.Module): 128 | 129 | def __init__(self, planes): 130 | super(DLAUpEncoder, self).__init__() 131 | self.first_level = 1 132 | self.base = dla.dla34(planes) 133 | scales = [2**i for i in range(len(planes[self.first_level:]))] 134 | self.dla_up = DLAUp(planes[self.first_level:], scales=scales) 135 | 136 | def forward(self, x): 137 | x = self.base(x) 138 | y = self.dla_up(x[self.first_level:]) 139 | return y[::-1] 140 | 141 | 142 | def DLAEncoder(planes): 143 | model = DLAUpEncoder(planes) 144 | return model 145 | 146 | 147 | def test(): 148 | net = dla34up([16, 32, 64, 128, 256, 512, 512]) 149 | y = net(torch.randn(1, 3, 384, 448)) 150 | for t in y: 151 | print(t.size()) 152 | 153 | 154 | if __name__ == '__main__': 155 | test() 156 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/hd3/models/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | BatchNorm = nn.BatchNorm2d 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | 8 | def __init__(self, in_planes, out_planes): 9 | super(BasicBlock, self).__init__() 10 | self.conv1 = nn.Conv2d( 11 | in_planes, 12 | out_planes, 13 | kernel_size=3, 14 | stride=2, 15 | padding=1, 16 | dilation=1, 17 | bias=False) 18 | self.bn1 = BatchNorm(out_planes) 19 | self.conv2 = nn.Conv2d( 20 | out_planes, 21 | out_planes, 22 | kernel_size=3, 23 | stride=1, 24 | padding=1, 25 | dilation=1, 26 | bias=False) 27 | self.bn2 = BatchNorm(out_planes) 28 | self.conv3 = nn.Conv2d( 29 | out_planes, 30 | out_planes, 31 | kernel_size=3, 32 | stride=1, 33 | padding=1, 34 | dilation=1, 35 | bias=False) 36 | self.bn3 = BatchNorm(out_planes) 37 | self.relu = nn.ReLU(inplace=True) 38 | 39 | def forward(self, x): 40 | out = self.conv1(x) 41 | out = self.bn1(out) 42 | out = self.relu(out) 43 | 44 | out = self.conv2(out) 45 | out = self.bn2(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv3(out) 49 | out = self.bn3(out) 50 | out = self.relu(out) 51 | 52 | return out 53 | 54 | 55 | class VGG(nn.Module): 56 | 57 | def __init__(self, block, planes): 58 | super(VGG, self).__init__() 59 | self.levels = len(planes) 60 | channels = [3] + planes 61 | for i in range(self.levels): 62 | setattr(self, 'block_{}'.format(i), 63 | block(channels[i], channels[i + 1])) 64 | 65 | for m in self.modules(): 66 | classname = m.__class__.__name__ 67 | if isinstance(m, nn.Conv2d): 68 | m.weight.data.normal_(0, 0.02) 69 | elif classname.find('BatchNorm') != -1: 70 | m.weight.data.fill_(1) 71 | m.bias.data.zero_() 72 | 73 | def forward(self, x): 74 | out = [] 75 | for i in range(self.levels): 76 | x = getattr(self, 'block_' + str(i))(x) 77 | out.append(x) 78 | return out 79 | 80 | 81 | def VGGEncoder(planes): 82 | return VGG(BasicBlock, planes) 83 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/lite_flow_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/flow/lite_flow_net/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/flow/lite_flow_net/lite_flow.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-19 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-06-04 7 | @LastEditors: Huangying Zhan 8 | @Description: This is the interface for LiteFlowNet 9 | ''' 10 | 11 | import cv2 12 | import math 13 | import numpy as np 14 | import os 15 | import sys 16 | import torch 17 | import torch.nn.functional as F 18 | 19 | from .lite_flow_net import LiteFlowNet 20 | from ..deep_flow import DeepFlow 21 | 22 | 23 | class LiteFlow(DeepFlow): 24 | """LiteFlow is the interface for LiteFlowNet. 25 | """ 26 | 27 | def __init__(self, *args, **kwargs): 28 | super(LiteFlow, self).__init__(*args, **kwargs) 29 | # FIXME: half-flow issue 30 | self.half_flow = False 31 | 32 | def initialize_network_model(self, weight_path, finetune): 33 | """initialize flow_net model with weight_path 34 | 35 | Args: 36 | weight_path (str): weight path 37 | finetune (bool): finetune model on the run if True 38 | """ 39 | if weight_path is not None: 40 | print("==> Initialize LiteFlowNet with [{}]: ".format(weight_path)) 41 | # Initialize network 42 | self.model = LiteFlowNet().cuda() 43 | 44 | # Load model weights 45 | checkpoint = torch.load(weight_path) 46 | self.model.load_state_dict(checkpoint) 47 | 48 | if finetune: 49 | self.model.train() 50 | else: 51 | self.model.eval() 52 | else: 53 | assert False, "No LiteFlowNet pretrained model is provided." 54 | 55 | def inference(self, img1, img2): 56 | """Predict optical flow for the given pairs 57 | 58 | Args: 59 | img1 (tensor, [Nx3xHxW]): image 1; intensity [0-1] 60 | img2 (tensor, [Nx3xHxW]): image 2; intensity [0-1] 61 | 62 | Returns: 63 | a dictionary containing flows at different scales, resized back to input scale 64 | - **scale-N** (tensor, [Nx2xHxW]): flow from img1 to img2 at scale level-N 65 | """ 66 | # get shape 67 | _, _, h, w = img1.shape 68 | th, tw = self.get_target_size(h, w) 69 | 70 | # forward pass 71 | flow_inputs = [img1, img2] 72 | resized_img_list = [ 73 | F.interpolate( 74 | img, (th, tw), mode='bilinear', align_corners=True) 75 | for img in flow_inputs 76 | ] 77 | output = self.model(resized_img_list) 78 | 79 | # Post-process output 80 | flows = {} 81 | for s in self.flow_scales: 82 | flows[s] = self.resize_dense_flow( 83 | output[s], 84 | h, w) 85 | if self.half_flow: 86 | flows[s] /= 2. 87 | return flows 88 | 89 | def inference_flow(self, 90 | img1, img2, 91 | forward_backward=False, 92 | dataset='kitti'): 93 | """Estimate flow (1->2) and compute flow consistency 94 | 95 | Args: 96 | img1 (tensor, [Nx3xHxW]): image 1 97 | img2 (tensor [Nx3xHxW]): image 2 98 | foward_backward (bool): forward-backward flow consistency is used if True 99 | dataset (str): dataset type 100 | 101 | Returns: 102 | a dictionary containing 103 | - **forward** (tensor, [Nx2xHxW]) : forward flow 104 | - **backward** (tensor, [Nx2xHxW]) : backward flow 105 | - **flow_diff** (tensor, [NxHxWx1]) : foward-backward flow inconsistency 106 | """ 107 | # flow net inference to get flows 108 | if forward_backward: 109 | input_img1 = torch.cat((img1, img2), dim=0) 110 | input_img2 = torch.cat((img2, img1), dim=0) 111 | else: 112 | input_img1 = img1 113 | input_img2 = img2 114 | 115 | # inference with/without gradient 116 | if self.enable_finetune: 117 | combined_flow_data = self.inference(input_img1, input_img2) 118 | else: 119 | combined_flow_data = self.inference_no_grad(input_img1, input_img2) 120 | 121 | self.forward_flow = {} 122 | self.backward_flow = {} 123 | self.flow_diff = {} 124 | self.px1on2 = {} 125 | for s in self.flow_scales: 126 | self.forward_flow[s] = combined_flow_data[s][0:1] 127 | if forward_backward: 128 | self.backward_flow[s] = combined_flow_data[s][1:2] 129 | 130 | # sampled flow 131 | # Get sampling pixel coordinates 132 | self.px1on2[s] = self.flow_to_pix(self.forward_flow[s]) 133 | 134 | # Forward-Backward flow consistency check 135 | if forward_backward: 136 | # get flow-consistency error map 137 | self.flow_diff[s] = self.forward_backward_consistency( 138 | flow1=self.forward_flow[s], 139 | flow2=self.backward_flow[s], 140 | px1on2=self.px1on2[s]) 141 | 142 | # summarize flow data and flow difference for DF-VO 143 | flows = {} 144 | flows['forward'] = self.forward_flow[1].clone() 145 | if forward_backward: 146 | flows['backward'] = self.backward_flow[1].clone() 147 | flows['flow_diff'] = self.flow_diff[1].clone() 148 | return flows 149 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/pose/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/pose/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/pose/deep_pose.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-19 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-06-04 7 | @LastEditors: Huangying Zhan 8 | @Description: This is the Base class for deep pose network interface 9 | ''' 10 | 11 | import torch 12 | 13 | 14 | class DeepPose(): 15 | """DeepPose is the Base class for deep pose network interface 16 | """ 17 | def __init__(self): 18 | # Basic configuration 19 | self.device = torch.device('cuda') 20 | self.enable_finetune = False 21 | 22 | # ========================== Methods need to be implemented ======================= 23 | def initialize_network_model(self, weight_path): 24 | """initialize network and load pretrained model 25 | 26 | Args: 27 | weight_path (str): directory stores pretrained models 28 | - **pose_encoder.pth**: encoder model; 29 | - **pose.pth**: pose decoder model 30 | dataset (str): dataset setup 31 | """ 32 | raise NotImplementedError 33 | 34 | def inference(self, imgs): 35 | """Pose prediction 36 | 37 | Args: 38 | imgs (tensor, Nx2CxHxW): concatenated image pair 39 | 40 | Returns: 41 | pose (tensor, [Nx4x4]): relative pose from img2 to img1 42 | """ 43 | raise NotImplementedError 44 | 45 | def inference_pose(self, img): 46 | """Pose prediction 47 | 48 | Args: 49 | imgs (tensor, Nx2CxHxW): concatenated image pair 50 | 51 | Returns: 52 | pose (tensor, [Nx4x4]): relative pose from img2 to img1 53 | """ 54 | raise NotImplementedError 55 | # ================================================================================= 56 | 57 | @torch.no_grad() 58 | def inference_no_grad(self, imgs): 59 | """Pose prediction 60 | 61 | Args: 62 | imgs (tensor, Nx2CxHxW): concatenated image pair 63 | 64 | Returns: 65 | pose (tensor, [Nx4x4]): relative pose from img2 to img1 66 | """ 67 | return self.inference(imgs) 68 | 69 | def setup_train(self, deep_model, cfg): 70 | """Setup training configurations for online finetuning depth network 71 | 72 | Args: 73 | deep_model (DeepModel): DeepModel interface object 74 | cfg (edict): configuration dictionary for depth finetuning 75 | """ 76 | self.enable_finetune = cfg.enable 77 | 78 | # train parameter 79 | deep_model.parameters_to_train += list(self.model.parameters()) 80 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/pose/monodepth2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/deep_models/pose/monodepth2/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/deep_models/pose/monodepth2/monodepth2.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-19 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-06-25 7 | @LastEditors: Please set LastEditors 8 | @Description: This is the interface for Monodepth2 pose network 9 | ''' 10 | 11 | import numpy as np 12 | import os 13 | import PIL.Image as pil 14 | import torch 15 | from torchvision import transforms 16 | 17 | from .pose_decoder import PoseDecoder 18 | from ..deep_pose import DeepPose 19 | from dfvo.libs.deep_models.depth.monodepth2.resnet_encoder import ResnetEncoder 20 | from dfvo.libs.deep_models.depth.monodepth2.layers import transformation_from_parameters 21 | 22 | 23 | 24 | class Monodepth2PoseNet(DeepPose): 25 | """This is the interface for Monodepth2 pose network 26 | """ 27 | def __init__(self, *args, **kwargs): 28 | super(Monodepth2PoseNet, self).__init__(*args, **kwargs) 29 | self.enable_finetune = False 30 | 31 | def initialize_network_model(self, weight_path, dataset, finetune): 32 | """initialize network and load pretrained model 33 | 34 | Args: 35 | weight_path (str): directory stores pretrained models 36 | - **pose_encoder.pth**: encoder model; 37 | - **pose.pth**: pose decoder model 38 | dataset (str): dataset setup 39 | finetune (bool): finetune model on the run if True 40 | """ 41 | device = torch.device('cuda') 42 | 43 | # initilize network 44 | self.encoder = ResnetEncoder(18, False, 2) 45 | self.pose_decoder = PoseDecoder( 46 | self.encoder.num_ch_enc, 1, 2) 47 | 48 | print("==> Initialize Pose-CNN with [{}]".format(weight_path)) 49 | # loading pretrained model (encoder) 50 | encoder_path = os.path.join(weight_path, 'pose_encoder.pth') 51 | loaded_dict_enc = torch.load(encoder_path, map_location=device) 52 | filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict()} 53 | self.encoder.load_state_dict(filtered_dict_enc) 54 | self.encoder.to(device) 55 | 56 | # loading pretrained model (pose-decoder) 57 | pose_decoder_path = os.path.join(weight_path, 'pose.pth') 58 | loaded_dict = torch.load(pose_decoder_path, map_location=device) 59 | self.pose_decoder.load_state_dict(loaded_dict) 60 | self.pose_decoder.to(device) 61 | 62 | # concatenate encoders and decoders 63 | self.model = torch.nn.Sequential(self.encoder, self.pose_decoder) 64 | 65 | if finetune: 66 | self.encoder.train() 67 | self.pose_decoder.train() 68 | else: 69 | self.encoder.eval() 70 | self.pose_decoder.eval() 71 | 72 | # image size 73 | self.feed_height = 192 74 | self.feed_width = 640 75 | 76 | # dataset parameters 77 | if 'kitti' in dataset: 78 | self.stereo_baseline_multiplier = 5.4 79 | elif 'tum' in dataset: 80 | self.stereo_baseline_multiplier = 1. 81 | elif 'robotcar' in dataset: 82 | self.stereo_baseline_multiplier = 5.4 83 | else: 84 | self.stereo_baseline_multiplier = 1. 85 | 86 | def inference(self, imgs): 87 | """Pose prediction 88 | 89 | Args: 90 | imgs (tensor, Nx2CxHxW): concatenated image pair 91 | 92 | Returns: 93 | pose (tensor, [Nx4x4]): relative pose from img2 to img1 94 | """ 95 | features = self.encoder(imgs) 96 | axisangle, translation = self.pose_decoder([features]) 97 | pose = transformation_from_parameters(axisangle[:, 0], translation[:, 0], invert=True) 98 | 99 | return pose 100 | 101 | def inference_pose(self, img): 102 | """Pose prediction 103 | 104 | Args: 105 | imgs (tensor, Nx2CxHxW): concatenated image pair 106 | 107 | Returns: 108 | pose (tensor, [Nx4x4]): relative pose from img2 to img1 109 | """ 110 | if self.enable_finetune: 111 | predictions = self.inference(img) 112 | else: 113 | predictions = self.inference_no_grad(img) 114 | self.pred_pose = predictions 115 | 116 | # summarize pose predictions for DF-VO 117 | pose = self.pred_pose[:1].clone() 118 | pose[:, :3, 3] *= self.stereo_baseline_multiplier # monodepth2 assumes 0.1 unit baseline 119 | return pose 120 | -------------------------------------------------------------------------------- /dfvo/libs/deep_models/pose/monodepth2/pose_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import torch 10 | import torch.nn as nn 11 | from collections import OrderedDict 12 | 13 | 14 | class PoseDecoder(nn.Module): 15 | def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1): 16 | super(PoseDecoder, self).__init__() 17 | 18 | self.num_ch_enc = num_ch_enc 19 | self.num_input_features = num_input_features 20 | 21 | if num_frames_to_predict_for is None: 22 | num_frames_to_predict_for = num_input_features - 1 23 | self.num_frames_to_predict_for = num_frames_to_predict_for 24 | 25 | self.convs = OrderedDict() 26 | self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1) 27 | self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1) 28 | self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1) 29 | self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1) 30 | 31 | self.relu = nn.ReLU() 32 | 33 | self.net = nn.ModuleList(list(self.convs.values())) 34 | 35 | def forward(self, input_features): 36 | last_features = [f[-1] for f in input_features] 37 | 38 | cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features] 39 | cat_features = torch.cat(cat_features, 1) 40 | 41 | out = cat_features 42 | for i in range(3): 43 | out = self.convs[("pose", i)](out) 44 | if i != 2: 45 | out = self.relu(out) 46 | 47 | out = out.mean(3).mean(2) 48 | 49 | out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6) 50 | 51 | axisangle = out[..., :3] 52 | translation = out[..., 3:] 53 | 54 | return axisangle, translation 55 | -------------------------------------------------------------------------------- /dfvo/libs/flowlib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/flowlib/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/general/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/general/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/general/configuration.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-19 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-20 7 | @LastEditors: Huangying Zhan 8 | @Description: ConfigLoader contains operations for processing multiple yml files 9 | ''' 10 | 11 | from easydict import EasyDict as edict 12 | import yaml 13 | 14 | def read_yaml(filename): 15 | """Load yaml file as a dictionary item 16 | 17 | Args: 18 | filename (str): yaml file path 19 | 20 | Returns: 21 | cfg (dict): configuration 22 | """ 23 | if filename is not None: 24 | with open(filename, 'r') as f: 25 | return yaml.load(f, Loader=yaml.FullLoader) 26 | else: 27 | return {} 28 | 29 | 30 | class ConfigLoader(): 31 | '''Configuration loader for yml configuration files 32 | ''' 33 | def merge_cfg(self, cfg_files): 34 | """Merge default configuration and custom configuration 35 | 36 | Args: 37 | cfg_files (str): configuration file paths [default, custom] 38 | 39 | Returns: 40 | cfg (edict): merged EasyDict 41 | """ 42 | cfg = {} 43 | for f in cfg_files: 44 | if f is not None: 45 | cfg = self.update_dict(cfg, read_yaml(f)) 46 | return edict(cfg) 47 | 48 | def save_cfg(self, cfg_files, file_path): 49 | """Merge cfg_files and save merged configuration to file_path 50 | 51 | Args: 52 | cfg_files (str): configuration file paths [default, custom] 53 | file_path (str): path of text file for writing the configurations 54 | """ 55 | # read configurations 56 | default = read_yaml(cfg_files[0]) 57 | merged = self.merge_cfg(cfg_files) 58 | 59 | # create file to be written 60 | f = open(file_path, 'w') 61 | 62 | # write header line 63 | line = "# " + "-"*20 + " Setup " + "-"*74 64 | line += "|" + "-"*10 + " Default " + "-"*20 + "\n" 65 | f.writelines(line) 66 | 67 | # write configurations 68 | self.write_cfg(default, merged, f) 69 | f.close() 70 | 71 | def update_dict(self, dict1, dict2): 72 | """Update dict1 according to dict2 73 | 74 | Args: 75 | dict1 (dict): reference dictionary 76 | dict2 (dict): new dictionary 77 | 78 | Returns: 79 | dict1 (dict): updated reference dictionary 80 | """ 81 | for item in dict2: 82 | if dict1.get(item, -1) != -1: 83 | if isinstance(dict1[item], dict): 84 | dict1[item] = self.update_dict(dict1[item], dict2[item]) 85 | else: 86 | dict1[item] = dict2[item] 87 | else: 88 | dict1[item] = dict2[item] 89 | return dict1 90 | 91 | def write_cfg(self, default, merge, file_io, level_cnt=0): 92 | """Write merged configuration to file and show difference 93 | with default configuration 94 | 95 | Args: 96 | default (dict): default configuration dictionary 97 | merge (dict): merged configuration dictionary 98 | file_io (TextIOWrapper): text IO wrapper object 99 | level_cnt (int): dictionary level counter 100 | """ 101 | offset_len = 100 102 | for item in merge: 103 | if isinstance(merge[item], dict): 104 | # go deeper for dict item 105 | line = " "*level_cnt + item + ": " 106 | offset = offset_len - len(line) 107 | line += " "*offset + " # | " 108 | 109 | # check if default has this config 110 | if default.get(item, -1) == -1: 111 | default[item] = {} 112 | line += " --NEW-- " 113 | file_io.writelines(line + "\n") 114 | self.write_cfg(default[item], merge[item], file_io, level_cnt+1) 115 | else: 116 | # write current config 117 | line = " " * level_cnt + item + ": " 118 | if merge[item] is not None: 119 | line += str(merge[item]) 120 | else: 121 | line += " " 122 | 123 | offset = offset_len - len(line) 124 | line += " "*offset + " # | " 125 | file_io.writelines(line) 126 | 127 | # write default if default is different from current 128 | if default.get(item, -1) != -1: 129 | line = " " 130 | if merge[item] != default[item]: 131 | line = str(default[item]) 132 | file_io.writelines(line) 133 | else: 134 | line = " --NEW-- " 135 | file_io.writelines(line) 136 | file_io.writelines("\n") 137 | -------------------------------------------------------------------------------- /dfvo/libs/general/kitti_raw_utils.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-20 7 | @LastEditors: Huangying Zhan 8 | @Description: Provides helper methods for loading and parsing KITTI Raw data 9 | ''' 10 | import numpy as np 11 | from collections import namedtuple 12 | import os 13 | 14 | from .kitti_utils import * 15 | 16 | 17 | OxtsPacket = namedtuple('OxtsPacket', 18 | 'lat, lon, alt, ' + 19 | 'roll, pitch, yaw, ' + 20 | 'vn, ve, vf, vl, vu, ' + 21 | 'ax, ay, az, af, al, au, ' + 22 | 'wx, wy, wz, wf, wl, wu, ' + 23 | 'pos_accuracy, vel_accuracy, ' + 24 | 'navstat, numsats, ' + 25 | 'posmode, velmode, orimode') 26 | 27 | 28 | def generate_pose(seq, frame_idx, do_flip): 29 | """Get pose for a frame in a sequence 30 | 31 | Args: 32 | seq (str): sequence oxts_dir directory 33 | frame_idx (int): frame index 34 | do_flip (bool): flip sequence horizontally 35 | 36 | Returns: 37 | pose (array, [4x4]): absolute pose w.r.t frame-0 38 | """ 39 | # Read oxts data 40 | oxts_files = [ 41 | os.path.join(seq, "{:010}.txt".format(0)), 42 | os.path.join(seq, "{:010}.txt".format(frame_idx)) 43 | ] 44 | oxts_packets = [] 45 | for filename in oxts_files: 46 | with open(filename, 'r') as f: 47 | for line in f.readlines(): 48 | line = line.split() 49 | # Last five entries are flags and counts 50 | line[:-5] = [float(x) for x in line[:-5]] 51 | line[-5:] = [int(float(x)) for x in line[-5:]] 52 | 53 | data = OxtsPacket(*line) 54 | oxts_packets.append(data) 55 | 56 | # get absolute pose w.r.t frame-0 57 | gps_poses = poses_from_oxts(oxts_packets) 58 | 59 | # convert from GPS coordinate system to camera coordinate system 60 | # - Camera: x: right, y: down, z: forward 61 | # - GPS/IMU: x: forward, y: left, z: up 62 | T = np.eye(4) 63 | T[:3, :3] = np.array([ 64 | [0, -1, 0], 65 | [0, 0, -1], 66 | [1, 0, 0]] 67 | ) 68 | T_01 = np.linalg.inv(gps_poses[0]) @ gps_poses[1] 69 | # pose = (T @ gps_poses[0]) @ np.linalg.inv(T @ gps_poses[1]) 70 | pose = T @ T_01 @ np.linalg.inv(T) 71 | 72 | if do_flip: 73 | pose[:3, :3] = flip_rotation(pose[:3, :3]) 74 | pose[0, 3] = -pose[0, 3] 75 | 76 | return pose 77 | 78 | 79 | def flip_rotation(R): 80 | """Transform rotation when there is a flipping of image along x-axis 81 | 82 | Args: 83 | R (array, [3x3]): rotation matrix 84 | 85 | Returns: 86 | new_R (array, [3x3]): new rotation matrix 87 | """ 88 | theta_x = np.arctan2(R[2,1], R[2,2]) 89 | theta_y = np.arctan2(-R[2,0], np.linalg.norm([R[2,1], R[2,2]])) 90 | theta_z = np.arctan2(R[1,0], R[0,0]) 91 | 92 | R_x = np.asarray([[1, 0, 0], 93 | [0, np.cos(theta_x), -np.sin(theta_x)], 94 | [0, np.sin(theta_x), np.cos(theta_x)]]) 95 | R_y = np.asarray([[np.cos(theta_y), 0, np.sin(theta_y)], 96 | [0, 1, 0], 97 | [-np.sin(theta_y), 0, np.cos(theta_y)]]) 98 | R_z = np.asarray([[np.cos(theta_z), -np.sin(theta_z), 0], 99 | [np.sin(theta_z), np.cos(theta_z), 0], 100 | [0, 0, 1]]) 101 | new_R = np.linalg.inv(R_z) @ np.linalg.inv(R_y) @ R_x 102 | return new_R 103 | 104 | 105 | def poses_from_oxts(oxts_packets): 106 | """Helper method to compute SE(3) pose matrices from OXTS packets. 107 | 108 | Args: 109 | oxts_packets (namedtuple): oxts data 110 | 111 | Returns: 112 | poses (list): list of sensor poses 113 | """ 114 | er = 6378137. # earth radius (approx.) in meters 115 | 116 | # compute scale from first lat value 117 | scale = np.cos(oxts_packets[0].lat * np.pi / 180.) 118 | 119 | t_0 = [] # initial position 120 | poses = [] # list of poses computed from oxts 121 | for packet in oxts_packets: 122 | # Use a Mercator projection to get the translation vector 123 | tx = scale * packet.lon * np.pi * er / 180. 124 | ty = scale * er * \ 125 | np.log(np.tan((90. + packet.lat) * np.pi / 360.)) 126 | tz = packet.alt 127 | t = np.array([tx, ty, tz]) 128 | 129 | # We want the initial position to be the origin, but keep the ENU 130 | # coordinate system 131 | if len(t_0) == 0: 132 | t_0 = t 133 | 134 | # Use the Euler angles to get the rotation matrix 135 | Rx = rotx(packet.roll) 136 | Ry = roty(packet.pitch) 137 | Rz = rotz(packet.yaw) 138 | R = Rz.dot(Ry.dot(Rx)) 139 | 140 | # Combine the translation and rotation into a homogeneous transform 141 | # poses.append(transform_from_rot_trans(R, t - t_0)) 142 | poses.append(transform_from_rot_trans(R, t)) 143 | 144 | return poses -------------------------------------------------------------------------------- /dfvo/libs/general/kitti_utils.py: -------------------------------------------------------------------------------- 1 | """Provides helper methods for loading and parsing KITTI data.""" 2 | 3 | from collections import namedtuple 4 | 5 | import matplotlib.image as mpimg 6 | import numpy as np 7 | 8 | __author__ = "Lee Clement" 9 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 10 | 11 | 12 | def rotx(t): 13 | """Rotation about the x-axis. 14 | 15 | Args: 16 | t (float): angle 17 | 18 | Returns: 19 | R (array, [3x3]): rotation matrix 20 | """ 21 | c = np.cos(t) 22 | s = np.sin(t) 23 | return np.array([[1, 0, 0], 24 | [0, c, -s], 25 | [0, s, c]]) 26 | 27 | 28 | def roty(t): 29 | """Rotation about the y-axis. 30 | 31 | Args: 32 | t (float): angle 33 | 34 | Returns: 35 | R (array, [3x3]): rotation matrix 36 | """ 37 | c = np.cos(t) 38 | s = np.sin(t) 39 | return np.array([[c, 0, s], 40 | [0, 1, 0], 41 | [-s, 0, c]]) 42 | 43 | 44 | def rotz(t): 45 | """Rotation about the z-axis. 46 | 47 | Args: 48 | t (float): angle 49 | 50 | Returns: 51 | R (array, [3x3]): rotation matrix 52 | 53 | """ 54 | c = np.cos(t) 55 | s = np.sin(t) 56 | return np.array([[c, -s, 0], 57 | [s, c, 0], 58 | [0, 0, 1]]) 59 | 60 | 61 | def transform_from_rot_trans(R, t): 62 | """Transforation matrix from rotation matrix and translation vector. 63 | 64 | Args: 65 | R (array, [3x3]): rotation matrix 66 | t (float): angle 67 | 68 | Returns: 69 | T (array, [4x4]): transformation matrix 70 | """ 71 | R = R.reshape(3, 3) 72 | t = t.reshape(3, 1) 73 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1])) 74 | 75 | 76 | def read_calib_file(filepath): 77 | """Read in a calibration file and parse into a dictionary. 78 | 79 | Args: 80 | filepath (str): file path 81 | 82 | Return: 83 | data (dict): a dictionary containing intrinsics for different sensors 84 | """ 85 | data = {} 86 | 87 | with open(filepath, 'r') as f: 88 | for line in f.readlines(): 89 | key, value = line.split(':', 1) 90 | # The only non-float values in these files are dates, which 91 | # we don't care about anyway 92 | try: 93 | data[key] = np.array([float(x) for x in value.split()]) 94 | except ValueError: 95 | pass 96 | 97 | return data 98 | 99 | 100 | def load_stereo_pairs(imL_files, imR_files, **kwargs): 101 | """Helper method to read stereo image pairs. 102 | 103 | Args: 104 | imL_files (list): left image file paths 105 | imR_files (list): right image file paths 106 | 107 | Returns: 108 | a list containing tuples 109 | - **left** (HxWxC): left image 110 | - **right** (HxWxC): left image 111 | """ 112 | StereoPair = namedtuple('StereoPair', 'left, right') 113 | 114 | impairs = [] 115 | for imfiles in zip(imL_files, imR_files): 116 | # Convert to uint8 and BGR for OpenCV if requested 117 | imformat = kwargs.get('format', '') 118 | if imformat is 'cv2': 119 | imL = np.uint8(mpimg.imread(imfiles[0]) * 255) 120 | imR = np.uint8(mpimg.imread(imfiles[1]) * 255) 121 | 122 | # Convert RGB to BGR 123 | if len(imL.shape) > 2: 124 | imL = imL[:, :, ::-1] 125 | imR = imR[:, :, ::-1] 126 | 127 | else: 128 | imL = mpimg.imread(imfiles[0]) 129 | imR = mpimg.imread(imfiles[1]) 130 | 131 | impairs.append(StereoPair(imL, imR)) 132 | 133 | return impairs 134 | 135 | 136 | def load_velo_scans(velo_files): 137 | """Helper method to parse velodyne binary files into a list of scans. 138 | 139 | Args: 140 | velo_files (list): velodyne data paths 141 | 142 | Returns: 143 | scan_list (list): list of velodyne scans, each scan is a [Nx4] array 144 | """ 145 | scan_list = [] 146 | for filename in velo_files: 147 | scan = np.fromfile(filename, dtype=np.float32) 148 | scan_list.append(scan.reshape((-1, 4))) 149 | 150 | return scan_list 151 | -------------------------------------------------------------------------------- /dfvo/libs/general/timer.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-21 7 | @LastEditors: Huangying Zhan 8 | @Description: Timer object for counting times. 9 | ''' 10 | 11 | import numpy as np 12 | from time import time 13 | 14 | 15 | class Timer(): 16 | """Timer class to count time and do time analysis 17 | """ 18 | 19 | def __init__(self, items=None): 20 | """ 21 | Args: 22 | items (list/str): list of items to be counted, each item is a str 23 | """ 24 | self.timers = {} 25 | if items is not None: 26 | self.add(items) 27 | 28 | def add(self, item, group=None): 29 | """add item to the timer 30 | 31 | Args: 32 | item (str/list): item name 33 | group (str): group name of the item 34 | """ 35 | if isinstance(item, list): 36 | for i in item: 37 | self.timers[i] = { 38 | 'name': i, 39 | 'time': 0, 40 | 'is_counting': False, 41 | 'duration': [], 42 | 'group': group 43 | } 44 | elif isinstance(item, str): 45 | self.timers[item] = { 46 | 'name': item, 47 | 'time': 0, 48 | 'is_counting': False, 49 | 'duration': [], 50 | 'group': group 51 | } 52 | else: 53 | assert False, "only list or str is accepted." 54 | 55 | def start(self, item, group=None): 56 | """Start timer for an item 57 | 58 | Args: 59 | item (str): timer name 60 | group (str): group name for the item 61 | """ 62 | if self.timers.get(item, -1) == -1: 63 | self.add(item, group) 64 | 65 | assert not(self.timers[item]['is_counting']), "Timer for {} has started already.".format(item) 66 | 67 | self.timers[item]['is_counting'] = True 68 | self.timers[item]['time'] = time() 69 | 70 | def end(self, item): 71 | """Stop timer for an item 72 | 73 | Args: 74 | item (str): timer name 75 | """ 76 | assert self.timers[item]['is_counting'], "Timer for {} has not started.".format(item) 77 | 78 | duration = time() - self.timers[item]['time'] 79 | self.timers[item]['duration'].append(duration) 80 | self.timers[item]['is_counting'] = False 81 | 82 | def time_analysis(self): 83 | """Time analysis of the items 84 | """ 85 | print("----- time breakdown -----") 86 | # group items according to groups 87 | group_timers = {'single': []} 88 | for key in sorted(self.timers.keys()): 89 | group_name = self.timers[key]['group'] 90 | if group_name is not None: 91 | if group_timers.get(group_name, -1) == -1: 92 | group_timers[group_name] = [] 93 | group_timers[group_name].append(self.timers[key]) 94 | else: 95 | group_timers['single'].append(self.timers[key]) 96 | 97 | # display times 98 | for group_name, members in group_timers.items(): 99 | print("Group [{}]: ".format(group_name)) 100 | group_avg_times = [] 101 | for member in members: 102 | avg_time = np.asarray(member['duration']).mean() 103 | group_avg_times.append(avg_time) 104 | print("\t[{}]: {:.03f}s".format(member['name'], avg_time)) 105 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/geometry/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/geometry/backprojection.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: Backproject layer to backproject a depth image given the camera intrinsics 9 | ''' 10 | 11 | import numpy as np 12 | import torch 13 | import torch.nn as nn 14 | 15 | 16 | class Backprojection(nn.Module): 17 | """Layer to backproject a depth image given the camera intrinsics 18 | """ 19 | 20 | def __init__(self, height, width): 21 | """ 22 | Args: 23 | height (int): image height 24 | width (int): image width 25 | """ 26 | super(Backprojection, self).__init__() 27 | 28 | self.height = height 29 | self.width = width 30 | 31 | # generate regular grid 32 | meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy') 33 | id_coords = np.stack(meshgrid, axis=0).astype(np.float32) 34 | id_coords = torch.tensor(id_coords) 35 | 36 | # generate homogeneous pixel coordinates 37 | self.ones = nn.Parameter(torch.ones(1, 1, self.height * self.width), 38 | requires_grad=False) 39 | self.xy = torch.unsqueeze( 40 | torch.stack([id_coords[0].view(-1), id_coords[1].view(-1)], 0) 41 | , 0) 42 | self.xy = torch.cat([self.xy, self.ones], 1) 43 | self.xy = nn.Parameter(self.xy, requires_grad=False) 44 | 45 | def forward(self, depth, inv_K, img_like_out=False): 46 | """Forward pass 47 | 48 | Args: 49 | depth (tensor, [Nx1xHxW]): depth map 50 | inv_K (tensor, [Nx4x4]): inverse camera intrinsics 51 | img_like_out (bool):if True, the output shape is Nx4xHxW; else Nx4x(HxW) 52 | 53 | Returns: 54 | points (tensor, [Nx4x(HxW) or Nx4xHxW]): 3D points in homogeneous coordinates 55 | """ 56 | depth = depth.contiguous() 57 | 58 | xy = self.xy.repeat(depth.shape[0], 1, 1) 59 | ones = self.ones.repeat(depth.shape[0],1,1) 60 | 61 | points = torch.matmul(inv_K[:, :3, :3], xy) 62 | points = depth.view(depth.shape[0], 1, -1) * points 63 | points = torch.cat([points, ones], 1) 64 | 65 | if img_like_out: 66 | points = points.reshape(depth.shape[0], 4, self.height, self.width) 67 | return points 68 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/camera_modules.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: Camera related classes 9 | ''' 10 | 11 | import numpy as np 12 | 13 | 14 | class SE3(): 15 | """SE3 object consists rotation and translation components 16 | """ 17 | def __init__(self, np_arr=None): 18 | if np_arr is None: 19 | self._pose = np.eye(4) 20 | else: 21 | self._pose = np_arr 22 | 23 | @property 24 | def pose(self): 25 | """ (array, [4x4]): camera pose 26 | """ 27 | return self._pose 28 | 29 | @pose.setter 30 | def pose(self, value): 31 | self._pose = value 32 | 33 | @property 34 | def inv_pose(self): 35 | """ (array, [4x4]): inverse camera pose 36 | """ 37 | return np.linalg.inv(self._pose) 38 | 39 | @inv_pose.setter 40 | def inv_pose(self, value): 41 | self._pose = np.linalg.inv(value) 42 | 43 | @property 44 | def R(self): 45 | """ (array, [3x4]): rotation matrix 46 | """ 47 | return self._pose[:3, :3] 48 | 49 | @R.setter 50 | def R(self, value): 51 | self._pose[:3, :3] = value 52 | 53 | @property 54 | def t(self): 55 | """ (array, [3x1]): translation vector 56 | """ 57 | return self._pose[:3, 3:] 58 | 59 | @t.setter 60 | def t(self, value): 61 | self._pose[:3, 3:] = value 62 | 63 | 64 | class Intrinsics(): 65 | """Camera intrinsics object 66 | """ 67 | def __init__(self, param=None): 68 | """ 69 | Args: 70 | param (list): [cx, cy, fx, fy] 71 | """ 72 | if param is None: 73 | self._mat = np.zeros((3, 3)) 74 | else: 75 | cx, cy, fx, fy = param 76 | self._mat = np.array([ 77 | [fx, 0, cx], 78 | [0, fy, cy], 79 | [0, 0, 1]]) 80 | 81 | @property 82 | def mat(self): 83 | """ (array, [3x3]): intrinsics matrix """ 84 | return self._mat 85 | 86 | @mat.setter 87 | def mat(self, mat): 88 | self._mat = mat 89 | 90 | @property 91 | def inv_mat(self): 92 | """ (array, [3x3]): inverse intrinsics matrix """ 93 | return np.linalg.inv(self._mat) 94 | 95 | @inv_mat.setter 96 | def inv_mat(self, mat): 97 | self._mat = np.linalg.inv(mat) 98 | 99 | @property 100 | def fx(self): 101 | """ float: focal length in x-direction """ 102 | return self._mat[0, 0] 103 | 104 | @fx.setter 105 | def fx(self, value): 106 | self._mat[0, 0] = value 107 | 108 | @property 109 | def fy(self): 110 | """ float: focal length in y-direction """ 111 | return self._mat[1, 1] 112 | 113 | @fy.setter 114 | def fy(self, value): 115 | self._mat[1, 1] = value 116 | 117 | @property 118 | def cx(self): 119 | """ float: principal point in x-direction """ 120 | return self._mat[0, 2] 121 | 122 | @cx.setter 123 | def cx(self, value): 124 | self._mat[0, 2] = value 125 | 126 | @property 127 | def cy(self): 128 | """ float: principal point in y-direction """ 129 | return self._mat[1, 2] 130 | 131 | @cy.setter 132 | def cy(self, value): 133 | self._mat[1, 2] = value 134 | 135 | 136 | class PinholeCamera(): 137 | """Pinhole camera model 138 | Attributes: 139 | height (int): image height 140 | width (int): image width 141 | SE3 (SE3): camera pose 142 | K (intrinsics): camera intrinsics 143 | """ 144 | def __init__(self, pose=None, K=None): 145 | """ 146 | Args: 147 | pose (array, [4x4]): camera pose 148 | K (list): [cx, cy, fx, fy] 149 | """ 150 | self._height = 0 151 | self._width = 0 152 | self._SE3 = SE3(pose) 153 | self._K = Intrinsics(K) 154 | 155 | @property 156 | def height(self): 157 | """ (int): image height """ 158 | return self._height 159 | 160 | @height.setter 161 | def height(self, value): 162 | self._height = value 163 | 164 | @property 165 | def width(self): 166 | """ (int): image width """ 167 | return self._width 168 | 169 | @width.setter 170 | def width(self, value): 171 | self._width = value 172 | 173 | @property 174 | def SE3(self): 175 | """ (SE3): pose """ 176 | return self._SE3 177 | 178 | @SE3.setter 179 | def SE3(self, SE3_obj): 180 | self._SE3 = SE3_obj 181 | 182 | @property 183 | def K(self): 184 | """ (Intrinsics): camera intrinsics """ 185 | return self._K 186 | 187 | @K.setter 188 | def K(self, intrinsics): 189 | self._K = intrinsics 190 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/ops_3d.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: This file contains operations related 3D 9 | ''' 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | 15 | def convert_sparse3D_to_depth(kp, XYZ, height, width): 16 | """Convert sparse 3D keypoint to depth map 17 | 18 | Args: 19 | kp (array, [Nx2]): keypoints 20 | XYZ (array, [3xN]): 3D coorindates for the keypoints 21 | height (int): image height 22 | width (int): image width 23 | 24 | Returns: 25 | depth (array, [HxW]): depth map 26 | """ 27 | # initialize depth map 28 | depth = np.zeros((height, width)) 29 | kp_int = kp.astype(np.int) 30 | 31 | # remove out of region keypoints 32 | y_idx = (kp_int[:, 0] >= 0) * (kp_int[:, 0] < width) 33 | kp_int = kp_int[y_idx] 34 | x_idx = (kp_int[:, 1] >= 0) * (kp_int[:, 1] < height) 35 | kp_int = kp_int[x_idx] 36 | 37 | XYZ = XYZ[:, y_idx] 38 | XYZ = XYZ[:, x_idx] 39 | 40 | depth[kp_int[:, 1], kp_int[:, 0]] = XYZ[2] 41 | return depth 42 | 43 | 44 | def triangulation(kp1, kp2, T_1w, T_2w): 45 | """Triangulation to get 3D points 46 | 47 | Args: 48 | kp1 (array, [Nx2]): keypoint in view 1 (normalized) 49 | kp2 (array, [Nx2]): keypoints in view 2 (normalized) 50 | T_1w (array, [4x4]): pose of view 1 w.r.t i.e. T_1w (from w to 1) 51 | T_2w (array, [4x4]): pose of view 2 w.r.t world, i.e. T_2w (from w to 2) 52 | 53 | Returns: 54 | a tuple containing 55 | - **X** (array, [3xN]): 3D coordinates of the keypoints w.r.t world coordinate 56 | - **X1** (array, [3xN]): 3D coordinates of the keypoints w.r.t view1 coordinate 57 | - **X2** (array, [3xN]): 3D coordinates of the keypoints w.r.t view2 coordinate 58 | """ 59 | kp1_3D = np.ones((3, kp1.shape[0])) 60 | kp2_3D = np.ones((3, kp2.shape[0])) 61 | kp1_3D[0], kp1_3D[1] = kp1[:, 0].copy(), kp1[:, 1].copy() 62 | kp2_3D[0], kp2_3D[1] = kp2[:, 0].copy(), kp2[:, 1].copy() 63 | X = cv2.triangulatePoints(T_1w[:3], T_2w[:3], kp1_3D[:2], kp2_3D[:2]) 64 | X /= X[3] 65 | X1 = T_1w[:3] @ X 66 | X2 = T_2w[:3] @ X 67 | return X[:3], X1, X2 68 | 69 | 70 | def unprojection_kp(kp, kp_depth, cam_intrinsics): 71 | """Convert kp to XYZ 72 | 73 | Args: 74 | kp (array, [Nx2]): [x, y] keypoints 75 | kp_depth (array, [Nx2]): keypoint depth 76 | cam_intrinsics (Intrinsics): camera intrinsics 77 | 78 | Returns: 79 | XYZ (array, [Nx3]): 3D coordinates 80 | """ 81 | N = kp.shape[0] 82 | # initialize regular grid 83 | XYZ = np.ones((N, 3, 1)) 84 | XYZ[:, :2, 0] = kp 85 | 86 | inv_K = np.ones((1, 3, 3)) 87 | inv_K[0] = cam_intrinsics.inv_mat 88 | inv_K = np.repeat(inv_K, N, axis=0) 89 | 90 | XYZ = np.matmul(inv_K, XYZ)[:, :, 0] 91 | XYZ[:, 0] = XYZ[:, 0] * kp_depth 92 | XYZ[:, 1] = XYZ[:, 1] * kp_depth 93 | XYZ[:, 2] = XYZ[:, 2] * kp_depth 94 | return XYZ 95 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/pose_graph_optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import g2o 3 | 4 | 5 | class PoseGraphOptimizer(g2o.SparseOptimizer): 6 | def __init__(self): 7 | super().__init__() 8 | solver = g2o.BlockSolverSE3(g2o.LinearSolverCholmodSE3()) 9 | solver = g2o.OptimizationAlgorithmLevenberg(solver) 10 | super().set_algorithm(solver) 11 | 12 | def optimize(self, max_iterations=100): 13 | """Optimize the pose graph 14 | 15 | Args: 16 | max_iteration (int): maximum iteration 17 | """ 18 | super().initialize_optimization() 19 | super().optimize(max_iterations) 20 | 21 | def add_vertex(self, id, pose, fixed=False): 22 | """Add vertex 23 | 24 | Args: 25 | id (int): index 26 | pose (g2o.Isometry3d): T_wc 27 | """ 28 | v_se3 = g2o.VertexSE3() 29 | v_se3.set_id(id) 30 | v_se3.set_estimate(pose) 31 | v_se3.set_fixed(fixed) 32 | super().add_vertex(v_se3) 33 | 34 | def add_edge(self, vertices, measurement, 35 | information=np.identity(6), 36 | robust_kernel=g2o.RobustKernelHuber()): 37 | """Add edge 38 | 39 | Args: 40 | vertices (list): two vertices / vertex index 41 | measurement (g2o.Isometry3d): relative pose T_ij 42 | """ 43 | 44 | edge = g2o.EdgeSE3() 45 | for i, v in enumerate(vertices): 46 | if isinstance(v, int): 47 | v = self.vertex(v) 48 | edge.set_vertex(i, v) 49 | 50 | edge.set_measurement(measurement) # relative pose 51 | edge.set_information(information) 52 | edge.set_robust_kernel(robust_kernel) 53 | super().add_edge(edge) 54 | 55 | def get_pose(self, id): 56 | """Get pose matrix at vertex-id 57 | 58 | Args: 59 | id (int): vertex index 60 | 61 | Returns: 62 | pose (array, [4x4]): pose of vertex 63 | """ 64 | return self.vertex(id).estimate().matrix() 65 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/projection.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: Layer to project 3D points into a camera view given camera intrinsics 9 | ''' 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class Projection(nn.Module): 16 | """Layer to project 3D points into a camera view given camera intrinsics 17 | """ 18 | def __init__(self, height, width, eps=1e-7): 19 | """ 20 | Args: 21 | height (int): image height 22 | width (int): image width 23 | eps (float): small number to prevent division of zero 24 | """ 25 | super(Projection, self).__init__() 26 | 27 | self.height = height 28 | self.width = width 29 | self.eps = eps 30 | 31 | def forward(self, points3d, K, normalized=True): 32 | """Forward pass 33 | 34 | Args: 35 | points3d (tensor, [Nx4x(HxW)]): 3D points in homogeneous coordinates 36 | K (tensor, [Nx4x4]): camera intrinsics 37 | normalized (bool): 38 | 39 | - True: normalized to [-1, 1] 40 | - False: [0, W-1] and [0, H-1] 41 | 42 | Returns: 43 | xy (tensor, [NxHxWx2]): pixel coordinates 44 | """ 45 | # projection 46 | points2d = torch.matmul(K[:, :3, :], points3d) 47 | 48 | # convert from homogeneous coordinates 49 | xy = points2d[:, :2, :] / (points2d[:, 2:3, :] + self.eps) 50 | xy = xy.view(points3d.shape[0], 2, self.height, self.width) 51 | xy = xy.permute(0, 2, 3, 1) 52 | 53 | # normalization 54 | if normalized: 55 | xy[..., 0] /= self.width - 1 56 | xy[..., 1] /= self.height - 1 57 | xy = (xy - 0.5) * 2 58 | return xy 59 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/reprojection.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: Layer to transform pixel coordinates from one view to another view via 9 | backprojection, transformation in 3D, and projection 10 | ''' 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from dfvo.libs.geometry.backprojection import Backprojection 16 | from dfvo.libs.geometry.transformation3d import Transformation3D 17 | from dfvo.libs.geometry.projection import Projection 18 | 19 | 20 | class Reprojection(nn.Module): 21 | """Layer to transform pixel coordinates from one view to another view via 22 | backprojection, transformation in 3D, and projection 23 | """ 24 | def __init__(self, height, width): 25 | """ 26 | Args: 27 | height (int): image height 28 | width (int): image width 29 | """ 30 | super(Reprojection, self).__init__() 31 | 32 | # layers 33 | self.backproj = Backprojection(height, width) 34 | self.transform = Transformation3D() 35 | self.project = Projection(height, width) 36 | 37 | def forward(self, depth, T, K, inv_K, normalized=True): 38 | """Forward pass 39 | 40 | Args: 41 | depth (tensor, [Nx1xHxW]): depth map 42 | T (tensor, [Nx4x4]): transformation matrice 43 | inv_K (tensor, [Nx4x4]): inverse camera intrinsics 44 | K (tensor, [Nx4x4]): camera intrinsics 45 | normalized (bool): 46 | 47 | - True: normalized to [-1, 1] 48 | - False: [0, W-1] and [0, H-1] 49 | 50 | Returns: 51 | xy (NxHxWx2): pixel coordinates 52 | """ 53 | points3d = self.backproj(depth, inv_K) 54 | points3d_trans = self.transform(points3d, T) 55 | xy = self.project(points3d_trans, K, normalized) 56 | return xy 57 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/rigid_flow.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-03-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-28 7 | @LastEditors: Huangying Zhan 8 | @Description: 9 | ''' 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | from dfvo.libs.deep_models.depth.monodepth2.layers import PixToFlow 15 | from dfvo.libs.geometry.reprojection import Reprojection 16 | 17 | class RigidFlow(nn.Module): 18 | """Layer to compute rigid flow given depth and camera motion 19 | """ 20 | def __init__(self, height, width): 21 | """ 22 | Args: 23 | height (int): image height 24 | width (int): image width 25 | """ 26 | super(RigidFlow, self).__init__() 27 | # basic configuration 28 | self.height = height 29 | self.width = width 30 | self.device = torch.device('cuda') 31 | 32 | # layer setup 33 | self.pix2flow = PixToFlow(1, self.height, self.width) 34 | self.pix2flow.to(self.device) 35 | 36 | self.reprojection = Reprojection(self.height, self.width) 37 | 38 | def forward(self, depth, T, K, inv_K, normalized=True): 39 | """Forward pass 40 | 41 | Args: 42 | depth (tensor, [Nx1xHxW]): depth map 43 | T (tensor, [Nx4x4]): transformation matrice 44 | inv_K (tensor, [Nx4x4]): inverse camera intrinsics 45 | K (tensor, [Nx4x4]): camera intrinsics 46 | normalized (bool): 47 | 48 | - True: normalized to [-1, 1] 49 | - False: [0, W-1] and [0, H-1] 50 | 51 | Returns: 52 | flow (NxHxWx2): rigid flow 53 | """ 54 | xy = self.reprojection(depth, T, K, inv_K, normalized) 55 | flow = self.pix2flow(xy) 56 | 57 | return flow 58 | 59 | -------------------------------------------------------------------------------- /dfvo/libs/geometry/transformation3d.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: Layer to transform 3D points given transformation matrice 9 | ''' 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class Transformation3D(nn.Module): 16 | """Layer to transform 3D points given transformation matrice 17 | """ 18 | def __init__(self): 19 | super(Transformation3D, self).__init__() 20 | 21 | def forward(self, points, T): 22 | """Forward pass 23 | 24 | Args: 25 | points (tensor, [Nx4x(HxW)]): 3D points in homogeneous coordinates 26 | T (tensor, [Nx4x4]): transformation matrice 27 | 28 | Returns: 29 | transformed_points (tensor, [Nx4x(HxW)]): 3D points in homogeneous coordinates 30 | """ 31 | transformed_points = torch.matmul(T, points) 32 | return transformed_points -------------------------------------------------------------------------------- /dfvo/libs/matching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/libs/matching/__init__.py -------------------------------------------------------------------------------- /dfvo/libs/matching/keypoint_sampler.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-03-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | LastEditTime: 2020-09-24 7 | LastEditors: Huangying Zhan 8 | @Description: KeypointSampler is an interface for keypoint sampling 9 | ''' 10 | 11 | 12 | import numpy as np 13 | 14 | from .kp_selection import * 15 | from dfvo.libs.general.utils import image_grid 16 | from dfvo.libs.geometry.camera_modules import SE3 17 | 18 | class KeypointSampler(): 19 | """KeypointSampler is an interface for keypoint sampling 20 | """ 21 | def __init__(self, cfg): 22 | """ 23 | Args: 24 | cfg (edict): configuration dictionary 25 | """ 26 | self.cfg = cfg 27 | self.kps = {} 28 | 29 | # generate uniform kp list 30 | if self.cfg.kp_selection.sampled_kp.enable: 31 | self.kps['uniform'] = self.generate_kp_samples( 32 | img_h=self.cfg.image.height, 33 | img_w=self.cfg.image.width, 34 | crop=self.cfg.crop.flow_crop, 35 | N=self.cfg.kp_selection.sampled_kp.num_kp 36 | ) 37 | 38 | def get_feat_track_methods(self, method_idx): 39 | """Get feature tracking method 40 | 41 | Args: 42 | method_idx (int): feature tracking method index 43 | 44 | Returns: 45 | feat_track_method (str): feature tracking method 46 | """ 47 | feat_track_methods = { 48 | 1: "deep_flow", 49 | } 50 | return feat_track_methods[method_idx] 51 | 52 | def generate_kp_samples(self, img_h, img_w, crop, N): 53 | """generate uniform keypoint samples according to image height, width 54 | and cropping scheme 55 | 56 | Args: 57 | img_h (int): image height 58 | img_w (int): image width 59 | crop (list): normalized cropping ratio, [[y0, y1],[x0, x1]] 60 | N (int): number of keypoint 61 | 62 | Returns: 63 | kp_list (array, [N]): keypoint list 64 | """ 65 | # get cropped image shape 66 | y0, y1 = crop[0] 67 | y0, y1 = int(y0 * img_h), int(y1 * img_h) 68 | x0, x1 = crop[1] 69 | x0, x1 = int(x0 * img_w), int(x1 * img_w) 70 | 71 | # uniform sampling keypoints 72 | total_num = (x1-x0) * (y1-y0) - 1 73 | kp_list = np.linspace(0, total_num, N, dtype=np.int) 74 | return kp_list 75 | 76 | def kp_selection(self, cur_data, ref_data): 77 | """Choose valid kp from a series of operations 78 | 79 | Args: 80 | cur_data (dict): data of current frame (view-2) 81 | ref_data (dict): data of reference frame (view-1) 82 | 83 | Returns: 84 | outputs (dict): a dictionary containing some of the following items 85 | 86 | - **kp1_best** (array, [Nx2]): keypoints on view-1 87 | - **kp2_best** (array, [Nx2]): keypoints on view-2 88 | - **kp1_list** (array, [Nx2]): keypoints on view-1 89 | - **kp2_list** (array, [Nx2]): keypoints on view-2 90 | - **kp1_depth** (array, [Nx2]): keypoints in view-1 91 | - **kp2_depth** (array, [Nx2]): keypoints in view-2 92 | - **rigid_flow_mask** (array, [HxW]): rigid-optical flow consistency 93 | 94 | """ 95 | outputs = {} 96 | outputs['good_kp_found'] = True 97 | 98 | # initialization 99 | h, w, _ = cur_data['img'].shape 100 | 101 | kp1 = image_grid(h, w) 102 | kp1 = np.expand_dims(kp1, 0) 103 | tmp_flow_data = np.transpose(np.expand_dims(ref_data['flow'], 0), (0, 2, 3, 1)) 104 | kp2 = kp1 + tmp_flow_data 105 | 106 | """ best-N selection """ 107 | if self.cfg.kp_selection.local_bestN.enable: 108 | kp_sel_method = local_bestN 109 | outputs.update( 110 | kp_sel_method( 111 | kp1=kp1, 112 | kp2=kp2, 113 | ref_data=ref_data, 114 | cfg=self.cfg, 115 | outputs=outputs 116 | ) 117 | ) 118 | elif self.cfg.kp_selection.bestN.enable: 119 | kp_sel_method = bestN_flow_kp 120 | outputs.update( 121 | kp_sel_method( 122 | kp1=kp1, 123 | kp2=kp2, 124 | ref_data=ref_data, 125 | cfg=self.cfg, 126 | outputs=outputs 127 | ) 128 | ) 129 | 130 | """ sampled kp selection """ 131 | if self.cfg.kp_selection.sampled_kp.enable: 132 | outputs.update( 133 | sampled_kp( 134 | kp1=kp1, 135 | kp2=kp2, 136 | ref_data=ref_data, 137 | kp_list=self.kps['uniform'], 138 | cfg=self.cfg, 139 | outputs=outputs 140 | ) 141 | ) 142 | 143 | return outputs 144 | 145 | def update_kp_data(self, cur_data, ref_data, kp_sel_outputs): 146 | """update cur_data and ref_data with the kp_selection output 147 | 148 | Args: 149 | cur_data (dict): data of current frame 150 | ref_data (dict): data of reference frame 151 | kp_sel_outputs (dict): data of keypoint selection outputs 152 | """ 153 | if self.cfg.kp_selection.local_bestN.enable or self.cfg.kp_selection.bestN.enable: 154 | # save selected kp 155 | ref_data['kp_best'] = kp_sel_outputs['kp1_best'][0] 156 | cur_data['kp_best'] = kp_sel_outputs['kp2_best'][0] 157 | 158 | # save mask 159 | cur_data['fb_flow_mask'] = kp_sel_outputs['fb_flow_mask'] 160 | 161 | if self.cfg.kp_selection.sampled_kp.enable: 162 | ref_data['kp_list'] = kp_sel_outputs['kp1_list'][0] 163 | cur_data['kp_list'] = kp_sel_outputs['kp2_list'][0] 164 | -------------------------------------------------------------------------------- /dfvo/libs/tracker/__init__.py: -------------------------------------------------------------------------------- 1 | from .E_tracker import EssTracker 2 | from .pnp_tracker import PnpTracker -------------------------------------------------------------------------------- /dfvo/libs/tracker/gric.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-03-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: This file contains functions related to GRIC computation 9 | ''' 10 | 11 | import numpy as np 12 | 13 | 14 | def compute_fundamental_residual(F, kp1, kp2): 15 | """ 16 | Compute fundamental matrix residual 17 | 18 | Args: 19 | F (array, [3x3]): Fundamental matrix (from view-1 to view-2) 20 | kp1 (array, [Nx2]): keypoint 1 21 | kp2 (array, [Nx2]): keypoint 2 22 | 23 | Returns: 24 | res (array, [N]): residual 25 | """ 26 | # get homogeneous keypoints (3xN array) 27 | m0 = np.ones((3, kp1.shape[0])) 28 | m0[:2] = np.transpose(kp1, (1,0)) 29 | m1 = np.ones((3, kp2.shape[0])) 30 | m1[:2] = np.transpose(kp2, (1,0)) 31 | 32 | Fm0 = F @ m0 #3xN 33 | Ftm1 = F.T @ m1 #3xN 34 | 35 | m1Fm0 = (np.transpose(Fm0, (1,0)) @ m1).diagonal() 36 | res = m1Fm0**2 / (np.sum(Fm0[:2]**2, axis=0) + np.sum(Ftm1[:2]**2, axis=0)) 37 | return res 38 | 39 | 40 | def compute_homography_residual(H_in, kp1, kp2): 41 | """ 42 | Compute homography matrix residual 43 | 44 | Args: 45 | H (array, [3x3]): homography matrix (Transformation from view-1 to view-2) 46 | kp1 (array, [Nx2]): keypoint 1 47 | kp2 (array, [Nx2]): keypoint 2 48 | 49 | Returns: 50 | res (array, [N]): residual 51 | """ 52 | n = kp1.shape[0] 53 | H = H_in.flatten() 54 | 55 | # get homogeneous keypoints (3xN array) 56 | m0 = np.ones((3, kp1.shape[0])) 57 | m0[:2] = np.transpose(kp1, (1,0)) 58 | m1 = np.ones((3, kp2.shape[0])) 59 | m1[:2] = np.transpose(kp2, (1,0)) 60 | 61 | 62 | G0 = np.zeros((3, n)) 63 | G1 = np.zeros((3, n)) 64 | 65 | G0[0]= H[0] - m1[0] * H[6] 66 | G0[1]= H[1] - m1[0] * H[7] 67 | G0[2]=-m0[0] * H[6] - m0[1] * H[7] - H[8] 68 | 69 | G1[0]= H[3] - m1[1] * H[6] 70 | G1[1]= H[4] - m1[1] * H[7] 71 | G1[2]=-m0[0] * H[6] - m0[1] * H[7] - H[8] 72 | 73 | magG0=np.sqrt(G0[0]*G0[0] + G0[1]*G0[1] + G0[2]*G0[2]) 74 | magG1=np.sqrt(G1[0]*G1[0] + G1[1]*G1[1] + G1[2]*G1[2]) 75 | magG0G1=G0[0]*G1[0] + G0[1]*G1[1] 76 | 77 | alpha=np.arccos(magG0G1 /(magG0*magG1)) 78 | 79 | alg = np.zeros((2, n)) 80 | alg[0]= m0[0]*H[0] + m0[1]*H[1] + H[2] - \ 81 | m1[0]*(m0[0]*H[6] + m0[1]*H[7] + H[8]) 82 | 83 | alg[1]= m0[0]*H[3] + m0[1]*H[4] + H[5] - \ 84 | m1[1]*(m0[0]*H[6] + m0[1]*H[7] + H[8]) 85 | 86 | D1=alg[0]/magG0 87 | D2=alg[1]/magG1 88 | 89 | res = (D1*D1 + D2*D2 - 2.0*D1*D2*np.cos(alpha))/np.sin(alpha) 90 | 91 | return res 92 | 93 | 94 | def calc_GRIC(res, sigma, n, model): 95 | """Calculate GRIC 96 | 97 | Args: 98 | res (array, [N]): residual 99 | sigma (float): assumed variance of the error 100 | n (int): number of residuals 101 | model (str): model type 102 | - FMat 103 | - EMat 104 | - HMat 105 | """ 106 | R = 4 107 | sigmasq1 = 1./ sigma**2 108 | 109 | K = { 110 | "FMat": 7, 111 | "EMat": 5, 112 | "HMat": 8, 113 | }[model] 114 | D = { 115 | "FMat": 3, 116 | "EMat": 3, 117 | "HMat": 2, 118 | }[model] 119 | 120 | lam3RD=2.0 * (R-D) 121 | 122 | sum_ = 0 123 | for i in range(n): 124 | tmp=res[i] * sigmasq1 125 | if tmp<=lam3RD: 126 | sum_ += tmp 127 | else: 128 | sum_ += lam3RD 129 | 130 | sum_ += n * D * np.log(R) + K * np.log(R*n) 131 | 132 | return sum_ 133 | -------------------------------------------------------------------------------- /dfvo/options/kitti/dfvo_test.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: 2011_10_03_drive_0047_sync 9 | dataset: kitti_raw 10 | seed: 4869 11 | # result_dir: result/dfvo2/3/1 12 | result_dir: result/tmp/2 13 | image: 14 | height: 370 15 | width: 1226 16 | ext: jpg 17 | 18 | # ---------------------------------------------------------------------------- 19 | # tracking options 20 | # ---------------------------------------------------------------------------- 21 | feature_tracking_method: 1 22 | # 1: deep optical flow 23 | 24 | tracking_method: 3 25 | # 3: hybrid 26 | 27 | compute_2d2d_pose: 28 | ransac: 29 | reproj_thre: 0.2 30 | repeat: 5 31 | validity: 32 | method: flow # [flow+chei, homo_ratio] 33 | thre: 5 34 | kp_src: kp_best # [kp_best, kp_list] 35 | 36 | PnP: 37 | ransac: 38 | iter: 1000 39 | reproj_thre: 1 40 | repeat: 5 41 | kp_src: kp_list # [kp_best, kp_list] 42 | 43 | # translation scale 44 | translation_scale: 45 | ransac: 46 | method: depth_ratio # [abs_diff, depth_ratio] 47 | min_samples: 3 48 | max_trials: 100 49 | stop_prob: 0.99 50 | thre: 0.1 51 | kp_src: kp_list # [kp_best, kp_list] 52 | 53 | # ---------------------------------------------------------------------------- 54 | # Deep Optical Flow 55 | # ---------------------------------------------------------------------------- 56 | deep_flow: 57 | forward_backward: True 58 | # num_kp: 2000 59 | # kp_sel_method: uniform_filtered_bestN # uniform_filtered_bestN # uniform_bestN 60 | network: liteflow 61 | # liteflow: LiteFlowNet 62 | flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 63 | batch_size: 1 64 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 65 | 66 | crop: 67 | depth_crop: [[0.3, 1], [0, 1]] 68 | flow_crop: [[0, 1], [0, 1]] 69 | 70 | # ---------------------------------------------------------------------------- 71 | # Deep single-view depth 72 | # ---------------------------------------------------------------------------- 73 | depth: 74 | depth_src: # (blank: deep model inference; 0: GT depth) 75 | max_depth: 50 # [50/200] 76 | min_depth: 0 77 | pretrained_model: model_zoo/depth/kitti_odom/stereo/ 78 | # pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 79 | 80 | # ---------------------------------------------------------------------------- 81 | # Deep two-view pose 82 | # ---------------------------------------------------------------------------- 83 | pose_net: 84 | enable: False 85 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 86 | 87 | # ---------------------------------------------------------------------------- 88 | # Keypoint selection 89 | # ---------------------------------------------------------------------------- 90 | kp_selection: 91 | uniform_filtered_bestN: 92 | enable: False 93 | num_row: 10 94 | num_col: 10 95 | num_bestN: 2000 96 | score_method: flow # [flow, flow_depth 97 | bestN: 98 | enable: True 99 | num_bestN: 2000 100 | sampled_kp: 101 | enable: True 102 | 103 | depth_consistency: 104 | enable: False 105 | thre: 0.05 106 | flow_consistency: 107 | enable: True 108 | thre: 1 109 | 110 | # ---------------------------------------------------------------------------- 111 | # Visualization 112 | # ---------------------------------------------------------------------------- 113 | visualization: 114 | save_img: True 115 | kp_src: kp_best 116 | flow: 117 | vis_full_flow: True 118 | vis_back_flow: True 119 | vis_flow_diff: True 120 | match: 121 | kp_num: 50 # -1 for using all 122 | vis_temp: 123 | enable: True 124 | vis_side: 125 | enable: True 126 | inlier_plot: True 127 | depth: 128 | use_tracking_depth: False 129 | vis_full_depth: False 130 | vis_full_disp: True 131 | mask: 132 | vis_masks: True 133 | trajectory: 134 | vis_traj: True 135 | draw_scale: 0.8 136 | mono_scale: 1 #5.4; 0.2 137 | vis_gt_traj: True 138 | 139 | directory: 140 | # img_seq_dir: dataset/kitti_odom/odom_data_jpg/ 141 | img_seq_dir: dataset/kitti_raw 142 | gt_pose_dir: dataset/kitti_raw_pose 143 | # depth_dir: /home/hyzhan/harddisk_local/DOM/result/kitti_raw_gt_depth -------------------------------------------------------------------------------- /dfvo/options/kitti/kitti_mono_sc_0.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Ours (Mono-SC Train.) in the paper. 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | result_dir: result/mono_sc/0 7 | 8 | # ---------------------------------------------------------------------------- 9 | # Deep single-view depth 10 | # ---------------------------------------------------------------------------- 11 | depth: 12 | depth_src: # (blank: deep model inference; 0: GT depth) 13 | max_depth: 200 14 | min_depth: 0 15 | pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 16 | 17 | # ---------------------------------------------------------------------------- 18 | # Visualization 19 | # ---------------------------------------------------------------------------- 20 | visualization: 21 | flow: 22 | vis_full_flow: False 23 | vis_back_flow: False 24 | vis_flow_diff: False 25 | match: 26 | kp_num: 50 # -1 for using all 27 | vis_type: 0 28 | # None: None 29 | # 0: draw_match_2_side 30 | # 1: draw_match_temporal 31 | # 2: both 32 | trajectory: 33 | vis_traj: True 34 | draw_scale: 0.8 35 | mono_scale: 0.2 #5.4; 0.2 36 | vis_gt_traj: True 37 | depth: 38 | use_tracking_depth: False 39 | vis_full_depth: False 40 | vis_full_disp: False 41 | -------------------------------------------------------------------------------- /dfvo/options/kitti/kitti_stereo_0.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Ours (Stereo Train.) in the paper. 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | result_dir: result/stereo/0 -------------------------------------------------------------------------------- /dfvo/options/kitti/kitti_stereo_1.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to 2 | # the Reference Model described in Sec.V-C in the paper. 3 | 4 | # ---------------------------------------------------------------------------- 5 | # Basic setup 6 | # ---------------------------------------------------------------------------- 7 | seq: "09" 8 | result_dir: result/tmp/0 9 | image: 10 | height: 192 11 | width: 640 12 | 13 | # ---------------------------------------------------------------------------- 14 | # Visualization 15 | # ---------------------------------------------------------------------------- 16 | visualization: 17 | flow: 18 | vis_full_flow: False 19 | vis_back_flow: False 20 | vis_flow_diff: False 21 | match: 22 | kp_num: 50 # -1 for using all 23 | vis_type: 24 | # None: None 25 | # 0: draw_match_2_side 26 | # 1: draw_match_temporal 27 | # 2: both 28 | trajectory: 29 | vis_traj: True 30 | draw_scale: 0.8 31 | mono_scale: 1 #5.4; 0.2 32 | vis_gt_traj: True 33 | depth: 34 | use_tracking_depth: False 35 | vis_full_depth: False 36 | vis_full_disp: False 37 | -------------------------------------------------------------------------------- /dfvo/options/kitti/sampling_test.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: "04" 9 | dataset: kitti_odom # [kitti_odom, kitti_raw] 10 | seed: 4869 11 | result_dir: result/dfvo2/6/0 12 | # result_dir: result/tmp/2 13 | image: 14 | height: 370 15 | width: 1226 16 | ext: jpg 17 | 18 | # ---------------------------------------------------------------------------- 19 | # tracking options 20 | # ---------------------------------------------------------------------------- 21 | tracking_method: 3 22 | # 3: hybrid 23 | 24 | compute_2d2d_pose: 25 | ransac: 26 | reproj_thre: 0.2 27 | repeat: 5 28 | validity: 29 | method: GRIC # [flow+chei, homo_ratio, GRIC] 30 | thre: 0.25 31 | kp_src: kp_best # [kp_best, kp_list] 32 | 33 | PnP: 34 | ransac: 35 | iter: 1000 36 | reproj_thre: 1 37 | repeat: 5 38 | kp_src: kp_best # [kp_best, kp_list] 39 | 40 | # translation scale 41 | translation_scale: 42 | method: single # [single; iterative] 43 | # - dfvo: selcted kps 44 | ransac: 45 | method: depth_ratio # [abs_diff, depth_ratio] 46 | min_samples: 3 47 | max_trials: 100 48 | stop_prob: 0.99 49 | thre: 0.1 50 | kp_src: kp_best # [kp_best, kp_list] 51 | 52 | # ---------------------------------------------------------------------------- 53 | # Deep Optical Flow 54 | # ---------------------------------------------------------------------------- 55 | deep_flow: 56 | forward_backward: True 57 | num_kp: 2000 58 | network: liteflow 59 | # liteflow: LiteFlowNet 60 | flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 61 | batch_size: 1 62 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 63 | 64 | crop: 65 | depth_crop: [[0.3, 1], [0, 1]] 66 | flow_crop: [[0, 1], [0, 1]] 67 | 68 | # ---------------------------------------------------------------------------- 69 | # Deep single-view depth 70 | # ---------------------------------------------------------------------------- 71 | depth: 72 | depth_src: # (blank: deep model inference; 0: GT depth) 73 | max_depth: 50 # [50/200] 74 | min_depth: 0 75 | pretrained_model: model_zoo/depth/kitti_odom/stereo/ 76 | # pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 77 | 78 | # ---------------------------------------------------------------------------- 79 | # Deep two-view pose 80 | # ---------------------------------------------------------------------------- 81 | pose_net: 82 | enable: True 83 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 84 | # pretrained_model: model_zoo/pose/kitti_odom/mono_sc/ 85 | 86 | # ---------------------------------------------------------------------------- 87 | # Keypoint selection 88 | # ---------------------------------------------------------------------------- 89 | kp_selection: 90 | uniform_filtered_bestN: 91 | enable: True 92 | num_row: 10 93 | num_col: 10 94 | num_bestN: 2000 95 | score_method: flow # [flow, flow_depth] 96 | bestN: 97 | enable: False 98 | num_bestN: 2000 99 | sampled_kp: 100 | enable: True 101 | rigid_flow_kp: 102 | enable: False 103 | num_row: 10 104 | num_col: 10 105 | num_bestN: 2000 106 | score_method: flow # [flow, flow_depth] 107 | thre: 3 108 | 109 | 110 | depth_consistency: 111 | enable: False 112 | thre: 0.05 113 | flow_consistency: 114 | enable: True 115 | thre: 0.1 116 | 117 | # ---------------------------------------------------------------------------- 118 | # Visualization 119 | # ---------------------------------------------------------------------------- 120 | visualization: 121 | save_img: False 122 | kp_src: kp_best 123 | flow: 124 | vis_full_flow: False 125 | vis_back_flow: False 126 | vis_flow_diff: False 127 | match: 128 | kp_num: 100 # -1 for using all 129 | vis_temp: 130 | enable: False 131 | vis_side: 132 | enable: False 133 | inlier_plot: False 134 | 135 | depth: 136 | vis_full_disp: False 137 | mask: 138 | vis_masks: False 139 | trajectory: 140 | vis_traj: True 141 | draw_scale: 0.8 142 | mono_scale: 1 #5.4; 0.2 143 | vis_gt_traj: True 144 | 145 | directory: 146 | img_seq_dir: dataset/kitti_odom/odom_data_jpg/ 147 | gt_pose_dir: dataset/kitti_odom/gt_poses/ 148 | depth_dir: ../robust-vo/dataset/kitti_odom/depth/ 149 | # img_seq_dir: dataset/kitti_raw 150 | # gt_pose_dir: dataset/kitti_raw_pose 151 | # depth_dir: /home/hyzhan/harddisk_local/DOM/result/kitti_raw_gt_depth -------------------------------------------------------------------------------- /dfvo/options/kitti/tro_exp/reference.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Reference Model 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: "2011_10_03_drive_0047_sync" 9 | dataset: kitti_raw # [kitti_odom, kitti_raw] 10 | seed: 4869 11 | # result_dir: result/dfvo2/tro_paper/7/0 12 | # result_dir: result/dfvo2/tro_paper/tmp/1/epoch_0 13 | result_dir: result/tmp/0 14 | frame_step: 1 15 | image: 16 | height: 192 17 | width: 640 18 | # height: 370 19 | # width: 1226 20 | ext: jpg 21 | 22 | # ---------------------------------------------------------------------------- 23 | # tracking options 24 | # ---------------------------------------------------------------------------- 25 | tracking_method: 3 26 | # 3: hybrid 27 | # 1: PnP 28 | 29 | compute_2d2d_pose: 30 | ransac: 31 | reproj_thre: 0.2 32 | repeat: 5 33 | validity: 34 | method: GRIC # [flow, homo_ratio, GRIC] 35 | thre: 36 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 37 | 38 | PnP: 39 | ransac: 40 | iter: 1000 41 | reproj_thre: 1 42 | repeat: 5 43 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 44 | 45 | # translation scale 46 | translation_scale: 47 | method: single # [single; iterative] 48 | # - dfvo: selcted kps 49 | ransac: 50 | method: depth_ratio # [abs_diff, depth_ratio] 51 | min_samples: 3 52 | max_trials: 100 53 | stop_prob: 0.99 54 | thre: 0.1 55 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 56 | 57 | # ---------------------------------------------------------------------------- 58 | # Deep Optical Flow 59 | # ---------------------------------------------------------------------------- 60 | deep_flow: 61 | forward_backward: True 62 | num_kp: 2000 63 | network: liteflow 64 | # liteflow: LiteFlowNet 65 | # flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 66 | # flow_net_weight: model_zoo/optical_flow/UnLiteFlowNet/kitti_odom/mono_640x192/flow.pth 67 | flow_net_weight: /home/hyzhan/harddisk_local/DF-VO/robust-vo/deep_depth/monodepth2/checkpoint/kitti/flow/exp_2/0/09/M_640x192/models/weights_0/flow.pth 68 | batch_size: 1 69 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 70 | 71 | crop: 72 | depth_crop: [[0.3, 1], [0, 1]] 73 | flow_crop: [[0, 1], [0, 1]] 74 | 75 | # ---------------------------------------------------------------------------- 76 | # Deep single-view depth 77 | # ---------------------------------------------------------------------------- 78 | depth: 79 | depth_src: # (blank: deep model inference; 0: GT depth) 80 | max_depth: 50 # [10/50/200] 81 | min_depth: 0 82 | pretrained_model: model_zoo/depth/kitti_odom/stereo/ 83 | # pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 84 | # pretrained_model: model_zoo/depth/kitti_odom/mono/ 85 | 86 | # ---------------------------------------------------------------------------- 87 | # Deep two-view pose 88 | # ---------------------------------------------------------------------------- 89 | pose_net: 90 | enable: False 91 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 92 | # pretrained_model: model_zoo/pose/kitti_odom/mono_sc/ 93 | # pretrained_model: model_zoo/pose/kitti_odom/mono/ 94 | 95 | # ---------------------------------------------------------------------------- 96 | # Keypoint selection 97 | # ---------------------------------------------------------------------------- 98 | kp_selection: 99 | uniform_filtered_bestN: 100 | enable: True 101 | num_row: 10 102 | num_col: 10 103 | num_bestN: 2000 104 | score_method: flow # [flow, flow_depth] 105 | bestN: 106 | enable: False 107 | num_bestN: 2000 108 | sampled_kp: 109 | enable: False 110 | rigid_flow_kp: 111 | enable: False 112 | num_row: 10 113 | num_col: 10 114 | num_bestN: 2000 115 | score_method: flow # [flow, flow_depth] 116 | thre: 3 117 | 118 | 119 | depth_consistency: 120 | enable: False 121 | thre: 0.05 122 | flow_consistency: 123 | enable: True 124 | thre: 0.1 125 | 126 | # ---------------------------------------------------------------------------- 127 | # Visualization 128 | # ---------------------------------------------------------------------------- 129 | visualization: 130 | save_img: True 131 | kp_src: kp_best 132 | flow: 133 | vis_full_flow: True 134 | vis_back_flow: True 135 | vis_flow_diff: True 136 | match: 137 | kp_num: 100 # -1 for using all 138 | vis_temp: 139 | enable: True 140 | vis_side: 141 | enable: True 142 | inlier_plot: False 143 | 144 | depth: 145 | vis_full_disp: True 146 | use_tracking_depth: False 147 | mask: 148 | vis_masks: True 149 | trajectory: 150 | vis_traj: True 151 | draw_scale: 0.6 152 | mono_scale: 1 #5.4; 0.2 153 | vis_gt_traj: True 154 | 155 | directory: 156 | # img_seq_dir: dataset/kitti_odom/odom_data_jpg/ 157 | # gt_pose_dir: dataset/kitti_odom/gt_poses/ 158 | # depth_dir: ../robust-vo/dataset/kitti_odom/depth/ 159 | img_seq_dir: dataset/kitti_raw 160 | gt_pose_dir: dataset/kitti_raw 161 | depth_dir: /home/hyzhan/harddisk_local/DOM/result/kitti_raw_gt_depth -------------------------------------------------------------------------------- /dfvo/options/unit_test/kitti_1.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Reference Model 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: 2011_10_03_drive_0047_sync 9 | dataset: kitti_raw # [kitti_odom, kitti_raw] 10 | seed: 4869 11 | result_dir: result/tmp/0 12 | frame_step: 1 13 | image: 14 | height: 192 15 | width: 640 16 | # height: 370 17 | # width: 1226 18 | ext: jpg 19 | 20 | # ---------------------------------------------------------------------------- 21 | # tracking options 22 | # ---------------------------------------------------------------------------- 23 | tracking_method: 3 24 | # 3: hybrid 25 | # 1: PnP 26 | 27 | compute_2d2d_pose: 28 | ransac: 29 | reproj_thre: 0.2 30 | repeat: 5 31 | validity: 32 | method: GRIC # [flow, homo_ratio, GRIC] 33 | thre: 34 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 35 | 36 | PnP: 37 | ransac: 38 | iter: 1000 39 | reproj_thre: 1 40 | repeat: 5 41 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 42 | 43 | # translation scale 44 | translation_scale: 45 | method: single # [single; iterative] 46 | # - dfvo: selcted kps 47 | ransac: 48 | method: depth_ratio # [abs_diff, depth_ratio] 49 | min_samples: 3 50 | max_trials: 100 51 | stop_prob: 0.99 52 | thre: 0.1 53 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 54 | 55 | # ---------------------------------------------------------------------------- 56 | # Deep Optical Flow 57 | # ---------------------------------------------------------------------------- 58 | deep_flow: 59 | forward_backward: True 60 | num_kp: 2000 61 | network: liteflow 62 | # liteflow: LiteFlowNet 63 | flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 64 | # flow_net_weight: model_zoo/optical_flow/UnLiteFlowNet/kitti_odom/mono_640x192/flow.pth 65 | # flow_net_weight: /home/hyzhan/harddisk_local/DF-VO/robust-vo/deep_depth/monodepth2/checkpoint/kitti/flow/exp_2/0/09/M_640x192/models/weights_0/flow.pth 66 | batch_size: 1 67 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 68 | 69 | crop: 70 | depth_crop: [[0.3, 1], [0, 1]] 71 | flow_crop: [[0, 1], [0, 1]] 72 | 73 | # ---------------------------------------------------------------------------- 74 | # Deep single-view depth 75 | # ---------------------------------------------------------------------------- 76 | depth: 77 | depth_src: # (blank: deep model inference; 0: GT depth) 78 | max_depth: 50 # [10/50/200] 79 | min_depth: 0 80 | pretrained_model: model_zoo/depth/kitti_odom/stereo/ 81 | # pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 82 | # pretrained_model: model_zoo/depth/kitti_odom/mono/ 83 | 84 | # ---------------------------------------------------------------------------- 85 | # Deep two-view pose 86 | # ---------------------------------------------------------------------------- 87 | pose_net: 88 | enable: False 89 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 90 | # pretrained_model: model_zoo/pose/kitti_odom/mono_sc/ 91 | # pretrained_model: model_zoo/pose/kitti_odom/mono/ 92 | 93 | # ---------------------------------------------------------------------------- 94 | # Keypoint selection 95 | # ---------------------------------------------------------------------------- 96 | kp_selection: 97 | uniform_filtered_bestN: 98 | enable: True 99 | num_row: 10 100 | num_col: 10 101 | num_bestN: 2000 102 | score_method: flow # [flow, flow_depth] 103 | bestN: 104 | enable: False 105 | num_bestN: 2000 106 | sampled_kp: 107 | enable: False 108 | rigid_flow_kp: 109 | enable: False 110 | num_row: 10 111 | num_col: 10 112 | num_bestN: 2000 113 | score_method: flow # [flow, flow_depth] 114 | thre: 3 115 | 116 | 117 | depth_consistency: 118 | enable: False 119 | thre: 0.05 120 | flow_consistency: 121 | enable: True 122 | thre: 0.1 123 | 124 | # ---------------------------------------------------------------------------- 125 | # Visualization 126 | # ---------------------------------------------------------------------------- 127 | visualization: 128 | enable: False 129 | save_img: True 130 | kp_src: kp_best 131 | flow: 132 | vis_full_flow: True 133 | vis_back_flow: True 134 | vis_flow_diff: True 135 | match: 136 | kp_num: 100 # -1 for using all 137 | vis_temp: 138 | enable: True 139 | vis_side: 140 | enable: True 141 | inlier_plot: False 142 | 143 | depth: 144 | vis_full_disp: True 145 | use_tracking_depth: False 146 | mask: 147 | vis_masks: True 148 | trajectory: 149 | vis_traj: True 150 | draw_scale: 0.6 151 | mono_scale: 1 #5.4; 0.2 152 | vis_gt_traj: True 153 | 154 | directory: 155 | img_seq_dir: dataset/kitti_raw_rv 156 | gt_pose_dir: #dataset/kitti_raw_rv 157 | depth_dir: #/home/hyzhan/harddisk_local/DOM/result/kitti_raw_gt_depth -------------------------------------------------------------------------------- /dfvo/options/unit_test/kitti_2.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Reference Model 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: 2011_10_03_drive_0047_sync 9 | dataset: kitti_raw # [kitti_odom, kitti_raw] 10 | seed: 4869 11 | result_dir: result/tmp/0 12 | frame_step: 1 13 | image: 14 | # height: 192 15 | # width: 640 16 | height: 370 17 | width: 1226 18 | ext: jpg 19 | 20 | # ---------------------------------------------------------------------------- 21 | # tracking options 22 | # ---------------------------------------------------------------------------- 23 | tracking_method: 3 24 | # 3: hybrid 25 | # 1: PnP 26 | 27 | compute_2d2d_pose: 28 | ransac: 29 | reproj_thre: 0.2 30 | repeat: 5 31 | validity: 32 | method: GRIC # [flow, homo_ratio, GRIC] 33 | thre: 34 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 35 | 36 | PnP: 37 | ransac: 38 | iter: 1000 39 | reproj_thre: 1 40 | repeat: 5 41 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 42 | 43 | # translation scale 44 | translation_scale: 45 | method: iterative # [single; iterative] 46 | # - dfvo: selcted kps 47 | ransac: 48 | method: depth_ratio # [abs_diff, depth_ratio] 49 | min_samples: 3 50 | max_trials: 100 51 | stop_prob: 0.99 52 | thre: 0.1 53 | kp_src: kp_depth # [kp_best, kp_list, kp_rigid] 54 | 55 | # ---------------------------------------------------------------------------- 56 | # Deep Optical Flow 57 | # ---------------------------------------------------------------------------- 58 | deep_flow: 59 | half_flow: False 60 | online_finetune: 61 | enable: False 62 | lr: 0.000001 63 | num_frames: 50 64 | loss: 65 | flow_consistency: 0.005 66 | flow_smoothness: 0.1 67 | forward_backward: True 68 | num_kp: 2000 69 | network: liteflow 70 | # liteflow: LiteFlowNet 71 | flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 72 | # flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default-doubled.pytorch 73 | # flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-kitti.pytorch 74 | # flow_net_weight: model_zoo/optical_flow/UnLiteFlowNet/kitti_odom/mono_640x192/flow.pth 75 | # flow_net_weight: /home/hyzhan/harddisk_local/DF-VO/robust-vo/deep_depth/monodepth2/checkpoint/kitti/flow/exp_2/0/10/M_640x192/models/weights_5/flow.pth 76 | batch_size: 1 77 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 78 | 79 | crop: 80 | depth_crop: [[0.3, 1], [0, 1]] 81 | flow_crop: [[0, 1], [0, 1]] 82 | 83 | # ---------------------------------------------------------------------------- 84 | # Deep single-view depth 85 | # ---------------------------------------------------------------------------- 86 | depth: 87 | depth_src: # (blank: deep model inference; 0: GT depth) 88 | max_depth: 50 # [10/50/200] 89 | min_depth: 0 90 | pretrained_model: model_zoo/depth/kitti_odom/stereo/ 91 | # pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 92 | # pretrained_model: model_zoo/depth/kitti_odom/mono/ 93 | 94 | # ---------------------------------------------------------------------------- 95 | # Deep two-view pose 96 | # ---------------------------------------------------------------------------- 97 | pose_net: 98 | enable: False 99 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 100 | # pretrained_model: model_zoo/pose/kitti_odom/mono_sc/ 101 | # pretrained_model: model_zoo/pose/kitti_odom/mono/ 102 | 103 | # ---------------------------------------------------------------------------- 104 | # Keypoint selection 105 | # ---------------------------------------------------------------------------- 106 | kp_selection: 107 | uniform_filtered_bestN: 108 | enable: True 109 | num_row: 10 110 | num_col: 10 111 | num_bestN: 2000 112 | score_method: flow # [flow, flow_depth, flow_ratio] 113 | bestN: 114 | enable: False 115 | num_bestN: 2000 116 | sampled_kp: 117 | enable: False 118 | rigid_flow_kp: 119 | enable: True 120 | num_row: 10 121 | num_col: 10 122 | num_bestN: 2000 123 | score_method: flow # [flow, flow_depth] 124 | thre: 3 125 | 126 | 127 | depth_consistency: 128 | enable: False 129 | thre: 0.05 130 | flow_consistency: 131 | enable: True 132 | thre: 0.1 133 | 134 | # ---------------------------------------------------------------------------- 135 | # Visualization 136 | # ---------------------------------------------------------------------------- 137 | visualization: 138 | enable: False 139 | save_img: True 140 | kp_src: kp_best 141 | flow: 142 | vis_full_flow: True 143 | vis_back_flow: True 144 | vis_flow_diff: True 145 | match: 146 | kp_num: 100 # -1 for using all 147 | vis_temp: 148 | enable: True 149 | vis_side: 150 | enable: True 151 | inlier_plot: False 152 | 153 | depth: 154 | vis_full_disp: True 155 | use_tracking_depth: False 156 | mask: 157 | vis_masks: True 158 | trajectory: 159 | vis_traj: True 160 | draw_scale: 0.6 161 | mono_scale: 1 #5.4; 0.2 162 | vis_gt_traj: True 163 | 164 | directory: 165 | # img_seq_dir: dataset/kitti_odom/odom_data_jpg/ 166 | # gt_pose_dir: dataset/kitti_odom/gt_poses/ 167 | # depth_dir: #../robust-vo/dataset/kitti_odom/depth/ 168 | img_seq_dir: dataset/kitti_raw 169 | gt_pose_dir: dataset/kitti_raw 170 | depth_dir: /home/hyzhan/harddisk_local/DOM/result/kitti_raw_gt_depth -------------------------------------------------------------------------------- /dfvo/options/unit_test/kitti_3.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Reference Model 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: "10" 9 | dataset: kitti_odom # [kitti_odom, kitti_raw] 10 | seed: 4869 11 | frame_step: 1 12 | image: 13 | height: 192 14 | width: 640 15 | # height: 370 16 | # width: 1226 17 | ext: jpg 18 | 19 | directory: 20 | result_dir: result/tmp/0 21 | img_seq_dir: dataset/kitti_odom/odom_data_jpg/ 22 | # img_seq_dir: dataset/kitti_odom/kitti_odom_rv/ 23 | gt_pose_dir: dataset/kitti_odom/gt_poses/ 24 | depth_dir: #../robust-vo/dataset/kitti_odom/depth/ 25 | # img_seq_dir: dataset/kitti_raw 26 | # gt_pose_dir: dataset/kitti_raw 27 | # depth_dir: /home/hyzhan/harddisk_local/DOM/result/kitti_raw_gt_depth 28 | 29 | # ---------------------------------------------------------------------------- 30 | # tracking options 31 | # ---------------------------------------------------------------------------- 32 | tracking_method: PnP 33 | # 3: hybrid 34 | # 1: PnP 35 | 36 | e_tracker: 37 | ransac: 38 | reproj_thre: 0.2 39 | repeat: 3 40 | validity: 41 | method: GRIC # [flow, homo_ratio, GRIC] 42 | thre: 43 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 44 | 45 | pnp_tracker: 46 | ransac: 47 | iter: 1000 48 | reproj_thre: 1 49 | repeat: 3 50 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 51 | 52 | # translation scale 53 | scale_recovery: 54 | method: simple # [single; iterative] 55 | # - dfvo: selcted kps 56 | ransac: 57 | method: depth_ratio # [abs_diff, depth_ratio] 58 | min_samples: 3 59 | max_trials: 100 60 | stop_prob: 0.99 61 | thre: 0.1 62 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 63 | 64 | # ---------------------------------------------------------------------------- 65 | # Deep Optical Flow 66 | # ---------------------------------------------------------------------------- 67 | deep_flow: 68 | half_flow: False 69 | online_finetune: 70 | enable: False 71 | lr: 0.000001 72 | num_frames: 50 73 | loss: 74 | flow_consistency: 0.005 75 | flow_smoothness: 0.1 76 | forward_backward: True 77 | num_kp: 2000 78 | network: liteflow 79 | # liteflow: LiteFlowNet 80 | # flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 81 | # flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default-doubled.pytorch 82 | # flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-kitti.pytorch 83 | # flow_net_weight: model_zoo/optical_flow/UnLiteFlowNet/kitti_odom/mono_640x192/flow.pth 84 | # flow_net_weight: /home/hyzhan/harddisk_local/DF-VO/robust-vo/deep_depth/monodepth2/checkpoint/kitti/flow/exp_2/0/10/M_640x192/models/weights_5/flow.pth 85 | batch_size: 1 86 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 87 | 88 | crop: 89 | depth_crop: [[0.3, 1], [0, 1]] 90 | flow_crop: [[0, 1], [0, 1]] 91 | 92 | # ---------------------------------------------------------------------------- 93 | # Deep single-view depth 94 | # ---------------------------------------------------------------------------- 95 | depth: 96 | depth_src: # (blank: deep model inference; 0: GT depth) 97 | max_depth: 50 # [10/50/200] 98 | min_depth: 0 99 | pretrained_model: model_zoo/depth/kitti_odom/stereo/ 100 | # pretrained_model: model_zoo/depth/kitti_odom/mono_sc/ 101 | # pretrained_model: model_zoo/depth/kitti_odom/mono/ 102 | 103 | # ---------------------------------------------------------------------------- 104 | # Deep two-view pose 105 | # ---------------------------------------------------------------------------- 106 | pose_net: 107 | enable: False 108 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 109 | # pretrained_model: model_zoo/pose/kitti_odom/mono_sc/ 110 | # pretrained_model: model_zoo/pose/kitti_odom/mono/ 111 | 112 | # ---------------------------------------------------------------------------- 113 | # Keypoint selection 114 | # ---------------------------------------------------------------------------- 115 | kp_selection: 116 | local_bestN: 117 | enable: True 118 | num_row: 10 119 | num_col: 10 120 | num_bestN: 2000 121 | score_method: flow # [flow, flow_depth, flow_ratio] 122 | thre: 0.1 123 | bestN: 124 | enable: False 125 | num_bestN: 2000 126 | sampled_kp: 127 | enable: False 128 | rigid_flow_kp: 129 | enable: False 130 | num_row: 10 131 | num_col: 10 132 | num_bestN: 2000 133 | score_method: flow # [flow, flow_depth] 134 | rigid_flow_thre: 3 135 | optical_flow_thre: 0.1 136 | 137 | 138 | depth_consistency: 139 | enable: False 140 | thre: 0.05 141 | flow_consistency: 142 | enable: True 143 | thre: 0.1 144 | 145 | # ---------------------------------------------------------------------------- 146 | # Visualization 147 | # ---------------------------------------------------------------------------- 148 | visualization: 149 | enable: True 150 | save_img: True 151 | kp_src: kp_best 152 | flow: 153 | vis_forward_flow: True 154 | vis_backward_flow: True 155 | vis_flow_diff: True 156 | kp_match: 157 | kp_num: 100 # -1 for using all 158 | vis_temp: 159 | enable: True 160 | vis_side: 161 | enable: True 162 | inlier_plot: False 163 | 164 | depth: 165 | # vis_full_disp: True 166 | depth_disp: disp 167 | use_tracking_depth: False 168 | mask: 169 | vis_masks: True 170 | trajectory: 171 | vis_traj: True 172 | draw_scale: 0.6 173 | mono_scale: 1 #5.4; 0.2 174 | vis_gt_traj: True 175 | 176 | -------------------------------------------------------------------------------- /dfvo/options/unit_test/tum_0.yml: -------------------------------------------------------------------------------- 1 | # This configuration corresponds to Reference Model 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Basic setup 5 | # ---------------------------------------------------------------------------- 6 | debug: False 7 | use_multiprocessing: False 8 | seq: "rgbd_dataset_freiburg3_long_office_household" 9 | dataset: tum-1 # [kitti_odom, kitti_raw, tum-1/2/3] 10 | seed: 4869 11 | result_dir: result/tmp/0 12 | frame_step: 1 13 | image: 14 | height: 480 15 | width: 640 16 | # height: 370 17 | # width: 1226 18 | ext: png 19 | 20 | # ---------------------------------------------------------------------------- 21 | # tracking options 22 | # ---------------------------------------------------------------------------- 23 | tracking_method: 3 24 | # 3: hybrid 25 | # 1: PnP 26 | 27 | compute_2d2d_pose: 28 | ransac: 29 | reproj_thre: 0.2 30 | repeat: 5 31 | validity: 32 | method: GRIC # [flow, homo_ratio, GRIC] 33 | thre: 34 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 35 | 36 | PnP: 37 | ransac: 38 | iter: 1000 39 | reproj_thre: 1 40 | repeat: 5 41 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 42 | 43 | # translation scale 44 | translation_scale: 45 | method: single # [single; iterative] 46 | # - dfvo: selcted kps 47 | ransac: 48 | method: depth_ratio # [abs_diff, depth_ratio] 49 | min_samples: 3 50 | max_trials: 100 51 | stop_prob: 0.99 52 | thre: 0.1 53 | kp_src: kp_best # [kp_best, kp_list, kp_rigid] 54 | 55 | # ---------------------------------------------------------------------------- 56 | # Deep Optical Flow 57 | # ---------------------------------------------------------------------------- 58 | deep_flow: 59 | forward_backward: True 60 | num_kp: 2000 61 | network: liteflow 62 | # liteflow: LiteFlowNet 63 | flow_net_weight: model_zoo/optical_flow/LiteFlowNet/network-default.pytorch 64 | # flow_net_weight: model_zoo/optical_flow/UnLiteFlowNet/kitti_odom/mono_640x192/flow.pth 65 | # flow_net_weight: /home/hyzhan/harddisk_local/DF-VO/robust-vo/deep_depth/monodepth2/checkpoint/kitti/flow/exp_2/0/09/M_640x192/models/weights_0/flow.pth 66 | batch_size: 1 67 | precomputed_flow: #tmp/result/flow/kitti_odom/liteflow_default/{}/npy 68 | 69 | crop: 70 | depth_crop: [[0.3, 1], [0, 1]] 71 | flow_crop: [[0, 1], [0, 1]] 72 | 73 | # ---------------------------------------------------------------------------- 74 | # Deep single-view depth 75 | # ---------------------------------------------------------------------------- 76 | depth: 77 | depth_src: # (blank: deep model inference; 0: GT depth) 78 | max_depth: 50 # [10/50/200] 79 | min_depth: 0 80 | # pretrained_model: model_zoo/depth/kitti_odom/stereo/ 81 | # pretrained_model: model_zoo/depth/nyuv2/supervised/ 82 | pretrained_model: model_zoo/depth/nyuv2/self_supervised/ 83 | 84 | # ---------------------------------------------------------------------------- 85 | # Deep two-view pose 86 | # ---------------------------------------------------------------------------- 87 | pose_net: 88 | enable: False 89 | pretrained_model: model_zoo/pose/kitti_odom/stereo/ 90 | # pretrained_model: model_zoo/pose/kitti_odom/mono_sc/ 91 | # pretrained_model: model_zoo/pose/kitti_odom/mono/ 92 | 93 | # ---------------------------------------------------------------------------- 94 | # Keypoint selection 95 | # ---------------------------------------------------------------------------- 96 | kp_selection: 97 | uniform_filtered_bestN: 98 | enable: True 99 | num_row: 10 100 | num_col: 10 101 | num_bestN: 2000 102 | score_method: flow # [flow, flow_depth] 103 | bestN: 104 | enable: False 105 | num_bestN: 2000 106 | sampled_kp: 107 | enable: False 108 | rigid_flow_kp: 109 | enable: False 110 | num_row: 10 111 | num_col: 10 112 | num_bestN: 2000 113 | score_method: flow # [flow, flow_depth] 114 | thre: 3 115 | 116 | 117 | depth_consistency: 118 | enable: False 119 | thre: 0.05 120 | flow_consistency: 121 | enable: True 122 | thre: 0.1 123 | 124 | # ---------------------------------------------------------------------------- 125 | # Visualization 126 | # ---------------------------------------------------------------------------- 127 | visualization: 128 | save_img: True 129 | kp_src: kp_best 130 | flow: 131 | vis_full_flow: True 132 | vis_back_flow: True 133 | vis_flow_diff: True 134 | match: 135 | kp_num: 100 # -1 for using all 136 | vis_temp: 137 | enable: True 138 | vis_side: 139 | enable: True 140 | inlier_plot: False 141 | 142 | depth: 143 | vis_full_disp: True 144 | use_tracking_depth: False 145 | mask: 146 | vis_masks: True 147 | trajectory: 148 | vis_traj: True 149 | draw_scale: 100 150 | mono_scale: 1.8 #5.4; 0.2 151 | vis_gt_traj: True 152 | 153 | directory: 154 | img_seq_dir: dataset/tum/rgbd_slam 155 | gt_pose_dir: dataset/tum/rgbd_slam 156 | depth_dir: dataset/tum/rgbd_slam -------------------------------------------------------------------------------- /dfvo/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/tools/__init__.py -------------------------------------------------------------------------------- /dfvo/tools/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/tools/evaluation/__init__.py -------------------------------------------------------------------------------- /dfvo/tools/evaluation/odometry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/dfvo/tools/evaluation/odometry/__init__.py -------------------------------------------------------------------------------- /dfvo/tools/evaluation/odometry/eval_odom.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2019-09-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: This program evaluate KITTI odometry result 9 | ''' 10 | 11 | import argparse 12 | 13 | from dfvo.tools.evaluation.odometry.kitti_odometry import KittiEvalOdom 14 | 15 | 16 | def argument_parsing(): 17 | """Argument parsing 18 | 19 | Returns: 20 | args (args): arguments 21 | """ 22 | parser = argparse.ArgumentParser(description='KITTI Odometry evaluation') 23 | parser.add_argument('--result', type=str, required=True, 24 | help="Result directory") 25 | parser.add_argument('--gt', type=str, 26 | default="results/gt/", 27 | help="GT Pose directory containing gt pose txt files") 28 | parser.add_argument('--align', type=str, 29 | choices=['scale', 'scale_7dof', '7dof', '6dof'], 30 | default=None, 31 | help="alignment type") 32 | parser.add_argument('--seqs', 33 | nargs="+", 34 | help="sequences to be evaluated", 35 | default=None) 36 | args = parser.parse_args() 37 | 38 | return args 39 | 40 | 41 | if __name__ == '__main__': 42 | # argument parsing 43 | args = argument_parsing() 44 | 45 | # initialize evaluation tool 46 | eval_tool = KittiEvalOdom() 47 | 48 | continue_flag = input("Evaluate result in [{}]? [y/n]".format(args.result)) 49 | if continue_flag == "y": 50 | eval_tool.eval( 51 | args.gt, 52 | args.result, 53 | alignment=args.align, 54 | seqs=args.seqs, 55 | ) 56 | else: 57 | print("Double check the path!") 58 | -------------------------------------------------------------------------------- /dfvo/tools/evaluation/robotcar/get_gt_poses.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 1970-01-01 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-07-08 7 | @Description: Get GT poses (KITTI format) for robotcar 8 | ''' 9 | 10 | import numpy as np 11 | import os 12 | 13 | from sdk_python.interpolate_poses import interpolate_vo_poses 14 | from dfvo.libs.general.utils import save_traj, mkdir_if_not_exists 15 | 16 | for seq in [ 17 | "2014-05-06-12-54-54", 18 | "2014-05-06-13-09-52", 19 | "2014-05-06-13-14-58", 20 | "2014-05-06-13-17-51", 21 | "2014-05-14-13-46-12", 22 | "2014-05-14-13-50-20", 23 | "2014-05-14-13-53-47", 24 | "2014-05-14-13-59-05", 25 | "2014-06-25-16-22-15",]: 26 | 27 | # seq = "2014-05-06-13-14-58" 28 | dataset_dir = "dataset/robotcar" 29 | time_offset = 20 30 | 31 | result_dir = "dataset/robotcar/gt_poses_20" 32 | 33 | # Load data 34 | timestamp_txt = os.path.join(dataset_dir, seq, "stereo.timestamps") 35 | timestamps = np.loadtxt(timestamp_txt)[:, 0].astype(np.int) 36 | origin_timestamp = list(timestamps) 37 | 38 | raw_vo_path = os.path.join(dataset_dir, seq, "vo/vo.csv") 39 | 40 | poses = interpolate_vo_poses(raw_vo_path, origin_timestamp, origin_timestamp[time_offset]) 41 | 42 | # transformation 43 | T = np.array([ 44 | [0, 1, 0, 0], 45 | [0, 0, 1, 0], 46 | [1, 0, 0, 0], 47 | [0, 0, 0, 1] 48 | ]) 49 | 50 | poses_dict = {} 51 | for i in range(time_offset, len(poses)): 52 | # poses_dict[i-time_offset] = np.asarray(poses[i]) 53 | poses_dict[i-time_offset] = T @ np.asarray(poses[i]) @ np.linalg.inv(T) 54 | 55 | mkdir_if_not_exists(result_dir) 56 | save_traj(os.path.join(result_dir, "{}.txt".format(seq)), poses_dict) -------------------------------------------------------------------------------- /dfvo/tools/evaluation/tum_tool/associate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Software License Agreement (BSD License) 3 | # 4 | # Copyright (c) 2013, Juergen Sturm, TUM 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above 14 | # copyright notice, this list of conditions and the following 15 | # disclaimer in the documentation and/or other materials provided 16 | # with the distribution. 17 | # * Neither the name of TUM nor the names of its 18 | # contributors may be used to endorse or promote products derived 19 | # from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 | # POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # Requirements: 35 | # sudo apt-get install python-argparse 36 | 37 | """ 38 | The Kinect provides the color and depth images in an un-synchronized way. This means that the set of time stamps from the color images do not intersect with those of the depth images. Therefore, we need some way of associating color images to depth images. 39 | 40 | For this purpose, you can use the ''associate.py'' script. It reads the time stamps from the rgb.txt file and the depth.txt file, and joins them by finding the best matches. 41 | """ 42 | 43 | import argparse 44 | import sys 45 | import os 46 | import numpy 47 | 48 | 49 | def read_file_list(filename): 50 | """ 51 | Reads a trajectory from a text file. 52 | 53 | File format: 54 | The file format is "stamp d1 d2 d3 ...", where stamp denotes the time stamp (to be matched) 55 | and "d1 d2 d3.." is arbitary data (e.g., a 3D position and 3D orientation) associated to this timestamp. 56 | 57 | Input: 58 | filename -- File name 59 | 60 | Output: 61 | dict -- dictionary of (stamp,data) tuples 62 | 63 | """ 64 | file = open(filename) 65 | data = file.read() 66 | lines = data.replace(","," ").replace("\t"," ").split("\n") 67 | list = [[v.strip() for v in line.split(" ") if v.strip()!=""] for line in lines if len(line)>0 and line[0]!="#"] 68 | list = [(float(l[0]),l[1:]) for l in list if len(l)>1] 69 | return dict(list) 70 | 71 | def associate(first_list, second_list,offset,max_difference): 72 | """ 73 | Associate two dictionaries of (stamp,data). As the time stamps never match exactly, we aim 74 | to find the closest match for every input tuple. 75 | 76 | Input: 77 | first_list -- first dictionary of (stamp,data) tuples 78 | second_list -- second dictionary of (stamp,data) tuples 79 | offset -- time offset between both dictionaries (e.g., to model the delay between the sensors) 80 | max_difference -- search radius for candidate generation 81 | 82 | Output: 83 | matches -- list of matched tuples ((stamp1,data1),(stamp2,data2)) 84 | 85 | """ 86 | first_keys = list(first_list.keys()) 87 | second_keys = list(second_list.keys()) 88 | potential_matches = [(abs(a - (b + offset)), a, b) 89 | for a in first_keys 90 | for b in second_keys 91 | if abs(a - (b + offset)) < max_difference] 92 | potential_matches.sort() 93 | matches = [] 94 | for diff, a, b in potential_matches: 95 | if a in first_keys and b in second_keys: 96 | first_keys.remove(a) 97 | second_keys.remove(b) 98 | matches.append((a, b)) 99 | 100 | matches.sort() 101 | return matches 102 | 103 | if __name__ == '__main__': 104 | 105 | # parse command line 106 | parser = argparse.ArgumentParser(description=''' 107 | This script takes two data files with timestamps and associates them 108 | ''') 109 | parser.add_argument('first_file', help='first text file (format: timestamp data)') 110 | parser.add_argument('second_file', help='second text file (format: timestamp data)') 111 | parser.add_argument('--first_only', help='only output associated lines from first file', action='store_true') 112 | parser.add_argument('--offset', help='time offset added to the timestamps of the second file (default: 0.0)',default=0.0) 113 | parser.add_argument('--max_difference', help='maximally allowed time difference for matching entries (default: 0.02)',default=0.02) 114 | args = parser.parse_args() 115 | 116 | first_list = read_file_list(args.first_file) 117 | second_list = read_file_list(args.second_file) 118 | 119 | matches = associate(first_list, second_list,float(args.offset),float(args.max_difference)) 120 | 121 | if args.first_only: 122 | for a,b in matches: 123 | print("%f %s"%(a," ".join(first_list[a]))) 124 | else: 125 | for a,b in matches: 126 | print("%f %s %f %s"%(a," ".join(first_list[a]),b-float(args.offset)," ".join(second_list[b]))) 127 | 128 | 129 | -------------------------------------------------------------------------------- /dfvo/tools/generate_flow_prediction.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-07 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-28 7 | @LastEditors: Huangying Zhan 8 | @Description: This program generates optical flow prediction for KITTI Flow 2012/2015 9 | ''' 10 | 11 | import argparse 12 | import cv2 13 | from glob import glob 14 | import numpy as np 15 | import os 16 | import scipy.misc 17 | import torch 18 | from tqdm import tqdm 19 | 20 | from dfvo.libs.deep_models.flow.lite_flow_net.lite_flow import LiteFlow 21 | from dfvo.libs.general.utils import * 22 | 23 | 24 | def argument_parsing(): 25 | """Argument parsing 26 | 27 | Returns: 28 | args (args): arguments 29 | """ 30 | parser = argparse.ArgumentParser(description='Generate optical flow predictions for KITTI Flow 2012/2015') 31 | parser.add_argument("--result", type=str, required=True, 32 | help="Result output directory, RESULT/data will be created") 33 | parser.add_argument("--dataset", type=str, required=True, 34 | choices=["kitti2012", "kitti2015"], 35 | help="Dataset choice: kitti2012, kitti2015") 36 | parser.add_argument("--test", action="store_true", 37 | help="Test testing split. If not set, training split") 38 | parser.add_argument("--model", type=str, required=True, 39 | help="Model weight path") 40 | parser.add_argument("--flow_mask_thre" , type=float, 41 | default=None, 42 | help="Forward-backward flow consistency mask threshold. If non-zero, mask is used") 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def initialize_deep_flow_model(h, w, weight): 48 | """Initialize optical flow network 49 | 50 | Args: 51 | h (int): image height 52 | w (int): image width 53 | 54 | Returns: 55 | flow_net (nn.Module): optical flow network 56 | """ 57 | flow_net = LiteFlow(h, w) 58 | flow_net.initialize_network_model( 59 | weight_path=weight 60 | ) 61 | return flow_net 62 | 63 | 64 | def read_image(path): 65 | """read image data and convert to RGB 66 | 67 | Args: 68 | path (str): image path 69 | 70 | Returns: 71 | img (array, [HxWx3]): image data 72 | """ 73 | img = cv2.imread(path, 1) 74 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 75 | return img 76 | 77 | 78 | def get_img_idxs(dataset, is_test): 79 | """Get image paths 80 | 81 | Args: 82 | dataset (str): dataset type 83 | 84 | - kitti2012: All kitti-2012 image 85 | - kitti2015: All kitti-2015 image 86 | is_test (bool): Use testing set if true 87 | 88 | Returns: 89 | img_idx (list): Indexs of test image 90 | """ 91 | if dataset == "kitti2012": 92 | if is_test: 93 | raise NotImplementedError 94 | else: 95 | return [i for i in range(194)] 96 | 97 | elif dataset == "kitti2015": 98 | if is_test: 99 | raise NotImplementedError 100 | else: 101 | return [i for i in range(200)] 102 | 103 | 104 | if __name__ == '__main__': 105 | # Basic setup 106 | ref_h = 370 107 | ref_w = 1226 108 | 109 | # argument parsing 110 | args = argument_parsing() 111 | 112 | # Create result directory 113 | dirs = {} 114 | dirs['result'] = args.result 115 | mkdir_if_not_exists(os.path.join(dirs['result'], "data")) 116 | 117 | # Get dataset directory 118 | dirs['img_data'] = { 119 | "kitti2012": "dataset/kitti_flow_2012/{}/colored_0", 120 | "kitti2015": "dataset/kitti_flow_2015/{}/image_2", 121 | }[args.dataset] 122 | 123 | if args.test: 124 | dirs['img_data'] = dirs['img_data'].format("testing") 125 | else: 126 | dirs['img_data'] = dirs['img_data'].format("training") 127 | 128 | img_idxs = get_img_idxs(args.dataset, args.test) 129 | 130 | # initalize network 131 | flow_net = initialize_deep_flow_model(ref_h, ref_w, args.model) 132 | 133 | 134 | for i in tqdm(img_idxs): 135 | # get image paths 136 | img1_path = os.path.join(dirs['img_data'] , "{:06}_10.png".format(i)) 137 | img2_path = os.path.join(dirs['img_data'] , "{:06}_11.png".format(i)) 138 | 139 | # load image 140 | img1 = read_image(img1_path) 141 | img2 = read_image(img2_path) 142 | h, w, _ = img1.shape 143 | 144 | # resize image 145 | img1 = cv2.resize(img1, (ref_w, ref_h)) 146 | img2 = cv2.resize(img2, (ref_w, ref_h)) 147 | 148 | cur_imgs = [np.transpose((img1)/255, (2, 0, 1))] 149 | ref_imgs = [np.transpose((img2)/255, (2, 0, 1))] 150 | ref_imgs = np.asarray(ref_imgs) 151 | cur_imgs = np.asarray(cur_imgs) 152 | 153 | ''' prediction ''' 154 | flows = {} 155 | # Flow inference 156 | batch_flows = flow_net.inference_flow( 157 | img1=cur_imgs[0:1], 158 | img2=ref_imgs[0:1], 159 | flow_dir=None, 160 | forward_backward=True, 161 | dataset="kitti") 162 | 163 | flows = batch_flows['forward'] 164 | 165 | # resie flows back to original size 166 | flows = flow_net.resize_dense_flow(torch.from_numpy(flows), h, w) 167 | flows = flows.detach().cpu().numpy()[0] 168 | 169 | ''' Save result ''' 170 | _, h, w = flows.shape 171 | flows3 = np.ones((h, w, 3)) 172 | 173 | if args.flow_mask_thre is not None: 174 | resized_mask = cv2.resize(batch_flows['flow_diff'][0,:,:,0], (w, h)) 175 | flow_mask = (resized_mask < args.flow_mask_thre) * 1 176 | flows3[:, :, 0] = flow_mask 177 | flows3[:, :, 2] = flows[0] * 64 + 2**15 178 | flows3[:, :, 1] = flows[1] * 64 + 2**15 179 | flows3 = flows3.astype(np.uint16) 180 | 181 | 182 | out_png = os.path.join(dirs['result'], 'data', '{:06}_10.png'.format(i)) 183 | cv2.imwrite(out_png, flows3) 184 | 185 | -------------------------------------------------------------------------------- /dfvo/tools/generate_kitti_raw_pose.py: -------------------------------------------------------------------------------- 1 | '''''' 2 | ''' 3 | @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 4 | @Date: 2020-05-20 5 | @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 6 | @LastEditTime: 2020-05-27 7 | @LastEditors: Huangying Zhan 8 | @Description: This program generates ground truth pose of KITTI Raw dataset 9 | ''' 10 | 11 | import argparse 12 | import os 13 | 14 | from dfvo.libs.general.utils import load_poses_from_oxts, save_traj 15 | from dfvo.libs.general.utils import mkdir_if_not_exists 16 | 17 | 18 | def argument_parsing(): 19 | """Argument parsing 20 | 21 | Returns: 22 | args (args): arguments 23 | """ 24 | parser = argparse.ArgumentParser(description='Ground truth pose generation for KITTI Raw dataset') 25 | parser.add_argument('--result_dir', type=str, 26 | default="dataset/kitti_raw_pose", 27 | help="Result directory") 28 | parser.add_argument('--data_dir', type=str, 29 | default="dataset/kitti_raw", 30 | help="Raw dataset directory") 31 | parser.add_argument('--seqs', 32 | nargs="+", 33 | help="sequences to be processed", 34 | default=None) 35 | args = parser.parse_args() 36 | return args 37 | 38 | 39 | if __name__ == '__main__': 40 | # argument parsing 41 | args = argument_parsing() 42 | 43 | for seq in args.seqs: 44 | # get gps data dir 45 | gps_info_dir = os.path.join( 46 | args.data_dir, 47 | seq[:10], 48 | seq, 49 | "oxts/data" 50 | ) 51 | 52 | # load poses 53 | gt_poses = load_poses_from_oxts(gps_info_dir) 54 | 55 | # save poses 56 | traj_txt = os.path.join(args.result_dir, "{}.txt".format(seq)) 57 | save_traj(traj_txt, gt_poses, format="kitti") 58 | -------------------------------------------------------------------------------- /dfvo/tools/undistort_robotcar.py: -------------------------------------------------------------------------------- 1 | # ''' 2 | # @Author: Huangying Zhan (huangying.zhan.work@gmail.com) 3 | # @Date: 1970-01-01 4 | # @Copyright: Copyright (C) Huangying Zhan 2020. All rights reserved. Please refer to the license file. 5 | # @LastEditTime: 2020-07-02 6 | # @LastEditors: Huangying Zhan 7 | # @Description: This tool undistort Oxford Robotcar sequences 8 | # ''' 9 | # 10 | # import argparse 11 | # 12 | # from dfvo.tools.evaluation.robotcar.sdk_python.image import load_image 13 | # from dfvo.tools.evaluation.robotcar.sdk_python.camera_model import CameraModel 14 | # 15 | # 16 | # 17 | # def argument_parsing(): 18 | # """Argument parsing 19 | # 20 | # Returns: 21 | # args (args): arguments 22 | # """ 23 | # parser = argparse.ArgumentParser(description='KITTI Odometry evaluation') 24 | # parser.add_argument('--data_dir', type=str, 25 | # default="dataset/robotcar/raw_data/", 26 | # help="GT Pose directory containing gt pose txt files") 27 | # parser.add_argument('--result', type=str, required=True, 28 | # default="dataset/robotcar/" 29 | # help="Result directory") 30 | # parser.add_argument('--seqs', 31 | # nargs="+", 32 | # help="sequences to be undistorted", 33 | # default=None) 34 | # args = parser.parse_args() 35 | # 36 | # return args 37 | # 38 | # 39 | # if __name__ == '__main__': 40 | # # argument parsing 41 | # args = argument_parsing() 42 | # 43 | # # initialize evaluation tool 44 | # eval_tool = KittiEvalOdom() 45 | # 46 | # continue_flag = input("Evaluate result in [{}]? [y/n]".format(args.result)) 47 | # if continue_flag == "y": 48 | # eval_tool.eval( 49 | # args.gt, 50 | # args.result, 51 | # alignment=args.align, 52 | # seqs=args.seqs, 53 | # ) 54 | # else: -------------------------------------------------------------------------------- /drawer/utils.py: -------------------------------------------------------------------------------- 1 | import signal, getch 2 | 3 | 4 | def interrupted(signum, frame): 5 | return 6 | 7 | 8 | def input(): 9 | try: 10 | foo = getch.getch() 11 | return foo 12 | except: 13 | return 14 | 15 | 16 | def waitKey(timeout): 17 | """ 18 | the same function as cv2.waitKey() 19 | :param timeout: 20 | :return: 21 | """ 22 | # set alarmd 23 | signal.signal(signal.SIGALRM, interrupted) 24 | signal.setitimer(signal.ITIMER_REAL, timeout / 1000) 25 | s = input() 26 | # disable the alarm after success 27 | signal.setitimer(signal.ITIMER_REAL, 0) 28 | 29 | return s 30 | -------------------------------------------------------------------------------- /envs/README: -------------------------------------------------------------------------------- 1 | Besides the required packages in the requirements.yaml, the following two libraries are also required to manually installed. 2 | 3 | 1. "pangolin" for visualization, following the install instructions here: https://github.com/uoip/pangolin 4 | 5 | 2. "g2opy" for pose graph optimization, following the install instructions here: https://github.com/uoip/g2opy 6 | -------------------------------------------------------------------------------- /envs/min_requirements.yml: -------------------------------------------------------------------------------- 1 | name: toposlam 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | - default 7 | dependencies: 8 | - _libgcc_mutex=0.1=conda_forge 9 | - _openmp_mutex=4.5=1_llvm 10 | - ca-certificates=2020.10.14=0 11 | - certifi=2020.6.20=py36_0 12 | - cffi=1.14.5=py36hc120d54_0 13 | - cudatoolkit=10.0.130=hf841e97_8 14 | - cudnn=7.6.5=cuda10.0_0 15 | - cupy=6.0.0=py36hc0ce245_0 16 | - faiss=1.6.3=py36hbcec7e0_3_cuda 17 | - fastrlock=0.5=py36he6710b0_0 18 | - freetype=2.10.4=h0708190_1 19 | - jpeg=9d=h36c2ea0_0 20 | - lcms2=2.12=hddcbb42_0 21 | - ld_impl_linux-64=2.35.1=hea4e1c9_2 22 | - libblas=3.9.0=8_openblas 23 | - libcblas=3.9.0=8_openblas 24 | - libfaiss=1.6.3=hb17eacc_3_cuda 25 | - libffi=3.3=h58526e2_2 26 | - libgcc-ng=9.3.0=h2828fa1_18 27 | - libgfortran-ng=9.3.0=hff62375_18 28 | - libgfortran5=9.3.0=hff62375_18 29 | - libgomp=9.3.0=h2828fa1_18 30 | - liblapack=3.9.0=8_openblas 31 | - libopenblas=0.3.12=pthreads_h4812303_1 32 | - libpng=1.6.37=h21135ba_2 33 | - libstdcxx-ng=9.3.0=h6de172a_18 34 | - libtiff=4.2.0=hdc55705_0 35 | - libwebp-base=1.2.0=h7f98852_0 36 | - llvm-openmp=11.0.1=h4bd325d_0 37 | - lz4-c=1.9.3=h9c3ff4c_0 38 | - mkl=2020.4=h726a3e6_304 39 | - nccl=1.3.5=cuda10.0_0 40 | - ncurses=6.2=h58526e2_4 41 | - ninja=1.10.2=h4bd325d_0 42 | - numpy=1.19.5=py36h2aa4a07_1 43 | - olefile=0.46=pyh9f0ad1d_1 44 | - openssl=1.1.1h=h7b6447c_0 45 | - pillow=8.1.1=py36ha6010c0_0 46 | - pip=21.0.1=pyhd8ed1ab_0 47 | - pycparser=2.20=pyh9f0ad1d_2 48 | - python=3.6.12=hcff3b4d_2 49 | - python_abi=3.6=1_cp36m 50 | - pytorch=1.0.1=py3.6_cuda10.0.130_cudnn7.4.2_2 51 | - readline=8.0=he28a2e2_2 52 | - setuptools=49.6.0=py36h5fab9bb_3 53 | - six=1.15.0=pyh9f0ad1d_0 54 | - sqlite=3.34.0=h74cdb3f_0 55 | - tk=8.6.10=h21135ba_1 56 | - torchvision=0.2.2=py_3 57 | - wheel=0.36.2=pyhd3deb0d_0 58 | - xz=5.2.5=h516909a_1 59 | - zlib=1.2.11=h516909a_1010 60 | - zstd=1.4.8=ha95c52a_1 61 | - pip: 62 | - cycler==0.10.0 63 | - easydict==1.9 64 | - getch==1.0 65 | - kiwisolver==1.3.1 66 | - matplotlib==3.0.3 67 | - opencv-python==3.4.3.18 68 | - pyopengl==3.1.0 69 | - pyparsing==2.4.7 70 | - python-dateutil==2.8.1 71 | - pyyaml==5.1 72 | - scikit-learn==0.20.3 73 | - scipy==1.2.1 74 | - tqdm==4.58.0 75 | -------------------------------------------------------------------------------- /lcd/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /lcd/.idea/loop_closure_detect.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /lcd/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /lcd/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /lcd/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 33 | 34 | 35 | 36 | 37 | 56 | 57 | 58 | 77 | 78 | 79 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 1597480121258 113 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | file://$USER_HOME$/anaconda3/envs/SPR/lib/python3.6/site-packages/torch/nn/modules/module.py 124 | 541 125 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /lcd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/lcd/__init__.py -------------------------------------------------------------------------------- /lcd/extract_deep_vlad_feature.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models as models 4 | from lcd.netvlad import NetVLAD 5 | 6 | 7 | class ExtractDeepVladFeature(nn.Module): 8 | def __init__(self, net_vlad_ckp): 9 | 10 | super(ExtractDeepVladFeature, self).__init__() 11 | 12 | # model for feature extraction, consisting of encoder and netvlad two parts 13 | self.model = nn.Module() 14 | 15 | # vgg16 encoder 16 | encoder = models.vgg16(pretrained=False) 17 | 18 | # capture only feature part and remove last relu and maxpool 19 | layers = list(encoder.features.children())[:-2] 20 | 21 | # do not need to update parameters 22 | for l in layers[:-5]: 23 | for p in l.parameters(): 24 | p.requires_grad = False 25 | 26 | encoder = nn.Sequential(*layers) 27 | 28 | # add the vgg16 encoder to the model 29 | self.model.add_module('encoder', encoder) 30 | 31 | # NetVlad 32 | netvlad = NetVLAD() 33 | 34 | # add netvlad to the model 35 | self.model.add_module('pool', netvlad) 36 | 37 | # load pre-trained weights into the model 38 | # ckp = 'model_zoo/net_vlad/checkpoint.pth.tar' 39 | check_point = torch.load(net_vlad_ckp) 40 | print('==> Initialize NetVlad with [{}]'.format(net_vlad_ckp)) 41 | self.model.load_state_dict(check_point['state_dict']) 42 | 43 | self.model.eval() 44 | 45 | def forward(self, x): 46 | with torch.no_grad(): 47 | x = self.model.encoder(x) 48 | x = self.model.pool(x) 49 | 50 | return x 51 | -------------------------------------------------------------------------------- /lcd/hmm_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | The code is based on the followin paper: 3 | "Scalable place recognition under appearance change for autonomous driving." Doan, Anh-Dzung, et al. ICCV 2019 4 | ''' 5 | 6 | from __future__ import absolute_import, division, print_function 7 | import numpy as np 8 | from scipy import sparse 9 | 10 | 11 | def graph_update(node_num, image_ids, trans_mat, feature, image_id, max_time_step): 12 | ''' 13 | This function assume that all the added images are consecutive video frames and they are added in a temporal order. 14 | 15 | Input: 16 | node_num: the number of previously added image features, this is also the number of the features in 17 | the database, we simply get this value from the FAISS index structure using its 'ntotal' 18 | attribute 19 | 20 | image_ids: a list of length N1, correspond to all the ids of the image(features) in the 'database' 21 | 22 | trans_mat: a 2D matrix of N1 x N1, the transition matrix of the all the features in 'database', 23 | defined in Sec. 5.1. dtype: scipy parse matrix 24 | 25 | feature: row vector(s) of size n x D, the new deep vlad feature(s) to be added to the 'database', 26 | extracted from the image(s) to be added, dtype: numpy array 27 | 28 | image_id: a list of length n, the corresponding id(s) of the image(s) to be added, dtype: list 29 | 30 | max_time_step: the maximum step size for affinity matrix computation, defined in Sec. 5.1, dtype: int 31 | 32 | Output: 33 | image_ids_updated: the updated image id list of length N2 where N2 = N1 + n 34 | trans_mat_updated: the updated transition matrix of size N2 x N2 35 | ''' 36 | 37 | N1 = node_num # N1 features in the database before update 38 | n = feature.shape[0] # n features to be added to the database 39 | N2 = N1 + n # there will be N2 features in the database after update 40 | 41 | # update state transition matrix 42 | start_row = max([0, N1 - max_time_step]) # from which row the node transition probabilities need to be updated 43 | 44 | trans_mat_keep = trans_mat[0: start_row, :] 45 | zeros_block = sparse.csr_matrix(np.zeros([trans_mat_keep.shape[0], N2 - trans_mat_keep.shape[1]])) 46 | trans_mat_keep = sparse.hstack([trans_mat_keep, zeros_block]) # extend the column number of the kept block to N2 47 | 48 | trans_mat_new = update_state_transition_matrix(start_row, N2, max_time_step) 49 | trans_mat_updated = sparse.vstack([trans_mat_keep, trans_mat_new]) 50 | trans_mat_updated = sparse.csr_matrix(trans_mat_updated) 51 | 52 | # update image_ids 53 | image_ids_updated = image_ids + image_id 54 | 55 | return image_ids_updated, trans_mat_updated 56 | 57 | 58 | def update_state_transition_matrix(start_row, total_row, max_time_step): 59 | ''' 60 | This function incrementally updates the state transition matrix with the newly added images(features) 61 | Input: 62 | start_row: the index of the row from which the node transition probabilities that need to be updated 63 | total_row: the total number of rows in the transition matrix after update 64 | max_time_step: the maximum step size to construct the edges in the graph 65 | 66 | Output: 67 | trans_mat_new: a matrix of size (total_row - start_row + 1) x total_row, each row sum to 1 68 | ''' 69 | 70 | gamma = 5 71 | gamma = gamma**2 72 | 73 | trans_mat_new = np.zeros([total_row - start_row, total_row]) 74 | for row in range(total_row - start_row): 75 | col_idx = list(range(np.max([0, row + start_row - max_time_step]), 76 | np.min([total_row - 1, row + start_row + max_time_step]) + 1)) 77 | col_value = np.array(col_idx) 78 | trans_mat_new[row, col_idx] = np.exp(- (row - col_value + start_row)**2 / gamma) 79 | 80 | # normalization each row to sum 1 81 | trans_mat_new = trans_mat_new.T 82 | trans_mat_new /= np.sum(trans_mat_new, 0) 83 | trans_mat_new = trans_mat_new.T 84 | 85 | return sparse.csr_matrix(trans_mat_new) 86 | 87 | 88 | def do_filter(trans_mat, obs_model, last_belief, filtering_count, belief_init_step): 89 | 90 | [node_num, input_len] = obs_model.shape 91 | 92 | AT = sparse.csr_matrix(trans_mat).T 93 | belief_all = np.zeros([node_num, input_len]) 94 | belief = last_belief 95 | 96 | for i in range(input_len): 97 | if filtering_count == 0: # re-initialize the belief 98 | init_dist = np.ones([node_num, 1]) / node_num 99 | belief = normalize_belief(init_dist.flatten() * obs_model[:, 0]) 100 | belief_all[:, i] = belief 101 | 102 | filtering_count += 1 103 | else: 104 | belief = normalize_belief(AT.dot(belief) * obs_model[:, i]) 105 | belief_all[:, i] = belief 106 | 107 | filtering_count += 1 108 | 109 | if filtering_count == belief_init_step: # set the filtering count to zero for belief re-initialization 110 | filtering_count = 0 111 | 112 | return filtering_count, belief_all 113 | 114 | 115 | def normalize_belief(belief): 116 | ''' input should be a vector ''' 117 | 118 | z = belief.sum() 119 | belief = belief / z if z != 0 else 0 120 | 121 | return belief 122 | 123 | 124 | def normalize(img): 125 | ''' this function is used to normalize an image of numpy array data type ''' 126 | 127 | # this normalized the input image with with mean and stand deviation 128 | c, h, w = img.shape 129 | mean = np.mean(img.reshape(c, -1), 1) 130 | std = np.std(img.reshape(c, -1), 1) 131 | mean = np.expand_dims(np.expand_dims(mean, axis=1), axis=1) 132 | std = np.expand_dims(np.expand_dims(std, axis=1), axis=1) 133 | 134 | return (img - mean) / std 135 | 136 | # # this normalized the input image with its maximum value and its minimum value, the output is in range of [0, 1] 137 | # c, h, w = img.shape 138 | # img_max = np.max(img.reshape(c, -1), 1) 139 | # img_min = np.min(img.reshape(c, -1), 1) 140 | # img_max = np.expand_dims(img_max, [1, 2]) 141 | # img_min = np.expand_dims(img_min, [1, 2]) 142 | # 143 | # return 2 * ((img - img_min) / (img_max - img_min) - 0.5) 144 | -------------------------------------------------------------------------------- /lcd/netvlad.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class NetVLAD(nn.Module): 7 | """NetVLAD layer implementation""" 8 | 9 | def __init__(self, num_clusters=64, dim=512, normalize_input=True): 10 | """ 11 | Args: 12 | num_clusters : int 13 | The number of clusters 14 | dim : int 15 | Dimension of descriptors 16 | alpha : float 17 | Parameter of initialization. Larger value is harder assignment. 18 | normalize_input : bool 19 | If true, descriptor-wise L2 normalization is applied to input. 20 | """ 21 | super(NetVLAD, self).__init__() 22 | self.num_clusters = num_clusters 23 | self.dim = dim 24 | self.normalize_input = normalize_input 25 | self.conv = nn.Conv2d(dim, num_clusters, kernel_size=(1, 1), bias=False) 26 | self.conv.bias = None 27 | self.centroids = nn.Parameter(torch.rand(num_clusters, dim)) 28 | 29 | def forward(self, x): 30 | N, C = x.shape[:2] 31 | 32 | # across descriptor dim 33 | x = F.normalize(x, p=2, dim=1) 34 | 35 | # soft-assignment 36 | soft_assign = self.conv(x).view(N, self.num_clusters, -1) 37 | soft_assign = F.softmax(soft_assign, dim=1) 38 | 39 | x_flatten = x.view(N, C, -1) 40 | 41 | # calculate residuals to each clusters 42 | vlad = torch.zeros([N, self.num_clusters, C], dtype=x.dtype, layout=x.layout, device=x.device) 43 | for C in range(self.num_clusters): # slower than non-looped, but lower memory usage 44 | residual = x_flatten.unsqueeze(0).permute(1, 0, 2, 3) - \ 45 | self.centroids[C:C + 1, :].expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0) 46 | residual *= soft_assign[:, C:C + 1, :].unsqueeze(2) 47 | vlad[:, C:C + 1, :] = residual.sum(dim=-1) 48 | 49 | vlad = F.normalize(vlad, p=2, dim=2) # intra-normalization 50 | vlad = vlad.view(x.size(0), -1) # flatten 51 | vlad = F.normalize(vlad, p=2, dim=1) # L2 normalize 52 | 53 | return vlad 54 | -------------------------------------------------------------------------------- /loader/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /loader/.idea/loader.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /loader/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /loader/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /loader/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 33 | 34 | 35 | 36 | 37 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 1601470819277 69 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/loader/__init__.py -------------------------------------------------------------------------------- /loader/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | 4 | 5 | class Dataset(): 6 | def __init__(self, img_dir, calib_dir, height, width, ext): 7 | self.img_dir = img_dir 8 | self.calib_dir = calib_dir 9 | self.height = height 10 | self.width = width 11 | self.ext = ext 12 | 13 | self.img_id_list = [] 14 | 15 | def get_id_list(self): 16 | """ 17 | get all the image id in time order 18 | """ 19 | img_names = sorted(os.listdir(self.img_dir)) 20 | name_len = len(img_names[0]) 21 | ext_len = len(self.ext) 22 | id_len = name_len - ext_len - 1 23 | for i in range(len(img_names)): 24 | img_name = img_names[i] 25 | self.img_id_list.append(img_name[:id_len]) 26 | 27 | return self.img_id_list 28 | 29 | def get_image(self, img_id): 30 | img_path = os.path.join(self.img_dir, img_id) + '.' + self.ext 31 | img = cv2.imread(img_path) 32 | img = cv2.resize(img, (self.width, self.height)) 33 | 34 | return img 35 | 36 | def get_intrinsics_param(self): 37 | """ 38 | get the camera intrinsic parameters 39 | :return: 40 | """ 41 | 42 | raise NotImplementedError 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /loader/kitti.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from loader.dataset import Dataset 4 | from loader.utils import load_kitti_odom_intrinsics 5 | 6 | 7 | class KITTIOdom(Dataset): 8 | def __init__(self, img_dir, calib_dir, height, width, ext): 9 | super(KITTIOdom, self).__init__(img_dir, calib_dir, height, width, ext) 10 | 11 | def get_intrinsics_param(self): 12 | K = load_kitti_odom_intrinsics(os.path.join(self.calib_dir, "calib.txt"), self.height, self.width)[2] 13 | 14 | return K 15 | -------------------------------------------------------------------------------- /loader/utils.py: -------------------------------------------------------------------------------- 1 | 2 | def load_kitti_odom_intrinsics(file_name, new_h, new_w): 3 | """Load kitti odometry data intrinscis 4 | 5 | Args: 6 | file_name (str): txt file path 7 | 8 | Returns: 9 | intrinsics (dict): each element contains [cx, cy, fx, fy] 10 | """ 11 | raw_img_h = 370.0 12 | raw_img_w = 1226.0 13 | intrinsics = {} 14 | with open(file_name, 'r') as f: 15 | s = f.readlines() 16 | for cnt, line in enumerate(s): 17 | line_split = [float(i) for i in line.split(" ")[1:]] 18 | intrinsics[cnt] = [ 19 | line_split[2] / raw_img_w * new_w, 20 | line_split[6] / raw_img_h * new_h, 21 | line_split[0] / raw_img_w * new_w, 22 | line_split[5] / raw_img_h * new_h, 23 | ] 24 | return intrinsics 25 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import os 4 | 5 | from slam import SLAM 6 | from dfvo.libs.general.configuration import ConfigLoader 7 | 8 | 9 | config_loader = ConfigLoader() 10 | 11 | 12 | def read_cfgs(): 13 | """Parse arguments and laod configurations 14 | 15 | Returns 16 | ------- 17 | args : args 18 | arguments 19 | cfg : edict 20 | configuration dictionary 21 | """ 22 | ''' Argument Parsing ''' 23 | parser = argparse.ArgumentParser(description='SLAM system') 24 | parser.add_argument("-d", "--default_configuration", type=str, 25 | default="cfg/default.yml", 26 | help="default configuration files") 27 | parser.add_argument("-c", "--configuration", type=str, 28 | default=None, 29 | help="custom configuration file") 30 | parser.add_argument("-r", "--data_root", type=str, default="./data", 31 | help="path containing image sequence directory") 32 | parser.add_argument("-s", "--seq", type=str, default="09", 33 | help="which (kitti) image sequence to perform VO on") 34 | parser.add_argument("-e", "--ext", type=str, default="png", 35 | help="file extension of the images") 36 | 37 | args = parser.parse_args() 38 | 39 | ''' Read configuration ''' 40 | # read default and custom config, merge cfgs 41 | config_files = [args.default_configuration, args.configuration] 42 | cfg = config_loader.merge_cfg(config_files) 43 | 44 | return args, cfg 45 | 46 | 47 | if __name__ == '__main__': 48 | # Read config 49 | args, cfg = read_cfgs() 50 | 51 | # use images in the "image_2" folder for VO 52 | img_dir = os.path.join(args.data_root, args.seq, 'image_2') 53 | 54 | # folder path that contains the calibration file 55 | calib_dir = os.path.join(args.data_root, args.seq) 56 | 57 | # use CUDA 58 | device = torch.device("cuda") 59 | 60 | """ perform VO """ 61 | slam = SLAM(img_dir, calib_dir, cfg, device) 62 | slam.main() 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /misc/topo_slam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/best-of-acrv/toposlam/3ec1dda210722d86bf77f101dca57ba27baa5833/misc/topo_slam.png --------------------------------------------------------------------------------