├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── __init__.py
    ├── config.py
    ├── dataset
    │   └── PITT.yaml
    └── network
    │   ├── spherevlad.yaml
    │   ├── spherevlad2.yaml
    │   └── spherevlad_visual.yaml
├── dataloader
    ├── __init__.py
    ├── data_augmentation.py
    ├── pittsburgh.py
    ├── triplet_dataloader.py
    └── utils.py
├── eval
    ├── __init__.py
    ├── eval_utils.py
    └── evaluate_pitt.py
├── generating_sph
    └── gene_pitt.py
├── models
    ├── __init__.py
    ├── loop_closure
    │   ├── __init__.py
    │   ├── aggregator
    │   │   ├── __init__.py
    │   │   └── netvlad.py
    │   ├── lidar
    │   │   ├── __init__.py
    │   │   ├── spherevlad.py
    │   │   └── spherevlad2.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   └── lazy_quadruplet_loss.py
    │   ├── transformer
    │   │   ├── __init__.py
    │   │   └── attention.py
    │   └── visual
    │   │   ├── __init__.py
    │   │   └── spherevlad.py
    └── robotLCD.py
├── pics
    ├── spherevlad.png
    └── spherevlad2.png
├── requirements.txt
├── train_lcd.py
└── utils
    ├── __init__.py
    ├── equirectRotate
        ├── EquirectRotate.py
        ├── __init__.py
        └── utils.py
    ├── geometry
        ├── __init__.py
        ├── equi_trans.py
        └── so3_rotate.py
    ├── log_print.py
    ├── logger.py
    └── pointProcess.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # 
 10 | cache/
 11 | third/
 12 | data/results
 13 | data/dataset
 14 | log/
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | cover/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | .pybuilder/
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # poetry
105 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
107 | #   commonly ignored for libraries.
108 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109 | #poetry.lock
110 | 
111 | # pdm
112 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #pdm.lock
114 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
115 | #   in version control.
116 | #   https://pdm.fming.dev/#use-with-ide
117 | .pdm.toml
118 | 
119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
120 | __pypackages__/
121 | 
122 | # Celery stuff
123 | celerybeat-schedule
124 | celerybeat.pid
125 | 
126 | # SageMath parsed files
127 | *.sage.py
128 | 
129 | # Environments
130 | .env
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 | 
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 | 
142 | # Rope project settings
143 | .ropeproject
144 | 
145 | # mkdocs documentation
146 | /site
147 | 
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 | 
153 | # Pyre type checker
154 | .pyre/
155 | 
156 | # pytype static type analyzer
157 | .pytype/
158 | 
159 | # Cython debug symbols
160 | cython_debug/
161 | 
162 | # PyCharm
163 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
166 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
167 | #.idea/
168 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, METASLAM
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SphereVLAD Series: Viewpoint-invariant Place Recognition
  2 | **1. Pytorch implementation **SphereVLAD** and **SphereVLAD++** proposed in the papers:** </br> 
  3 | [SeqSphereVLAD: Sequence Matching Enhanced Orientation-invariant Place Recognition](https://www.ri.cmu.edu/app/uploads/2020/11/0583.pdf) </br>
  4 | Peng Yin, Fuying Wang, Anton Egorov, Jiafan Hou, Ji Zhang, Howie Choset </br>
  5 | [SphereVLAD++: Attention-based and Signal-enhanced Viewpoint Invariant Descriptor](https://arxiv.org/abs/2207.02958) </br>
  6 | Shiqi Zhao, Peng Yin*, Ge Yi, Sebastian Scherer
  7 | 
  8 | **2. Core library for our iSimLoc, AutoMerge and BioSLAM works:** </br>
  9 | [iSimLoc: Visual Global Localization for Previously Unseen Environments With Simulated Images](https://ieeexplore.ieee.org/document/10035430) </br>
 10 | Peng Yin; Ivan Cisneros; Shiqi Zhao; Ji Zhang; Howie Choset; Sebastian Scherer </br>
 11 | [AutoMerge: A Framework for Map Assembling and Smoothing in City-Scale Environments](https://ieeexplore.ieee.org/document/10203034) </br>
 12 | Peng Yin; Shiqi Zhao; Haowen Lai; Ruohai Ge; Ji Zhang; Howie Choset; Sebastian Scherer </br>
 13 | [BioSLAM: A Bioinspired Lifelong Memory System for General Place Recognition](https://ieeexplore.ieee.org/document/10242249) </br>
 14 | Peng Yin; Abulikemu Abuduweili; Shiqi Zhao; Lingyun Xu; Changliu Liu; Sebastian Scherer
 15 | 
 16 | ### Network Structure
 17 | ![](pics/spherevlad.png)
 18 | ![](pics/spherevlad2.png)
 19 | 
 20 | ### Environment Dependency
 21 | 
 22 | **System**
 23 | 
 24 | The codes are tested in: </br>
 25 | * Ubuntu20.04+CUDA11.6+python3.8.10 </br>
 26 | * Ubuntu18.04+CUDA10.2+python3.6.9 </br>
 27 | 
 28 | **Python Environment Dependency**
 29 | 
 30 | * [s2cnn](https://github.com/jonkhler/s2cnn)
 31 | * torch==1.13.0+cu116 torchaudio==0.13.0+cu116 torchvision==0.14+cu116
 32 | * cupy-cuda11x==11.6.0
 33 | 
 34 | ### Installation
 35 | It is highly recommended too create a virtual environment by **conda** or **virtualenv**.
 36 | 
 37 | ```bash
 38 | # create virtual env
 39 | virtualenv /path_to_env/spherevlad -p /usr/bin/python3
 40 | source /path_to_env/spherevlad/bin/activate
 41 | # or
 42 | conda create -n spherevlad python==3.8
 43 | conda activate spherevlad
 44 | 
 45 | # install pytorch and cupy
 46 | pip3 install torch==1.13.0+cu116 torchvision==0.14.0+cu116 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu116
 47 | pip3 install cupy-cuda11x==11.6.0
 48 | 
 49 | # install s2cnn
 50 | pip3 install pynvrtc joblib
 51 | mkdir third && cd third
 52 | git clone https://github.com/jonkhler/s2cnn.git
 53 | cd s2cnn
 54 | python setup.py install
 55 | 
 56 | # install all others packages
 57 | pip3 install -r requirements.txt
 58 | ```
 59 | 
 60 | ### Lidar Place Recognition
 61 | 
 62 | **Data Structure**
 63 | 
 64 | ```
 65 | ├── data 
 66 |     ├── results
 67 |     │   └── model_name
 68 |     │       ├── DATA.yaml
 69 |     │       ├── MODEL.yaml
 70 |     │       ├── log.txt
 71 |     │       └── pth
 72 |     │           ├── model_xxx.pth
 73 |     │           ├── ...
 74 |     └── dataset
 75 |         ├── PITT
 76 |         │   ├── DATA
 77 |         │   │   ├── Train1
 78 |         │   │   │   ├── cloudGlobal.pcd
 79 |         │   │   │   └── poses.txt
 80 |         │   │   ├── ...
 81 |         │   │   └── Trainxx
 82 | ```
 83 | 
 84 | **Note**
 85 | 
 86 | Modify the ```PYTHONPATH``` environment variable to include absolute path to the project root folder:
 87 | ```
 88 | export PYTHONPATH=$PATHONPATH:/path_to_spherevlad/SphereVLAD
 89 | ```
 90 | 
 91 | **Data Preparation**
 92 | 
 93 | Download the sample data from [Dropbox](https://www.dropbox.com/scl/fi/45g8uns8gfmzmsxytjg2n/DATA.zip?rlkey=hs62zaaiub71f9g26ldhpyuir&dl=0) and put unzipped folder in the /data/PITT. The data structure should be like the same mentioned in **Data Structure** section.
 94 | ```bash
 95 | mkdir data
 96 | python generating_sph/gene_pitt.py --config-network=config/network/spherevlad.yaml --config-dataset=config/dataset/PITT.yaml
 97 | ```
 98 | 
 99 | **Training on Alita Urban Dataset**
100 | 
101 | ```bash
102 | # spherevlad
103 | python3 train_lcd.py --config-network=config/network/spherevlad.yaml --config-dataset=config/dataset/PITT.yaml
104 | # spherevlad2
105 | python3 train_lcd.py --config-network=config/network/spherevlad2.yaml --config-dataset=config/dataset/PITT.yaml
106 | ```
107 | 
108 | * If there are issues with the s2cnn, please download the [version](https://www.dropbox.com/scl/fi/hvc809dymwwq4g3s4stoa/s2cnn.zip?rlkey=awal9vpauog6xfr3l6z4t5uam&dl=0) we have modified and tested.
109 | 
110 | * The generated weight will be saved at /data/results/SphereVLAD-{DatasetName}-{DD}-{MM}-{YYYY}-{TIME}, the folder also contains the log and configuration of the model and dataset.
111 | 
112 | **Testing on Alita Urban Dataset**
113 | 
114 | Pretrained weight on Alita Urban Dataset: [Dropbox](https://www.dropbox.com/scl/fi/wkidjpayjuepa124c22rv/SphereVLAD.zip?rlkey=ce50l1ypznlc131ftbwj30o6y&dl=0). Please unzip it and put in /data/results
115 | 
116 | Please change the trajectory numbers in *line 45* and *line 68* in **eval/evaluate_pitt.py** first.
117 | ```bash
118 | # spherevlad
119 | python eval/evaluate_pitt.py --model-path=data/results/SphereVLAD --epoch=666 --noise=1 --type=recall --trans-noise=2 --rot-noise=180 --log=False
120 | # spherevlad2
121 | python eval/evaluate_pitt.py --model-path=data/results/SphereVLAD2 --epoch=666 --noise=1 --type=recall --trans-noise=2 --rot-noise=180 --log=False
122 | ```
123 | * --model-path: the folder contains the configuration and trained weights
124 | * --epoch: the epoch of trained weights
125 | 
126 | ### Hints on training and test on other dataset
127 | * Modify the dataloader/pittsburgh.py to fit the data structure of your own dataset
128 | * Modify the config/dataset/PITT.yaml to change the configuration of the dataset
129 | * Add new evaluation methods in eval/eval_utils.py and import it in train_lcd.py
130 | 
131 | ### Citation
132 | ```
133 | @INPROCEEDINGS{9341727,
134 |   author={Yin, Peng and Wang, Fuying and Egorov, Anton and Hou, Jiafan and Zhang, Ji and Choset, Howie},
135 |   booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, 
136 |   title={SeqSphereVLAD: Sequence Matching Enhanced Orientation-invariant Place Recognition}, 
137 |   year={2020},
138 |   volume={},
139 |   number={},
140 |   pages={5024-5029},
141 |   doi={10.1109/IROS45743.2020.9341727}}
142 | ```
143 | ```
144 | @ARTICLE{9956017,
145 |   author={Zhao, Shiqi and Yin, Peng and Yi, Ge and Scherer, Sebastian},
146 |   journal={IEEE Robotics and Automation Letters}, 
147 |   title={SphereVLAD++: Attention-Based and Signal-Enhanced Viewpoint Invariant Descriptor}, 
148 |   year={2023},
149 |   volume={8},
150 |   number={1},
151 |   pages={256-263},
152 |   doi={10.1109/LRA.2022.3223555}}
153 | ```
154 | ```
155 | @article{yin2022alita,
156 |   title={Alita: A large-scale incremental dataset for long-term autonomy},
157 |   author={Yin, Peng and Zhao, Shiqi and Ge, Ruohai and Cisneros, Ivan and Fu, Ruijie and Zhang, Ji and Choset, Howie and Scherer, Sebastian},
158 |   journal={arXiv preprint arXiv:2205.10737},
159 |   year={2022}
160 | }
161 | ```
162 | ```
163 | @ARTICLE{10035430,
164 |   author={Yin, Peng and Cisneros, Ivan and Zhao, Shiqi and Zhang, Ji and Choset, Howie and Scherer, Sebastian},
165 |   journal={IEEE Transactions on Robotics}, 
166 |   title={iSimLoc: Visual Global Localization for Previously Unseen Environments With Simulated Images}, 
167 |   year={2023},
168 |   volume={39},
169 |   number={3},
170 |   pages={1893-1909},
171 |   doi={10.1109/TRO.2023.3238201}}
172 | 
173 | ```
174 | ```
175 | @ARTICLE{10203034,
176 |   author={Yin, Peng and Zhao, Shiqi and Lai, Haowen and Ge, Ruohai and Zhang, Ji and Choset, Howie and Scherer, Sebastian},
177 |   journal={IEEE Transactions on Robotics}, 
178 |   title={AutoMerge: A Framework for Map Assembling and Smoothing in City-Scale Environments}, 
179 |   year={2023},
180 |   volume={},
181 |   number={},
182 |   pages={1-19},
183 |   doi={10.1109/TRO.2023.3290448}}
184 | ```
185 | ```
186 | @ARTICLE{10242249,
187 |   author={Yin, Peng and Abuduweili, Abulikemu and Zhao, Shiqi and Xu, Lingyun and Liu, Changliu and Scherer, Sebastian},
188 |   journal={IEEE Transactions on Robotics}, 
189 |   title={BioSLAM: A Bioinspired Lifelong Memory System for General Place Recognition}, 
190 |   year={2023},
191 |   volume={},
192 |   number={},
193 |   pages={1-20},
194 |   doi={10.1109/TRO.2023.3306615}}
195 | ```
196 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import _C as cfg


--------------------------------------------------------------------------------
/config/config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from yacs.config import CfgNode as CN
  3 | 
  4 | _C = CN()
  5 | 
  6 | #! -----------------------------------------------------------------------------
  7 | #! NEPTUNE
  8 | #! -----------------------------------------------------------------------------
  9 | _C.PROJECT_NAME = ''
 10 | _C.API_TOKEN = ''
 11 | 
 12 | #! -----------------------------------------------------------------------------
 13 | #! INPUT
 14 | #! -----------------------------------------------------------------------------
 15 | _C.DATA = CN()
 16 | _C.DATA.DATASET_NAME = "" 
 17 | _C.DATA.BENCHMARK_DIR = "" # The parent folder of dataset
 18 | _C.DATA.DATASET_TYPE = "baseline" # For Oxford Dataset
 19 | # Projection for LiDAR point cloud
 20 | # Spherical projection
 21 | _C.DATA.SPH_PROJ = CN()
 22 | _C.DATA.SPH_PROJ.RADIUS = 50.0 # Radius of ball query in submap generation
 23 | _C.DATA.SPH_PROJ.IMAGE_SIZE = [64, 64] # Define the size of generated panorama
 24 | _C.DATA.SPH_PROJ.FOV = [-90, 90] # Define the fov when genenrating panorama
 25 | _C.DATA.SPH_PROJ.VIS_THRESH = 3.0 # Not used for now
 26 | _C.DATA.SPH_PROJ.VIS_RADIUS = 3 # Not used for now
 27 | _C.DATA.SPH_PROJ.OCCLUSION = 16  # Not used for now
 28 | # BEV projection
 29 | _C.DATA.BEV_PROJ = CN()
 30 | _C.DATA.BEV_PROJ.IMAGE_SIZE = [64, 64] # Define the size of generated BEV
 31 | _C.DATA.BEV_PROJ.Z_RANGE = [-100, 100] # Confine the points used to generate BEV
 32 | _C.DATA.BEV_PROJ.MAX_DIS = 30 # The x and y max range of the point cloud, eg: a 
 33 |                               # point cloud x in [-100,100], this value will be 
 34 |                               # 100
 35 | # Image
 36 | _C.DATA.SPH_IM = CN()
 37 | _C.DATA.SPH_IM.IMAGE_SIZE = [224, 224]
 38 | _C.DATA.IM = CN()
 39 | _C.DATA.IM.IMAGE_SIZE = [224, 224]
 40 | # Distance settings for positive and negative frame
 41 | _C.DATA.POSITIVES_RADIUS = 10.0
 42 | _C.DATA.NEGATIVES_RADIUS = 50.0
 43 | _C.DATA.TRAJ_RADIUS = 5.0e+3
 44 | # Data spliting
 45 | _C.DATA.TRAIN_LIST = []
 46 | _C.DATA.VAL_LIST = []
 47 | _C.DATA.TEST_LIST = []
 48 | # Pickles
 49 | _C.DATA.TRAIN_PICKLE = 'train.pickle'
 50 | _C.DATA.VAL_PICKLE = 'val.pickle'
 51 | 
 52 | #! -----------------------------------------------------------------------------
 53 | #! DATASET
 54 | #! -----------------------------------------------------------------------------
 55 | # Alita Campus dataset
 56 | _C.DATA.ALITA_CAMPUS = CN()
 57 | _C.DATA.ALITA_CAMPUS.DIRECTION = None
 58 | _C.DATA.ALITA_CAMPUS.COLLECTED_TIME = None
 59 | 
 60 | #! -----------------------------------------------------------------------------
 61 | #! MODEL
 62 | #! -----------------------------------------------------------------------------
 63 | _C.MODEL = CN()
 64 | _C.MODEL.NAME = ""
 65 | _C.MODEL.TYPE = "Lidar" # "Lidar", "LiSPH", "Image"
 66 | 
 67 | # S2CNN
 68 | _C.MODEL.S2CNN = CN()
 69 | _C.MODEL.S2CNN.INPUT_CHANNEL_NUM = 1
 70 | 
 71 | # NetVLAD
 72 | _C.MODEL.NETVLAD = CN()
 73 | _C.MODEL.NETVLAD.ENCODER = 'vgg16'
 74 | _C.MODEL.NETVLAD.CLUSTER_NUM = 64
 75 | _C.MODEL.NETVLAD.FEATURE_DIM = 128
 76 | _C.MODEL.NETVLAD.NORMALIZE_INPUT = True
 77 | _C.MODEL.NETVLAD.OUTPUT_DIM = None
 78 | _C.MODEL.NETVLAD.GATE = False
 79 | 
 80 | # PointNetVLAD
 81 | _C.MODEL.POINTNETVLAD = CN()
 82 | _C.MODEL.POINTNETVLAD.MAX_SAMPLES = 4096
 83 | _C.MODEL.POINTNETVLAD.GLOBAL_FEAT = True
 84 | _C.MODEL.POINTNETVLAD.FEATURE_TRANSFORM = True
 85 | _C.MODEL.POINTNETVLAD.MAX_POOL = False
 86 | _C.MODEL.POINTNETVLAD.CLUSTER_SIZE = 64
 87 | _C.MODEL.POINTNETVLAD.OUTPUT_DIM = 256
 88 | _C.MODEL.POINTNETVLAD.CONTEXT_GATING = True
 89 | _C.MODEL.POINTNETVLAD.ADD_BATCH_NORM = True
 90 | 
 91 | # AutoMerge
 92 | _C.MODEL.AUTOMERGE = CN()
 93 | _C.MODEL.AUTOMERGE.FEATURE_SIZE = 6
 94 | _C.MODEL.AUTOMERGE.SELF_ATTEN = 'add'
 95 | _C.MODEL.AUTOMERGE.CROSS_ATTEN = 'reweight'
 96 | 
 97 | #! -----------------------------------------------------------------------------
 98 | #! LOSS
 99 | #! -----------------------------------------------------------------------------
100 | _C.LOSS = CN()
101 | _C.LOSS.NAME = ""
102 | _C.LOSS.MARGIN0 = 0.5
103 | _C.LOSS.MARGIN1 = 0.2
104 | _C.LOSS.MARGIN2 = 0.3
105 | 
106 | #! -----------------------------------------------------------------------------
107 | #! TRAINING CONFIGURATION
108 | #! -----------------------------------------------------------------------------
109 | _C.TRAINING = CN()
110 | # Training or Test
111 | _C.TRAINING.IS_TRAIN = False
112 | # Number of workers
113 | _C.TRAINING.NUM_WORKERS = 8 # Number of workers when training
114 | # Batch
115 | _C.TRAINING.BATCH = CN()
116 | _C.TRAINING.BATCH.BATCH_SIZE = 2 # Batch size per gpu
117 | # Data Augment
118 | _C.TRAINING.BATCH.FRAME_TRANSFORM = 0 # several mode
119 | _C.TRAINING.BATCH.BATCH_TRANSFORM = False # Data argmentation for the whole batch
120 | # Number of positive and negative in a batch
121 | _C.TRAINING.BATCH.POSITIVES_PER_QUERY = 2
122 | _C.TRAINING.BATCH.NEGATIVES_PER_QUERY = 18
123 | # Optimizer
124 | _C.TRAINING.OPTIMIZER = CN()
125 | _C.TRAINING.OPTIMIZER.NAME = "Adam"
126 | _C.TRAINING.OPTIMIZER.INIT_LEARNING_RATE = 1e-4
127 | _C.TRAINING.OPTIMIZER.WEIGHT_DECAY = 0.001 # Not used for now
128 | # Scheduler
129 | _C.TRAINING.SCHEDULER = CN()
130 | _C.TRAINING.SCHEDULER.NAME = None
131 | _C.TRAINING.SCHEDULER.MILESTONES = [10] # For MultiStepLR
132 | _C.TRAINING.SCHEDULER.STEP_SIZE = None # For StepLR
133 | _C.TRAINING.SCHEDULER.GAMMA = 0.1 # For StepLR & MultiStepLR
134 | # GPU Index for Parallel Training
135 | _C.TRAINING.GPU = CN()
136 | _C.TRAINING.GPU.IDS = None # Choose to use which gpus
137 | # Resume
138 | _C.TRAINING.RESUME = False
139 | # Epoch
140 | _C.TRAINING.EPOCH = 20
141 | 
142 | #! -----------------------------------------------------------------------------
143 | #! TRAINED WEIGHT
144 | #! -----------------------------------------------------------------------------
145 | _C.WEIGHT = CN()
146 | _C.WEIGHT.LOAD_ADDRESS = ""
147 | _C.WEIGHT.SAVE_ADDRESS = ""
148 | _C.WEIGHT.LOAD_OPTIMIZER = True
149 | _C.WEIGHT.FREEZE_WEIGHT = False
150 | 
151 | #! -----------------------------------------------------------------------------
152 | #! OUTPUT
153 | #! -----------------------------------------------------------------------------
154 | _C.OUTPUT = CN()
155 | _C.OUTPUT.DIR = "data/results"
156 | 


--------------------------------------------------------------------------------
/config/dataset/PITT.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   DATASET_NAME: "PITT" # should be same with the folder name
 3 |   BENCHMARK_DIR: "data/dataset/"
 4 |   # Projections
 5 |   SPH_PROJ:
 6 |     RADIUS: 20.0 # query radius for data generation
 7 |     IMAGE_SIZE: [64, 64]
 8 |     FOV: [-90, 90]
 9 |   BEV_PROJ:
10 |     IMAGE_SIZE: [224, 224]
11 |     Z_RANGE: [-10, 10]
12 |   # Data spliting
13 |   # TRAIN_LIST: [1,2,3,4,5,6,7,8,9,10,16,17,18,19,20]
14 |   # VAL_LIST:   [21,22,23,24,25]
15 |   # TEST_LIST:  [21,22,23,24,25]
16 |   TRAIN_LIST: [1]
17 |   VAL_LIST:   [21]
18 |   TEST_LIST:  [21]


--------------------------------------------------------------------------------
/config/network/spherevlad.yaml:
--------------------------------------------------------------------------------
 1 | PROJECT_NAME: ''
 2 | DATA:
 3 |   POSITIVES_RADIUS: 8.0
 4 |   NEGATIVES_RADIUS: 16.0
 5 | MODEL:
 6 |   NAME: "SphereVLAD"
 7 |   TYPE: "LiSPH"
 8 |   S2CNN:
 9 |     INPUT_CHANNEL_NUM: 1
10 |   NETVLAD:
11 |     CLUSTER_NUM: 32
12 |     FEATURE_DIM: 16
13 |     NORMALIZE_INPUT: True
14 |     OUTPUT_DIM: None
15 |     GATE: False
16 | 
17 | LOSS:
18 |   NAME: "LazyQuadrupletLoss"
19 |   MARGIN0: 0.5
20 |   MARGIN1: 0.2
21 | 
22 | TRAINING:
23 |   NUM_WORKERS: 8
24 |   BATCH:
25 |     BATCH_SIZE: 6
26 |     FRAME_TRANSFORM: 0
27 |     BATCH_TRANSFORM: False
28 |   # Select GPUs
29 |   GPU:
30 |     IDS: [0,1]
31 |   # Optimizer
32 |   OPTIMIZER:
33 |     NAME: "Adam"
34 |     INIT_LEARNING_RATE: 1e-4
35 |   # Scheduler
36 |   SCHEDULER:
37 |     NAME: "MultiStepLR"
38 |     MILESTONES: [10]
39 |     GAMMA: 0.1
40 |   # Epoch
41 |   EPOCH: 20
42 |   # Resume?
43 |   RESUME: False
44 | 
45 | # Save & Load weight
46 | WEIGHT:
47 |   LOAD_ADDRESS: None
48 |   SAVE_ADDRESS: None
49 | 
50 | # where logs and weights will be saved at
51 | OUTPUT:
52 |   DIR: "data/results"
53 | 


--------------------------------------------------------------------------------
/config/network/spherevlad2.yaml:
--------------------------------------------------------------------------------
 1 | PROJECT_NAME: ''
 2 | DATA:
 3 |   POSITIVES_RADIUS: 8.0
 4 |   NEGATIVES_RADIUS: 16.0
 5 | MODEL:
 6 |   NAME: "SphereVLAD2"
 7 |   TYPE: "LiSPH"
 8 |   S2CNN:
 9 |     INPUT_CHANNEL_NUM: 1
10 |   NETVLAD:
11 |     CLUSTER_NUM: 32
12 |     FEATURE_DIM: 16
13 |     NORMALIZE_INPUT: True
14 |     OUTPUT_DIM: None
15 |     GATE: False
16 | 
17 | LOSS:
18 |   NAME: "LazyQuadrupletLoss"
19 |   MARGIN0: 0.5
20 |   MARGIN1: 0.2
21 | 
22 | TRAINING:
23 |   NUM_WORKERS: 8
24 |   BATCH:
25 |     BATCH_SIZE: 6
26 |     FRAME_TRANSFORM: 0
27 |     BATCH_TRANSFORM: False
28 |   # Select GPUs
29 |   GPU:
30 |     IDS: [0,1]
31 |   # Optimizer
32 |   OPTIMIZER:
33 |     NAME: "Adam"
34 |     INIT_LEARNING_RATE: 1e-4
35 |   # Scheduler
36 |   SCHEDULER:
37 |     NAME: "MultiStepLR"
38 |     MILESTONES: [10]
39 |     GAMMA: 0.1
40 |   # Epoch
41 |   EPOCH: 30
42 |   # Resume?
43 |   RESUME: True
44 | 
45 | # Save & Load weight
46 | WEIGHT:
47 |   LOAD_ADDRESS: ""
48 |   LOAD_OPTIMIZER: False
49 |   SAVE_ADDRESS: None
50 | 
51 | # where logs and weights will be saved at
52 | OUTPUT:
53 |   DIR: "data/results"
54 | 


--------------------------------------------------------------------------------
/config/network/spherevlad_visual.yaml:
--------------------------------------------------------------------------------
 1 | PROJECT_NAME: ''
 2 | DATA:
 3 |   POSITIVES_RADIUS: 8.0
 4 |   NEGATIVES_RADIUS: 16.0
 5 | MODEL:
 6 |   NAME: "SphereVLAD"
 7 |   TYPE: "Image"
 8 |   S2CNN:
 9 |     INPUT_CHANNEL_NUM: 3
10 |   NETVLAD:
11 |     CLUSTER_NUM: 32
12 |     FEATURE_DIM: 16
13 |     NORMALIZE_INPUT: True
14 |     OUTPUT_DIM: None
15 |     GATE: False
16 | 
17 | LOSS:
18 |   NAME: "LazyQuadrupletLoss"
19 |   MARGIN0: 0.5
20 |   MARGIN1: 0.2
21 | 
22 | TRAINING:
23 |   NUM_WORKERS: 8
24 |   BATCH:
25 |     BATCH_SIZE: 1
26 |     FRAME_TRANSFORM: 0
27 |     BATCH_TRANSFORM: False
28 |   # Select GPUs
29 |   GPU:
30 |     IDS: [0,1]
31 |   # Optimizer
32 |   OPTIMIZER:
33 |     NAME: "Adam"
34 |     INIT_LEARNING_RATE: 1e-4
35 |   # Scheduler
36 |   SCHEDULER:
37 |     NAME: "StepLR"
38 |     MILESTONES: None
39 |     STEP_SIZE: 5
40 |     GAMMA: 0.1
41 |   # Epoch
42 |   EPOCH: 30
43 |   # Resume?
44 |   RESUME: False
45 | 
46 | # Save & Load weight
47 | WEIGHT:
48 |   LOAD_ADDRESS: None
49 |   SAVE_ADDRESS: None
50 | 
51 | # where logs and weights will be saved at
52 | OUTPUT:
53 |   DIR: "data/results"
54 | 


--------------------------------------------------------------------------------
/dataloader/__init__.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import DataLoader
 2 | 
 3 | from .utils import make_collate_fn
 4 | from utils import log_print
 5 | 
 6 | def make_data_loader(config, gpu_ids, is_train):
 7 |     '''
 8 |     Args:
 9 |         config: parameter configurations
10 |         gpu_ids: gpu indexes for training
11 |         is_train: training or testing
12 |     Returns:
13 |         dataloader(type:class)
14 |     '''
15 |     if is_train:
16 |         log_print("Using dataset: %s" % config.DATA.DATASET_NAME, "g")
17 |     
18 |     if config.DATA.DATASET_NAME == "PITT":
19 |         from .pittsburgh import PittsburghDataset
20 |         dataloader = PittsburghDataset(config, is_train)
21 |     else:
22 |         raise ValueError(f"Unrecognized Dataset Name {config.DATA.DATASET_NAME}")
23 |     
24 |     #TODO collate functions
25 |     if config.TRAINING.BATCH.BATCH_TRANSFORM:
26 |         collate_fn = make_collate_fn(config)
27 |     else:
28 |         collate_fn = None
29 |     # collate_fn=collate_fn,
30 |     
31 |     loader = DataLoader(dataloader,
32 |                         batch_size=config.TRAINING.BATCH.BATCH_SIZE*len(gpu_ids),
33 |                         num_workers=config.TRAINING.NUM_WORKERS if is_train else 4,
34 |                         pin_memory=True,
35 |                         shuffle=True if is_train else False,
36 |                         drop_last=True)
37 |     return loader


--------------------------------------------------------------------------------
/dataloader/data_augmentation.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Shiqi Zhao
  3 | data_augmentation
  4 | 
  5 | Codes of Data Augmentation for Point Cloud is adopted from MinkLoc++
  6 | Original Code: https://github.com/jac99/MinkLocMultimodal
  7 | '''
  8 | 
  9 | import random
 10 | import math
 11 | import numpy as np
 12 | from scipy.linalg import expm, norm
 13 | from PIL import Image
 14 | 
 15 | import torch
 16 | import torchvision.transforms as transforms
 17 | 
 18 | from utils.geometry import get_projection_grid, rand_rotation_matrix, rotate_grid, project_2d_on_sphere
 19 | 
 20 | 
 21 | class Augment_Point_Data():
 22 |     def __init__(self, mode=0, is_train=False):
 23 |         if is_train:
 24 |             if mode == 0:
 25 |                 transform = [transforms.ToTensor()]
 26 |             elif mode == 1:
 27 |                 transform = [transforms.ToTensor(), 
 28 |                              JitterPoints(sigma=0.001, clip=0.002), 
 29 |                              RemoveRandomPoints(r=(0.0, 0.1)),
 30 |                              RandomTranslation(max_delta=0.01), 
 31 |                              RemoveRandomBlock(p=0.4),
 32 |                              RandomRotation(axis=np.array([0,0,1]),
 33 |                                             max_theta=15,
 34 |                                             max_theta2=None)]
 35 |             else:
 36 |                 raise NotImplementedError(f'Uncognized data augmentation mode')
 37 |         else:
 38 |             transform = [transforms.ToTensor()]
 39 |         self.transform = transforms.Compose(transform)
 40 |     
 41 |     def __call__(self, input):
 42 |         output = self.transform(input)
 43 |         return output
 44 | 
 45 | 
 46 | class Augment_RGB_Data():
 47 |     def __init__(self, mode=0, is_train=False):
 48 |         if is_train:
 49 |             if mode == 0:
 50 |                 transform = [transforms.ToTensor(),
 51 |                              transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
 52 |             elif mode == 1:
 53 |                 transform = [transforms.ToTensor(), 
 54 |                              transforms.Resize([224, 224]),
 55 |                              transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 
 56 |                              transforms.RandomErasing(scale=(0.1, 0.4)),
 57 |                              transforms.RandomRotation(degrees=5),
 58 |                              transforms.RandomHorizontalFlip(p=0.5)]
 59 |             else:
 60 |                 raise NotImplementedError(f'Uncognized data augmentation mode')
 61 |         else:
 62 |             transform = [transforms.ToTensor(),
 63 |                          transforms.Resize([224, 224]),
 64 |                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
 65 |         self.transform = transforms.Compose(transform)
 66 |     
 67 |     def __call__(self, input):
 68 |         output = self.transform(input)
 69 |         return output
 70 | 
 71 | 
 72 | class Augment_SPH_Data():
 73 |     def __init__(self, img_size, mode=0, is_train=False):
 74 |         if is_train:
 75 |             if mode == 0:
 76 |                 transform = [transforms.ToTensor(), 
 77 |                              transforms.Normalize([0.5], [0.5])]
 78 |             elif mode == 1:
 79 |                 transform = [SphRandomRotate(p=1, img_size=img_size),
 80 |                              transforms.ToTensor(),
 81 |                              transforms.Normalize([0.5], [0.5])]
 82 |             else:
 83 |                 raise NotImplementedError(f'Uncognized data augmentation mode')
 84 |         else:
 85 |             transform = [transforms.ToTensor(),
 86 |                          transforms.Resize(img_size, Image.BICUBIC),
 87 |                          transforms.Normalize([0.5], [0.5])]
 88 |         self.transform = transforms.Compose(transform)
 89 |     
 90 |     def __call__(self, input):
 91 |         output = self.transform(input)
 92 |         return output
 93 | 
 94 | 
 95 | class SphRandomRotate:
 96 |     '''Random rotate spherical projection of point cloud'''
 97 |     def __init__(self, p, img_size):
 98 |         assert 0 < p <= 1, 'probability must be in (0, 1] range)!'
 99 |         self.p = p
100 |         self.grid = get_projection_grid(b=(int)(img_size[0]/2.0))
101 |     
102 |     def __call__(self, sph_img):
103 |         #* conver from PIL.Image into numpy.array
104 |         signals = np.expand_dims(np.array(sph_img), axis=0)
105 |         #* generate random rotation along all three axis
106 |         rot = rand_rotation_matrix(deflection=1)
107 |         #* generate random rotation along z-axis
108 |         # rot_angle = 2*np.pi*random.uniform(1)
109 |         # cosval = np.cos(rot_angle)
110 |         # sinval = np.sin(rot_angle)
111 |         # rot = np.array([[cosval, -sinval, 0],
112 |         #                 [sinval, cosval, 0],
113 |         #                 [0, 0, 1]])
114 |         rotated_grid = rotate_grid(rot, self.grid)
115 |         rot_sph_img = project_2d_on_sphere(signals, rotated_grid, projection_origin=[0,0,0.00001])
116 |         rot_sph_img = np.squeeze(rot_sph_img)
117 |         return rot_sph_img
118 | 
119 | 
120 | class RandomFlip:
121 |     def __init__(self, p):
122 |         # p = [p_x, p_y, p_z] probability of flipping each axis
123 |         assert len(p) == 3
124 |         assert 0 < sum(p) <= 1, 'sum(p) must be in (0, 1] range, is: {}'.format(sum(p))
125 |         self.p = p
126 |         self.p_cum_sum = np.cumsum(p)
127 | 
128 |     def __call__(self, coords):
129 |         r = random.random()
130 |         if r <= self.p_cum_sum[0]:
131 |             # Flip the first axis
132 |             coords[..., 0] = -coords[..., 0]
133 |         elif r <= self.p_cum_sum[1]:
134 |             # Flip the second axis
135 |             coords[..., 1] = -coords[..., 1]
136 |         elif r <= self.p_cum_sum[2]:
137 |             # Flip the third axis
138 |             coords[..., 2] = -coords[..., 2]
139 | 
140 |         return coords
141 | 
142 | 
143 | class RandomRotation:
144 |     def __init__(self, axis=None, max_theta=180, max_theta2=15):
145 |         self.axis = axis
146 |         self.max_theta = max_theta      # Rotation around axis
147 |         self.max_theta2 = max_theta2    # Smaller rotation in random direction
148 | 
149 |     def _M(self, axis, theta):
150 |         return expm(np.cross(np.eye(3), axis / norm(axis) * theta)).astype(np.float32)
151 | 
152 |     def __call__(self, coords):
153 |         if self.axis is not None:
154 |             axis = self.axis
155 |         else:
156 |             axis = np.random.rand(3) - 0.5
157 |         R = self._M(axis, (np.pi * self.max_theta / 180) * 2 * (np.random.rand(1) - 0.5))
158 |         if self.max_theta2 is None:
159 |             coords = coords @ R
160 |         else:
161 |             R_n = self._M(np.random.rand(3) - 0.5, (np.pi * self.max_theta2 / 180) * 2 * (np.random.rand(1) - 0.5))
162 |             coords = coords @ R @ R_n
163 | 
164 |         return coords
165 | 
166 | 
167 | class RandomTranslation:
168 |     def __init__(self, max_delta=0.05):
169 |         self.max_delta = max_delta
170 | 
171 |     def __call__(self, coords):
172 |         trans = self.max_delta * np.random.randn(1, 3)
173 |         return coords + trans.astype(np.float32)
174 | 
175 | 
176 | class RandomScale:
177 |     def __init__(self, min, max):
178 |         self.scale = max - min
179 |         self.bias = min
180 | 
181 |     def __call__(self, coords):
182 |         s = self.scale * np.random.rand(1) + self.bias
183 |         return coords * s.astype(np.float32)
184 | 
185 | 
186 | class RandomShear:
187 |     def __init__(self, delta=0.1):
188 |         self.delta = delta
189 | 
190 |     def __call__(self, coords):
191 |         T = np.eye(3) + self.delta * np.random.randn(3, 3)
192 |         return coords @ T.astype(np.float32)
193 | 
194 | 
195 | class JitterPoints:
196 |     def __init__(self, sigma=0.01, clip=None, p=1.):
197 |         assert 0 < p <= 1.
198 |         assert sigma > 0.
199 | 
200 |         self.sigma = sigma
201 |         self.clip = clip
202 |         self.p = p
203 | 
204 |     def __call__(self, e):
205 |         """ Randomly jitter points. jittering is per point.
206 |             Input:
207 |               BxNx3 array, original batch of point clouds
208 |             Return:
209 |               BxNx3 array, jittered batch of point clouds
210 |         """
211 | 
212 |         sample_shape = (e.shape[0],)
213 |         if self.p < 1.:
214 |             # Create a mask for points to jitter
215 |             m = torch.distributions.categorical.Categorical(probs=torch.tensor([1 - self.p, self.p]))
216 |             mask = m.sample(sample_shape=sample_shape)
217 |         else:
218 |             mask = torch.ones(sample_shape, dtype=torch.int64 )
219 | 
220 |         mask = mask == 1
221 |         jitter = self.sigma * torch.randn_like(e[mask])
222 | 
223 |         if self.clip is not None:
224 |             jitter = torch.clamp(jitter, min=-self.clip, max=self.clip)
225 | 
226 |         e[mask] = e[mask] + jitter
227 |         return e
228 | 
229 | 
230 | class RemoveRandomPoints:
231 |     def __init__(self, r):
232 |         if type(r) is list or type(r) is tuple:
233 |             assert len(r) == 2
234 |             assert 0 <= r[0] <= 1
235 |             assert 0 <= r[1] <= 1
236 |             self.r_min = float(r[0])
237 |             self.r_max = float(r[1])
238 |         else:
239 |             assert 0 <= r <= 1
240 |             self.r_min = None
241 |             self.r_max = float(r)
242 | 
243 |     def __call__(self, e):
244 |         n = len(e)
245 |         if self.r_min is None:
246 |             r = self.r_max
247 |         else:
248 |             # Randomly select removal ratio
249 |             r = random.uniform(self.r_min, self.r_max)
250 | 
251 |         mask = np.random.choice(range(n), size=int(n*r), replace=False)   # select elements to remove
252 |         e[mask] = torch.zeros_like(e[mask])
253 |         return e
254 | 
255 | 
256 | class RemoveRandomBlock:
257 |     """
258 |     Randomly remove part of the point cloud. Similar to PyTorch RandomErasing but operating on 3D point clouds.
259 |     Erases fronto-parallel cuboid.
260 |     Instead of erasing we set coords of removed points to (0, 0, 0) to retain the same number of points
261 |     """
262 |     def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)):
263 |         self.p = p
264 |         self.scale = scale
265 |         self.ratio = ratio
266 | 
267 |     def get_params(self, coords):
268 |         # Find point cloud 3D bounding box
269 |         flattened_coords = coords.view(-1, 3)
270 |         min_coords, _ = torch.min(flattened_coords, dim=0)
271 |         max_coords, _ = torch.max(flattened_coords, dim=0)
272 |         span = max_coords - min_coords
273 |         area = span[0] * span[1]
274 |         erase_area = random.uniform(self.scale[0], self.scale[1]) * area
275 |         aspect_ratio = random.uniform(self.ratio[0], self.ratio[1])
276 | 
277 |         h = math.sqrt(erase_area * aspect_ratio)
278 |         w = math.sqrt(erase_area / aspect_ratio)
279 | 
280 |         x = min_coords[0] + random.uniform(0, 1) * (span[0] - w)
281 |         y = min_coords[1] + random.uniform(0, 1) * (span[1] - h)
282 | 
283 |         return x, y, w, h
284 | 
285 |     def __call__(self, coords):
286 |         if random.random() < self.p:
287 |             x, y, w, h = self.get_params(coords)     # Fronto-parallel cuboid to remove
288 |             mask = (x < coords[..., 0]) & (coords[..., 0] < x+w) & (y < coords[..., 1]) & (coords[..., 1] < y+h)
289 |             coords[mask] = torch.zeros_like(coords[mask])
290 |         return coords


--------------------------------------------------------------------------------
/dataloader/pittsburgh.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Peng Yin, Shiqi Zhao
  3 | train.py
  4 | 
  5 | Dataset wrapper for Pittsburgh Dataset modified based on PointNetVLAD
  6 | Original Code: https://github.com/mikacuy/pointnetvlad
  7 | '''
  8 | import os
  9 | import random
 10 | from glob import glob
 11 | 
 12 | import pickle
 13 | import numpy as np
 14 | import open3d as o3d
 15 | import pandas as pd
 16 | from tqdm import tqdm
 17 | from PIL import Image
 18 | from sklearn.neighbors import KDTree
 19 | 
 20 | from .triplet_dataloader import TripletDataLoader
 21 | from utils import log_print, pc_normalize
 22 | 
 23 | 
 24 | class PittsburghDataset(TripletDataLoader):
 25 |     """Dataloader Wrapper for Pittsburgh Dataset"""
 26 |     def __init__(self, config, is_train):
 27 |         super().__init__(config, is_train)
 28 |         self.dataset_dir = os.path.join(config.DATA.BENCHMARK_DIR, str(config.DATA.DATASET_NAME))
 29 |         if config.MODEL.TYPE not in ["Lidar", "LiSPH"]:
 30 |             raise ValueError("Pittsburgh dataset only provides Lidar data!")
 31 |         if is_train:
 32 |             self.generate_pickles()
 33 |             self.queries = self.get_queries_dict(os.path.join(self.dataset_dir, config.DATA.TRAIN_PICKLE))
 34 |             log_print("Number of training tuples: %d" % len(self.queries), "y")
 35 |         else:
 36 |             self.queries = self.get_queries_dict(os.path.join(self.dataset_dir, config.DATA.VAL_PICKLE))
 37 |             log_print("Number of val tuples: %d" % len(self.queries), "y")
 38 |         self.file_idxs = np.arange(0, len(self.queries.keys()))
 39 |     
 40 |     def load_file_func(self, filename):
 41 |         if self.config.MODEL.TYPE == "Lidar" or self.branch == "Lidar":
 42 |             pcd = np.asarray(o3d.io.read_point_cloud(filename + ".pcd").points)
 43 |             if pcd.shape[0] != 4096:
 44 |                 raise ValueError('{}.pcd does not have sufficient points'.format(filename))
 45 |             pcd = pc_normalize(pcd)
 46 |             output = self.lidar_data_aug(pcd)
 47 |         elif self.config.MODEL.TYPE == "LiSPH" or self.branch == "LiSPH":
 48 |             sph_img = Image.open(filename + "_sph.png")
 49 |             output = self.lisph_data_aug(sph_img)
 50 |         return output
 51 |         
 52 |     def generate_pickles(self):
 53 |         if self.config.TRAINING.IS_TRAIN:
 54 |             pair_train = os.path.join(
 55 |                 self.dataset_dir, self.config.DATA.TRAIN_PICKLE)
 56 |             pair_test = os.path.join(
 57 |                 self.dataset_dir, self.config.DATA.VAL_PICKLE)
 58 |             #! Load pickles if exist
 59 |             if  os.path.exists(pair_train) and os.path.exists(pair_test):
 60 |                 log_print("Load previous pickles", "b")
 61 |                 return
 62 |             # ANCHOR generate train/val query for 
 63 |             pair_train = self.get_df(self.dataset_dir, 'train', is_shuffle=False)
 64 |             pair_train = pair_train.sample(frac=1).reset_index(drop=True)
 65 |             pair_val = self.get_df(self.dataset_dir, 'val', is_shuffle=False)
 66 |             pair_val = pair_val.sample(frac=1).reset_index(drop=True)
 67 | 
 68 |             self.construct_query_dict(pair_train, self.config.DATA.TRAIN_PICKLE)
 69 |             self.construct_query_dict(pair_val, self.config.DATA.VAL_PICKLE)
 70 |             log_print("Generated pickles!\n", "g")
 71 | 
 72 |     def get_from_folder(self, data_path, index):
 73 | 
 74 |         all_file_id = []
 75 |         pose_data = glob(data_path+'/*_pose.npy')
 76 |         for file_name in pose_data:
 77 |             all_file_id.append(file_name.split('_pose')[-2])
 78 |         all_file_id.sort()
 79 |         all_data_df = pd.DataFrame(all_file_id, columns=["file"])
 80 |         all_data_df["pcd_position_x"] = all_data_df["file"].apply(
 81 |             lambda x: np.load(x + '_pose.npy')[0])
 82 |         all_data_df["pcd_position_y"] = all_data_df["file"].apply(
 83 |             lambda x: np.load(x + '_pose.npy')[1])
 84 |         all_data_df["pcd_position_z"] = all_data_df["file"].apply(
 85 |             lambda x: np.load(x + '_pose.npy')[2])
 86 |         all_data_df["date"] = all_data_df["file"].apply(
 87 |             lambda x: x.split('/')[-2])
 88 |         all_data_df.reset_index(drop=True, inplace=True)
 89 |         return all_data_df
 90 | 
 91 |     def get_df(self, dataset_dir, type='train', is_shuffle=False):
 92 |         file_df = pd.DataFrame()
 93 |         file_dirs = []
 94 |         if type == 'train':
 95 |             for i in self.config.DATA.TRAIN_LIST:
 96 |                 file_dirs.append("{}/train_{}/".format(dataset_dir, i))
 97 |         elif type == 'val':
 98 |             for i in self.config.DATA.VAL_LIST:
 99 |                 file_dirs.append("{}/val_{}/".format(dataset_dir, i))
100 |         elif type == 'test':
101 |             for i in self.config.DATA.TEST_LIST:
102 |                 file_dirs.append("{}/test_{}/".format(dataset_dir, i))
103 |         file_dirs.sort()
104 |         for index, folder in enumerate(file_dirs):
105 |             data_df = self.get_from_folder(folder, index)
106 |             file_df = pd.concat([file_df, data_df], ignore_index=True)
107 |         if is_shuffle:
108 |             file_df.sample(frac=1).reset_index(drop=True)
109 | 
110 |         return file_df
111 |     
112 |     def construct_query_dict(self, data_df, filename):
113 |         data_df.reset_index(drop=True, inplace=True)
114 | 
115 |         tree = KDTree(
116 |             data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]])
117 |         ind_nn = tree.query_radius(data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]],
118 |                                    r=self.config.DATA.POSITIVES_RADIUS)
119 |         ind_r = tree.query_radius(data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]],
120 |                                   r=self.config.DATA.NEGATIVES_RADIUS)
121 |         ind_traj = tree.query_radius(data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]],
122 |                                      r=self.config.DATA.TRAJ_RADIUS)
123 | 
124 |         queries = {}
125 |         for i in tqdm(range(len(ind_nn)), total=len(ind_nn), desc='construct queries', leave=False):
126 |             query = data_df.iloc[i]["file"]
127 |             positives = np.setdiff1d(ind_nn[i], [i]).tolist()
128 |             negatives = np.setdiff1d(ind_traj[i], ind_r[i]).tolist()
129 | 
130 |             random.shuffle(negatives)
131 |             random.shuffle(positives)
132 | 
133 |             queries[i] = {"query": query, "positives": positives, "negatives": negatives}
134 | 
135 |         with open(os.path.join(self.dataset_dir, filename), 'wb') as handle:
136 |             pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL)
137 | 


--------------------------------------------------------------------------------
/dataloader/triplet_dataloader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Shiqi Zhao, Peng Yin
  3 | TripletDataLoader
  4 | '''
  5 | 
  6 | import os
  7 | import pickle
  8 | import random
  9 | from abc import abstractmethod
 10 | 
 11 | import torch
 12 | import numpy as np
 13 | from torch.utils.data import Dataset
 14 | 
 15 | from .data_augmentation import Augment_Point_Data, Augment_RGB_Data, Augment_SPH_Data
 16 | 
 17 | random.seed(12345)
 18 | 
 19 |         
 20 | class TripletDataLoader(Dataset):
 21 |     """Dataloader for tripletLoss or qradrupletLoss"""
 22 |     def __init__(self, config, is_train):
 23 |         self.config = config
 24 |         self.is_train = is_train
 25 |         self.data_count = 0
 26 |         self.branch = None
 27 |         
 28 |         if config.MODEL.TYPE == "Lidar":
 29 |             self.lidar_data_aug = Augment_Point_Data(config.TRAINING.BATCH.FRAME_TRANSFORM, is_train)
 30 |         elif config.MODEL.TYPE == "LiSPH":
 31 |             self.lisph_data_aug = Augment_SPH_Data(config.DATA.SPH_PROJ.IMAGE_SIZE,
 32 |                                                    config.TRAINING.BATCH.FRAME_TRANSFORM,  
 33 |                                                    is_train)
 34 |         elif config.MODEL.TYPE == "Image":
 35 |             self.image_data_aug = Augment_RGB_Data(config.TRAINING.BATCH.FRAME_TRANSFORM, is_train)
 36 |         else:
 37 |             raise ValueError("Wrong Model Type, Please Check config/config.py *MODEL* Section")
 38 |     
 39 |     def __len__(self):
 40 |         return len(self.queries)
 41 |     
 42 |     def __getitem__(self, idx):
 43 |         q_tuple = self.get_data()
 44 |         return q_tuple
 45 | 
 46 |     def get_data(self):
 47 |         q_tuple = []
 48 | 
 49 |         while(1):
 50 |             # if try data_count times still can not found a proper batch, resample
 51 |             if self.data_count > (len(self.file_idxs)-self.config.TRAINING.BATCH.BATCH_SIZE):
 52 |                 self.shuffle_query()
 53 |                 continue
 54 |             batch_keys = self.file_idxs[self.data_count]
 55 |             # select the batch with qualified number of positives and negatives for training
 56 |             if (len(self.queries[batch_keys]["positives"]) < self.config.TRAINING.BATCH.POSITIVES_PER_QUERY) or (len(self.queries[batch_keys]["negatives"]) < self.config.TRAINING.BATCH.NEGATIVES_PER_QUERY):
 57 |                 self.data_count+=1
 58 |                 continue
 59 |             q_tuple = self.get_tuple(self.queries[batch_keys], hard_neg=[], other_neg=True)
 60 |             self.data_count+=1
 61 |             break
 62 | 
 63 |         return q_tuple
 64 | 
 65 |     def shuffle_query(self):
 66 |         random.shuffle(self.file_idxs)
 67 |         self.data_count = 0
 68 | 
 69 |     def get_tuple(self, dict_value, hard_neg=None, other_neg=False):
 70 |         '''
 71 |         hard_neg: bool, add hard negative?
 72 |         other_neg: bool, add other negatives?(quadra loss fuction required)
 73 |         '''
 74 |         if hard_neg is None:
 75 |             hard_neg = []
 76 |         possible_negs = []
 77 | 
 78 |         num_pos = self.config.TRAINING.BATCH.POSITIVES_PER_QUERY
 79 |         num_neg = self.config.TRAINING.BATCH.NEGATIVES_PER_QUERY
 80 |         random.shuffle(dict_value["positives"])
 81 |         pos_files = []
 82 |         for i in range(num_pos):
 83 |             pos_files.append(self.queries[dict_value["positives"][i]]["query"])
 84 | 
 85 |         neg_files = []
 86 |         neg_indices = []
 87 |         if len(hard_neg) == 0:
 88 |             random.shuffle(dict_value["negatives"])
 89 |             for i in range(num_neg):
 90 |                 neg_files.append(self.queries[dict_value["negatives"][i]]["query"])
 91 |                 neg_indices.append(dict_value["negatives"][i])
 92 |         else:
 93 |             random.shuffle(dict_value["negatives"])
 94 |             for i in hard_neg:
 95 |                 neg_files.append(self.queries[i]["query"])
 96 |                 neg_indices.append(i)
 97 |             j = 0
 98 |             while len(neg_files) < num_neg:
 99 |                 if not dict_value["negatives"][j] in hard_neg:
100 |                     neg_files.append(self.queries[dict_value["negatives"][j]]["query"])
101 |                     neg_indices.append(dict_value["negatives"][j])
102 |                 j += 1
103 |         if other_neg:
104 |             # get neighbors of negatives and query
105 |             neighbors = []
106 |             for pos in dict_value["positives"]:
107 |                 neighbors.append(pos)
108 |             for neg in neg_indices:
109 |                 for pos in self.queries[neg]["positives"]:
110 |                     neighbors.append(pos)
111 |             possible_negs = list(set(self.queries.keys()) - set(neighbors))
112 |             random.shuffle(possible_negs)
113 | 
114 |         query = self.load_file_func(dict_value["query"])  # Nx3
115 |         query = np.expand_dims(query, axis=0)
116 |         positives = self.load_files_func(pos_files)
117 |         negatives = self.load_files_func(neg_files)
118 | 
119 |         output = [query, positives, negatives]
120 |         if other_neg:
121 |             neg2 = self.load_file_func(self.queries[possible_negs[0]]["query"])
122 |             neg2 = np.expand_dims(neg2, axis=0)
123 |             output.append(neg2)
124 |         return output
125 | 
126 |     @abstractmethod
127 |     def load_file_func(self, filename):
128 |         pass
129 | 
130 |     def load_files_func(self, filenames):
131 |         pcs = []
132 |         for filename in zip(filenames):
133 |             pc = self.load_file_func(filename[0])
134 |             pcs.append(pc)
135 |         pcs = torch.stack(pcs, axis=0)
136 |         return pcs
137 | 
138 |     @staticmethod
139 |     def get_queries_dict(filename):
140 |         with open(filename, 'rb') as handle:
141 |             queries = pickle.load(handle)
142 |             return queries
143 | 


--------------------------------------------------------------------------------
/dataloader/utils.py:
--------------------------------------------------------------------------------
 1 | def make_collate_fn(config):
 2 |     """Generate Collate_functions
 3 | 
 4 |     Args:
 5 |         config (class): parameter configurations
 6 |     """
 7 |     def collate_fn(batch):
 8 |         """Collate_functions
 9 | 
10 |         Args:
11 |             batch (list): input batch for training
12 |         """
13 |         if config.TRAINING.IS_TRAIN and config.TRAINING.BATCH.BATCH_TRANSFORM:
14 |             pass
15 |         return batch
16 |     return collate_fn


--------------------------------------------------------------------------------
/eval/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_utils import EvaluationPitts


--------------------------------------------------------------------------------
/eval/eval_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Shiqi Zhao
  3 | evaluate_utils.py
  4 | '''
  5 | import tqdm
  6 | from glob import glob
  7 | from copy import copy, deepcopy
  8 | 
  9 | import torch
 10 | import numpy as np
 11 | import pandas as pd
 12 | import open3d as o3d
 13 | from sklearn.neighbors import KDTree
 14 | from PIL import Image
 15 | from scipy.spatial.transform import Rotation as R
 16 | 
 17 | from utils import pc_normalize, pcd_downsample, LaserProjection, log_print
 18 | from dataloader.data_augmentation import Augment_Point_Data, Augment_SPH_Data, Augment_RGB_Data
 19 | 
 20 | 
 21 | class EvaluationPitts():
 22 |     def __init__(self, config, model, device, number_neighbors=20) -> None:
 23 |         self.config = config
 24 |         self.number_neighbors = number_neighbors
 25 |         self.model = model
 26 |         self.device = device
 27 |         if config.MODEL.TYPE == "Lidar":
 28 |             self.trans_lidar = Augment_Point_Data(is_train=False)
 29 |         elif config.MODEL.TYPE == "LiSPH":
 30 |             self.trans_lidar_sph = Augment_SPH_Data(img_size=config.DATA.SPH_PROJ.IMAGE_SIZE,
 31 |                                                     is_train=False)
 32 |             self.projection = LaserProjection(device=device,
 33 |                                               top_size=config.DATA.BEV_PROJ.IMAGE_SIZE,
 34 |                                               z_range=config.DATA.BEV_PROJ.Z_RANGE,
 35 |                                               sph_size=config.DATA.SPH_PROJ.IMAGE_SIZE,
 36 |                                               fov_range=config.DATA.SPH_PROJ.FOV)
 37 |         else:
 38 |             raise NotImplementedError('Please Try Predefined Types')
 39 |         
 40 |     def get_features_recall(self, traj_num, trans_IDX, rot_IDX, rand_idx=0):
 41 |         database = []
 42 |         query = []
 43 |         running_time = 0
 44 |         
 45 |         # Set global map
 46 |         if self.config.DATA.DATASET_NAME == 'PITT':
 47 |             global_map = o3d.io.read_point_cloud('{}/{}/DATA/Train{}/cloudGlobal.pcd'.format(
 48 |                                                  self.config.DATA.BENCHMARK_DIR, self.config.DATA.DATASET_NAME,traj_num))
 49 |             bbox_pnv = o3d.geometry.AxisAlignedBoundingBox(
 50 |                         np.array([-20, -20, 0.8]),
 51 |                         np.array([ 20,  20, 100.0]))
 52 |         map_tree = o3d.geometry.KDTreeFlann(global_map)
 53 |         
 54 |         # Set filelist for test
 55 |         file_list = sorted(glob('{}/{}/train_{}/*_sph.png'.format(self.config.DATA.BENCHMARK_DIR, self.config.DATA.DATASET_NAME, traj_num)))
 56 |         file_list = file_list[::10]
 57 | 
 58 |         # load database
 59 |         for file_name in tqdm.tqdm(file_list):
 60 |             if self.config.MODEL.TYPE == "LiSPH":
 61 |                 queries_img = Image.open(file_name)
 62 |                 frame = [self.trans_lidar_sph(queries_img).to(self.device, dtype=torch.float)]
 63 |             embedding, t_gpu = self.model.infer_frame(frame, t=True)
 64 |             database.append(embedding)
 65 |             running_time += t_gpu
 66 | 
 67 |         # load the test query
 68 |         for file_name in tqdm.tqdm(file_list):
 69 |             # preprocess generate test query
 70 |             pose = np.load('{}_pose6d.npy'.format(
 71 |                     file_name.split('_sph.png')[0]))
 72 | 
 73 |             # * Add fixed transformation
 74 |             if rand_idx == 0:
 75 |                 trans_idx = trans_IDX + np.random.random(1)[0]-0.5 # 0.5m noise
 76 |                 rot_idx = rot_IDX + 5*(np.random.random(1)[0]-0.5) # 2.5° noise
 77 |             # * Add random transformation
 78 |             else:
 79 |                 trans_idx = 2 * trans_IDX * (np.random.random(1)[0]-0.5)
 80 |                 rot_idx = 2 * rot_IDX * (np.random.random(1)[0]-0.5)
 81 |             pose[0] += trans_idx
 82 |             pose[1] += trans_idx
 83 |             frame = []
 84 |             # generate sphrical images
 85 |             [k, p_idx, _] = map_tree.search_radius_vector_3d(
 86 |                     pose[:3], self.config.DATA.SPH_PROJ.RADIUS)
 87 |             pcd_data = np.asarray(global_map.points)[p_idx, :]
 88 |             pose[5] += rot_idx * np.pi/180
 89 |             pcd_data -= pose[:3]
 90 |             pcd = o3d.geometry.PointCloud()
 91 |             pcd.points = o3d.utility.Vector3dVector(pcd_data)
 92 |             rot = R.from_rotvec(np.array([pose[3], pose[4], pose[5]]))
 93 |             if self.config.MODEL.TYPE in ["LiSPH"]:
 94 |                 trans_matrix = np.eye(4)
 95 |                 trans_matrix[0:3, 0:3] = rot.inv().as_matrix()
 96 |                 pcd.transform(trans_matrix)
 97 |                 pcd = pcd.voxel_down_sample(0.5)
 98 |                 sph_img = self.projection.do_sph_projection(np.asarray(pcd.points))
 99 |                 im = Image.fromarray((sph_img * 255).astype(np.uint8))
100 |                 frame.append(self.trans_lidar_sph(im).to(self.device, dtype=torch.float))
101 |             # load test query into network
102 |             query.append(self.model.infer_frame(frame))
103 |         log_print(f'{self.config.DATA.DATASET_NAME} {traj_num}:', 'b')
104 |         print("Descriptor Generation Done!")
105 |         print("Genrate Speed {}s/frame".format(running_time/len(file_list)))
106 |         
107 |         _,recall_nums,_,one_percent_recall = self.get_recall(np.array(database), np.array(query), num_neighbors=self.number_neighbors)
108 |         log_print(f'Top one percent recall is {one_percent_recall}')
109 | 
110 |         return recall_nums, one_percent_recall, running_time
111 |     
112 |     def get_recall(self, database_feature, queries_feature, num_neighbors=30):
113 |         database_output = database_feature
114 |         queries_output = queries_feature
115 | 
116 |         database_nbrs = KDTree(database_output)
117 |         # num_neighbors = (int)(queries_feature.shape[0]*0.01)
118 |         recall = [0] * num_neighbors
119 | 
120 |         top1_similarity_score = []
121 |         one_percent_retrieved = 0
122 |         threshold = max(int(round(len(database_output) / 100.0)), 1)
123 | 
124 |         num_evaluated = 0
125 |         topk_dict = {}
126 |         top_recalls = np.zeros(num_neighbors+2)
127 | 
128 |         for i in range(len(queries_output)):
129 | 
130 |             true_neighbors = [i]
131 |             if len(true_neighbors) == 0:
132 |                 continue
133 |             num_evaluated += 1
134 | 
135 |             distances, indices = database_nbrs.query(np.array([queries_output[i]]), k=num_neighbors)
136 |             # indices = np.setdiff1d(indices[0], [i])
137 |             indices = indices[0]
138 | 
139 |             for j in range(0, len(indices)):
140 |                 if indices[j] in true_neighbors:
141 |                     if (j == 0):
142 |                         similarity = np.dot(queries_output[i], database_output[indices[j]])
143 |                         top1_similarity_score.append(similarity)
144 |                     recall[j] += 1
145 |                     break
146 | 
147 |             if len(list(set(indices[0:threshold]).intersection(set(true_neighbors)))) > 0:
148 |                 one_percent_retrieved += 1
149 |             
150 |             for recall_num in range(num_neighbors):
151 |                 if len(list(set(indices[0:recall_num+1]).intersection(set(true_neighbors)))) > 0:
152 |                     top_recalls[recall_num] += 1
153 | 
154 |         top_recalls[-2] = one_percent_retrieved
155 |         top_recalls[-1] = num_evaluated
156 |         one_percent_recall = (one_percent_retrieved / float(num_evaluated)) * 100
157 |         top_one_recall = (top_recalls[0] / float(num_evaluated)) * 100
158 |         top_five_recall = (top_recalls[4] / float(num_evaluated)) * 100
159 |         top_ten_recall = (top_recalls[9] / float(num_evaluated)) * 100
160 |         recall = (np.cumsum(recall) / float(num_evaluated)) * 100
161 | 
162 |         return (top_one_recall, top_five_recall, top_ten_recall), \
163 |             top_recalls, \
164 |             top1_similarity_score, one_percent_recall
165 | 
166 |     @staticmethod
167 |     def apply_noise(pcd, mu=0, sigma=0.1):
168 |         noisy_pcd = deepcopy(pcd)
169 |         points = np.asarray(noisy_pcd.points)
170 |         points += np.clip(np.random.normal(mu, sigma, size=points.shape), -1, 1)
171 |         noisy_pcd.points = o3d.utility.Vector3dVector(points)
172 |         return noisy_pcd


--------------------------------------------------------------------------------
/eval/evaluate_pitt.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Shiqi Zhao
 3 | evaluate_pitt.py
 4 | '''
 5 | import os
 6 | import argparse
 7 | import numpy as np
 8 | from config import cfg
 9 | 
10 | from models import set_lcd_model
11 | from eval.eval_utils import EvaluationPitts
12 | from utils import log_print
13 | 
14 | 
15 | def para_args():
16 |     parser = argparse.ArgumentParser(description="Network configurations!")
17 |     parser.add_argument("--model-path", default="data/results/default_model", metavar="FILE",
18 |                         help="path to config file", type=str)
19 |     parser.add_argument("--dataset", default="PITT", type=str)
20 |     parser.add_argument("--epoch", default="62", type=int)
21 |     parser.add_argument("--noise", default=0, type=int, help="apply noise during test")
22 |     parser.add_argument("--type", default="rot", type=str, choices=["recall", "rot"], help="choose do top recall or rot analysis")
23 |     parser.add_argument("--trans-noise", type=int, required=True)
24 |     parser.add_argument("--rot-noise", type=int, required=True)
25 |     parser.add_argument("--log", type=bool, required=False, default=False)
26 |     args = parser.parse_args()
27 |     return args
28 | 
29 | 
30 | def val(config, type, noise, trans_noise, rot_noise, log):
31 |     #! Log
32 |     if log:
33 |         save_dir = 'log/{}/{}/'.format(config.MODEL.NAME, config.DATA.DATASET_NAME)
34 |         if not os.path.exists(save_dir):
35 |             os.makedirs(save_dir)
36 |     #! Define Model
37 |     model, gpu_conf = set_lcd_model(config)
38 |     [_, device, gpu_ids] = gpu_conf
39 |     #! Define Evaluation Class
40 |     valPitt = EvaluationPitts(config, model, device)
41 |     #! Evaluation
42 |     if type == "recall":
43 |         total_recalls = []
44 |         total_time = 0
45 |         for traj_num in range(21, 22):
46 |             recalls, _, running_time = valPitt.get_features_recall(traj_num, trans_noise, rot_noise, noise)
47 |             total_recalls.append(recalls)
48 |             total_time += running_time
49 |         total_recalls = np.array(total_recalls).sum(axis=0)
50 |         log_print('Total Top One Recall is {}'.format(total_recalls[0]/total_recalls[-1]), 'r')
51 |         log_print('Total Running Time is {}'.format(total_time/total_recalls[-1]), 'r')
52 |         if log:
53 |             stats = total_recalls[:-1]/total_recalls[-1]
54 |             save_file = save_dir + 'recall.txt'
55 |             file_tosave = open(save_file, 'w+')
56 |             file_tosave.write('Average recall @1 is: {}\n'.format(stats[0]))
57 |             file_tosave.write('Average recall @1% is: {}\n'.format(stats[-1]))
58 |             file_tosave.write('\n')
59 |             for index, item in enumerate(stats[:-1]):
60 |                 file_tosave.write("Average recall @{} is: {}\n".format(index+1, item))
61 |             file_tosave.write('\t\n\t\n')
62 |             file_tosave.close()
63 |     elif type == "rot":
64 |         for trans in [1,2,3]:
65 |             for rot in [30,60,90,120,150,180]:
66 |                 total_recalls = []
67 |                 total_time = 0
68 |                 for traj_num in range(21, 22):
69 |                     recalls, _, running_time = valPitt.get_features_recall(traj_num, trans, rot, noise)
70 |                     total_recalls.append(recalls)
71 |                     total_time += running_time
72 |                 total_recalls = np.array(total_recalls).sum(axis=0)
73 |                 log_print('Total Top One Recall is {}'.format(total_recalls[0]/total_recalls[-1]), 'r')
74 |                 log_print('Total Running Time is {}'.format(total_time/total_recalls[-1]), 'r')
75 |                 if log:
76 |                     stats = total_recalls[:-1]/total_recalls[-1]
77 |                     save_file = save_dir + f'rot_{trans}_{rot}.txt'
78 |                     file_tosave = open(save_file, 'w+')
79 |                     file_tosave.write('Average recall @1 is: {}\n'.format(stats[0]))
80 |                     file_tosave.write('Average recall @1% is: {}\n'.format(stats[-1]))
81 |                     file_tosave.write('\n')
82 |                     for index, item in enumerate(stats[:-1]):
83 |                         file_tosave.write("Average recall @{} is: {}\n".format(index+1, item))
84 |                     file_tosave.write('\t\n\t\n')
85 |                     file_tosave.close()
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     args = para_args()
90 |     cfg.merge_from_file("{}/DATA.yaml".format(args.model_path))
91 |     cfg.merge_from_file("{}/MODEL.yaml".format(args.model_path))
92 |     cfg.DATA.DATASET_NAME = args.dataset
93 |     cfg.TRAINING.IS_TRAIN = False
94 |     cfg.TRAINING.GPU.IDS = [0] # default to use only one gpu when testing
95 |     cfg.WEIGHT.LOAD_ADDRESS = "{}/pth/model_{}.pth".format(args.model_path, args.epoch)
96 |     cfg.freeze()
97 |     val(cfg, args.type, args.noise, args.trans_noise, args.rot_noise, args.log)


--------------------------------------------------------------------------------
/generating_sph/gene_pitt.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Created on Sun Mar 07 2021
  3 | #
  4 | # The MIT License (MIT)
  5 | # Copyright (c) 2021 Max Yin
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software
  7 | # and associated documentation files (the "Software"), to deal in the Software without restriction,
  8 | # including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9 | # and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
 10 | # subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in all copies or substantial
 13 | # portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 16 | # TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 18 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | #
 20 | 
 21 | import os
 22 | import sys
 23 | import numpy as np
 24 | from glob import glob
 25 | import open3d as o3d
 26 | import argparse
 27 | from torch import square
 28 | from tqdm import tqdm
 29 | from PIL import Image
 30 | from copy import deepcopy
 31 | from scipy.spatial.transform import Rotation as R
 32 | 
 33 | 
 34 | from config import cfg
 35 | from utils import log_print, LaserProjection, pcd_downsample
 36 | 
 37 | 
 38 | #!============================================================================================#
 39 | #! Parameters for Dataset and Network
 40 | #!============================================================================================#
 41 | 
 42 | 
 43 | def para_args():
 44 |     parser = argparse.ArgumentParser(description="Network configurations!")
 45 |     parser.add_argument("--config-network", default="../config/network/spherevlad.yaml", metavar="FILE",
 46 |                         help="path to config file", type=str)
 47 |     parser.add_argument("--config-dataset", default="../config/dataset/PITT.yaml", metavar="FILE",
 48 |                         help="path to config file", type=str)
 49 |     args = parser.parse_args()
 50 |     return args
 51 | 
 52 | def find_boundry(num):
 53 |     pose_list = glob(f'/data_hdd_1/SphereVLAD++/dataset/KITTI360/train_{num}/*_pose6d.npy')
 54 |     boundry = []
 55 |     num_list = [int(a.split('_pose6d.npy')[0].split('/')[-1]) for a in pose_list]
 56 |     num_list = sorted(num_list)
 57 |     for i in range(max(num_list)+1):
 58 |         if (i in num_list) and (i+1 not in num_list):
 59 |             boundry.append(i)
 60 |         if (i in num_list) and (i-1 not in num_list):
 61 |             boundry.append(i)
 62 |     return boundry
 63 | 
 64 | def generate_data(cfg, index, mode, slice_size=50, interval=1):
 65 | 
 66 |     path_base = '{}{}'.format(cfg.DATA.BENCHMARK_DIR, cfg.DATA.DATASET_NAME)
 67 |     out_path = '{}/{}_{}'.format(path_base, mode, index)
 68 |     os.system('rm -rf {}'.format(out_path))
 69 |     os.mkdir('{}'.format(out_path))
 70 | 
 71 |     log_print("Load data {}".format(out_path), 'b')
 72 | 
 73 |     #! Define range to image projection
 74 |     projection = LaserProjection(None, 
 75 |                                 top_size=cfg.DATA.BEV_PROJ.IMAGE_SIZE,
 76 |                                 z_range=cfg.DATA.BEV_PROJ.Z_RANGE,
 77 |                                 sph_size=cfg.DATA.SPH_PROJ.IMAGE_SIZE,
 78 |                                 fov_range=cfg.DATA.SPH_PROJ.FOV)
 79 |     
 80 |     trj_6D = np.loadtxt("{}/DATA/Train{}/poses.txt".format(path_base, index))
 81 | 
 82 |     #! Obtain Projections
 83 |     map_pcd = o3d.io.read_point_cloud(
 84 |         "{}/DATA/Train{}/cloudGlobal.pcd".format(path_base, index))
 85 |     pcd_tree = o3d.geometry.KDTreeFlann(map_pcd)
 86 |     
 87 |     #* generate submaps
 88 |     last_pose = np.array([9.9e9, 9.9e9, 9.9e9])
 89 |     count = 0
 90 | 
 91 |     for im_idx, pose in tqdm(enumerate(trj_6D), total=len(trj_6D)):
 92 |         # select interval
 93 |         if np.linalg.norm(pose[:3] - last_pose, ord=2) < interval:
 94 |             continue
 95 |         last_pose = pose[:3]
 96 | 
 97 |         # generate ball query pointcloud
 98 |         [k, p_idx, _] = pcd_tree.search_radius_vector_3d(
 99 |             pose[:3], cfg.DATA.SPH_PROJ.RADIUS)
100 |         pcd_data = np.asarray(map_pcd.points)[p_idx, :]
101 |         pcd_data -= pose[:3]
102 |         if pcd_data.shape[0] == 0:
103 |             continue
104 | 
105 |         # save pose files
106 |         np.save(
107 |             "{}/{:06d}_pose.npy".format(out_path, count+1), pose[:3])
108 |         np.save(
109 |             "{}/{:06d}_pose6d.npy".format(out_path, count+1), pose)
110 | 
111 |         # generate data for spherical view
112 |         pcd = o3d.geometry.PointCloud()
113 |         pcd.points = o3d.utility.Vector3dVector(pcd_data)
114 |         trans_matrix = np.eye(4)
115 |         rot = R.from_rotvec(np.array([pose[3], pose[4], pose[5]]))
116 |         trans_matrix[0:3, 0:3] = rot.inv().as_matrix()
117 |         pcd.transform(trans_matrix)
118 |         pcd = pcd.voxel_down_sample(0.5)
119 |         sph_img = projection.do_sph_projection(np.array(pcd.points))
120 |         im = Image.fromarray((sph_img * 255).astype(np.uint8))
121 |         im.save("{}/{:06d}_sph.png".format(out_path, count+1))
122 |         
123 |         count += 1
124 | 
125 | if __name__ == "__main__":
126 |     args = para_args()
127 |     cfg.merge_from_file(args.config_network)
128 |     cfg.merge_from_file(args.config_dataset)
129 |     cfg.freeze()
130 |     gen_data = generate_data
131 | 
132 |     # ANCHOR prepare train data
133 |     for index in list(set(cfg.DATA.TRAIN_LIST + cfg.DATA.TEST_LIST)):
134 |         gen_data(cfg, index, 'train')
135 |     for index in cfg.DATA.VAL_LIST:
136 |         gen_data(cfg, index, 'val')
137 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .robotLCD import LcdNet
 5 | 
 6 | 
 7 | def set_lcd_model(cfg, logger=None, neptune=None):
 8 |     #* set up GPU usage
 9 |     use_cuda = torch.cuda.is_available()
10 |     gpu_ids = []
11 |     if use_cuda:
12 |         gpu_c = torch.cuda.device_count()
13 |         if cfg.TRAINING.GPU.IDS is not None:
14 |             if len(cfg.TRAINING.GPU.IDS) <= gpu_c:
15 |                 gpu_ids = cfg.TRAINING.GPU.IDS
16 |             else:
17 |                 raise ValueError("Incorrect GPU Index, Please Check!")
18 |         else:
19 |             gpu_ids = np.arange(gpu_c).tolist()
20 |         cuda = "cuda"
21 |     device = torch.device(cuda if use_cuda else 'cpu')
22 |     lcd = LcdNet(cfg, logger, use_cuda, device, neptune, gpu_ids)
23 |     return lcd, [use_cuda, device, gpu_ids]


--------------------------------------------------------------------------------
/models/loop_closure/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def make_models(config):
 5 |     if config.MODEL.NAME == "SphereVLAD":
 6 |         if config.MODEL.TYPE == "LiSPH":
 7 |             from .lidar.spherevlad import SphereVLAD
 8 |         elif config.MODEL.TYPE == "Image":
 9 |             from .visual.spherevlad import SphereVLAD
10 |         model = SphereVLAD(config)
11 |     elif config.MODEL.NAME == "SphereVLAD2":
12 |         from .lidar.spherevlad2 import SphereVLAD2
13 |         model = SphereVLAD2(config)
14 |     else:
15 |         raise ValueError("Wrong Model Name!")
16 |     
17 |     if config.TRAINING.IS_TRAIN and config.WEIGHT.FREEZE_WEIGHT:
18 |         pass
19 |     return model
20 | 


--------------------------------------------------------------------------------
/models/loop_closure/aggregator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/aggregator/__init__.py


--------------------------------------------------------------------------------
/models/loop_closure/aggregator/netvlad.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class NetVLAD(nn.Module):
  8 |     """NetVLAD layer implementation"""
  9 | 
 10 |     def __init__(self, num_clusters=64, dim=128, alpha=100.0, 
 11 |                  normalize_input=True, output_dim=None, gate=False):
 12 |         """
 13 |         Args:
 14 |             num_clusters : int
 15 |                 The number of clusters
 16 |             dim : int
 17 |                 Dimension of descriptors
 18 |             alpha : float
 19 |                 Parameter of initialization. Larger value is harder assignment.
 20 |             normalize_input : bool
 21 |                 If true, descriptor-wise L2 normalization is applied to input.
 22 |         """
 23 |         super(NetVLAD, self).__init__()
 24 |         self.num_clusters = num_clusters
 25 |         self.dim = dim
 26 |         self.alpha = alpha
 27 |         self.normalize_input = normalize_input
 28 |         self.conv = nn.Conv2d(dim, num_clusters, kernel_size=(1, 1), bias=True)
 29 |         self.centroids = nn.Parameter(torch.rand(num_clusters, dim))
 30 |         self._init_params()
 31 |             
 32 |         if output_dim is not None:
 33 |             self.nn_output = nn.Linear(in_features=1024, out_features=output_dim)
 34 |             self.bn_output = nn.BatchNorm1d(output_dim)
 35 |         self.output_dim = output_dim
 36 |         
 37 |         if gate:
 38 |             self.context_gating = GatingContext(output_dim)
 39 |         self.gate = gate
 40 | 
 41 |     def _init_params(self):
 42 |         self.conv.weight = nn.Parameter(
 43 |             (2.0 * self.alpha * self.centroids).unsqueeze(-1).unsqueeze(-1)
 44 |         )
 45 |         self.conv.bias = nn.Parameter(
 46 |             - self.alpha * self.centroids.norm(dim=1)
 47 |         )
 48 | 
 49 |     def forward(self, x):
 50 |         if self.normalize_input:
 51 |             x = F.normalize(x, p=2, dim=1)  # across descriptor dim
 52 |         
 53 |         N, C = x.shape[:2]
 54 | 
 55 |         # soft-assignment
 56 |         soft_assign = self.conv(x).view(N, self.num_clusters, -1)
 57 |         soft_assign = F.softmax(soft_assign, dim=1)
 58 |         x_flatten = x.view(N, C, -1)
 59 | 
 60 |         # calculate residuals to each clusters
 61 |         residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \
 62 |             self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
 63 |         residual *= soft_assign.unsqueeze(2)
 64 | 
 65 |         vlad = residual.sum(dim=-1)
 66 |         vlad = F.normalize(vlad, p=2, dim=2)  # intra-normalization
 67 |         vlad = vlad.view(x.size(0), -1)  # flatten
 68 |         vlad = F.normalize(vlad, p=2, dim=1)  # L2 normalize
 69 |         
 70 |         if self.output_dim is not None:
 71 |             vlad = self.nn_output(vlad)
 72 |             vlad = self.bn_output(vlad)
 73 |         
 74 |         if self.gate:
 75 |             vlad = self.context_gating(vlad)
 76 | 
 77 |         return vlad
 78 | 
 79 | 
 80 | class GatingContext(nn.Module):
 81 |     def __init__(self, dim, add_batch_norm=True):
 82 |         super(GatingContext, self).__init__()
 83 |         self.dim = dim
 84 |         self.add_batch_norm = add_batch_norm
 85 |         self.gating_weights = nn.Parameter(
 86 |             torch.randn(dim, dim) * 1 / math.sqrt(dim))
 87 |         self.sigmoid = nn.Sigmoid()
 88 | 
 89 |         if add_batch_norm:
 90 |             self.gating_biases = None
 91 |             self.bn1 = nn.BatchNorm1d(dim)
 92 |         else:
 93 |             self.gating_biases = nn.Parameter(
 94 |                 torch.randn(dim) * 1 / math.sqrt(dim))
 95 |             self.bn1 = None
 96 | 
 97 |     def forward(self, x):
 98 |         gates = torch.matmul(x, self.gating_weights)
 99 | 
100 |         if self.add_batch_norm:
101 |             gates = self.bn1(gates)
102 |         else:
103 |             gates = gates + self.gating_biases
104 | 
105 |         gates = self.sigmoid(gates)
106 | 
107 |         activation = x * gates
108 | 
109 |         return activation
110 | 


--------------------------------------------------------------------------------
/models/loop_closure/lidar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/lidar/__init__.py


--------------------------------------------------------------------------------
/models/loop_closure/lidar/spherevlad.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Shiqi Zhao
 3 | SphereVLAD
 4 | '''
 5 | import numpy as np
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | from s2cnn import SO3Convolution
10 | from s2cnn import S2Convolution
11 | from s2cnn import so3_near_identity_grid
12 | from s2cnn import s2_near_identity_grid
13 | 
14 | from ..aggregator.netvlad import NetVLAD
15 | 
16 | 
17 | class SphereVLAD(nn.Module):
18 |     '''
19 |     sphvlad with bn
20 |     '''
21 |     def __init__(self, config):
22 |         super(SphereVLAD, self).__init__()
23 | 
24 |         bandwidth = (int)(config.DATA.SPH_PROJ.IMAGE_SIZE[0]/2.0)
25 | 
26 |         grid_s2 = s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1)
27 |         grid_so3_1 = so3_near_identity_grid(
28 |             n_alpha=6, max_beta=np.pi / 16, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
29 |         grid_so3_2 = so3_near_identity_grid(
30 |             n_alpha=6, max_beta=np.pi / 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
31 |         grid_so3_3 = so3_near_identity_grid(
32 |             n_alpha=6, max_beta=np.pi / 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
33 |         grid_so3_4 = so3_near_identity_grid(
34 |             n_alpha=6, max_beta=np.pi / 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
35 | 
36 |         self.conv1 = nn.Sequential(
37 |                         S2Convolution(nfeature_in=1, nfeature_out=8,
38 |                                       b_in=bandwidth, b_out=bandwidth//2, grid=grid_s2),
39 |                         nn.BatchNorm3d(8),
40 |                         nn.ReLU())
41 | 
42 |         self.conv2 = nn.Sequential(
43 |                         SO3Convolution(nfeature_in=8, nfeature_out=8,
44 |                                        b_in=bandwidth//2, b_out=bandwidth//4, grid=grid_so3_1),
45 |                         nn.BatchNorm3d(8),
46 |                         nn.ReLU())
47 | 
48 |         self.conv3 = nn.Sequential(
49 |                         SO3Convolution(nfeature_in=8, nfeature_out=16,
50 |                                        b_in=bandwidth//4, b_out=bandwidth//8, grid=grid_so3_2),
51 |                         nn.BatchNorm3d(16),
52 |                         nn.ReLU())
53 | 
54 |         self.conv4 = nn.Sequential(
55 |                         SO3Convolution(nfeature_in=16, nfeature_out=16,
56 |                                        b_in=bandwidth//8, b_out=bandwidth//8, grid=grid_so3_3),
57 |                         nn.BatchNorm3d(16),
58 |                         nn.ReLU())
59 | 
60 |         self.vlad = NetVLAD(num_clusters=config.MODEL.NETVLAD.CLUSTER_NUM, 
61 |                             dim=config.MODEL.NETVLAD.FEATURE_DIM, 
62 |                             normalize_input=config.MODEL.NETVLAD.NORMALIZE_INPUT,
63 |                             output_dim=config.MODEL.NETVLAD.OUTPUT_DIM,
64 |                             gate=config.MODEL.NETVLAD.GATE)
65 | 
66 |     
67 |     def forward(self, x):
68 |         # encoder
69 |         x = self.conv1(x)
70 |         x = self.conv2(x)
71 |         x = self.conv3(x)
72 |         x = self.conv4(x)
73 |         # reshape
74 |         x = x.view(x.shape[0], x.shape[1], x.shape[2], x.shape[3]*x.shape[4])
75 |         # vlad
76 |         x = self.vlad(x)
77 |         return x


--------------------------------------------------------------------------------
/models/loop_closure/lidar/spherevlad2.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Shiqi Zhao
 3 | SphereVLAD++
 4 | '''
 5 | import numpy as np
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | from s2cnn import SO3Convolution
10 | from s2cnn import S2Convolution
11 | from s2cnn import so3_near_identity_grid
12 | from s2cnn import s2_near_identity_grid
13 | 
14 | from ..aggregator.netvlad import NetVLAD
15 | from ..transformer.attention import Spatial_Attn
16 | 
17 | 
18 | class SphereVLAD2(nn.Module):
19 |     '''
20 |     sphvlad2 with bn
21 |     '''
22 |     def __init__(self, config):
23 |         super(SphereVLAD2, self).__init__()
24 | 
25 |         bandwidth = (int)(config.DATA.SPH_PROJ.IMAGE_SIZE[0]/2.0)
26 | 
27 |         grid_s2 = s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1)
28 |         grid_so3_1 = so3_near_identity_grid(
29 |             n_alpha=6, max_beta=np.pi / 16, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
30 |         grid_so3_2 = so3_near_identity_grid(
31 |             n_alpha=6, max_beta=np.pi / 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
32 |         grid_so3_3 = so3_near_identity_grid(
33 |             n_alpha=6, max_beta=np.pi / 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
34 |         grid_so3_4 = so3_near_identity_grid(
35 |             n_alpha=6, max_beta=np.pi / 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
36 | 
37 |         self.conv1 = nn.Sequential(
38 |                         S2Convolution(nfeature_in=1, nfeature_out=8,
39 |                                       b_in=bandwidth, b_out=bandwidth//2, grid=grid_s2),
40 |                         nn.BatchNorm3d(8),
41 |                         nn.ReLU())
42 | 
43 |         self.conv2 = nn.Sequential(
44 |                         SO3Convolution(nfeature_in=8, nfeature_out=8,
45 |                                        b_in=bandwidth//2, b_out=bandwidth//4, grid=grid_so3_1),
46 |                         nn.BatchNorm3d(8),
47 |                         nn.ReLU())
48 | 
49 |         self.conv3 = nn.Sequential(
50 |                         SO3Convolution(nfeature_in=8, nfeature_out=16,
51 |                                        b_in=bandwidth//4, b_out=bandwidth//8, grid=grid_so3_2),
52 |                         nn.BatchNorm3d(16),
53 |                         nn.ReLU())
54 | 
55 |         self.conv4 = nn.Sequential(
56 |                         SO3Convolution(nfeature_in=16, nfeature_out=16,
57 |                                        b_in=bandwidth//8, b_out=bandwidth//8, grid=grid_so3_3),
58 |                         nn.BatchNorm3d(16),
59 |                         nn.ReLU())
60 |         
61 |         self.attention = Spatial_Attn(in_dim=config.MODEL.NETVLAD.FEATURE_DIM)
62 | 
63 |         self.vlad = NetVLAD(num_clusters=config.MODEL.NETVLAD.CLUSTER_NUM, 
64 |                             dim=config.MODEL.NETVLAD.FEATURE_DIM, 
65 |                             normalize_input=config.MODEL.NETVLAD.NORMALIZE_INPUT,
66 |                             output_dim=config.MODEL.NETVLAD.OUTPUT_DIM,
67 |                             gate=config.MODEL.NETVLAD.GATE)
68 | 
69 |     
70 |     def forward(self, x):
71 |         # encoder
72 |         x = self.conv1(x)
73 |         x = self.conv2(x)
74 |         x = self.conv3(x)
75 |         x = self.conv4(x)
76 |         # reshape and normalize across the descriptor dimension
77 |         x = x.view(x.shape[0], x.shape[1], x.shape[2], x.shape[3]*x.shape[4])
78 |         x = F.normalize(x, p=2, dim=1)
79 |         # spatial self attention
80 |         x, _ = self.attention(x)
81 |         # vlad
82 |         x = self.vlad(x)
83 |         return x


--------------------------------------------------------------------------------
/models/loop_closure/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .lazy_quadruplet_loss import LazyTripletLoss, LazyQuadrupletLoss
 2 | 
 3 | def make_losses(config):
 4 |     if config.LOSS.NAME == "LazyQuadrupletLoss":
 5 |         loss = LazyQuadrupletLoss(
 6 |                 margin_dis=config.LOSS.MARGIN0, 
 7 |                 margin_sec=config.LOSS.MARGIN1)
 8 |     elif config.LOSS.NAME == "LazyTripletLoss":
 9 |         loss = LazyTripletLoss(margin=config.LOSS.MARGIN0)
10 |     else:
11 |         raise NotImplementedError(f"Unrecognized Loss Function {config.LOSS.NAME}!")
12 |     return loss


--------------------------------------------------------------------------------
/models/loop_closure/losses/lazy_quadruplet_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | def _best_pos_distance(query, pos_vecs):
  5 |     '''
  6 |     Find the minimum distance between query feature and positives features.
  7 |     Args:
  8 |         query (tensor: batch_size x 1 x feature_dim): Output feature of query
  9 |         pos_vecs (tensor: batch_size x num_pos x feature_dim): Output feature of positives
 10 | 
 11 |     Returns: 
 12 |         best_pos (tensor: batch_size): Minimum distance of each query to positives.    
 13 |     '''
 14 |     if len(pos_vecs.shape) > 2:
 15 |         if len(query.shape) <= 2:
 16 |             query_copy = query.unsqueeze(1).repeat(1, pos_vecs.shape[1], 1)
 17 |             best_pos = torch.min(torch.norm(
 18 |                 query_copy - pos_vecs, dim=2), dim=1)
 19 |         else:
 20 |             best_pos = torch.min(torch.norm(query - pos_vecs, dim=2), dim=1)
 21 |         best_pos = best_pos[0]
 22 |     else:
 23 |         best_pos = torch.norm(query - pos_vecs, dim=1)
 24 | 
 25 |     best_pos = best_pos.view(query.shape[0], -1)
 26 |     return best_pos
 27 | 
 28 | 
 29 | def _best_neg_distance(query, neg_vecs):
 30 |     """ Caculate the distance between query and negitative query
 31 |     Args:
 32 |         query    ([batch, 1, 8192]): Query information
 33 |         neg_vecs ([batch, 2, 8192]): Negative Query information
 34 |     Returns:
 35 |         [batch, 2, 1]: [distance]
 36 |     """
 37 | 
 38 |     if len(query.shape) > 2:
 39 |         best_neg = torch.norm(query - neg_vecs, dim=2)
 40 |     else:
 41 |         query_copy = query.unsqueeze(1).repeat(1, neg_vecs.shape[1], 1)
 42 |         best_neg = torch.norm(query_copy - neg_vecs, dim=2)
 43 | 
 44 |     return best_neg
 45 | 
 46 | 
 47 | class LazyTripletLoss(torch.nn.Module):
 48 |     '''
 49 |     Lazy variance of triplet loss
 50 |     '''
 51 | 
 52 |     def __init__(self, margin=0.2):
 53 |         super(LazyTripletLoss, self).__init__()
 54 |         self.margin = margin
 55 | 
 56 |     def forward(self, q_vec, pos_vecs, neg_vecs):
 57 | 
 58 |         batch_size = q_vec.shape[0]
 59 |         num_neg = neg_vecs.shape[1]
 60 | 
 61 |         # query to positive queries
 62 |         best_pos = _best_pos_distance(q_vec, pos_vecs)
 63 |         q_pos = best_pos.reshape(-1, 1).repeat(1, num_neg)
 64 |         q_neg = _best_neg_distance(q_vec, neg_vecs)
 65 |         loss = q_pos - q_neg + self.margin
 66 |         loss = torch.clamp(loss, min=0.0)
 67 |         loss = torch.mean(loss)
 68 | 
 69 |         return loss
 70 | 
 71 | 
 72 | class LazySecondLoss(torch.nn.Module):
 73 |     '''
 74 |     Lazy variance of triplet loss
 75 |     '''
 76 | 
 77 |     def __init__(self, margin=0.2):
 78 |         super(LazySecondLoss, self).__init__()
 79 |         self.margin = margin
 80 | 
 81 |     def forward(self, q_vec, pos_vecs, neg_vecs, other_neg):
 82 | 
 83 |         batch_size = q_vec.shape[0]
 84 |         num_neg = neg_vecs.shape[1]
 85 | 
 86 |         # query to positive queries
 87 |         best_pos = _best_pos_distance(q_vec, pos_vecs)
 88 |         other_neg_copy = other_neg.repeat(1, neg_vecs.shape[1], 1)
 89 | 
 90 |         q_pos = best_pos.repeat(1, neg_vecs.shape[1])
 91 |         # print(neg_vecs.shape, other_neg_copy.shape)
 92 |         # print(best_pos.shape, q_vec.shape, pos_vecs.shape)
 93 |         q_neg = _best_neg_distance(neg_vecs, other_neg_copy)
 94 | 
 95 |         # print(q_pos.shape, q_neg.shape)
 96 |         loss = q_pos - q_neg + self.margin
 97 |         loss = torch.clamp(loss, min=0.0)
 98 |         loss = torch.mean(loss)
 99 | 
100 |         return loss
101 | 
102 | 
103 | class LazyQuadrupletLoss(torch.nn.Module):
104 |     ''' 
105 |     Lazy Quadruplet Loss
106 |         Args:
107 |         q_vec (tensor: batch_size x 1 x feature_dim): query feature
108 |         pos_vecs (tensor: batch_size x num_pos x feature_dim): positive feature
109 |         neg_vecs (tensor: batch_size x num_neg x feature_dim): negative feature
110 |         other_neg (tensor: batch_size x num_neg x feature_dim): random negative feature
111 |         m0 (float32): margin for triplet loss
112 |         m1 (float32): margin for second loss
113 |     Returns: quadruplet loss, triplet loss, second loss
114 |     '''
115 | 
116 |     def __init__(self, margin_dis, margin_sec):
117 |         super(LazyQuadrupletLoss, self).__init__()
118 |         self.trip_loss = LazyTripletLoss(margin_dis)
119 |         self.second_loss = LazySecondLoss(margin_sec)
120 | 
121 |     def forward(self, x):
122 |         q_vec = x[:, 0:1, :]
123 |         pos_vecs = x[:, 1:3, :]
124 |         neg_vecs = x[:, 3:21, :]
125 |         neg_other = x[:, 21:, :]
126 | 
127 |         # |q_vec - neg_vecs| >= |q_vec - pos_vecs| + margin_trans
128 |         trip_loss = self.trip_loss(q_vec, pos_vecs, neg_vecs)
129 |         # |q_vec - other_neg| >= |q_vec - neg_vecs| + margin_sec
130 |         second_loss = self.second_loss(q_vec, pos_vecs, neg_vecs, neg_other)
131 |         loss = trip_loss + second_loss
132 | 
133 |         return loss, (trip_loss, second_loss)
134 | 


--------------------------------------------------------------------------------
/models/loop_closure/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/transformer/__init__.py


--------------------------------------------------------------------------------
/models/loop_closure/transformer/attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Spatial_Attn is modified based on 
 3 | https://github.com/heykeetae/Self-Attention-GAN/blob/master/sagan_models.py
 4 | """
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | 
10 | class Spatial_Attn(nn.Module):
11 |     """ Self attention Layer"""
12 |     def __init__(self, in_dim):
13 |         super(Spatial_Attn, self).__init__()
14 |         self.chanel_in = in_dim
15 |         if in_dim == 16:
16 |             out_dim = in_dim // 2
17 |         elif in_dim == 64:
18 |             out_dim = in_dim * 2
19 |         else:
20 |             out_dim = in_dim // 8
21 |         self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=1)
22 |         self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=1)
23 |         self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
24 |         self.gamma = nn.Parameter(torch.zeros(1))
25 | 
26 |         self.softmax = nn.Softmax(dim=-1)
27 |     
28 |     def forward(self,x):
29 |         """
30 |             inputs :
31 |                 x : input feature maps( B X C X W X H)
32 |             returns :
33 |                 out : self attention value + input feature
34 |                 attention: B X N X N (N is Width*Height)
35 |         """
36 |         m_batchsize,C,width,height = x.size()
37 |         proj_query = self.query_conv(x).view(m_batchsize,-1,width*height).permute(0,2,1) # B X N X C
38 |         proj_key = self.key_conv(x).view(m_batchsize,-1,width*height) # B X C x N
39 |         energy = torch.bmm(proj_query,proj_key) # transpose check
40 |         attention = self.softmax(energy) # B X N X N
41 |         proj_value = self.value_conv(x).view(m_batchsize,-1,width*height) # B X C X N
42 | 
43 |         out = torch.bmm(proj_value, attention.permute(0,2,1))
44 |         out = out.view(m_batchsize, C, width, height)
45 |         
46 |         out = self.gamma*out + x
47 |         return out, attention
48 | 
49 | 
50 | class Channel_Attn(nn.Module):
51 |     """ Self channel attention Layer"""
52 |     def __init__(self, in_dim, type):
53 |         super(Channel_Attn, self).__init__()
54 |         if type == "add":
55 |             self.softmax = nn.Softmax(dim=-1)
56 |             self.gamma = nn.Parameter(torch.zeros(1))
57 |         elif type == "reweight":
58 |             self.sigmoid = nn.Sigmoid()
59 |             self.pool = nn.AdaptiveAvgPool2d((in_dim, 1))
60 |             self.fc = nn.Linear(in_dim, in_dim)
61 |         self.type = type
62 |     
63 |     def forward(self,x):
64 |         """
65 |             inputs :
66 |                 x : input feature maps( B X C X N)
67 |             returns :
68 |                 out : attention weight + input feature(element-wise)
69 |                 attention: B X C X C (C is channel number)
70 |         """
71 |         m_batchsize,C,N = x.size()
72 |         energy = torch.bmm(x, x.permute(0,2,1)) # transpose check
73 |         
74 |         if self.type == "add":
75 |             attention = self.softmax(energy) # B X C X C
76 |             value = torch.bmm(attention.permute(0,2,1), x) # B*C*N
77 |             out = self.gamma*value + x
78 |         elif self.type == "reweight":
79 |             weight = self.pool(energy)
80 |             weight = torch.bmm(energy, weight)
81 |             weight = self.fc(x.squeeze(-1))
82 |             weight = self.sigmoid(weight)
83 |             out = x.view(m_batchsize, C)*weight
84 |         out = out.view(m_batchsize, -1)
85 |         return out


--------------------------------------------------------------------------------
/models/loop_closure/visual/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/visual/__init__.py


--------------------------------------------------------------------------------
/models/loop_closure/visual/spherevlad.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author: Shiqi Zhao
 3 | SphereVLAD
 4 | '''
 5 | import numpy as np
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | from s2cnn import SO3Convolution
10 | from s2cnn import S2Convolution
11 | from s2cnn import so3_near_identity_grid
12 | from s2cnn import s2_near_identity_grid
13 | 
14 | from ..aggregator.netvlad import NetVLAD
15 | 
16 | 
17 | class SphereVLAD(nn.Module):
18 |     '''
19 |     sphvlad with bn
20 |     '''
21 |     def __init__(self, config):
22 |         super(SphereVLAD, self).__init__()
23 |         
24 |         bandwidth = (int)(config.DATA.SPH_IM.IMAGE_SIZE[0]/2.0)
25 | 
26 |         grid_s2 = s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1)
27 |         grid_so3_1 = so3_near_identity_grid(
28 |             n_alpha=6, max_beta=np.pi / 16, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
29 |         grid_so3_2 = so3_near_identity_grid(
30 |             n_alpha=6, max_beta=np.pi / 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
31 |         grid_so3_3 = so3_near_identity_grid(
32 |             n_alpha=6, max_beta=np.pi / 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
33 |         grid_so3_4 = so3_near_identity_grid(
34 |             n_alpha=6, max_beta=np.pi / 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
35 | 
36 |         self.conv1 = nn.Sequential(
37 |                         S2Convolution(nfeature_in=3, nfeature_out=8,
38 |                                       b_in=bandwidth, b_out=bandwidth//2, grid=grid_s2),
39 |                         nn.BatchNorm3d(8),
40 |                         nn.ReLU())
41 | 
42 |         self.conv2 = nn.Sequential(
43 |                         SO3Convolution(nfeature_in=8, nfeature_out=8,
44 |                                        b_in=bandwidth//2, b_out=bandwidth//4, grid=grid_so3_1),
45 |                         nn.BatchNorm3d(8),
46 |                         nn.ReLU())
47 | 
48 |         self.conv3 = nn.Sequential(
49 |                         SO3Convolution(nfeature_in=8, nfeature_out=16,
50 |                                        b_in=bandwidth//4, b_out=bandwidth//8, grid=grid_so3_2),
51 |                         nn.BatchNorm3d(16),
52 |                         nn.ReLU())
53 | 
54 |         self.conv4 = nn.Sequential(
55 |                         SO3Convolution(nfeature_in=16, nfeature_out=16,
56 |                                        b_in=bandwidth//8, b_out=bandwidth//8, grid=grid_so3_3),
57 |                         nn.BatchNorm3d(16),
58 |                         nn.ReLU())
59 | 
60 |         self.vlad = NetVLAD(num_clusters=config.MODEL.NETVLAD.CLUSTER_NUM, 
61 |                             dim=config.MODEL.NETVLAD.FEATURE_DIM, 
62 |                             normalize_input=config.MODEL.NETVLAD.NORMALIZE_INPUT,
63 |                             output_dim=config.MODEL.NETVLAD.OUTPUT_DIM,
64 |                             gate=config.MODEL.NETVLAD.GATE)
65 | 
66 |     
67 |     def forward(self, x):
68 |         # encoder
69 |         x = self.conv1(x)
70 |         x = self.conv2(x)
71 |         x = self.conv3(x)
72 |         x = self.conv4(x)
73 |         # reshape
74 |         x = x.view(x.shape[0], x.shape[1], x.shape[2], x.shape[3]*x.shape[4])
75 |         # vlad
76 |         x = self.vlad(x)
77 |         return x


--------------------------------------------------------------------------------
/models/robotLCD.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('../')
  3 | sys.path.append('../../')
  4 | 
  5 | import time
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | from models.loop_closure import make_models
 11 | from models.loop_closure.losses import make_losses
 12 | from utils import log_print
 13 | 
 14 | 
 15 | #!=======================================================================#
 16 | #!                           Traning                                     #
 17 | #!=======================================================================#
 18 | class LcdNet(object):
 19 |     def __init__(self, config, logger, use_cuda, device, neptune=None, gpu_ids=None):
 20 |         super(LcdNet, self).__init__()
 21 | 
 22 |         self.neptune = neptune
 23 |         self.config = config
 24 |         self.logger = logger
 25 | 
 26 |         #! Let's define different models
 27 |         self.model = make_models(config)
 28 |         log_print("Runing {}".format(config.MODEL.NAME), 'r')
 29 |         log_print("#params of model: {}".format(sum([x.numel()
 30 |                                         for x in self.model.parameters()])), 'b')
 31 |         log_print("#trainable params of model: {}".format(sum([x.numel()
 32 |                                         for x in self.model.parameters() if x.requires_grad])), 'b')
 33 |         
 34 |         #! Data Parallel
 35 |         if use_cuda:
 36 |             if gpu_ids is not None and len(gpu_ids)>1 :
 37 |                 print("Let's use", len(gpu_ids), "GPUs!")
 38 |                 self.model = nn.DataParallel(self.model, device_ids=gpu_ids)
 39 |             self.model = self.model.to(device)
 40 | 
 41 |         #! Loss Function
 42 |         self.criterion = make_losses(config).to(device)
 43 |         
 44 |         #! Optimizer and Scheduler
 45 |         if config.TRAINING.OPTIMIZER.NAME == 'Adam':
 46 |             self.optimizer = torch.optim.Adam(
 47 |                 filter(lambda p: p.requires_grad, self.model.parameters()), 
 48 |                 lr=config.TRAINING.OPTIMIZER.INIT_LEARNING_RATE)
 49 |         else:
 50 |             raise NotImplementedError(f"Unrecognized Optimizer {config.TRAINING.OPTIMIZER.NAME}")
 51 |         
 52 |         if config.TRAINING.SCHEDULER.NAME == None:
 53 |             self.scheduler = None
 54 |         elif config.TRAINING.SCHEDULER.NAME == "StepLR":
 55 |             self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 
 56 |                                                              step_size=config.TRAINING.SCHEDULER.STEP_SIZE, 
 57 |                                                              gamma=config.TRAINING.SCHEDULER.GAMMA)
 58 |         elif config.TRAINING.SCHEDULER.NAME == "MultiStepLR":
 59 |             self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, 
 60 |                                                                   milestones=config.TRAINING.SCHEDULER.MILESTONES, 
 61 |                                                                   gamma=config.TRAINING.SCHEDULER.GAMMA)
 62 |         else:
 63 |             raise NotImplementedError(f"Unrecognized Scheduler {config.TRAINING.SCHEDULER.NAME}")
 64 |         
 65 |         #! Resume Training & Testing
 66 |         if config.TRAINING.IS_TRAIN:
 67 |             if config.TRAINING.RESUME:
 68 |                 self.epoch = self.load_checkpoint(config.WEIGHT.LOAD_ADDRESS, config.TRAINING.RESUME) + 1
 69 |             else:
 70 |                 self.epoch = 1
 71 |         else:
 72 |             self.load_checkpoint(config.WEIGHT.LOAD_ADDRESS)
 73 | 
 74 |     def train_lcd(self, x):
 75 |         """[summary]
 76 |         Args:
 77 |             x ([type]): [description]
 78 |         """
 79 | 
 80 |         self.model.train()
 81 |         data = torch.cat(x, dim=1)
 82 |         B = data.shape[0]
 83 |         N = data.shape[1]
 84 |         lidar_data = data.view(B*N, -1, data.shape[3], data.shape[4])
 85 |         self.optimizer.zero_grad()
 86 |         feature_lidar = self.model(lidar_data).view(B, N, -1)
 87 |         
 88 |         loss_lidar, losses = self.criterion(feature_lidar)
 89 |         if self.neptune is not None:
 90 |             self.neptune['Sphere/training_lidar_loss'].append(loss_lidar.item())
 91 |             self.neptune['Sphere/training_lidar_trip'].append(losses[0].item())
 92 |             self.neptune['Sphere/training_lidar_secd'].append(losses[1].item())
 93 |         loss_lidar.backward()
 94 |         self.optimizer.step()
 95 |         return loss_lidar.item()
 96 | 
 97 |     def eval_lcd(self, x):
 98 |         self.model.eval()
 99 |         data = torch.cat(x, dim=1)
100 |         B = data.shape[0]
101 |         N = data.shape[1]
102 |         lidar_data = data.view(B*N, -1, data.shape[3], data.shape[4])
103 |         feature_lidar = self.model(lidar_data).view(B, N, -1)
104 |         with torch.no_grad():
105 |             loss, (trip, secd) = self.criterion(feature_lidar)
106 |             if self.neptune is not None:
107 |                 self.neptune['Sphere/eval_lidar_loss'].append(loss.item())
108 |                 self.neptune['Sphere/eval_lidar_trip'].append(trip.item())
109 |                 self.neptune['Sphere/eval_lidar_secd'].append(secd.item())
110 |     
111 |     def adjust_learning_rate(self):
112 |         if self.scheduler != None:
113 |             self.scheduler.step()
114 | 
115 |     def save_checkpoint(self, epoch):
116 |         state_dict = {"state_dict": self.model.state_dict(),
117 |                       "optimizer": self.optimizer.state_dict(),
118 |                       "epoch": epoch}
119 |         torch.save(state_dict,
120 |                    '{}/pth/model_{}.pth'.format(self.config.OUTPUT.DIR, epoch))
121 |     
122 |     def load_checkpoint(self, weight_path, resume=False):
123 |         checkpoint = torch.load(weight_path)
124 |         # load model parameters
125 |         try:
126 |             model_dict = checkpoint["state_dict"]
127 |         except:
128 |             model_dict = checkpoint # for pointnetvlad
129 |         try:
130 |             self.model.load_state_dict(model_dict)
131 |         except:
132 |             from collections import OrderedDict
133 |             model_dict = OrderedDict()
134 |             for key, value in checkpoint["state_dict"].items():
135 |                 new_key = key.split('module.')[-1]
136 |                 model_dict[new_key] = value
137 |             self.model.load_state_dict(model_dict)
138 |         log_print("Load models from {}!".format(weight_path), 'g')
139 |         # load optimizer parameters
140 |         if resume:
141 |             self.optimizer.load_state_dict(checkpoint["optimizer"])
142 |             log_print("Load optimizer parameters from {}!".format(weight_path), 'g')
143 |             return checkpoint["epoch"]
144 |         else:
145 |             return
146 | 
147 | 
148 | #!=======================================================================#
149 | #!                           Inference                                   #
150 | #!=======================================================================#
151 |     def infer_frame(self, x, t=False):
152 |         if self.config.MODEL.TYPE in ["Lidar", "LiSPH"]:
153 |             return self.infer_lidar(*x, t)
154 |         elif self.config.MODEL.TYPE == "Image":
155 |             return self.infer_image(*x, t)
156 |     
157 |     def infer_image(self, x, t=False):
158 |         self.model.eval()
159 |         x = x.view(1, 3, x.shape[-2], x.shape[-1])
160 |         if t == True:
161 |             t1 = time.time()
162 |         with torch.no_grad():
163 |             x = self.model(x).view(-1)
164 |         if t == True:
165 |             t2 = time.time()
166 |             t_delta = t2-t1
167 |             return x.detach().cpu().numpy(), t_delta
168 |         return x.detach().cpu().numpy()
169 |     
170 |     def infer_lidar(self, x, t=False):
171 |         self.model.eval()
172 |         x = x.view(1, self.config.MODEL.S2CNN.INPUT_CHANNEL_NUM, x.shape[-2], x.shape[-1])
173 |         if t == True:
174 |             t1 = time.time()
175 |         with torch.no_grad():
176 |             x = self.model(x).view(-1)
177 |         if t == True:
178 |             t2 = time.time()
179 |             t_delta = t2-t1
180 |             return x.detach().cpu().numpy(), t_delta
181 |         return x.detach().cpu().numpy()
182 | 


--------------------------------------------------------------------------------
/pics/spherevlad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/pics/spherevlad.png


--------------------------------------------------------------------------------
/pics/spherevlad2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/pics/spherevlad2.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | tqdm
 4 | open3d
 5 | scikit-image
 6 | Pillow
 7 | pandas
 8 | yacs
 9 | neptune
10 | lie_learn


--------------------------------------------------------------------------------
/train_lcd.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: Peng Yin, Shiqi Zhao
  3 | train.py
  4 | '''
  5 | 
  6 | import os
  7 | import sys
  8 | import argparse
  9 | import numpy as np
 10 | import time
 11 | import datetime
 12 | import torch
 13 | import torch.nn as nn
 14 | from tqdm import tqdm
 15 | 
 16 | from config import cfg
 17 | from models import set_lcd_model
 18 | from dataloader import make_data_loader
 19 | from eval import EvaluationPitts
 20 | from utils import setup_logger, log_print
 21 | 
 22 | #!============================================================================================#
 23 | #! Parameters for Dataset and Network
 24 | #!============================================================================================#
 25 | 
 26 | 
 27 | def para_args():
 28 |     parser = argparse.ArgumentParser(description="Network configurations!")
 29 |     parser.add_argument("--config-network", default="config/network/spherevlad.yaml", metavar="FILE",
 30 |                         help="path to config file", type=str)
 31 |     parser.add_argument("--config-dataset", default="config/dataset/PITT.yaml", metavar="FILE",
 32 |                         help="path to config file", type=str)
 33 |     args = parser.parse_args()
 34 |     return args
 35 | 
 36 | 
 37 | def train(config, logger, neptune):
 38 | 
 39 |     #! Define Model
 40 |     lcd, gpu_conf = set_lcd_model(config, logger, neptune)
 41 |     [_, device, gpu_ids] = gpu_conf
 42 | 
 43 |     #! Define Dataloader
 44 |     train_loader = make_data_loader(config, gpu_ids, is_train=True)
 45 |     if config.DATA.DATASET_NAME in ["PITT", "Campus"]:
 46 |         eval_loader = make_data_loader(config, gpu_ids, is_train=False)
 47 |     else:
 48 |         eval_loader = []
 49 |     log_print("train batch with {}, eval batch with {}".format(
 50 |         len(train_loader), len(eval_loader)), 'g')
 51 | 
 52 |     #! Define Tester
 53 |     if config.DATA.DATASET_NAME == "PITT":
 54 |         tester = EvaluationPitts(config, lcd, device)
 55 | 
 56 |     #! Main loop
 57 |     prev_time = time.time()
 58 |     best_recall = 0
 59 |     for epoch in range(lcd.epoch, config.TRAINING.EPOCH+1):
 60 | 
 61 |         log_print("Train epoch {}".format(epoch), "g")
 62 |         
 63 |         #! Do Training
 64 |         for i, batch in enumerate(train_loader):
 65 | 
 66 |             # * Determine approximate time left
 67 |             batches_done = epoch * len(train_loader) + i
 68 |             batches_left = config.TRAINING.EPOCH * \
 69 |                 len(train_loader) - batches_done
 70 |             time_left = datetime.timedelta(
 71 |                 seconds = batches_left * (time.time() - prev_time))
 72 |             prev_time = time.time()
 73 |             data = [x.to(device, dtype=torch.float) for x in batch]
 74 |             loss_lidar = lcd.train_lcd(data)
 75 |             sys.stdout.write(
 76 |                 "\r[Epoch %d/%d] [Batch %d/%d] [LiDAR loss: %f], ETA: %s"
 77 |                 % (epoch, config.TRAINING.EPOCH, i, len(train_loader), loss_lidar, time_left)
 78 |             )
 79 | 
 80 |         #! Do Evaluation
 81 |         if config.DATA.DATASET_NAME in ["PITT", "Campus"]:
 82 |             for i, batch in tqdm(enumerate(eval_loader), total=len(eval_loader)):
 83 |                 data = [x.to(device, dtype=torch.float) for x in batch]
 84 |                 lcd.eval_lcd(data)
 85 |         
 86 |         #! Do Test
 87 |         if config.DATA.DATASET_NAME in ["PITT", "Campus"]:
 88 |             test_stats = []
 89 |             for traj_num in config.DATA.TEST_LIST:
 90 |                 if config.DATA.DATASET_NAME == "PITT":
 91 |                     recall, _, _ = tester.get_features_recall(traj_num, 2, 0, 0)
 92 |                 else:
 93 |                     recall, _, _ = tester.infer(traj_num)
 94 |                 test_stats.append(recall)
 95 |             test_stats = np.array(test_stats).sum(axis=0)
 96 |             curr_recall = test_stats[0]/test_stats[-1]
 97 |             if curr_recall > best_recall:
 98 |                 lcd.save_checkpoint(666)
 99 |             logger.info(f"[Epoch {epoch}/{config.TRAINING.EPOCH}] Recall@1 {curr_recall}")
100 |             log_print(f"[Epoch {epoch}/{config.TRAINING.EPOCH}] Recall@1 {curr_recall}", 'b')
101 |         
102 |         #! Adjust Learning Rate
103 |         lcd.adjust_learning_rate()
104 | 
105 |         #! Save Model
106 |         if epoch % 2 == 0:
107 |             lcd.save_checkpoint(epoch)
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     args = para_args()
112 |     cfg.merge_from_file(args.config_network)
113 |     cfg.merge_from_file(args.config_dataset)
114 |     cfg.TRAINING.IS_TRAIN = True
115 |     logger, neptune, cfg = setup_logger(
116 |         "Loop closure benchmark", cfg, args, is_train=True)
117 |     cfg.freeze()
118 |     train(cfg, logger, neptune)
119 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | pointProcess.py: functions about point processing
3 | logger.py: training log
4 | log_print.py: print info with different color and font
5 | '''
6 | from .pointProcess import pcd_downsample, pc_normalize, LaserProjection
7 | from .log_print import log_print
8 | from .logger import setup_logger
9 | 


--------------------------------------------------------------------------------
/utils/equirectRotate/EquirectRotate.py:
--------------------------------------------------------------------------------
  1 | from .utils import *
  2 | 
  3 | 
  4 | class EquirectRotate:
  5 |   """
  6 |     @:param height: height of image to rotate
  7 |     @:param width: widht of image to rotate
  8 |     @:param rotation: x, y, z degree to rotate
  9 |   """
 10 | 
 11 |   def __init__(self, height: int, width: int, rotation: tuple):
 12 |     assert height * 2 == width
 13 |     self.height = height
 14 |     self.width = width
 15 | 
 16 |     out_img = np.zeros((height, width))  # (H, W)
 17 | 
 18 |     # mapping equirect coordinate into LatLon coordinate system
 19 |     self.out_LonLat = Pixel2LatLon(out_img)  # (H, W, (lat, lon))
 20 | 
 21 |     # mapping LatLon coordinate into xyz(sphere) coordinate system
 22 |     self.out_xyz = LatLon2Sphere(self.out_LonLat)  # (H, W, (x, y, z))
 23 | 
 24 |     self.src_xyz = np.zeros_like(self.out_xyz)  # (H, W, (x, y, z))
 25 | 
 26 |     # make pair of xyz coordinate between src_xyz and out_xyz.
 27 |     # src_xyz @ R = out_xyz
 28 |     # src_xyz     = out_xyz @ R^t
 29 |     self.R = getRotMatrix(np.array(rotation))
 30 |     Rt = np.transpose(self.R)  # we should fill out the output image, so use R^t.
 31 |     for i in range(self.height):
 32 |       for j in range(self.width):
 33 |         self.src_xyz[i][j] = self.out_xyz[i][j] @ Rt
 34 | 
 35 |     # mapping xyz(sphere) coordinate into LatLon coordinate system
 36 |     self.src_LonLat = Sphere2LatLon(self.src_xyz)  # (H, W, 2)
 37 | 
 38 |     # mapping LatLon coordinate into equirect coordinate system
 39 |     self.src_Pixel = LatLon2Pixel(self.src_LonLat)  # (H, W, 2)
 40 | 
 41 |   def rotate(self, image):
 42 |     """
 43 |     :param image: (H, W, C)
 44 |     :return: rotated (H, W, C)
 45 |     """
 46 |     assert image.shape[:2] == (self.height, self.width)
 47 | 
 48 |     rotated_img = np.zeros_like(image)  # (H, W, C)
 49 |     for i in range(self.height):
 50 |       for j in range(self.width):
 51 |         pixel = self.src_Pixel[i][j]
 52 |         rotated_img[i][j] = image[pixel[0]][pixel[1]]
 53 |     return rotated_img
 54 | 
 55 |   def setInverse(self, isInverse=False):
 56 |     if not isInverse:
 57 |       return
 58 | 
 59 |     # re-generate coordinate pairing
 60 |     self.R = np.transpose(self.R)
 61 |     Rt = np.transpose(self.R)  # we should fill out the output image, so use R^t.
 62 |     for i in range(self.height):
 63 |       for j in range(self.width):
 64 |         self.src_xyz[i][j] = self.out_xyz[i][j] @ Rt
 65 | 
 66 |     # mapping xyz(sphere) coordinate into LatLon coordinate system
 67 |     self.src_LonLat = Sphere2LatLon(self.src_xyz)  # (H, W, 2)
 68 | 
 69 |     # mapping LatLon coordinate into equirect coordinate system
 70 |     self.src_Pixel = LatLon2Pixel(self.src_LonLat)  # (H, W, 2)
 71 | 
 72 | 
 73 | def pointRotate(h, w, index, rotation):
 74 |   """
 75 |   :param i, j: index of pixel in equirectangular
 76 |   :param rotation: (yaw, pitch, roll) in degree
 77 |   :return: rotated index of pixel
 78 |   """
 79 |   i, j = index
 80 |   assert (0 <= i < h) and (0 <= j < w)
 81 | 
 82 |   # convert pixel index to LatLon
 83 |   Lat = (0.5 - i / h) * np.pi
 84 |   Lon = (j / w - 0.5) * 2 * np.pi
 85 | 
 86 |   # convert LatLon to xyz
 87 |   x = np.cos(Lat) * np.cos(Lon)
 88 |   y = np.cos(Lat) * np.sin(Lon)
 89 |   z = np.sin(Lat)
 90 |   xyz = np.array([x, y, z])
 91 |   R = getRotMatrix(np.array(rotation))
 92 |   rotated_xyz = xyz @ R
 93 | 
 94 |   _Lat = np.pi / 2 - np.arccos(rotated_xyz[2])
 95 |   _Lon = np.arctan2(rotated_xyz[1], rotated_xyz[0])
 96 | 
 97 |   _i = h * (0.5 - _Lat / np.pi) % h
 98 |   _j = w * (0.5 + _Lon / (2 * np.pi)) % w
 99 | 
100 |   return _i, _j
101 | 


--------------------------------------------------------------------------------
/utils/equirectRotate/__init__.py:
--------------------------------------------------------------------------------
1 | from .EquirectRotate import EquirectRotate
2 | from .EquirectRotate import pointRotate
3 | 


--------------------------------------------------------------------------------
/utils/equirectRotate/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def getRotMatrix(rotation):
 4 |   """
 5 |   :param rotation: (yaw, pitch, roll) in degree
 6 |   :return: general rotational matrix
 7 |   refer this: https://en.wikipedia.org/wiki/Rotation_matrix#General_rotations
 8 |   """
 9 |   yaw, pitch, roll = (rotation / 180) * np.pi
10 | 
11 |   Rz = np.array([[np.cos(yaw), -np.sin(yaw), 0],
12 |                  [np.sin(yaw), np.cos(yaw), 0],
13 |                  [0, 0, 1]])
14 |   Ry = np.array([[np.cos(pitch), 0, np.sin(pitch)],
15 |                  [0, 1, 0],
16 |                  [-np.sin(pitch), 0, np.cos(pitch)]])
17 |   Rx = np.array([[1, 0, 0],
18 |                  [0, np.cos(roll), -np.sin(roll)],
19 |                  [0, np.sin(roll), np.cos(roll)]])
20 | 
21 |   return Rz @ Ry @ Rx
22 | 
23 | 
24 | def Pixel2LatLon(equirect):
25 |   # LatLon (H, W, (lat, lon))
26 |   h, w = equirect.shape
27 | 
28 |   Lat = (0.5 - np.arange(0, h)/h) * np.pi
29 |   Lon = (np.arange(0, w) / w - 0.5) * 2 * np.pi
30 | 
31 |   Lat = np.tile(Lat[:, np.newaxis], w)
32 |   Lon = np.tile(Lon, (h, 1))
33 | 
34 |   return np.dstack((Lat, Lon))
35 | 
36 | 
37 | def LatLon2Sphere(LatLon):
38 |   Lat = LatLon[:, :, 0]
39 |   Lon = LatLon[:, :, 1]
40 |   x = np.cos(Lat) * np.cos(Lon)
41 |   y = np.cos(Lat) * np.sin(Lon)
42 |   z = np.sin(Lat)
43 | 
44 |   return np.dstack((x, y, z))
45 | 
46 | 
47 | def Sphere2LatLon(xyz):
48 |   Lat = np.pi / 2 - np.arccos(xyz[:, :, 2])
49 |   Lon = np.arctan2(xyz[:, :, 1], xyz[:, :, 0])
50 | 
51 |   return np.dstack((Lat, Lon))
52 | 
53 | 
54 | def LatLon2Pixel(LatLon):
55 |   h, w, _ = LatLon.shape
56 |   Lat = LatLon[:, :, 0]
57 |   Lon = LatLon[:, :, 1]
58 |   i = (h * (0.5 - Lat / np.pi)) % h
59 |   j = (w * (0.5 + Lon / (2 * np.pi))) % w
60 | 
61 |   return np.dstack((i, j)).astype('int')


--------------------------------------------------------------------------------
/utils/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | from .so3_rotate import get_projection_grid, rand_rotation_matrix, rotate_grid, project_2d_on_sphere


--------------------------------------------------------------------------------
/utils/geometry/equi_trans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from equilib import equi2pers, equi2equi
 3 | 
 4 | class EquiTrans(object):
 5 |     """Class that contains Equi Trans with x,y,z,r"""
 6 | 
 7 |     def __init__(self, pers, fov_x, rot):
 8 |         self.w_pers = pers[0]
 9 |         self.h_pers = pers[1]
10 |         self.fov_x = fov_x
11 |         self.rot = rot #{'roll': 0.,'pitch': 0, 'yaw': 0.}
12 | 
13 |     def trans(self, equi_img):
14 |         equi_img = np.asarray(equi_img)
15 |         equi_img = np.transpose(equi_img, (2, 0, 1))
16 |         pers_img = equi2pers(
17 |             equi=equi_img,
18 |             rot=self.rot,
19 |             w_pers=self.w_pers,
20 |             h_pers=self.h_pers,
21 |             fov_x=self.fov_x,
22 |             skew=0.0,
23 |             sampling_method="default",
24 |             mode="bilinear",)
25 |         pers_img = np.transpose(pers_img, (1, 2, 0))
26 |         return pers_img
27 | 
28 | class Equi2Equi(object):
29 | 
30 |     def __init__(self, pers, rot):
31 |         self.w_out = pers[0]
32 |         self.h_out = pers[1]
33 |         # self.rot={'roll': 0.,'pitch': 0, 'yaw': 0.}
34 |         self.rot = rot
35 | 
36 |     def trans(self, equi_img):
37 |         equi_img = np.asarray(equi_img)
38 |         equi_img = np.transpose(equi_img, (2, 0, 1))
39 |         img_out = equi2equi(equi_img, self.rot, w_out=self.w_out, h_out=self.h_out)
40 |         img_out = np.transpose(img_out, (1, 2, 0))
41 |         return img_out
42 |         
43 | 


--------------------------------------------------------------------------------
/utils/geometry/so3_rotate.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from PIL import Image
  4 | 
  5 | import lie_learn.spaces.S2 as S2
  6 | from scipy.spatial.transform import Rotation as R
  7 | from s2cnn import so3_rotation, so3_integrate
  8 | 
  9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 10 | 
 11 | NORTHPOLE_EPSILON = 1e-3
 12 | 
 13 | def rand_rotation_matrix(deflection=1.0, randnums=None):
 14 |     """
 15 |     Creates a random rotation matrix.
 16 | 
 17 |     deflection: the magnitude of the rotation. For 0, no rotation; for 1, competely random
 18 |     rotation. Small deflection => small perturbation.
 19 |     randnums: 3 random numbers in the range [0, 1]. If `None`, they will be auto-generated.
 20 | 
 21 |     # http://blog.lostinmyterminal.com/python/2015/05/12/random-rotation-matrix.html
 22 |     """
 23 | 
 24 |     if randnums is None:
 25 |         randnums = np.random.uniform(size=(3,))
 26 | 
 27 |     theta, phi, z = randnums
 28 | 
 29 |     theta = theta * 2.0*deflection*np.pi  # Rotation about the pole (Z).
 30 |     phi = phi * 2.0*deflection*np.pi  # For direction of pole deflection.
 31 |     z = z * 2.0*0.00001*deflection  # For magnitude of pole deflection.
 32 | 
 33 |     # Compute a vector V used for distributing points over the sphere
 34 |     # via the reflection I - V Transpose(V).  This formulation of V
 35 |     # will guarantee that if x[1] and x[2] are uniformly distributed,
 36 |     # the reflected points will be uniform on the sphere.  Note that V
 37 |     # has length sqrt(2) to eliminate the 2 in the Householder matrix.
 38 | 
 39 |     r = np.sqrt(z)
 40 |     V = (
 41 |         np.sin(phi) * r,
 42 |         np.cos(phi) * r,
 43 |         np.sqrt(2.0 - z)
 44 |     )
 45 | 
 46 |     st = np.sin(theta)
 47 |     ct = np.cos(theta)
 48 | 
 49 |     R = np.array(((ct, st, 0), (-st, ct, 0), (0, 0, 1)))
 50 | 
 51 |     # Construct the rotation matrix  ( V Transpose(V) - I ) R.
 52 | 
 53 |     M = (np.outer(V, V) - np.eye(3)).dot(R)
 54 |     return M
 55 | 
 56 | 
 57 | def rotate_grid(rot, grid):
 58 |     x, y, z = grid
 59 |     xyz = np.array((x, y, z))
 60 |     x_r, y_r, z_r = np.einsum('ij,jab->iab', rot, xyz)
 61 |     return x_r, y_r, z_r
 62 | 
 63 | 
 64 | def get_projection_grid(b, grid_type="Driscoll-Healy"):
 65 |     ''' returns the spherical grid in euclidean
 66 |     coordinates, where the sphere's center is moved
 67 |     to (0, 0, 1)'''
 68 |     theta, phi = S2.meshgrid(b=b, grid_type=grid_type)
 69 |     theta -= np.pi/2
 70 |     x_ = np.cos(theta) * np.cos(phi)
 71 |     y_ = np.cos(theta) * np.sin(phi)
 72 |     z_ = np.sin(theta)
 73 |     
 74 |     return x_, y_, z_
 75 | 
 76 | 
 77 | def project_sphere_on_xy_plane(grid, projection_origin):
 78 |     ''' returns xy coordinates on the plane
 79 |     obtained from projecting each point of
 80 |     the spherical grid along the ray from
 81 |     the projection origin through the sphere '''
 82 | 
 83 |     sx, sy, sz = projection_origin
 84 |     x, y, z = grid
 85 | 
 86 |     yaw = np.arctan2(y, x)
 87 |     pitch = np.arcsin(z)
 88 | 
 89 |     rx = 0.5 * (yaw / np.pi + 1.0)
 90 |     ry = (pitch + np.pi/2) /np.pi
 91 | 
 92 |     return ry, rx
 93 | 
 94 | 
 95 | def sample_within_bounds(signal, x, y, bounds):
 96 |     ''' '''
 97 |     xmin, xmax, ymin, ymax = bounds
 98 | 
 99 |     idxs = (xmin <= x) & (x < xmax) & (ymin <= y) & (y < ymax)
100 | 
101 |     if len(signal.shape) > 2:
102 |         sample = np.zeros((signal.shape[0], x.shape[0], x.shape[1]))
103 |         sample[:, idxs] = signal[:, x[idxs], y[idxs]]
104 |     else:
105 |         sample = np.zeros((x.shape[0], x.shape[1]))
106 |         sample[idxs] = signal[x[idxs], y[idxs]]
107 |     return sample
108 | 
109 | 
110 | def sample_bilinear(signal, rx, ry):
111 |     ''' '''
112 |     
113 |     signal_dim_x = signal.shape[1]
114 |     signal_dim_y = signal.shape[2]
115 | 
116 |     rx *= signal_dim_x
117 |     ry *= signal_dim_y
118 | 
119 |     # discretize sample position
120 |     ix = rx.astype(int)
121 |     iy = ry.astype(int)
122 | 
123 |     # obtain four sample coordinates
124 |     ix0 = ix - 1
125 |     iy0 = iy - 1
126 |     ix1 = ix + 1
127 |     iy1 = iy + 1
128 | 
129 |     bounds = (0, signal_dim_x, 0, signal_dim_y)
130 | 
131 |     # sample signal at each four positions
132 |     signal_00 = sample_within_bounds(signal, ix0, iy0, bounds)
133 |     signal_10 = sample_within_bounds(signal, ix1, iy0, bounds)
134 |     signal_01 = sample_within_bounds(signal, ix0, iy1, bounds)
135 |     signal_11 = sample_within_bounds(signal, ix1, iy1, bounds)
136 | 
137 |     # linear interpolation in x-direction
138 |     fx1 = (ix1-rx) * signal_00 + (rx-ix0) * signal_10
139 |     fx2 = (ix1-rx) * signal_01 + (rx-ix0) * signal_11
140 | 
141 |     # linear interpolation in y-direction
142 |     return (iy1 - ry) * fx1 + (ry - iy0) * fx2
143 | 
144 | 
145 | def project_2d_on_sphere(signal, grid, projection_origin=None):
146 |     ''' '''
147 |     if projection_origin is None:
148 |         projection_origin = (0, 0, 2 + NORTHPOLE_EPSILON)
149 | 
150 |     rx, ry = project_sphere_on_xy_plane(grid, projection_origin)
151 |     sample = sample_bilinear(signal, rx, ry)/4.0
152 | 
153 |     return sample
154 | 
155 | if __name__=='__main__':
156 |     image = Image.open("test.png")
157 |     image_name = "raw.png"
158 |     image.save(image_name)
159 |     signals = np.transpose(np.array(image).astype(np.float64), (2, 0, 1))
160 |     grid = get_projection_grid(b=32)
161 |     rot = rand_rotation_matrix(deflection=1)
162 |     rotated_grid = rotate_grid(rot, grid)
163 |     rot_image = project_2d_on_sphere(signals, rotated_grid, projection_origin=[0,0,0.0001])
164 | 
165 |     rot_image = np.transpose(rot_image, (1, 2, 0))
166 |     image_name = "raw_rot.png"
167 |     image = Image.fromarray(rot_image)
168 |     image.save(image_name)


--------------------------------------------------------------------------------
/utils/log_print.py:
--------------------------------------------------------------------------------
 1 | """ Init file for utils"""
 2 | 
 3 | 
 4 | def red(sthg):
 5 |     ''' red color '''
 6 |     return "\033[01;31m{0}\033[00m".format(str(sthg))
 7 | 
 8 | 
 9 | def green(sthg):
10 |     ''' green color '''
11 |     return "\033[01;32m{0}\033[00m".format(str(sthg))
12 | 
13 | 
14 | def lblue(sthg):
15 |     ''' green color '''
16 |     return "\033[01;36m{0}\033[00m".format(str(sthg))
17 | 
18 | 
19 | def pink(sthg):
20 |     ''' pink color '''
21 |     return "\033[01;35m{0}\033[00m".format(str(sthg))
22 | 
23 | 
24 | def blue(sthg):
25 |     ''' blue color '''
26 |     return "\033[01;34m{0}\033[00m".format(str(sthg))
27 | 
28 | 
29 | def yellow(sthg):
30 |     ''' yellow color '''
31 |     return "\033[01;93m{0}\033[00m".format(str(sthg))
32 | 
33 | 
34 | def log_print(sthg, color='', logger=None):
35 |     ''' Print colored stream, color options:r, g, b, lb, y, p. '''
36 |     if color == "r":
37 |         print(red(sthg))
38 |     elif color == "g":
39 |         print(green(sthg))
40 |     elif color == "b":
41 |         print(blue(sthg))
42 |     elif color == "lb":
43 |         print(lblue(sthg))
44 |     elif color == "y":
45 |         print(yellow(sthg))
46 |     elif color == "p":
47 |         print(pink(sthg))
48 |     else:
49 |         print(sthg)
50 |     if logger != None:
51 |         logger.info(sthg)
52 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | from datetime import datetime
 5 | import neptune
 6 | 
 7 | def setup_logger(name, cfg, args, is_train):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 | 
11 |     now = datetime.now()
12 |     dt_string = now.strftime("%d-%m-%Y-%H-%M-%S")
13 |     if is_train:
14 |         Output_dir = os.path.join(cfg.OUTPUT.DIR, "{}-{}-{}".format(cfg.MODEL.NAME, cfg.DATA.DATASET_NAME, dt_string))
15 |     else:
16 |         Output_dir = os.path.join(cfg.OUTPUT.DIR, "Inference-{}-{}-{}".format(cfg.MODEL.NAME, cfg.DATA.DATASET_NAME, dt_string))
17 | 
18 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
19 | 
20 |     if not os.path.exists(Output_dir):
21 |         os.makedirs(Output_dir)
22 |         os.makedirs("{}/img".format(Output_dir))
23 |         os.makedirs("{}/pth".format(Output_dir))
24 | 
25 |     fh = logging.FileHandler(os.path.join(Output_dir, "log.txt"))
26 |     fh.setLevel(logging.DEBUG)
27 |     fh.setFormatter(formatter)
28 |     logger.addHandler(fh)
29 |         
30 |     logger.info(args)
31 | 
32 |     logger.info("Loaded network configuration file {}".format(args.config_network))
33 |     logger.info("Loaded dataset configuration file {}".format(args.config_dataset))
34 | 
35 |     os.system('cp {} {}/MODEL.yaml'.format(args.config_network, Output_dir))
36 |     os.system('cp {} {}/DATA.yaml'.format(args.config_dataset, Output_dir))
37 | 
38 |     cfg.OUTPUT.DIR = Output_dir
39 | 
40 |     # neptune_run = neptune.init_run(project=cfg.PROJECT_NAME,
41 |     #                  api_token=cfg.API_TOKEN)
42 |     # params = {"Learning_rate": cfg.TRAINING.OPTIMIZER.INIT_LEARNING_RATE,
43 |     #           "Optimizer": cfg.TRAINING.OPTIMIZER.NAME,
44 |     #           "Scheduler": cfg.TRAINING.SCHEDULER.NAME,
45 |     #           "Positive_Dis": cfg.DATA.POSITIVES_RADIUS,
46 |     #           "Negtive_Dis": cfg.DATA.NEGATIVES_RADIUS,
47 |     #           "Loss": cfg.LOSS.NAME,
48 |     #           "Loss_Margin0": cfg.LOSS.MARGIN0,
49 |     #           "Loss_Margin1": cfg.LOSS.MARGIN1,
50 |     #           "Batch_Size": cfg.TRAINING.BATCH.BATCH_SIZE}
51 |     # neptune_run["parameters"] = params
52 |     neptune_run = None
53 | 
54 |     return logger, neptune_run, cfg
55 | 


--------------------------------------------------------------------------------
/utils/pointProcess.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import open3d as o3d
  4 | from copy import copy, deepcopy
  5 | 
  6 | 
  7 | def pc_normalize(pc):
  8 |         centriod = np.mean(pc, axis=0)
  9 |         pc = pc - centriod
 10 |         m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
 11 |         pc = pc / m
 12 |         return pc
 13 | 
 14 | 
 15 | def pcd_downsample(initPcd, desiredNumOfPoint, leftVoxelSize, rightVoxelSize):
 16 |     """
 17 |         Downsample pointcloud to 4096 points
 18 |         Modify based on the version from https://blog.csdn.net/SJTUzhou/article/details/122927787
 19 |     """
 20 |     assert leftVoxelSize > rightVoxelSize, "leftVoxelSize should be larger than rightVoxelSize"
 21 |     assert len(initPcd.points) > desiredNumOfPoint, "desiredNumOfPoint should be less than or equal to the num of points in the given point cloud."
 22 |     if len(initPcd.points) == desiredNumOfPoint:
 23 |         return initPcd
 24 |     
 25 |     pcd = deepcopy(initPcd)
 26 |     pcd = pcd.voxel_down_sample(leftVoxelSize)
 27 |     assert len(pcd.points) <= desiredNumOfPoint, "Please specify a larger leftVoxelSize."
 28 |     pcd = deepcopy(initPcd)
 29 |     pcd = pcd.voxel_down_sample(rightVoxelSize)
 30 |     assert len(pcd.points) >= desiredNumOfPoint, "Please specify a smaller rightVoxelSize."
 31 |     
 32 |     pcd = deepcopy(initPcd)
 33 |     midVoxelSize = (leftVoxelSize + rightVoxelSize) / 2.
 34 |     pcd = pcd.voxel_down_sample(midVoxelSize)
 35 |     while len(pcd.points) != desiredNumOfPoint:
 36 |         if len(pcd.points) < desiredNumOfPoint:
 37 |             leftVoxelSize = copy(midVoxelSize)
 38 |         else:
 39 |             rightVoxelSize = copy(midVoxelSize)
 40 |         midVoxelSize = (leftVoxelSize + rightVoxelSize) / 2.
 41 |         pcd = deepcopy(initPcd)
 42 |         pcd = pcd.voxel_down_sample(midVoxelSize)
 43 |     
 44 |     return pcd
 45 | 
 46 | 
 47 | class LaserProjection(object):
 48 |     """Class that contains LaserScan with x,y,z,r"""
 49 | 
 50 |     def __init__(self, device, top_size=[64, 64], z_range=[-100.0, 100.0], sph_size=[64, 64], fov_range=[-90, 90], max_dis=30, sph_ocu=16, visib_thresh=3.0, visib_radius=25):
 51 |         #! For top down view
 52 |         self.proj_H = (int)(top_size[0]/2)
 53 |         self.proj_W = (int)(top_size[1]/2)
 54 |         self.proj_Z_min = z_range[0]
 55 |         self.proj_Z_max = z_range[1]
 56 | 
 57 |         self.device = device
 58 | 
 59 |         #! For spherical view
 60 |         self.sph_H = sph_size[0]
 61 |         self.sph_W = sph_size[1]
 62 |         self.sph_down = fov_range[0]
 63 |         self.sph_up = fov_range[1]
 64 | 
 65 |         #! Set sph occlusion factor
 66 |         self.sph_ocu = sph_ocu
 67 | 
 68 |         #! For activate range
 69 |         self.max_dis = max_dis
 70 | 
 71 |         self.visib_thresh = visib_thresh
 72 |         self.visib_radius = visib_radius
 73 | 
 74 |     def proj(self, pt):
 75 |         sph_proj = self.do_sph_projection(pt)
 76 |         sph_proj = sph_proj.reshape([1, self.sph_H, self.sph_W])
 77 |         sph_proj = torch.from_numpy(sph_proj).to(
 78 |             self.device, dtype=torch.float)
 79 |         return sph_proj
 80 | 
 81 |     def proj_img(self, pt):
 82 |         sph_proj = self.do_sph_projection(pt)
 83 |         sph_proj = sph_proj.reshape([self.sph_H, self.sph_W, 1])
 84 |         return sph_proj
 85 | 
 86 |     def do_top_projection(self, points):
 87 |         """ Project a pointcloud into a BEV picture
 88 |         """
 89 | 
 90 |         # get scan components
 91 |         scan_x = points[:, 0]
 92 |         scan_y = points[:, 1]
 93 |         scan_z = points[:, 2]
 94 | 
 95 |         # get projections in image coords
 96 |         proj_x = scan_x/self.max_dis
 97 |         proj_y = scan_y/self.max_dis
 98 | 
 99 |         # scale to image size using angular resolution
100 |         proj_x = (proj_x + 1.0)*self.proj_W                # in [0.0, 2W]
101 |         proj_y = (proj_y + 1.0)*self.proj_H                # in [0.0, 2H]
102 | 
103 |         # round and clamp for use as index
104 |         proj_x = np.floor(proj_x)
105 |         proj_x = np.minimum(2*self.proj_W - 1, proj_x)
106 |         proj_x = np.maximum(0, proj_x).astype(np.int32)   # in [0,2W-]
107 | 
108 |         proj_y = np.floor(proj_y)
109 |         proj_y = np.minimum(2*self.proj_H - 1, proj_y)
110 |         proj_y = np.maximum(0, proj_y).astype(np.int32)   # in [0,2H-1]
111 | 
112 |         data_grid = np.zeros((2*self.proj_H, 2*self.proj_W), dtype='float64')
113 |         data_grid[proj_y, proj_x] = scan_z
114 | 
115 |         data_norm = (data_grid - data_grid.min()) / \
116 |             (data_grid.max() - data_grid.min())
117 | 
118 |         return data_norm
119 | 
120 |     def do_sph_projection(self, points):
121 |         """ Project a pointcloud into a spherical projection image.projection.
122 |                 Function takes no arguments because it can be also called externally
123 |                 if the value of the constructor was not set (in case you change your
124 |                 mind about wanting the projection)
125 |         """
126 | 
127 |         # laser parameters
128 |         fov_up = self.sph_up / 180.0 * np.pi      # field of view up in rad
129 |         fov_down = self.sph_down / 180.0 * np.pi  # field of view down in rad
130 |         fov = abs(fov_down) + abs(fov_up)  # get field of view total in rad
131 | 
132 |         # get depth of all points
133 |         depth = np.linalg.norm(points, 2, axis=1)
134 | 
135 |         # get scan components
136 |         scan_x = points[:, 0]
137 |         scan_y = points[:, 1]
138 |         scan_z = points[:, 2]
139 | 
140 |         # get angles of all points
141 |         yaw = -np.arctan2(scan_y, scan_x)
142 |         pitch = np.arcsin(scan_z / depth)
143 | 
144 |         # get projections in image coords
145 |         proj_x = 0.5 * (yaw / np.pi + 1.0)                  # in [0.0, 1.0]
146 |         proj_y = 1.0 - (pitch + abs(fov_down)) / fov        # in [0.0, 1.0]
147 | 
148 |         # scale to image size using angular resolution
149 |         # sph_H = self.sph_H*self.sph_ocu
150 |         # sph_W = self.sph_W*self.sph_ocu
151 |         sph_H = self.sph_H
152 |         sph_W = self.sph_W
153 |         proj_x *= sph_W                              # in [0.0, W] 128
154 |         proj_y *= sph_H                              # in [0.0, H] 64
155 | 
156 |         # round and clamp for use as index
157 |         proj_x = np.floor(proj_x)
158 |         proj_x = np.minimum(sph_W - 1, proj_x)
159 |         proj_x = np.maximum(0, proj_x).astype(np.int32)   # in [0,W-1]
160 | 
161 |         proj_y = np.floor(proj_y)
162 |         proj_y = np.minimum(sph_H - 1, proj_y)
163 |         proj_y = np.maximum(0, proj_y).astype(np.int32)   # in [0,H-1]
164 | 
165 |         data_grid = np.zeros((sph_H, sph_W), dtype='float32')
166 | 
167 |         # NOTE Maybe the constraints come from the far building, instead of neighbor objects
168 |         # NOTE For indoor datasets, constraints from near objects will reduce the accuracy
169 | 
170 |         indices = np.argsort(depth)[::-1]
171 |         data_grid[proj_y[indices], proj_x[indices]] = depth[indices]
172 |         # data_grid[proj_y, proj_x] = depth
173 | 
174 |         # sph_grid = skimage.measure.block_reduce(data_grid, (self.sph_ocu, self.sph_ocu), np.max)
175 |         # sph_grid = 1./(sph_grid + 1)
176 |         sph_norm = (data_grid - data_grid.min()) / \
177 |             (data_grid.max() - data_grid.min())
178 | 
179 |         # depth_img = torch.from_numpy(data_grid).cuda()
180 |         # projected_points = visibility.visibility2(depth_img, torch.FloatTensor([self.sph_down, self.sph_up]).cuda(), torch.zeros_like(depth_img, device='cuda'), depth_img.shape[1], depth_img.shape[0], self.visib_thresh, self.visib_radius)
181 |         # data_out = projected_points.cpu().numpy()
182 |         # data_out[data_out ==0] = 100
183 |         # data_out = skimage.measure.block_reduce(data_out, (self.sph_ocu, self.sph_ocu), np.min)
184 |         # data_index = (data_out ==100)
185 |         # data_out[data_index] = 0
186 |         # # data_out= 1./(data_out+ 1)
187 |         # data_norm = (data_out - data_out.min()) / (data_out.max() - data_out.min())
188 |         # ## NOTE define -1 for all negative points
189 |         # data_norm[data_index] = -1
190 |         return sph_norm
191 |     
192 |     def do_multi_sph_projection(self, points):
193 |         depth = np.linalg.norm(points, 2, axis=1)
194 |         index_r20 = np.where(depth<=20)
195 |         index_r35 = np.intersect1d(np.where(depth>20), np.where(depth<=35)) 
196 |         index_r50 = np.where(depth>35)
197 |         
198 |         sphere_r20 = self.do_sph_projection(points[index_r20])
199 |         sphere_r35 = self.do_sph_projection(points[index_r35])
200 |         sphere_r50 = self.do_sph_projection(points[index_r50])
201 |         
202 |         return np.stack((sphere_r20,sphere_r35,sphere_r50), axis=0)


--------------------------------------------------------------------------------