├── .gitignore ├── LICENSE ├── README.md ├── config ├── __init__.py ├── config.py ├── dataset │ └── PITT.yaml └── network │ ├── spherevlad.yaml │ ├── spherevlad2.yaml │ └── spherevlad_visual.yaml ├── dataloader ├── __init__.py ├── data_augmentation.py ├── pittsburgh.py ├── triplet_dataloader.py └── utils.py ├── eval ├── __init__.py ├── eval_utils.py └── evaluate_pitt.py ├── generating_sph └── gene_pitt.py ├── models ├── __init__.py ├── loop_closure │ ├── __init__.py │ ├── aggregator │ │ ├── __init__.py │ │ └── netvlad.py │ ├── lidar │ │ ├── __init__.py │ │ ├── spherevlad.py │ │ └── spherevlad2.py │ ├── losses │ │ ├── __init__.py │ │ └── lazy_quadruplet_loss.py │ ├── transformer │ │ ├── __init__.py │ │ └── attention.py │ └── visual │ │ ├── __init__.py │ │ └── spherevlad.py └── robotLCD.py ├── pics ├── spherevlad.png └── spherevlad2.png ├── requirements.txt ├── train_lcd.py └── utils ├── __init__.py ├── equirectRotate ├── EquirectRotate.py ├── __init__.py └── utils.py ├── geometry ├── __init__.py ├── equi_trans.py └── so3_rotate.py ├── log_print.py ├── logger.py └── pointProcess.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # 10 | cache/ 11 | third/ 12 | data/results 13 | data/dataset 14 | log/ 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | #pdm.lock 114 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 115 | # in version control. 116 | # https://pdm.fming.dev/#use-with-ide 117 | .pdm.toml 118 | 119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 120 | __pypackages__/ 121 | 122 | # Celery stuff 123 | celerybeat-schedule 124 | celerybeat.pid 125 | 126 | # SageMath parsed files 127 | *.sage.py 128 | 129 | # Environments 130 | .env 131 | .venv 132 | env/ 133 | venv/ 134 | ENV/ 135 | env.bak/ 136 | venv.bak/ 137 | 138 | # Spyder project settings 139 | .spyderproject 140 | .spyproject 141 | 142 | # Rope project settings 143 | .ropeproject 144 | 145 | # mkdocs documentation 146 | /site 147 | 148 | # mypy 149 | .mypy_cache/ 150 | .dmypy.json 151 | dmypy.json 152 | 153 | # Pyre type checker 154 | .pyre/ 155 | 156 | # pytype static type analyzer 157 | .pytype/ 158 | 159 | # Cython debug symbols 160 | cython_debug/ 161 | 162 | # PyCharm 163 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 164 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 165 | # and can be added to the global gitignore or merged into this file. For a more nuclear 166 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 167 | #.idea/ 168 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, METASLAM 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SphereVLAD Series: Viewpoint-invariant Place Recognition 2 | **1. Pytorch implementation **SphereVLAD** and **SphereVLAD++** proposed in the papers:**
3 | [SeqSphereVLAD: Sequence Matching Enhanced Orientation-invariant Place Recognition](https://www.ri.cmu.edu/app/uploads/2020/11/0583.pdf)
4 | Peng Yin, Fuying Wang, Anton Egorov, Jiafan Hou, Ji Zhang, Howie Choset
5 | [SphereVLAD++: Attention-based and Signal-enhanced Viewpoint Invariant Descriptor](https://arxiv.org/abs/2207.02958)
6 | Shiqi Zhao, Peng Yin*, Ge Yi, Sebastian Scherer 7 | 8 | **2. Core library for our iSimLoc, AutoMerge and BioSLAM works:**
9 | [iSimLoc: Visual Global Localization for Previously Unseen Environments With Simulated Images](https://ieeexplore.ieee.org/document/10035430)
10 | Peng Yin; Ivan Cisneros; Shiqi Zhao; Ji Zhang; Howie Choset; Sebastian Scherer
11 | [AutoMerge: A Framework for Map Assembling and Smoothing in City-Scale Environments](https://ieeexplore.ieee.org/document/10203034)
12 | Peng Yin; Shiqi Zhao; Haowen Lai; Ruohai Ge; Ji Zhang; Howie Choset; Sebastian Scherer
13 | [BioSLAM: A Bioinspired Lifelong Memory System for General Place Recognition](https://ieeexplore.ieee.org/document/10242249)
14 | Peng Yin; Abulikemu Abuduweili; Shiqi Zhao; Lingyun Xu; Changliu Liu; Sebastian Scherer 15 | 16 | ### Network Structure 17 | ![](pics/spherevlad.png) 18 | ![](pics/spherevlad2.png) 19 | 20 | ### Environment Dependency 21 | 22 | **System** 23 | 24 | The codes are tested in:
25 | * Ubuntu20.04+CUDA11.6+python3.8.10
26 | * Ubuntu18.04+CUDA10.2+python3.6.9
27 | 28 | **Python Environment Dependency** 29 | 30 | * [s2cnn](https://github.com/jonkhler/s2cnn) 31 | * torch==1.13.0+cu116 torchaudio==0.13.0+cu116 torchvision==0.14+cu116 32 | * cupy-cuda11x==11.6.0 33 | 34 | ### Installation 35 | It is highly recommended too create a virtual environment by **conda** or **virtualenv**. 36 | 37 | ```bash 38 | # create virtual env 39 | virtualenv /path_to_env/spherevlad -p /usr/bin/python3 40 | source /path_to_env/spherevlad/bin/activate 41 | # or 42 | conda create -n spherevlad python==3.8 43 | conda activate spherevlad 44 | 45 | # install pytorch and cupy 46 | pip3 install torch==1.13.0+cu116 torchvision==0.14.0+cu116 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu116 47 | pip3 install cupy-cuda11x==11.6.0 48 | 49 | # install s2cnn 50 | pip3 install pynvrtc joblib 51 | mkdir third && cd third 52 | git clone https://github.com/jonkhler/s2cnn.git 53 | cd s2cnn 54 | python setup.py install 55 | 56 | # install all others packages 57 | pip3 install -r requirements.txt 58 | ``` 59 | 60 | ### Lidar Place Recognition 61 | 62 | **Data Structure** 63 | 64 | ``` 65 | ├── data 66 | ├── results 67 | │ └── model_name 68 | │ ├── DATA.yaml 69 | │ ├── MODEL.yaml 70 | │ ├── log.txt 71 | │ └── pth 72 | │ ├── model_xxx.pth 73 | │ ├── ... 74 | └── dataset 75 | ├── PITT 76 | │ ├── DATA 77 | │ │ ├── Train1 78 | │ │ │ ├── cloudGlobal.pcd 79 | │ │ │ └── poses.txt 80 | │ │ ├── ... 81 | │ │ └── Trainxx 82 | ``` 83 | 84 | **Note** 85 | 86 | Modify the ```PYTHONPATH``` environment variable to include absolute path to the project root folder: 87 | ``` 88 | export PYTHONPATH=$PATHONPATH:/path_to_spherevlad/SphereVLAD 89 | ``` 90 | 91 | **Data Preparation** 92 | 93 | Download the sample data from [Dropbox](https://www.dropbox.com/scl/fi/45g8uns8gfmzmsxytjg2n/DATA.zip?rlkey=hs62zaaiub71f9g26ldhpyuir&dl=0) and put unzipped folder in the /data/PITT. The data structure should be like the same mentioned in **Data Structure** section. 94 | ```bash 95 | mkdir data 96 | python generating_sph/gene_pitt.py --config-network=config/network/spherevlad.yaml --config-dataset=config/dataset/PITT.yaml 97 | ``` 98 | 99 | **Training on Alita Urban Dataset** 100 | 101 | ```bash 102 | # spherevlad 103 | python3 train_lcd.py --config-network=config/network/spherevlad.yaml --config-dataset=config/dataset/PITT.yaml 104 | # spherevlad2 105 | python3 train_lcd.py --config-network=config/network/spherevlad2.yaml --config-dataset=config/dataset/PITT.yaml 106 | ``` 107 | 108 | * If there are issues with the s2cnn, please download the [version](https://www.dropbox.com/scl/fi/hvc809dymwwq4g3s4stoa/s2cnn.zip?rlkey=awal9vpauog6xfr3l6z4t5uam&dl=0) we have modified and tested. 109 | 110 | * The generated weight will be saved at /data/results/SphereVLAD-{DatasetName}-{DD}-{MM}-{YYYY}-{TIME}, the folder also contains the log and configuration of the model and dataset. 111 | 112 | **Testing on Alita Urban Dataset** 113 | 114 | Pretrained weight on Alita Urban Dataset: [Dropbox](https://www.dropbox.com/scl/fi/wkidjpayjuepa124c22rv/SphereVLAD.zip?rlkey=ce50l1ypznlc131ftbwj30o6y&dl=0). Please unzip it and put in /data/results 115 | 116 | Please change the trajectory numbers in *line 45* and *line 68* in **eval/evaluate_pitt.py** first. 117 | ```bash 118 | # spherevlad 119 | python eval/evaluate_pitt.py --model-path=data/results/SphereVLAD --epoch=666 --noise=1 --type=recall --trans-noise=2 --rot-noise=180 --log=False 120 | # spherevlad2 121 | python eval/evaluate_pitt.py --model-path=data/results/SphereVLAD2 --epoch=666 --noise=1 --type=recall --trans-noise=2 --rot-noise=180 --log=False 122 | ``` 123 | * --model-path: the folder contains the configuration and trained weights 124 | * --epoch: the epoch of trained weights 125 | 126 | ### Hints on training and test on other dataset 127 | * Modify the dataloader/pittsburgh.py to fit the data structure of your own dataset 128 | * Modify the config/dataset/PITT.yaml to change the configuration of the dataset 129 | * Add new evaluation methods in eval/eval_utils.py and import it in train_lcd.py 130 | 131 | ### Citation 132 | ``` 133 | @INPROCEEDINGS{9341727, 134 | author={Yin, Peng and Wang, Fuying and Egorov, Anton and Hou, Jiafan and Zhang, Ji and Choset, Howie}, 135 | booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, 136 | title={SeqSphereVLAD: Sequence Matching Enhanced Orientation-invariant Place Recognition}, 137 | year={2020}, 138 | volume={}, 139 | number={}, 140 | pages={5024-5029}, 141 | doi={10.1109/IROS45743.2020.9341727}} 142 | ``` 143 | ``` 144 | @ARTICLE{9956017, 145 | author={Zhao, Shiqi and Yin, Peng and Yi, Ge and Scherer, Sebastian}, 146 | journal={IEEE Robotics and Automation Letters}, 147 | title={SphereVLAD++: Attention-Based and Signal-Enhanced Viewpoint Invariant Descriptor}, 148 | year={2023}, 149 | volume={8}, 150 | number={1}, 151 | pages={256-263}, 152 | doi={10.1109/LRA.2022.3223555}} 153 | ``` 154 | ``` 155 | @article{yin2022alita, 156 | title={Alita: A large-scale incremental dataset for long-term autonomy}, 157 | author={Yin, Peng and Zhao, Shiqi and Ge, Ruohai and Cisneros, Ivan and Fu, Ruijie and Zhang, Ji and Choset, Howie and Scherer, Sebastian}, 158 | journal={arXiv preprint arXiv:2205.10737}, 159 | year={2022} 160 | } 161 | ``` 162 | ``` 163 | @ARTICLE{10035430, 164 | author={Yin, Peng and Cisneros, Ivan and Zhao, Shiqi and Zhang, Ji and Choset, Howie and Scherer, Sebastian}, 165 | journal={IEEE Transactions on Robotics}, 166 | title={iSimLoc: Visual Global Localization for Previously Unseen Environments With Simulated Images}, 167 | year={2023}, 168 | volume={39}, 169 | number={3}, 170 | pages={1893-1909}, 171 | doi={10.1109/TRO.2023.3238201}} 172 | 173 | ``` 174 | ``` 175 | @ARTICLE{10203034, 176 | author={Yin, Peng and Zhao, Shiqi and Lai, Haowen and Ge, Ruohai and Zhang, Ji and Choset, Howie and Scherer, Sebastian}, 177 | journal={IEEE Transactions on Robotics}, 178 | title={AutoMerge: A Framework for Map Assembling and Smoothing in City-Scale Environments}, 179 | year={2023}, 180 | volume={}, 181 | number={}, 182 | pages={1-19}, 183 | doi={10.1109/TRO.2023.3290448}} 184 | ``` 185 | ``` 186 | @ARTICLE{10242249, 187 | author={Yin, Peng and Abuduweili, Abulikemu and Zhao, Shiqi and Xu, Lingyun and Liu, Changliu and Scherer, Sebastian}, 188 | journal={IEEE Transactions on Robotics}, 189 | title={BioSLAM: A Bioinspired Lifelong Memory System for General Place Recognition}, 190 | year={2023}, 191 | volume={}, 192 | number={}, 193 | pages={1-20}, 194 | doi={10.1109/TRO.2023.3306615}} 195 | ``` 196 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import _C as cfg -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from yacs.config import CfgNode as CN 3 | 4 | _C = CN() 5 | 6 | #! ----------------------------------------------------------------------------- 7 | #! NEPTUNE 8 | #! ----------------------------------------------------------------------------- 9 | _C.PROJECT_NAME = '' 10 | _C.API_TOKEN = '' 11 | 12 | #! ----------------------------------------------------------------------------- 13 | #! INPUT 14 | #! ----------------------------------------------------------------------------- 15 | _C.DATA = CN() 16 | _C.DATA.DATASET_NAME = "" 17 | _C.DATA.BENCHMARK_DIR = "" # The parent folder of dataset 18 | _C.DATA.DATASET_TYPE = "baseline" # For Oxford Dataset 19 | # Projection for LiDAR point cloud 20 | # Spherical projection 21 | _C.DATA.SPH_PROJ = CN() 22 | _C.DATA.SPH_PROJ.RADIUS = 50.0 # Radius of ball query in submap generation 23 | _C.DATA.SPH_PROJ.IMAGE_SIZE = [64, 64] # Define the size of generated panorama 24 | _C.DATA.SPH_PROJ.FOV = [-90, 90] # Define the fov when genenrating panorama 25 | _C.DATA.SPH_PROJ.VIS_THRESH = 3.0 # Not used for now 26 | _C.DATA.SPH_PROJ.VIS_RADIUS = 3 # Not used for now 27 | _C.DATA.SPH_PROJ.OCCLUSION = 16 # Not used for now 28 | # BEV projection 29 | _C.DATA.BEV_PROJ = CN() 30 | _C.DATA.BEV_PROJ.IMAGE_SIZE = [64, 64] # Define the size of generated BEV 31 | _C.DATA.BEV_PROJ.Z_RANGE = [-100, 100] # Confine the points used to generate BEV 32 | _C.DATA.BEV_PROJ.MAX_DIS = 30 # The x and y max range of the point cloud, eg: a 33 | # point cloud x in [-100,100], this value will be 34 | # 100 35 | # Image 36 | _C.DATA.SPH_IM = CN() 37 | _C.DATA.SPH_IM.IMAGE_SIZE = [224, 224] 38 | _C.DATA.IM = CN() 39 | _C.DATA.IM.IMAGE_SIZE = [224, 224] 40 | # Distance settings for positive and negative frame 41 | _C.DATA.POSITIVES_RADIUS = 10.0 42 | _C.DATA.NEGATIVES_RADIUS = 50.0 43 | _C.DATA.TRAJ_RADIUS = 5.0e+3 44 | # Data spliting 45 | _C.DATA.TRAIN_LIST = [] 46 | _C.DATA.VAL_LIST = [] 47 | _C.DATA.TEST_LIST = [] 48 | # Pickles 49 | _C.DATA.TRAIN_PICKLE = 'train.pickle' 50 | _C.DATA.VAL_PICKLE = 'val.pickle' 51 | 52 | #! ----------------------------------------------------------------------------- 53 | #! DATASET 54 | #! ----------------------------------------------------------------------------- 55 | # Alita Campus dataset 56 | _C.DATA.ALITA_CAMPUS = CN() 57 | _C.DATA.ALITA_CAMPUS.DIRECTION = None 58 | _C.DATA.ALITA_CAMPUS.COLLECTED_TIME = None 59 | 60 | #! ----------------------------------------------------------------------------- 61 | #! MODEL 62 | #! ----------------------------------------------------------------------------- 63 | _C.MODEL = CN() 64 | _C.MODEL.NAME = "" 65 | _C.MODEL.TYPE = "Lidar" # "Lidar", "LiSPH", "Image" 66 | 67 | # S2CNN 68 | _C.MODEL.S2CNN = CN() 69 | _C.MODEL.S2CNN.INPUT_CHANNEL_NUM = 1 70 | 71 | # NetVLAD 72 | _C.MODEL.NETVLAD = CN() 73 | _C.MODEL.NETVLAD.ENCODER = 'vgg16' 74 | _C.MODEL.NETVLAD.CLUSTER_NUM = 64 75 | _C.MODEL.NETVLAD.FEATURE_DIM = 128 76 | _C.MODEL.NETVLAD.NORMALIZE_INPUT = True 77 | _C.MODEL.NETVLAD.OUTPUT_DIM = None 78 | _C.MODEL.NETVLAD.GATE = False 79 | 80 | # PointNetVLAD 81 | _C.MODEL.POINTNETVLAD = CN() 82 | _C.MODEL.POINTNETVLAD.MAX_SAMPLES = 4096 83 | _C.MODEL.POINTNETVLAD.GLOBAL_FEAT = True 84 | _C.MODEL.POINTNETVLAD.FEATURE_TRANSFORM = True 85 | _C.MODEL.POINTNETVLAD.MAX_POOL = False 86 | _C.MODEL.POINTNETVLAD.CLUSTER_SIZE = 64 87 | _C.MODEL.POINTNETVLAD.OUTPUT_DIM = 256 88 | _C.MODEL.POINTNETVLAD.CONTEXT_GATING = True 89 | _C.MODEL.POINTNETVLAD.ADD_BATCH_NORM = True 90 | 91 | # AutoMerge 92 | _C.MODEL.AUTOMERGE = CN() 93 | _C.MODEL.AUTOMERGE.FEATURE_SIZE = 6 94 | _C.MODEL.AUTOMERGE.SELF_ATTEN = 'add' 95 | _C.MODEL.AUTOMERGE.CROSS_ATTEN = 'reweight' 96 | 97 | #! ----------------------------------------------------------------------------- 98 | #! LOSS 99 | #! ----------------------------------------------------------------------------- 100 | _C.LOSS = CN() 101 | _C.LOSS.NAME = "" 102 | _C.LOSS.MARGIN0 = 0.5 103 | _C.LOSS.MARGIN1 = 0.2 104 | _C.LOSS.MARGIN2 = 0.3 105 | 106 | #! ----------------------------------------------------------------------------- 107 | #! TRAINING CONFIGURATION 108 | #! ----------------------------------------------------------------------------- 109 | _C.TRAINING = CN() 110 | # Training or Test 111 | _C.TRAINING.IS_TRAIN = False 112 | # Number of workers 113 | _C.TRAINING.NUM_WORKERS = 8 # Number of workers when training 114 | # Batch 115 | _C.TRAINING.BATCH = CN() 116 | _C.TRAINING.BATCH.BATCH_SIZE = 2 # Batch size per gpu 117 | # Data Augment 118 | _C.TRAINING.BATCH.FRAME_TRANSFORM = 0 # several mode 119 | _C.TRAINING.BATCH.BATCH_TRANSFORM = False # Data argmentation for the whole batch 120 | # Number of positive and negative in a batch 121 | _C.TRAINING.BATCH.POSITIVES_PER_QUERY = 2 122 | _C.TRAINING.BATCH.NEGATIVES_PER_QUERY = 18 123 | # Optimizer 124 | _C.TRAINING.OPTIMIZER = CN() 125 | _C.TRAINING.OPTIMIZER.NAME = "Adam" 126 | _C.TRAINING.OPTIMIZER.INIT_LEARNING_RATE = 1e-4 127 | _C.TRAINING.OPTIMIZER.WEIGHT_DECAY = 0.001 # Not used for now 128 | # Scheduler 129 | _C.TRAINING.SCHEDULER = CN() 130 | _C.TRAINING.SCHEDULER.NAME = None 131 | _C.TRAINING.SCHEDULER.MILESTONES = [10] # For MultiStepLR 132 | _C.TRAINING.SCHEDULER.STEP_SIZE = None # For StepLR 133 | _C.TRAINING.SCHEDULER.GAMMA = 0.1 # For StepLR & MultiStepLR 134 | # GPU Index for Parallel Training 135 | _C.TRAINING.GPU = CN() 136 | _C.TRAINING.GPU.IDS = None # Choose to use which gpus 137 | # Resume 138 | _C.TRAINING.RESUME = False 139 | # Epoch 140 | _C.TRAINING.EPOCH = 20 141 | 142 | #! ----------------------------------------------------------------------------- 143 | #! TRAINED WEIGHT 144 | #! ----------------------------------------------------------------------------- 145 | _C.WEIGHT = CN() 146 | _C.WEIGHT.LOAD_ADDRESS = "" 147 | _C.WEIGHT.SAVE_ADDRESS = "" 148 | _C.WEIGHT.LOAD_OPTIMIZER = True 149 | _C.WEIGHT.FREEZE_WEIGHT = False 150 | 151 | #! ----------------------------------------------------------------------------- 152 | #! OUTPUT 153 | #! ----------------------------------------------------------------------------- 154 | _C.OUTPUT = CN() 155 | _C.OUTPUT.DIR = "data/results" 156 | -------------------------------------------------------------------------------- /config/dataset/PITT.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | DATASET_NAME: "PITT" # should be same with the folder name 3 | BENCHMARK_DIR: "data/dataset/" 4 | # Projections 5 | SPH_PROJ: 6 | RADIUS: 20.0 # query radius for data generation 7 | IMAGE_SIZE: [64, 64] 8 | FOV: [-90, 90] 9 | BEV_PROJ: 10 | IMAGE_SIZE: [224, 224] 11 | Z_RANGE: [-10, 10] 12 | # Data spliting 13 | # TRAIN_LIST: [1,2,3,4,5,6,7,8,9,10,16,17,18,19,20] 14 | # VAL_LIST: [21,22,23,24,25] 15 | # TEST_LIST: [21,22,23,24,25] 16 | TRAIN_LIST: [1] 17 | VAL_LIST: [21] 18 | TEST_LIST: [21] -------------------------------------------------------------------------------- /config/network/spherevlad.yaml: -------------------------------------------------------------------------------- 1 | PROJECT_NAME: '' 2 | DATA: 3 | POSITIVES_RADIUS: 8.0 4 | NEGATIVES_RADIUS: 16.0 5 | MODEL: 6 | NAME: "SphereVLAD" 7 | TYPE: "LiSPH" 8 | S2CNN: 9 | INPUT_CHANNEL_NUM: 1 10 | NETVLAD: 11 | CLUSTER_NUM: 32 12 | FEATURE_DIM: 16 13 | NORMALIZE_INPUT: True 14 | OUTPUT_DIM: None 15 | GATE: False 16 | 17 | LOSS: 18 | NAME: "LazyQuadrupletLoss" 19 | MARGIN0: 0.5 20 | MARGIN1: 0.2 21 | 22 | TRAINING: 23 | NUM_WORKERS: 8 24 | BATCH: 25 | BATCH_SIZE: 6 26 | FRAME_TRANSFORM: 0 27 | BATCH_TRANSFORM: False 28 | # Select GPUs 29 | GPU: 30 | IDS: [0,1] 31 | # Optimizer 32 | OPTIMIZER: 33 | NAME: "Adam" 34 | INIT_LEARNING_RATE: 1e-4 35 | # Scheduler 36 | SCHEDULER: 37 | NAME: "MultiStepLR" 38 | MILESTONES: [10] 39 | GAMMA: 0.1 40 | # Epoch 41 | EPOCH: 20 42 | # Resume? 43 | RESUME: False 44 | 45 | # Save & Load weight 46 | WEIGHT: 47 | LOAD_ADDRESS: None 48 | SAVE_ADDRESS: None 49 | 50 | # where logs and weights will be saved at 51 | OUTPUT: 52 | DIR: "data/results" 53 | -------------------------------------------------------------------------------- /config/network/spherevlad2.yaml: -------------------------------------------------------------------------------- 1 | PROJECT_NAME: '' 2 | DATA: 3 | POSITIVES_RADIUS: 8.0 4 | NEGATIVES_RADIUS: 16.0 5 | MODEL: 6 | NAME: "SphereVLAD2" 7 | TYPE: "LiSPH" 8 | S2CNN: 9 | INPUT_CHANNEL_NUM: 1 10 | NETVLAD: 11 | CLUSTER_NUM: 32 12 | FEATURE_DIM: 16 13 | NORMALIZE_INPUT: True 14 | OUTPUT_DIM: None 15 | GATE: False 16 | 17 | LOSS: 18 | NAME: "LazyQuadrupletLoss" 19 | MARGIN0: 0.5 20 | MARGIN1: 0.2 21 | 22 | TRAINING: 23 | NUM_WORKERS: 8 24 | BATCH: 25 | BATCH_SIZE: 6 26 | FRAME_TRANSFORM: 0 27 | BATCH_TRANSFORM: False 28 | # Select GPUs 29 | GPU: 30 | IDS: [0,1] 31 | # Optimizer 32 | OPTIMIZER: 33 | NAME: "Adam" 34 | INIT_LEARNING_RATE: 1e-4 35 | # Scheduler 36 | SCHEDULER: 37 | NAME: "MultiStepLR" 38 | MILESTONES: [10] 39 | GAMMA: 0.1 40 | # Epoch 41 | EPOCH: 30 42 | # Resume? 43 | RESUME: True 44 | 45 | # Save & Load weight 46 | WEIGHT: 47 | LOAD_ADDRESS: "" 48 | LOAD_OPTIMIZER: False 49 | SAVE_ADDRESS: None 50 | 51 | # where logs and weights will be saved at 52 | OUTPUT: 53 | DIR: "data/results" 54 | -------------------------------------------------------------------------------- /config/network/spherevlad_visual.yaml: -------------------------------------------------------------------------------- 1 | PROJECT_NAME: '' 2 | DATA: 3 | POSITIVES_RADIUS: 8.0 4 | NEGATIVES_RADIUS: 16.0 5 | MODEL: 6 | NAME: "SphereVLAD" 7 | TYPE: "Image" 8 | S2CNN: 9 | INPUT_CHANNEL_NUM: 3 10 | NETVLAD: 11 | CLUSTER_NUM: 32 12 | FEATURE_DIM: 16 13 | NORMALIZE_INPUT: True 14 | OUTPUT_DIM: None 15 | GATE: False 16 | 17 | LOSS: 18 | NAME: "LazyQuadrupletLoss" 19 | MARGIN0: 0.5 20 | MARGIN1: 0.2 21 | 22 | TRAINING: 23 | NUM_WORKERS: 8 24 | BATCH: 25 | BATCH_SIZE: 1 26 | FRAME_TRANSFORM: 0 27 | BATCH_TRANSFORM: False 28 | # Select GPUs 29 | GPU: 30 | IDS: [0,1] 31 | # Optimizer 32 | OPTIMIZER: 33 | NAME: "Adam" 34 | INIT_LEARNING_RATE: 1e-4 35 | # Scheduler 36 | SCHEDULER: 37 | NAME: "StepLR" 38 | MILESTONES: None 39 | STEP_SIZE: 5 40 | GAMMA: 0.1 41 | # Epoch 42 | EPOCH: 30 43 | # Resume? 44 | RESUME: False 45 | 46 | # Save & Load weight 47 | WEIGHT: 48 | LOAD_ADDRESS: None 49 | SAVE_ADDRESS: None 50 | 51 | # where logs and weights will be saved at 52 | OUTPUT: 53 | DIR: "data/results" 54 | -------------------------------------------------------------------------------- /dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | from .utils import make_collate_fn 4 | from utils import log_print 5 | 6 | def make_data_loader(config, gpu_ids, is_train): 7 | ''' 8 | Args: 9 | config: parameter configurations 10 | gpu_ids: gpu indexes for training 11 | is_train: training or testing 12 | Returns: 13 | dataloader(type:class) 14 | ''' 15 | if is_train: 16 | log_print("Using dataset: %s" % config.DATA.DATASET_NAME, "g") 17 | 18 | if config.DATA.DATASET_NAME == "PITT": 19 | from .pittsburgh import PittsburghDataset 20 | dataloader = PittsburghDataset(config, is_train) 21 | else: 22 | raise ValueError(f"Unrecognized Dataset Name {config.DATA.DATASET_NAME}") 23 | 24 | #TODO collate functions 25 | if config.TRAINING.BATCH.BATCH_TRANSFORM: 26 | collate_fn = make_collate_fn(config) 27 | else: 28 | collate_fn = None 29 | # collate_fn=collate_fn, 30 | 31 | loader = DataLoader(dataloader, 32 | batch_size=config.TRAINING.BATCH.BATCH_SIZE*len(gpu_ids), 33 | num_workers=config.TRAINING.NUM_WORKERS if is_train else 4, 34 | pin_memory=True, 35 | shuffle=True if is_train else False, 36 | drop_last=True) 37 | return loader -------------------------------------------------------------------------------- /dataloader/data_augmentation.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao 3 | data_augmentation 4 | 5 | Codes of Data Augmentation for Point Cloud is adopted from MinkLoc++ 6 | Original Code: https://github.com/jac99/MinkLocMultimodal 7 | ''' 8 | 9 | import random 10 | import math 11 | import numpy as np 12 | from scipy.linalg import expm, norm 13 | from PIL import Image 14 | 15 | import torch 16 | import torchvision.transforms as transforms 17 | 18 | from utils.geometry import get_projection_grid, rand_rotation_matrix, rotate_grid, project_2d_on_sphere 19 | 20 | 21 | class Augment_Point_Data(): 22 | def __init__(self, mode=0, is_train=False): 23 | if is_train: 24 | if mode == 0: 25 | transform = [transforms.ToTensor()] 26 | elif mode == 1: 27 | transform = [transforms.ToTensor(), 28 | JitterPoints(sigma=0.001, clip=0.002), 29 | RemoveRandomPoints(r=(0.0, 0.1)), 30 | RandomTranslation(max_delta=0.01), 31 | RemoveRandomBlock(p=0.4), 32 | RandomRotation(axis=np.array([0,0,1]), 33 | max_theta=15, 34 | max_theta2=None)] 35 | else: 36 | raise NotImplementedError(f'Uncognized data augmentation mode') 37 | else: 38 | transform = [transforms.ToTensor()] 39 | self.transform = transforms.Compose(transform) 40 | 41 | def __call__(self, input): 42 | output = self.transform(input) 43 | return output 44 | 45 | 46 | class Augment_RGB_Data(): 47 | def __init__(self, mode=0, is_train=False): 48 | if is_train: 49 | if mode == 0: 50 | transform = [transforms.ToTensor(), 51 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])] 52 | elif mode == 1: 53 | transform = [transforms.ToTensor(), 54 | transforms.Resize([224, 224]), 55 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 56 | transforms.RandomErasing(scale=(0.1, 0.4)), 57 | transforms.RandomRotation(degrees=5), 58 | transforms.RandomHorizontalFlip(p=0.5)] 59 | else: 60 | raise NotImplementedError(f'Uncognized data augmentation mode') 61 | else: 62 | transform = [transforms.ToTensor(), 63 | transforms.Resize([224, 224]), 64 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])] 65 | self.transform = transforms.Compose(transform) 66 | 67 | def __call__(self, input): 68 | output = self.transform(input) 69 | return output 70 | 71 | 72 | class Augment_SPH_Data(): 73 | def __init__(self, img_size, mode=0, is_train=False): 74 | if is_train: 75 | if mode == 0: 76 | transform = [transforms.ToTensor(), 77 | transforms.Normalize([0.5], [0.5])] 78 | elif mode == 1: 79 | transform = [SphRandomRotate(p=1, img_size=img_size), 80 | transforms.ToTensor(), 81 | transforms.Normalize([0.5], [0.5])] 82 | else: 83 | raise NotImplementedError(f'Uncognized data augmentation mode') 84 | else: 85 | transform = [transforms.ToTensor(), 86 | transforms.Resize(img_size, Image.BICUBIC), 87 | transforms.Normalize([0.5], [0.5])] 88 | self.transform = transforms.Compose(transform) 89 | 90 | def __call__(self, input): 91 | output = self.transform(input) 92 | return output 93 | 94 | 95 | class SphRandomRotate: 96 | '''Random rotate spherical projection of point cloud''' 97 | def __init__(self, p, img_size): 98 | assert 0 < p <= 1, 'probability must be in (0, 1] range)!' 99 | self.p = p 100 | self.grid = get_projection_grid(b=(int)(img_size[0]/2.0)) 101 | 102 | def __call__(self, sph_img): 103 | #* conver from PIL.Image into numpy.array 104 | signals = np.expand_dims(np.array(sph_img), axis=0) 105 | #* generate random rotation along all three axis 106 | rot = rand_rotation_matrix(deflection=1) 107 | #* generate random rotation along z-axis 108 | # rot_angle = 2*np.pi*random.uniform(1) 109 | # cosval = np.cos(rot_angle) 110 | # sinval = np.sin(rot_angle) 111 | # rot = np.array([[cosval, -sinval, 0], 112 | # [sinval, cosval, 0], 113 | # [0, 0, 1]]) 114 | rotated_grid = rotate_grid(rot, self.grid) 115 | rot_sph_img = project_2d_on_sphere(signals, rotated_grid, projection_origin=[0,0,0.00001]) 116 | rot_sph_img = np.squeeze(rot_sph_img) 117 | return rot_sph_img 118 | 119 | 120 | class RandomFlip: 121 | def __init__(self, p): 122 | # p = [p_x, p_y, p_z] probability of flipping each axis 123 | assert len(p) == 3 124 | assert 0 < sum(p) <= 1, 'sum(p) must be in (0, 1] range, is: {}'.format(sum(p)) 125 | self.p = p 126 | self.p_cum_sum = np.cumsum(p) 127 | 128 | def __call__(self, coords): 129 | r = random.random() 130 | if r <= self.p_cum_sum[0]: 131 | # Flip the first axis 132 | coords[..., 0] = -coords[..., 0] 133 | elif r <= self.p_cum_sum[1]: 134 | # Flip the second axis 135 | coords[..., 1] = -coords[..., 1] 136 | elif r <= self.p_cum_sum[2]: 137 | # Flip the third axis 138 | coords[..., 2] = -coords[..., 2] 139 | 140 | return coords 141 | 142 | 143 | class RandomRotation: 144 | def __init__(self, axis=None, max_theta=180, max_theta2=15): 145 | self.axis = axis 146 | self.max_theta = max_theta # Rotation around axis 147 | self.max_theta2 = max_theta2 # Smaller rotation in random direction 148 | 149 | def _M(self, axis, theta): 150 | return expm(np.cross(np.eye(3), axis / norm(axis) * theta)).astype(np.float32) 151 | 152 | def __call__(self, coords): 153 | if self.axis is not None: 154 | axis = self.axis 155 | else: 156 | axis = np.random.rand(3) - 0.5 157 | R = self._M(axis, (np.pi * self.max_theta / 180) * 2 * (np.random.rand(1) - 0.5)) 158 | if self.max_theta2 is None: 159 | coords = coords @ R 160 | else: 161 | R_n = self._M(np.random.rand(3) - 0.5, (np.pi * self.max_theta2 / 180) * 2 * (np.random.rand(1) - 0.5)) 162 | coords = coords @ R @ R_n 163 | 164 | return coords 165 | 166 | 167 | class RandomTranslation: 168 | def __init__(self, max_delta=0.05): 169 | self.max_delta = max_delta 170 | 171 | def __call__(self, coords): 172 | trans = self.max_delta * np.random.randn(1, 3) 173 | return coords + trans.astype(np.float32) 174 | 175 | 176 | class RandomScale: 177 | def __init__(self, min, max): 178 | self.scale = max - min 179 | self.bias = min 180 | 181 | def __call__(self, coords): 182 | s = self.scale * np.random.rand(1) + self.bias 183 | return coords * s.astype(np.float32) 184 | 185 | 186 | class RandomShear: 187 | def __init__(self, delta=0.1): 188 | self.delta = delta 189 | 190 | def __call__(self, coords): 191 | T = np.eye(3) + self.delta * np.random.randn(3, 3) 192 | return coords @ T.astype(np.float32) 193 | 194 | 195 | class JitterPoints: 196 | def __init__(self, sigma=0.01, clip=None, p=1.): 197 | assert 0 < p <= 1. 198 | assert sigma > 0. 199 | 200 | self.sigma = sigma 201 | self.clip = clip 202 | self.p = p 203 | 204 | def __call__(self, e): 205 | """ Randomly jitter points. jittering is per point. 206 | Input: 207 | BxNx3 array, original batch of point clouds 208 | Return: 209 | BxNx3 array, jittered batch of point clouds 210 | """ 211 | 212 | sample_shape = (e.shape[0],) 213 | if self.p < 1.: 214 | # Create a mask for points to jitter 215 | m = torch.distributions.categorical.Categorical(probs=torch.tensor([1 - self.p, self.p])) 216 | mask = m.sample(sample_shape=sample_shape) 217 | else: 218 | mask = torch.ones(sample_shape, dtype=torch.int64 ) 219 | 220 | mask = mask == 1 221 | jitter = self.sigma * torch.randn_like(e[mask]) 222 | 223 | if self.clip is not None: 224 | jitter = torch.clamp(jitter, min=-self.clip, max=self.clip) 225 | 226 | e[mask] = e[mask] + jitter 227 | return e 228 | 229 | 230 | class RemoveRandomPoints: 231 | def __init__(self, r): 232 | if type(r) is list or type(r) is tuple: 233 | assert len(r) == 2 234 | assert 0 <= r[0] <= 1 235 | assert 0 <= r[1] <= 1 236 | self.r_min = float(r[0]) 237 | self.r_max = float(r[1]) 238 | else: 239 | assert 0 <= r <= 1 240 | self.r_min = None 241 | self.r_max = float(r) 242 | 243 | def __call__(self, e): 244 | n = len(e) 245 | if self.r_min is None: 246 | r = self.r_max 247 | else: 248 | # Randomly select removal ratio 249 | r = random.uniform(self.r_min, self.r_max) 250 | 251 | mask = np.random.choice(range(n), size=int(n*r), replace=False) # select elements to remove 252 | e[mask] = torch.zeros_like(e[mask]) 253 | return e 254 | 255 | 256 | class RemoveRandomBlock: 257 | """ 258 | Randomly remove part of the point cloud. Similar to PyTorch RandomErasing but operating on 3D point clouds. 259 | Erases fronto-parallel cuboid. 260 | Instead of erasing we set coords of removed points to (0, 0, 0) to retain the same number of points 261 | """ 262 | def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)): 263 | self.p = p 264 | self.scale = scale 265 | self.ratio = ratio 266 | 267 | def get_params(self, coords): 268 | # Find point cloud 3D bounding box 269 | flattened_coords = coords.view(-1, 3) 270 | min_coords, _ = torch.min(flattened_coords, dim=0) 271 | max_coords, _ = torch.max(flattened_coords, dim=0) 272 | span = max_coords - min_coords 273 | area = span[0] * span[1] 274 | erase_area = random.uniform(self.scale[0], self.scale[1]) * area 275 | aspect_ratio = random.uniform(self.ratio[0], self.ratio[1]) 276 | 277 | h = math.sqrt(erase_area * aspect_ratio) 278 | w = math.sqrt(erase_area / aspect_ratio) 279 | 280 | x = min_coords[0] + random.uniform(0, 1) * (span[0] - w) 281 | y = min_coords[1] + random.uniform(0, 1) * (span[1] - h) 282 | 283 | return x, y, w, h 284 | 285 | def __call__(self, coords): 286 | if random.random() < self.p: 287 | x, y, w, h = self.get_params(coords) # Fronto-parallel cuboid to remove 288 | mask = (x < coords[..., 0]) & (coords[..., 0] < x+w) & (y < coords[..., 1]) & (coords[..., 1] < y+h) 289 | coords[mask] = torch.zeros_like(coords[mask]) 290 | return coords -------------------------------------------------------------------------------- /dataloader/pittsburgh.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Peng Yin, Shiqi Zhao 3 | train.py 4 | 5 | Dataset wrapper for Pittsburgh Dataset modified based on PointNetVLAD 6 | Original Code: https://github.com/mikacuy/pointnetvlad 7 | ''' 8 | import os 9 | import random 10 | from glob import glob 11 | 12 | import pickle 13 | import numpy as np 14 | import open3d as o3d 15 | import pandas as pd 16 | from tqdm import tqdm 17 | from PIL import Image 18 | from sklearn.neighbors import KDTree 19 | 20 | from .triplet_dataloader import TripletDataLoader 21 | from utils import log_print, pc_normalize 22 | 23 | 24 | class PittsburghDataset(TripletDataLoader): 25 | """Dataloader Wrapper for Pittsburgh Dataset""" 26 | def __init__(self, config, is_train): 27 | super().__init__(config, is_train) 28 | self.dataset_dir = os.path.join(config.DATA.BENCHMARK_DIR, str(config.DATA.DATASET_NAME)) 29 | if config.MODEL.TYPE not in ["Lidar", "LiSPH"]: 30 | raise ValueError("Pittsburgh dataset only provides Lidar data!") 31 | if is_train: 32 | self.generate_pickles() 33 | self.queries = self.get_queries_dict(os.path.join(self.dataset_dir, config.DATA.TRAIN_PICKLE)) 34 | log_print("Number of training tuples: %d" % len(self.queries), "y") 35 | else: 36 | self.queries = self.get_queries_dict(os.path.join(self.dataset_dir, config.DATA.VAL_PICKLE)) 37 | log_print("Number of val tuples: %d" % len(self.queries), "y") 38 | self.file_idxs = np.arange(0, len(self.queries.keys())) 39 | 40 | def load_file_func(self, filename): 41 | if self.config.MODEL.TYPE == "Lidar" or self.branch == "Lidar": 42 | pcd = np.asarray(o3d.io.read_point_cloud(filename + ".pcd").points) 43 | if pcd.shape[0] != 4096: 44 | raise ValueError('{}.pcd does not have sufficient points'.format(filename)) 45 | pcd = pc_normalize(pcd) 46 | output = self.lidar_data_aug(pcd) 47 | elif self.config.MODEL.TYPE == "LiSPH" or self.branch == "LiSPH": 48 | sph_img = Image.open(filename + "_sph.png") 49 | output = self.lisph_data_aug(sph_img) 50 | return output 51 | 52 | def generate_pickles(self): 53 | if self.config.TRAINING.IS_TRAIN: 54 | pair_train = os.path.join( 55 | self.dataset_dir, self.config.DATA.TRAIN_PICKLE) 56 | pair_test = os.path.join( 57 | self.dataset_dir, self.config.DATA.VAL_PICKLE) 58 | #! Load pickles if exist 59 | if os.path.exists(pair_train) and os.path.exists(pair_test): 60 | log_print("Load previous pickles", "b") 61 | return 62 | # ANCHOR generate train/val query for 63 | pair_train = self.get_df(self.dataset_dir, 'train', is_shuffle=False) 64 | pair_train = pair_train.sample(frac=1).reset_index(drop=True) 65 | pair_val = self.get_df(self.dataset_dir, 'val', is_shuffle=False) 66 | pair_val = pair_val.sample(frac=1).reset_index(drop=True) 67 | 68 | self.construct_query_dict(pair_train, self.config.DATA.TRAIN_PICKLE) 69 | self.construct_query_dict(pair_val, self.config.DATA.VAL_PICKLE) 70 | log_print("Generated pickles!\n", "g") 71 | 72 | def get_from_folder(self, data_path, index): 73 | 74 | all_file_id = [] 75 | pose_data = glob(data_path+'/*_pose.npy') 76 | for file_name in pose_data: 77 | all_file_id.append(file_name.split('_pose')[-2]) 78 | all_file_id.sort() 79 | all_data_df = pd.DataFrame(all_file_id, columns=["file"]) 80 | all_data_df["pcd_position_x"] = all_data_df["file"].apply( 81 | lambda x: np.load(x + '_pose.npy')[0]) 82 | all_data_df["pcd_position_y"] = all_data_df["file"].apply( 83 | lambda x: np.load(x + '_pose.npy')[1]) 84 | all_data_df["pcd_position_z"] = all_data_df["file"].apply( 85 | lambda x: np.load(x + '_pose.npy')[2]) 86 | all_data_df["date"] = all_data_df["file"].apply( 87 | lambda x: x.split('/')[-2]) 88 | all_data_df.reset_index(drop=True, inplace=True) 89 | return all_data_df 90 | 91 | def get_df(self, dataset_dir, type='train', is_shuffle=False): 92 | file_df = pd.DataFrame() 93 | file_dirs = [] 94 | if type == 'train': 95 | for i in self.config.DATA.TRAIN_LIST: 96 | file_dirs.append("{}/train_{}/".format(dataset_dir, i)) 97 | elif type == 'val': 98 | for i in self.config.DATA.VAL_LIST: 99 | file_dirs.append("{}/val_{}/".format(dataset_dir, i)) 100 | elif type == 'test': 101 | for i in self.config.DATA.TEST_LIST: 102 | file_dirs.append("{}/test_{}/".format(dataset_dir, i)) 103 | file_dirs.sort() 104 | for index, folder in enumerate(file_dirs): 105 | data_df = self.get_from_folder(folder, index) 106 | file_df = pd.concat([file_df, data_df], ignore_index=True) 107 | if is_shuffle: 108 | file_df.sample(frac=1).reset_index(drop=True) 109 | 110 | return file_df 111 | 112 | def construct_query_dict(self, data_df, filename): 113 | data_df.reset_index(drop=True, inplace=True) 114 | 115 | tree = KDTree( 116 | data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]]) 117 | ind_nn = tree.query_radius(data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]], 118 | r=self.config.DATA.POSITIVES_RADIUS) 119 | ind_r = tree.query_radius(data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]], 120 | r=self.config.DATA.NEGATIVES_RADIUS) 121 | ind_traj = tree.query_radius(data_df[["pcd_position_x", "pcd_position_y", "pcd_position_z"]], 122 | r=self.config.DATA.TRAJ_RADIUS) 123 | 124 | queries = {} 125 | for i in tqdm(range(len(ind_nn)), total=len(ind_nn), desc='construct queries', leave=False): 126 | query = data_df.iloc[i]["file"] 127 | positives = np.setdiff1d(ind_nn[i], [i]).tolist() 128 | negatives = np.setdiff1d(ind_traj[i], ind_r[i]).tolist() 129 | 130 | random.shuffle(negatives) 131 | random.shuffle(positives) 132 | 133 | queries[i] = {"query": query, "positives": positives, "negatives": negatives} 134 | 135 | with open(os.path.join(self.dataset_dir, filename), 'wb') as handle: 136 | pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL) 137 | -------------------------------------------------------------------------------- /dataloader/triplet_dataloader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao, Peng Yin 3 | TripletDataLoader 4 | ''' 5 | 6 | import os 7 | import pickle 8 | import random 9 | from abc import abstractmethod 10 | 11 | import torch 12 | import numpy as np 13 | from torch.utils.data import Dataset 14 | 15 | from .data_augmentation import Augment_Point_Data, Augment_RGB_Data, Augment_SPH_Data 16 | 17 | random.seed(12345) 18 | 19 | 20 | class TripletDataLoader(Dataset): 21 | """Dataloader for tripletLoss or qradrupletLoss""" 22 | def __init__(self, config, is_train): 23 | self.config = config 24 | self.is_train = is_train 25 | self.data_count = 0 26 | self.branch = None 27 | 28 | if config.MODEL.TYPE == "Lidar": 29 | self.lidar_data_aug = Augment_Point_Data(config.TRAINING.BATCH.FRAME_TRANSFORM, is_train) 30 | elif config.MODEL.TYPE == "LiSPH": 31 | self.lisph_data_aug = Augment_SPH_Data(config.DATA.SPH_PROJ.IMAGE_SIZE, 32 | config.TRAINING.BATCH.FRAME_TRANSFORM, 33 | is_train) 34 | elif config.MODEL.TYPE == "Image": 35 | self.image_data_aug = Augment_RGB_Data(config.TRAINING.BATCH.FRAME_TRANSFORM, is_train) 36 | else: 37 | raise ValueError("Wrong Model Type, Please Check config/config.py *MODEL* Section") 38 | 39 | def __len__(self): 40 | return len(self.queries) 41 | 42 | def __getitem__(self, idx): 43 | q_tuple = self.get_data() 44 | return q_tuple 45 | 46 | def get_data(self): 47 | q_tuple = [] 48 | 49 | while(1): 50 | # if try data_count times still can not found a proper batch, resample 51 | if self.data_count > (len(self.file_idxs)-self.config.TRAINING.BATCH.BATCH_SIZE): 52 | self.shuffle_query() 53 | continue 54 | batch_keys = self.file_idxs[self.data_count] 55 | # select the batch with qualified number of positives and negatives for training 56 | if (len(self.queries[batch_keys]["positives"]) < self.config.TRAINING.BATCH.POSITIVES_PER_QUERY) or (len(self.queries[batch_keys]["negatives"]) < self.config.TRAINING.BATCH.NEGATIVES_PER_QUERY): 57 | self.data_count+=1 58 | continue 59 | q_tuple = self.get_tuple(self.queries[batch_keys], hard_neg=[], other_neg=True) 60 | self.data_count+=1 61 | break 62 | 63 | return q_tuple 64 | 65 | def shuffle_query(self): 66 | random.shuffle(self.file_idxs) 67 | self.data_count = 0 68 | 69 | def get_tuple(self, dict_value, hard_neg=None, other_neg=False): 70 | ''' 71 | hard_neg: bool, add hard negative? 72 | other_neg: bool, add other negatives?(quadra loss fuction required) 73 | ''' 74 | if hard_neg is None: 75 | hard_neg = [] 76 | possible_negs = [] 77 | 78 | num_pos = self.config.TRAINING.BATCH.POSITIVES_PER_QUERY 79 | num_neg = self.config.TRAINING.BATCH.NEGATIVES_PER_QUERY 80 | random.shuffle(dict_value["positives"]) 81 | pos_files = [] 82 | for i in range(num_pos): 83 | pos_files.append(self.queries[dict_value["positives"][i]]["query"]) 84 | 85 | neg_files = [] 86 | neg_indices = [] 87 | if len(hard_neg) == 0: 88 | random.shuffle(dict_value["negatives"]) 89 | for i in range(num_neg): 90 | neg_files.append(self.queries[dict_value["negatives"][i]]["query"]) 91 | neg_indices.append(dict_value["negatives"][i]) 92 | else: 93 | random.shuffle(dict_value["negatives"]) 94 | for i in hard_neg: 95 | neg_files.append(self.queries[i]["query"]) 96 | neg_indices.append(i) 97 | j = 0 98 | while len(neg_files) < num_neg: 99 | if not dict_value["negatives"][j] in hard_neg: 100 | neg_files.append(self.queries[dict_value["negatives"][j]]["query"]) 101 | neg_indices.append(dict_value["negatives"][j]) 102 | j += 1 103 | if other_neg: 104 | # get neighbors of negatives and query 105 | neighbors = [] 106 | for pos in dict_value["positives"]: 107 | neighbors.append(pos) 108 | for neg in neg_indices: 109 | for pos in self.queries[neg]["positives"]: 110 | neighbors.append(pos) 111 | possible_negs = list(set(self.queries.keys()) - set(neighbors)) 112 | random.shuffle(possible_negs) 113 | 114 | query = self.load_file_func(dict_value["query"]) # Nx3 115 | query = np.expand_dims(query, axis=0) 116 | positives = self.load_files_func(pos_files) 117 | negatives = self.load_files_func(neg_files) 118 | 119 | output = [query, positives, negatives] 120 | if other_neg: 121 | neg2 = self.load_file_func(self.queries[possible_negs[0]]["query"]) 122 | neg2 = np.expand_dims(neg2, axis=0) 123 | output.append(neg2) 124 | return output 125 | 126 | @abstractmethod 127 | def load_file_func(self, filename): 128 | pass 129 | 130 | def load_files_func(self, filenames): 131 | pcs = [] 132 | for filename in zip(filenames): 133 | pc = self.load_file_func(filename[0]) 134 | pcs.append(pc) 135 | pcs = torch.stack(pcs, axis=0) 136 | return pcs 137 | 138 | @staticmethod 139 | def get_queries_dict(filename): 140 | with open(filename, 'rb') as handle: 141 | queries = pickle.load(handle) 142 | return queries 143 | -------------------------------------------------------------------------------- /dataloader/utils.py: -------------------------------------------------------------------------------- 1 | def make_collate_fn(config): 2 | """Generate Collate_functions 3 | 4 | Args: 5 | config (class): parameter configurations 6 | """ 7 | def collate_fn(batch): 8 | """Collate_functions 9 | 10 | Args: 11 | batch (list): input batch for training 12 | """ 13 | if config.TRAINING.IS_TRAIN and config.TRAINING.BATCH.BATCH_TRANSFORM: 14 | pass 15 | return batch 16 | return collate_fn -------------------------------------------------------------------------------- /eval/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_utils import EvaluationPitts -------------------------------------------------------------------------------- /eval/eval_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao 3 | evaluate_utils.py 4 | ''' 5 | import tqdm 6 | from glob import glob 7 | from copy import copy, deepcopy 8 | 9 | import torch 10 | import numpy as np 11 | import pandas as pd 12 | import open3d as o3d 13 | from sklearn.neighbors import KDTree 14 | from PIL import Image 15 | from scipy.spatial.transform import Rotation as R 16 | 17 | from utils import pc_normalize, pcd_downsample, LaserProjection, log_print 18 | from dataloader.data_augmentation import Augment_Point_Data, Augment_SPH_Data, Augment_RGB_Data 19 | 20 | 21 | class EvaluationPitts(): 22 | def __init__(self, config, model, device, number_neighbors=20) -> None: 23 | self.config = config 24 | self.number_neighbors = number_neighbors 25 | self.model = model 26 | self.device = device 27 | if config.MODEL.TYPE == "Lidar": 28 | self.trans_lidar = Augment_Point_Data(is_train=False) 29 | elif config.MODEL.TYPE == "LiSPH": 30 | self.trans_lidar_sph = Augment_SPH_Data(img_size=config.DATA.SPH_PROJ.IMAGE_SIZE, 31 | is_train=False) 32 | self.projection = LaserProjection(device=device, 33 | top_size=config.DATA.BEV_PROJ.IMAGE_SIZE, 34 | z_range=config.DATA.BEV_PROJ.Z_RANGE, 35 | sph_size=config.DATA.SPH_PROJ.IMAGE_SIZE, 36 | fov_range=config.DATA.SPH_PROJ.FOV) 37 | else: 38 | raise NotImplementedError('Please Try Predefined Types') 39 | 40 | def get_features_recall(self, traj_num, trans_IDX, rot_IDX, rand_idx=0): 41 | database = [] 42 | query = [] 43 | running_time = 0 44 | 45 | # Set global map 46 | if self.config.DATA.DATASET_NAME == 'PITT': 47 | global_map = o3d.io.read_point_cloud('{}/{}/DATA/Train{}/cloudGlobal.pcd'.format( 48 | self.config.DATA.BENCHMARK_DIR, self.config.DATA.DATASET_NAME,traj_num)) 49 | bbox_pnv = o3d.geometry.AxisAlignedBoundingBox( 50 | np.array([-20, -20, 0.8]), 51 | np.array([ 20, 20, 100.0])) 52 | map_tree = o3d.geometry.KDTreeFlann(global_map) 53 | 54 | # Set filelist for test 55 | file_list = sorted(glob('{}/{}/train_{}/*_sph.png'.format(self.config.DATA.BENCHMARK_DIR, self.config.DATA.DATASET_NAME, traj_num))) 56 | file_list = file_list[::10] 57 | 58 | # load database 59 | for file_name in tqdm.tqdm(file_list): 60 | if self.config.MODEL.TYPE == "LiSPH": 61 | queries_img = Image.open(file_name) 62 | frame = [self.trans_lidar_sph(queries_img).to(self.device, dtype=torch.float)] 63 | embedding, t_gpu = self.model.infer_frame(frame, t=True) 64 | database.append(embedding) 65 | running_time += t_gpu 66 | 67 | # load the test query 68 | for file_name in tqdm.tqdm(file_list): 69 | # preprocess generate test query 70 | pose = np.load('{}_pose6d.npy'.format( 71 | file_name.split('_sph.png')[0])) 72 | 73 | # * Add fixed transformation 74 | if rand_idx == 0: 75 | trans_idx = trans_IDX + np.random.random(1)[0]-0.5 # 0.5m noise 76 | rot_idx = rot_IDX + 5*(np.random.random(1)[0]-0.5) # 2.5° noise 77 | # * Add random transformation 78 | else: 79 | trans_idx = 2 * trans_IDX * (np.random.random(1)[0]-0.5) 80 | rot_idx = 2 * rot_IDX * (np.random.random(1)[0]-0.5) 81 | pose[0] += trans_idx 82 | pose[1] += trans_idx 83 | frame = [] 84 | # generate sphrical images 85 | [k, p_idx, _] = map_tree.search_radius_vector_3d( 86 | pose[:3], self.config.DATA.SPH_PROJ.RADIUS) 87 | pcd_data = np.asarray(global_map.points)[p_idx, :] 88 | pose[5] += rot_idx * np.pi/180 89 | pcd_data -= pose[:3] 90 | pcd = o3d.geometry.PointCloud() 91 | pcd.points = o3d.utility.Vector3dVector(pcd_data) 92 | rot = R.from_rotvec(np.array([pose[3], pose[4], pose[5]])) 93 | if self.config.MODEL.TYPE in ["LiSPH"]: 94 | trans_matrix = np.eye(4) 95 | trans_matrix[0:3, 0:3] = rot.inv().as_matrix() 96 | pcd.transform(trans_matrix) 97 | pcd = pcd.voxel_down_sample(0.5) 98 | sph_img = self.projection.do_sph_projection(np.asarray(pcd.points)) 99 | im = Image.fromarray((sph_img * 255).astype(np.uint8)) 100 | frame.append(self.trans_lidar_sph(im).to(self.device, dtype=torch.float)) 101 | # load test query into network 102 | query.append(self.model.infer_frame(frame)) 103 | log_print(f'{self.config.DATA.DATASET_NAME} {traj_num}:', 'b') 104 | print("Descriptor Generation Done!") 105 | print("Genrate Speed {}s/frame".format(running_time/len(file_list))) 106 | 107 | _,recall_nums,_,one_percent_recall = self.get_recall(np.array(database), np.array(query), num_neighbors=self.number_neighbors) 108 | log_print(f'Top one percent recall is {one_percent_recall}') 109 | 110 | return recall_nums, one_percent_recall, running_time 111 | 112 | def get_recall(self, database_feature, queries_feature, num_neighbors=30): 113 | database_output = database_feature 114 | queries_output = queries_feature 115 | 116 | database_nbrs = KDTree(database_output) 117 | # num_neighbors = (int)(queries_feature.shape[0]*0.01) 118 | recall = [0] * num_neighbors 119 | 120 | top1_similarity_score = [] 121 | one_percent_retrieved = 0 122 | threshold = max(int(round(len(database_output) / 100.0)), 1) 123 | 124 | num_evaluated = 0 125 | topk_dict = {} 126 | top_recalls = np.zeros(num_neighbors+2) 127 | 128 | for i in range(len(queries_output)): 129 | 130 | true_neighbors = [i] 131 | if len(true_neighbors) == 0: 132 | continue 133 | num_evaluated += 1 134 | 135 | distances, indices = database_nbrs.query(np.array([queries_output[i]]), k=num_neighbors) 136 | # indices = np.setdiff1d(indices[0], [i]) 137 | indices = indices[0] 138 | 139 | for j in range(0, len(indices)): 140 | if indices[j] in true_neighbors: 141 | if (j == 0): 142 | similarity = np.dot(queries_output[i], database_output[indices[j]]) 143 | top1_similarity_score.append(similarity) 144 | recall[j] += 1 145 | break 146 | 147 | if len(list(set(indices[0:threshold]).intersection(set(true_neighbors)))) > 0: 148 | one_percent_retrieved += 1 149 | 150 | for recall_num in range(num_neighbors): 151 | if len(list(set(indices[0:recall_num+1]).intersection(set(true_neighbors)))) > 0: 152 | top_recalls[recall_num] += 1 153 | 154 | top_recalls[-2] = one_percent_retrieved 155 | top_recalls[-1] = num_evaluated 156 | one_percent_recall = (one_percent_retrieved / float(num_evaluated)) * 100 157 | top_one_recall = (top_recalls[0] / float(num_evaluated)) * 100 158 | top_five_recall = (top_recalls[4] / float(num_evaluated)) * 100 159 | top_ten_recall = (top_recalls[9] / float(num_evaluated)) * 100 160 | recall = (np.cumsum(recall) / float(num_evaluated)) * 100 161 | 162 | return (top_one_recall, top_five_recall, top_ten_recall), \ 163 | top_recalls, \ 164 | top1_similarity_score, one_percent_recall 165 | 166 | @staticmethod 167 | def apply_noise(pcd, mu=0, sigma=0.1): 168 | noisy_pcd = deepcopy(pcd) 169 | points = np.asarray(noisy_pcd.points) 170 | points += np.clip(np.random.normal(mu, sigma, size=points.shape), -1, 1) 171 | noisy_pcd.points = o3d.utility.Vector3dVector(points) 172 | return noisy_pcd -------------------------------------------------------------------------------- /eval/evaluate_pitt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao 3 | evaluate_pitt.py 4 | ''' 5 | import os 6 | import argparse 7 | import numpy as np 8 | from config import cfg 9 | 10 | from models import set_lcd_model 11 | from eval.eval_utils import EvaluationPitts 12 | from utils import log_print 13 | 14 | 15 | def para_args(): 16 | parser = argparse.ArgumentParser(description="Network configurations!") 17 | parser.add_argument("--model-path", default="data/results/default_model", metavar="FILE", 18 | help="path to config file", type=str) 19 | parser.add_argument("--dataset", default="PITT", type=str) 20 | parser.add_argument("--epoch", default="62", type=int) 21 | parser.add_argument("--noise", default=0, type=int, help="apply noise during test") 22 | parser.add_argument("--type", default="rot", type=str, choices=["recall", "rot"], help="choose do top recall or rot analysis") 23 | parser.add_argument("--trans-noise", type=int, required=True) 24 | parser.add_argument("--rot-noise", type=int, required=True) 25 | parser.add_argument("--log", type=bool, required=False, default=False) 26 | args = parser.parse_args() 27 | return args 28 | 29 | 30 | def val(config, type, noise, trans_noise, rot_noise, log): 31 | #! Log 32 | if log: 33 | save_dir = 'log/{}/{}/'.format(config.MODEL.NAME, config.DATA.DATASET_NAME) 34 | if not os.path.exists(save_dir): 35 | os.makedirs(save_dir) 36 | #! Define Model 37 | model, gpu_conf = set_lcd_model(config) 38 | [_, device, gpu_ids] = gpu_conf 39 | #! Define Evaluation Class 40 | valPitt = EvaluationPitts(config, model, device) 41 | #! Evaluation 42 | if type == "recall": 43 | total_recalls = [] 44 | total_time = 0 45 | for traj_num in range(21, 22): 46 | recalls, _, running_time = valPitt.get_features_recall(traj_num, trans_noise, rot_noise, noise) 47 | total_recalls.append(recalls) 48 | total_time += running_time 49 | total_recalls = np.array(total_recalls).sum(axis=0) 50 | log_print('Total Top One Recall is {}'.format(total_recalls[0]/total_recalls[-1]), 'r') 51 | log_print('Total Running Time is {}'.format(total_time/total_recalls[-1]), 'r') 52 | if log: 53 | stats = total_recalls[:-1]/total_recalls[-1] 54 | save_file = save_dir + 'recall.txt' 55 | file_tosave = open(save_file, 'w+') 56 | file_tosave.write('Average recall @1 is: {}\n'.format(stats[0])) 57 | file_tosave.write('Average recall @1% is: {}\n'.format(stats[-1])) 58 | file_tosave.write('\n') 59 | for index, item in enumerate(stats[:-1]): 60 | file_tosave.write("Average recall @{} is: {}\n".format(index+1, item)) 61 | file_tosave.write('\t\n\t\n') 62 | file_tosave.close() 63 | elif type == "rot": 64 | for trans in [1,2,3]: 65 | for rot in [30,60,90,120,150,180]: 66 | total_recalls = [] 67 | total_time = 0 68 | for traj_num in range(21, 22): 69 | recalls, _, running_time = valPitt.get_features_recall(traj_num, trans, rot, noise) 70 | total_recalls.append(recalls) 71 | total_time += running_time 72 | total_recalls = np.array(total_recalls).sum(axis=0) 73 | log_print('Total Top One Recall is {}'.format(total_recalls[0]/total_recalls[-1]), 'r') 74 | log_print('Total Running Time is {}'.format(total_time/total_recalls[-1]), 'r') 75 | if log: 76 | stats = total_recalls[:-1]/total_recalls[-1] 77 | save_file = save_dir + f'rot_{trans}_{rot}.txt' 78 | file_tosave = open(save_file, 'w+') 79 | file_tosave.write('Average recall @1 is: {}\n'.format(stats[0])) 80 | file_tosave.write('Average recall @1% is: {}\n'.format(stats[-1])) 81 | file_tosave.write('\n') 82 | for index, item in enumerate(stats[:-1]): 83 | file_tosave.write("Average recall @{} is: {}\n".format(index+1, item)) 84 | file_tosave.write('\t\n\t\n') 85 | file_tosave.close() 86 | 87 | 88 | if __name__ == "__main__": 89 | args = para_args() 90 | cfg.merge_from_file("{}/DATA.yaml".format(args.model_path)) 91 | cfg.merge_from_file("{}/MODEL.yaml".format(args.model_path)) 92 | cfg.DATA.DATASET_NAME = args.dataset 93 | cfg.TRAINING.IS_TRAIN = False 94 | cfg.TRAINING.GPU.IDS = [0] # default to use only one gpu when testing 95 | cfg.WEIGHT.LOAD_ADDRESS = "{}/pth/model_{}.pth".format(args.model_path, args.epoch) 96 | cfg.freeze() 97 | val(cfg, args.type, args.noise, args.trans_noise, args.rot_noise, args.log) -------------------------------------------------------------------------------- /generating_sph/gene_pitt.py: -------------------------------------------------------------------------------- 1 | # 2 | # Created on Sun Mar 07 2021 3 | # 4 | # The MIT License (MIT) 5 | # Copyright (c) 2021 Max Yin 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software 7 | # and associated documentation files (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 10 | # subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all copies or substantial 13 | # portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 16 | # TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 18 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | # 20 | 21 | import os 22 | import sys 23 | import numpy as np 24 | from glob import glob 25 | import open3d as o3d 26 | import argparse 27 | from torch import square 28 | from tqdm import tqdm 29 | from PIL import Image 30 | from copy import deepcopy 31 | from scipy.spatial.transform import Rotation as R 32 | 33 | 34 | from config import cfg 35 | from utils import log_print, LaserProjection, pcd_downsample 36 | 37 | 38 | #!============================================================================================# 39 | #! Parameters for Dataset and Network 40 | #!============================================================================================# 41 | 42 | 43 | def para_args(): 44 | parser = argparse.ArgumentParser(description="Network configurations!") 45 | parser.add_argument("--config-network", default="../config/network/spherevlad.yaml", metavar="FILE", 46 | help="path to config file", type=str) 47 | parser.add_argument("--config-dataset", default="../config/dataset/PITT.yaml", metavar="FILE", 48 | help="path to config file", type=str) 49 | args = parser.parse_args() 50 | return args 51 | 52 | def find_boundry(num): 53 | pose_list = glob(f'/data_hdd_1/SphereVLAD++/dataset/KITTI360/train_{num}/*_pose6d.npy') 54 | boundry = [] 55 | num_list = [int(a.split('_pose6d.npy')[0].split('/')[-1]) for a in pose_list] 56 | num_list = sorted(num_list) 57 | for i in range(max(num_list)+1): 58 | if (i in num_list) and (i+1 not in num_list): 59 | boundry.append(i) 60 | if (i in num_list) and (i-1 not in num_list): 61 | boundry.append(i) 62 | return boundry 63 | 64 | def generate_data(cfg, index, mode, slice_size=50, interval=1): 65 | 66 | path_base = '{}{}'.format(cfg.DATA.BENCHMARK_DIR, cfg.DATA.DATASET_NAME) 67 | out_path = '{}/{}_{}'.format(path_base, mode, index) 68 | os.system('rm -rf {}'.format(out_path)) 69 | os.mkdir('{}'.format(out_path)) 70 | 71 | log_print("Load data {}".format(out_path), 'b') 72 | 73 | #! Define range to image projection 74 | projection = LaserProjection(None, 75 | top_size=cfg.DATA.BEV_PROJ.IMAGE_SIZE, 76 | z_range=cfg.DATA.BEV_PROJ.Z_RANGE, 77 | sph_size=cfg.DATA.SPH_PROJ.IMAGE_SIZE, 78 | fov_range=cfg.DATA.SPH_PROJ.FOV) 79 | 80 | trj_6D = np.loadtxt("{}/DATA/Train{}/poses.txt".format(path_base, index)) 81 | 82 | #! Obtain Projections 83 | map_pcd = o3d.io.read_point_cloud( 84 | "{}/DATA/Train{}/cloudGlobal.pcd".format(path_base, index)) 85 | pcd_tree = o3d.geometry.KDTreeFlann(map_pcd) 86 | 87 | #* generate submaps 88 | last_pose = np.array([9.9e9, 9.9e9, 9.9e9]) 89 | count = 0 90 | 91 | for im_idx, pose in tqdm(enumerate(trj_6D), total=len(trj_6D)): 92 | # select interval 93 | if np.linalg.norm(pose[:3] - last_pose, ord=2) < interval: 94 | continue 95 | last_pose = pose[:3] 96 | 97 | # generate ball query pointcloud 98 | [k, p_idx, _] = pcd_tree.search_radius_vector_3d( 99 | pose[:3], cfg.DATA.SPH_PROJ.RADIUS) 100 | pcd_data = np.asarray(map_pcd.points)[p_idx, :] 101 | pcd_data -= pose[:3] 102 | if pcd_data.shape[0] == 0: 103 | continue 104 | 105 | # save pose files 106 | np.save( 107 | "{}/{:06d}_pose.npy".format(out_path, count+1), pose[:3]) 108 | np.save( 109 | "{}/{:06d}_pose6d.npy".format(out_path, count+1), pose) 110 | 111 | # generate data for spherical view 112 | pcd = o3d.geometry.PointCloud() 113 | pcd.points = o3d.utility.Vector3dVector(pcd_data) 114 | trans_matrix = np.eye(4) 115 | rot = R.from_rotvec(np.array([pose[3], pose[4], pose[5]])) 116 | trans_matrix[0:3, 0:3] = rot.inv().as_matrix() 117 | pcd.transform(trans_matrix) 118 | pcd = pcd.voxel_down_sample(0.5) 119 | sph_img = projection.do_sph_projection(np.array(pcd.points)) 120 | im = Image.fromarray((sph_img * 255).astype(np.uint8)) 121 | im.save("{}/{:06d}_sph.png".format(out_path, count+1)) 122 | 123 | count += 1 124 | 125 | if __name__ == "__main__": 126 | args = para_args() 127 | cfg.merge_from_file(args.config_network) 128 | cfg.merge_from_file(args.config_dataset) 129 | cfg.freeze() 130 | gen_data = generate_data 131 | 132 | # ANCHOR prepare train data 133 | for index in list(set(cfg.DATA.TRAIN_LIST + cfg.DATA.TEST_LIST)): 134 | gen_data(cfg, index, 'train') 135 | for index in cfg.DATA.VAL_LIST: 136 | gen_data(cfg, index, 'val') 137 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .robotLCD import LcdNet 5 | 6 | 7 | def set_lcd_model(cfg, logger=None, neptune=None): 8 | #* set up GPU usage 9 | use_cuda = torch.cuda.is_available() 10 | gpu_ids = [] 11 | if use_cuda: 12 | gpu_c = torch.cuda.device_count() 13 | if cfg.TRAINING.GPU.IDS is not None: 14 | if len(cfg.TRAINING.GPU.IDS) <= gpu_c: 15 | gpu_ids = cfg.TRAINING.GPU.IDS 16 | else: 17 | raise ValueError("Incorrect GPU Index, Please Check!") 18 | else: 19 | gpu_ids = np.arange(gpu_c).tolist() 20 | cuda = "cuda" 21 | device = torch.device(cuda if use_cuda else 'cpu') 22 | lcd = LcdNet(cfg, logger, use_cuda, device, neptune, gpu_ids) 23 | return lcd, [use_cuda, device, gpu_ids] -------------------------------------------------------------------------------- /models/loop_closure/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def make_models(config): 5 | if config.MODEL.NAME == "SphereVLAD": 6 | if config.MODEL.TYPE == "LiSPH": 7 | from .lidar.spherevlad import SphereVLAD 8 | elif config.MODEL.TYPE == "Image": 9 | from .visual.spherevlad import SphereVLAD 10 | model = SphereVLAD(config) 11 | elif config.MODEL.NAME == "SphereVLAD2": 12 | from .lidar.spherevlad2 import SphereVLAD2 13 | model = SphereVLAD2(config) 14 | else: 15 | raise ValueError("Wrong Model Name!") 16 | 17 | if config.TRAINING.IS_TRAIN and config.WEIGHT.FREEZE_WEIGHT: 18 | pass 19 | return model 20 | -------------------------------------------------------------------------------- /models/loop_closure/aggregator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/aggregator/__init__.py -------------------------------------------------------------------------------- /models/loop_closure/aggregator/netvlad.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class NetVLAD(nn.Module): 8 | """NetVLAD layer implementation""" 9 | 10 | def __init__(self, num_clusters=64, dim=128, alpha=100.0, 11 | normalize_input=True, output_dim=None, gate=False): 12 | """ 13 | Args: 14 | num_clusters : int 15 | The number of clusters 16 | dim : int 17 | Dimension of descriptors 18 | alpha : float 19 | Parameter of initialization. Larger value is harder assignment. 20 | normalize_input : bool 21 | If true, descriptor-wise L2 normalization is applied to input. 22 | """ 23 | super(NetVLAD, self).__init__() 24 | self.num_clusters = num_clusters 25 | self.dim = dim 26 | self.alpha = alpha 27 | self.normalize_input = normalize_input 28 | self.conv = nn.Conv2d(dim, num_clusters, kernel_size=(1, 1), bias=True) 29 | self.centroids = nn.Parameter(torch.rand(num_clusters, dim)) 30 | self._init_params() 31 | 32 | if output_dim is not None: 33 | self.nn_output = nn.Linear(in_features=1024, out_features=output_dim) 34 | self.bn_output = nn.BatchNorm1d(output_dim) 35 | self.output_dim = output_dim 36 | 37 | if gate: 38 | self.context_gating = GatingContext(output_dim) 39 | self.gate = gate 40 | 41 | def _init_params(self): 42 | self.conv.weight = nn.Parameter( 43 | (2.0 * self.alpha * self.centroids).unsqueeze(-1).unsqueeze(-1) 44 | ) 45 | self.conv.bias = nn.Parameter( 46 | - self.alpha * self.centroids.norm(dim=1) 47 | ) 48 | 49 | def forward(self, x): 50 | if self.normalize_input: 51 | x = F.normalize(x, p=2, dim=1) # across descriptor dim 52 | 53 | N, C = x.shape[:2] 54 | 55 | # soft-assignment 56 | soft_assign = self.conv(x).view(N, self.num_clusters, -1) 57 | soft_assign = F.softmax(soft_assign, dim=1) 58 | x_flatten = x.view(N, C, -1) 59 | 60 | # calculate residuals to each clusters 61 | residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \ 62 | self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0) 63 | residual *= soft_assign.unsqueeze(2) 64 | 65 | vlad = residual.sum(dim=-1) 66 | vlad = F.normalize(vlad, p=2, dim=2) # intra-normalization 67 | vlad = vlad.view(x.size(0), -1) # flatten 68 | vlad = F.normalize(vlad, p=2, dim=1) # L2 normalize 69 | 70 | if self.output_dim is not None: 71 | vlad = self.nn_output(vlad) 72 | vlad = self.bn_output(vlad) 73 | 74 | if self.gate: 75 | vlad = self.context_gating(vlad) 76 | 77 | return vlad 78 | 79 | 80 | class GatingContext(nn.Module): 81 | def __init__(self, dim, add_batch_norm=True): 82 | super(GatingContext, self).__init__() 83 | self.dim = dim 84 | self.add_batch_norm = add_batch_norm 85 | self.gating_weights = nn.Parameter( 86 | torch.randn(dim, dim) * 1 / math.sqrt(dim)) 87 | self.sigmoid = nn.Sigmoid() 88 | 89 | if add_batch_norm: 90 | self.gating_biases = None 91 | self.bn1 = nn.BatchNorm1d(dim) 92 | else: 93 | self.gating_biases = nn.Parameter( 94 | torch.randn(dim) * 1 / math.sqrt(dim)) 95 | self.bn1 = None 96 | 97 | def forward(self, x): 98 | gates = torch.matmul(x, self.gating_weights) 99 | 100 | if self.add_batch_norm: 101 | gates = self.bn1(gates) 102 | else: 103 | gates = gates + self.gating_biases 104 | 105 | gates = self.sigmoid(gates) 106 | 107 | activation = x * gates 108 | 109 | return activation 110 | -------------------------------------------------------------------------------- /models/loop_closure/lidar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/lidar/__init__.py -------------------------------------------------------------------------------- /models/loop_closure/lidar/spherevlad.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao 3 | SphereVLAD 4 | ''' 5 | import numpy as np 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from s2cnn import SO3Convolution 10 | from s2cnn import S2Convolution 11 | from s2cnn import so3_near_identity_grid 12 | from s2cnn import s2_near_identity_grid 13 | 14 | from ..aggregator.netvlad import NetVLAD 15 | 16 | 17 | class SphereVLAD(nn.Module): 18 | ''' 19 | sphvlad with bn 20 | ''' 21 | def __init__(self, config): 22 | super(SphereVLAD, self).__init__() 23 | 24 | bandwidth = (int)(config.DATA.SPH_PROJ.IMAGE_SIZE[0]/2.0) 25 | 26 | grid_s2 = s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1) 27 | grid_so3_1 = so3_near_identity_grid( 28 | n_alpha=6, max_beta=np.pi / 16, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 29 | grid_so3_2 = so3_near_identity_grid( 30 | n_alpha=6, max_beta=np.pi / 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 31 | grid_so3_3 = so3_near_identity_grid( 32 | n_alpha=6, max_beta=np.pi / 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 33 | grid_so3_4 = so3_near_identity_grid( 34 | n_alpha=6, max_beta=np.pi / 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 35 | 36 | self.conv1 = nn.Sequential( 37 | S2Convolution(nfeature_in=1, nfeature_out=8, 38 | b_in=bandwidth, b_out=bandwidth//2, grid=grid_s2), 39 | nn.BatchNorm3d(8), 40 | nn.ReLU()) 41 | 42 | self.conv2 = nn.Sequential( 43 | SO3Convolution(nfeature_in=8, nfeature_out=8, 44 | b_in=bandwidth//2, b_out=bandwidth//4, grid=grid_so3_1), 45 | nn.BatchNorm3d(8), 46 | nn.ReLU()) 47 | 48 | self.conv3 = nn.Sequential( 49 | SO3Convolution(nfeature_in=8, nfeature_out=16, 50 | b_in=bandwidth//4, b_out=bandwidth//8, grid=grid_so3_2), 51 | nn.BatchNorm3d(16), 52 | nn.ReLU()) 53 | 54 | self.conv4 = nn.Sequential( 55 | SO3Convolution(nfeature_in=16, nfeature_out=16, 56 | b_in=bandwidth//8, b_out=bandwidth//8, grid=grid_so3_3), 57 | nn.BatchNorm3d(16), 58 | nn.ReLU()) 59 | 60 | self.vlad = NetVLAD(num_clusters=config.MODEL.NETVLAD.CLUSTER_NUM, 61 | dim=config.MODEL.NETVLAD.FEATURE_DIM, 62 | normalize_input=config.MODEL.NETVLAD.NORMALIZE_INPUT, 63 | output_dim=config.MODEL.NETVLAD.OUTPUT_DIM, 64 | gate=config.MODEL.NETVLAD.GATE) 65 | 66 | 67 | def forward(self, x): 68 | # encoder 69 | x = self.conv1(x) 70 | x = self.conv2(x) 71 | x = self.conv3(x) 72 | x = self.conv4(x) 73 | # reshape 74 | x = x.view(x.shape[0], x.shape[1], x.shape[2], x.shape[3]*x.shape[4]) 75 | # vlad 76 | x = self.vlad(x) 77 | return x -------------------------------------------------------------------------------- /models/loop_closure/lidar/spherevlad2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao 3 | SphereVLAD++ 4 | ''' 5 | import numpy as np 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from s2cnn import SO3Convolution 10 | from s2cnn import S2Convolution 11 | from s2cnn import so3_near_identity_grid 12 | from s2cnn import s2_near_identity_grid 13 | 14 | from ..aggregator.netvlad import NetVLAD 15 | from ..transformer.attention import Spatial_Attn 16 | 17 | 18 | class SphereVLAD2(nn.Module): 19 | ''' 20 | sphvlad2 with bn 21 | ''' 22 | def __init__(self, config): 23 | super(SphereVLAD2, self).__init__() 24 | 25 | bandwidth = (int)(config.DATA.SPH_PROJ.IMAGE_SIZE[0]/2.0) 26 | 27 | grid_s2 = s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1) 28 | grid_so3_1 = so3_near_identity_grid( 29 | n_alpha=6, max_beta=np.pi / 16, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 30 | grid_so3_2 = so3_near_identity_grid( 31 | n_alpha=6, max_beta=np.pi / 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 32 | grid_so3_3 = so3_near_identity_grid( 33 | n_alpha=6, max_beta=np.pi / 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 34 | grid_so3_4 = so3_near_identity_grid( 35 | n_alpha=6, max_beta=np.pi / 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 36 | 37 | self.conv1 = nn.Sequential( 38 | S2Convolution(nfeature_in=1, nfeature_out=8, 39 | b_in=bandwidth, b_out=bandwidth//2, grid=grid_s2), 40 | nn.BatchNorm3d(8), 41 | nn.ReLU()) 42 | 43 | self.conv2 = nn.Sequential( 44 | SO3Convolution(nfeature_in=8, nfeature_out=8, 45 | b_in=bandwidth//2, b_out=bandwidth//4, grid=grid_so3_1), 46 | nn.BatchNorm3d(8), 47 | nn.ReLU()) 48 | 49 | self.conv3 = nn.Sequential( 50 | SO3Convolution(nfeature_in=8, nfeature_out=16, 51 | b_in=bandwidth//4, b_out=bandwidth//8, grid=grid_so3_2), 52 | nn.BatchNorm3d(16), 53 | nn.ReLU()) 54 | 55 | self.conv4 = nn.Sequential( 56 | SO3Convolution(nfeature_in=16, nfeature_out=16, 57 | b_in=bandwidth//8, b_out=bandwidth//8, grid=grid_so3_3), 58 | nn.BatchNorm3d(16), 59 | nn.ReLU()) 60 | 61 | self.attention = Spatial_Attn(in_dim=config.MODEL.NETVLAD.FEATURE_DIM) 62 | 63 | self.vlad = NetVLAD(num_clusters=config.MODEL.NETVLAD.CLUSTER_NUM, 64 | dim=config.MODEL.NETVLAD.FEATURE_DIM, 65 | normalize_input=config.MODEL.NETVLAD.NORMALIZE_INPUT, 66 | output_dim=config.MODEL.NETVLAD.OUTPUT_DIM, 67 | gate=config.MODEL.NETVLAD.GATE) 68 | 69 | 70 | def forward(self, x): 71 | # encoder 72 | x = self.conv1(x) 73 | x = self.conv2(x) 74 | x = self.conv3(x) 75 | x = self.conv4(x) 76 | # reshape and normalize across the descriptor dimension 77 | x = x.view(x.shape[0], x.shape[1], x.shape[2], x.shape[3]*x.shape[4]) 78 | x = F.normalize(x, p=2, dim=1) 79 | # spatial self attention 80 | x, _ = self.attention(x) 81 | # vlad 82 | x = self.vlad(x) 83 | return x -------------------------------------------------------------------------------- /models/loop_closure/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .lazy_quadruplet_loss import LazyTripletLoss, LazyQuadrupletLoss 2 | 3 | def make_losses(config): 4 | if config.LOSS.NAME == "LazyQuadrupletLoss": 5 | loss = LazyQuadrupletLoss( 6 | margin_dis=config.LOSS.MARGIN0, 7 | margin_sec=config.LOSS.MARGIN1) 8 | elif config.LOSS.NAME == "LazyTripletLoss": 9 | loss = LazyTripletLoss(margin=config.LOSS.MARGIN0) 10 | else: 11 | raise NotImplementedError(f"Unrecognized Loss Function {config.LOSS.NAME}!") 12 | return loss -------------------------------------------------------------------------------- /models/loop_closure/losses/lazy_quadruplet_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def _best_pos_distance(query, pos_vecs): 5 | ''' 6 | Find the minimum distance between query feature and positives features. 7 | Args: 8 | query (tensor: batch_size x 1 x feature_dim): Output feature of query 9 | pos_vecs (tensor: batch_size x num_pos x feature_dim): Output feature of positives 10 | 11 | Returns: 12 | best_pos (tensor: batch_size): Minimum distance of each query to positives. 13 | ''' 14 | if len(pos_vecs.shape) > 2: 15 | if len(query.shape) <= 2: 16 | query_copy = query.unsqueeze(1).repeat(1, pos_vecs.shape[1], 1) 17 | best_pos = torch.min(torch.norm( 18 | query_copy - pos_vecs, dim=2), dim=1) 19 | else: 20 | best_pos = torch.min(torch.norm(query - pos_vecs, dim=2), dim=1) 21 | best_pos = best_pos[0] 22 | else: 23 | best_pos = torch.norm(query - pos_vecs, dim=1) 24 | 25 | best_pos = best_pos.view(query.shape[0], -1) 26 | return best_pos 27 | 28 | 29 | def _best_neg_distance(query, neg_vecs): 30 | """ Caculate the distance between query and negitative query 31 | Args: 32 | query ([batch, 1, 8192]): Query information 33 | neg_vecs ([batch, 2, 8192]): Negative Query information 34 | Returns: 35 | [batch, 2, 1]: [distance] 36 | """ 37 | 38 | if len(query.shape) > 2: 39 | best_neg = torch.norm(query - neg_vecs, dim=2) 40 | else: 41 | query_copy = query.unsqueeze(1).repeat(1, neg_vecs.shape[1], 1) 42 | best_neg = torch.norm(query_copy - neg_vecs, dim=2) 43 | 44 | return best_neg 45 | 46 | 47 | class LazyTripletLoss(torch.nn.Module): 48 | ''' 49 | Lazy variance of triplet loss 50 | ''' 51 | 52 | def __init__(self, margin=0.2): 53 | super(LazyTripletLoss, self).__init__() 54 | self.margin = margin 55 | 56 | def forward(self, q_vec, pos_vecs, neg_vecs): 57 | 58 | batch_size = q_vec.shape[0] 59 | num_neg = neg_vecs.shape[1] 60 | 61 | # query to positive queries 62 | best_pos = _best_pos_distance(q_vec, pos_vecs) 63 | q_pos = best_pos.reshape(-1, 1).repeat(1, num_neg) 64 | q_neg = _best_neg_distance(q_vec, neg_vecs) 65 | loss = q_pos - q_neg + self.margin 66 | loss = torch.clamp(loss, min=0.0) 67 | loss = torch.mean(loss) 68 | 69 | return loss 70 | 71 | 72 | class LazySecondLoss(torch.nn.Module): 73 | ''' 74 | Lazy variance of triplet loss 75 | ''' 76 | 77 | def __init__(self, margin=0.2): 78 | super(LazySecondLoss, self).__init__() 79 | self.margin = margin 80 | 81 | def forward(self, q_vec, pos_vecs, neg_vecs, other_neg): 82 | 83 | batch_size = q_vec.shape[0] 84 | num_neg = neg_vecs.shape[1] 85 | 86 | # query to positive queries 87 | best_pos = _best_pos_distance(q_vec, pos_vecs) 88 | other_neg_copy = other_neg.repeat(1, neg_vecs.shape[1], 1) 89 | 90 | q_pos = best_pos.repeat(1, neg_vecs.shape[1]) 91 | # print(neg_vecs.shape, other_neg_copy.shape) 92 | # print(best_pos.shape, q_vec.shape, pos_vecs.shape) 93 | q_neg = _best_neg_distance(neg_vecs, other_neg_copy) 94 | 95 | # print(q_pos.shape, q_neg.shape) 96 | loss = q_pos - q_neg + self.margin 97 | loss = torch.clamp(loss, min=0.0) 98 | loss = torch.mean(loss) 99 | 100 | return loss 101 | 102 | 103 | class LazyQuadrupletLoss(torch.nn.Module): 104 | ''' 105 | Lazy Quadruplet Loss 106 | Args: 107 | q_vec (tensor: batch_size x 1 x feature_dim): query feature 108 | pos_vecs (tensor: batch_size x num_pos x feature_dim): positive feature 109 | neg_vecs (tensor: batch_size x num_neg x feature_dim): negative feature 110 | other_neg (tensor: batch_size x num_neg x feature_dim): random negative feature 111 | m0 (float32): margin for triplet loss 112 | m1 (float32): margin for second loss 113 | Returns: quadruplet loss, triplet loss, second loss 114 | ''' 115 | 116 | def __init__(self, margin_dis, margin_sec): 117 | super(LazyQuadrupletLoss, self).__init__() 118 | self.trip_loss = LazyTripletLoss(margin_dis) 119 | self.second_loss = LazySecondLoss(margin_sec) 120 | 121 | def forward(self, x): 122 | q_vec = x[:, 0:1, :] 123 | pos_vecs = x[:, 1:3, :] 124 | neg_vecs = x[:, 3:21, :] 125 | neg_other = x[:, 21:, :] 126 | 127 | # |q_vec - neg_vecs| >= |q_vec - pos_vecs| + margin_trans 128 | trip_loss = self.trip_loss(q_vec, pos_vecs, neg_vecs) 129 | # |q_vec - other_neg| >= |q_vec - neg_vecs| + margin_sec 130 | second_loss = self.second_loss(q_vec, pos_vecs, neg_vecs, neg_other) 131 | loss = trip_loss + second_loss 132 | 133 | return loss, (trip_loss, second_loss) 134 | -------------------------------------------------------------------------------- /models/loop_closure/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/transformer/__init__.py -------------------------------------------------------------------------------- /models/loop_closure/transformer/attention.py: -------------------------------------------------------------------------------- 1 | """ 2 | Spatial_Attn is modified based on 3 | https://github.com/heykeetae/Self-Attention-GAN/blob/master/sagan_models.py 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class Spatial_Attn(nn.Module): 11 | """ Self attention Layer""" 12 | def __init__(self, in_dim): 13 | super(Spatial_Attn, self).__init__() 14 | self.chanel_in = in_dim 15 | if in_dim == 16: 16 | out_dim = in_dim // 2 17 | elif in_dim == 64: 18 | out_dim = in_dim * 2 19 | else: 20 | out_dim = in_dim // 8 21 | self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=1) 22 | self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=out_dim, kernel_size=1) 23 | self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) 24 | self.gamma = nn.Parameter(torch.zeros(1)) 25 | 26 | self.softmax = nn.Softmax(dim=-1) 27 | 28 | def forward(self,x): 29 | """ 30 | inputs : 31 | x : input feature maps( B X C X W X H) 32 | returns : 33 | out : self attention value + input feature 34 | attention: B X N X N (N is Width*Height) 35 | """ 36 | m_batchsize,C,width,height = x.size() 37 | proj_query = self.query_conv(x).view(m_batchsize,-1,width*height).permute(0,2,1) # B X N X C 38 | proj_key = self.key_conv(x).view(m_batchsize,-1,width*height) # B X C x N 39 | energy = torch.bmm(proj_query,proj_key) # transpose check 40 | attention = self.softmax(energy) # B X N X N 41 | proj_value = self.value_conv(x).view(m_batchsize,-1,width*height) # B X C X N 42 | 43 | out = torch.bmm(proj_value, attention.permute(0,2,1)) 44 | out = out.view(m_batchsize, C, width, height) 45 | 46 | out = self.gamma*out + x 47 | return out, attention 48 | 49 | 50 | class Channel_Attn(nn.Module): 51 | """ Self channel attention Layer""" 52 | def __init__(self, in_dim, type): 53 | super(Channel_Attn, self).__init__() 54 | if type == "add": 55 | self.softmax = nn.Softmax(dim=-1) 56 | self.gamma = nn.Parameter(torch.zeros(1)) 57 | elif type == "reweight": 58 | self.sigmoid = nn.Sigmoid() 59 | self.pool = nn.AdaptiveAvgPool2d((in_dim, 1)) 60 | self.fc = nn.Linear(in_dim, in_dim) 61 | self.type = type 62 | 63 | def forward(self,x): 64 | """ 65 | inputs : 66 | x : input feature maps( B X C X N) 67 | returns : 68 | out : attention weight + input feature(element-wise) 69 | attention: B X C X C (C is channel number) 70 | """ 71 | m_batchsize,C,N = x.size() 72 | energy = torch.bmm(x, x.permute(0,2,1)) # transpose check 73 | 74 | if self.type == "add": 75 | attention = self.softmax(energy) # B X C X C 76 | value = torch.bmm(attention.permute(0,2,1), x) # B*C*N 77 | out = self.gamma*value + x 78 | elif self.type == "reweight": 79 | weight = self.pool(energy) 80 | weight = torch.bmm(energy, weight) 81 | weight = self.fc(x.squeeze(-1)) 82 | weight = self.sigmoid(weight) 83 | out = x.view(m_batchsize, C)*weight 84 | out = out.view(m_batchsize, -1) 85 | return out -------------------------------------------------------------------------------- /models/loop_closure/visual/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/models/loop_closure/visual/__init__.py -------------------------------------------------------------------------------- /models/loop_closure/visual/spherevlad.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Shiqi Zhao 3 | SphereVLAD 4 | ''' 5 | import numpy as np 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from s2cnn import SO3Convolution 10 | from s2cnn import S2Convolution 11 | from s2cnn import so3_near_identity_grid 12 | from s2cnn import s2_near_identity_grid 13 | 14 | from ..aggregator.netvlad import NetVLAD 15 | 16 | 17 | class SphereVLAD(nn.Module): 18 | ''' 19 | sphvlad with bn 20 | ''' 21 | def __init__(self, config): 22 | super(SphereVLAD, self).__init__() 23 | 24 | bandwidth = (int)(config.DATA.SPH_IM.IMAGE_SIZE[0]/2.0) 25 | 26 | grid_s2 = s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1) 27 | grid_so3_1 = so3_near_identity_grid( 28 | n_alpha=6, max_beta=np.pi / 16, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 29 | grid_so3_2 = so3_near_identity_grid( 30 | n_alpha=6, max_beta=np.pi / 8, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 31 | grid_so3_3 = so3_near_identity_grid( 32 | n_alpha=6, max_beta=np.pi / 4, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 33 | grid_so3_4 = so3_near_identity_grid( 34 | n_alpha=6, max_beta=np.pi / 2, n_beta=1, max_gamma=2*np.pi, n_gamma=6) 35 | 36 | self.conv1 = nn.Sequential( 37 | S2Convolution(nfeature_in=3, nfeature_out=8, 38 | b_in=bandwidth, b_out=bandwidth//2, grid=grid_s2), 39 | nn.BatchNorm3d(8), 40 | nn.ReLU()) 41 | 42 | self.conv2 = nn.Sequential( 43 | SO3Convolution(nfeature_in=8, nfeature_out=8, 44 | b_in=bandwidth//2, b_out=bandwidth//4, grid=grid_so3_1), 45 | nn.BatchNorm3d(8), 46 | nn.ReLU()) 47 | 48 | self.conv3 = nn.Sequential( 49 | SO3Convolution(nfeature_in=8, nfeature_out=16, 50 | b_in=bandwidth//4, b_out=bandwidth//8, grid=grid_so3_2), 51 | nn.BatchNorm3d(16), 52 | nn.ReLU()) 53 | 54 | self.conv4 = nn.Sequential( 55 | SO3Convolution(nfeature_in=16, nfeature_out=16, 56 | b_in=bandwidth//8, b_out=bandwidth//8, grid=grid_so3_3), 57 | nn.BatchNorm3d(16), 58 | nn.ReLU()) 59 | 60 | self.vlad = NetVLAD(num_clusters=config.MODEL.NETVLAD.CLUSTER_NUM, 61 | dim=config.MODEL.NETVLAD.FEATURE_DIM, 62 | normalize_input=config.MODEL.NETVLAD.NORMALIZE_INPUT, 63 | output_dim=config.MODEL.NETVLAD.OUTPUT_DIM, 64 | gate=config.MODEL.NETVLAD.GATE) 65 | 66 | 67 | def forward(self, x): 68 | # encoder 69 | x = self.conv1(x) 70 | x = self.conv2(x) 71 | x = self.conv3(x) 72 | x = self.conv4(x) 73 | # reshape 74 | x = x.view(x.shape[0], x.shape[1], x.shape[2], x.shape[3]*x.shape[4]) 75 | # vlad 76 | x = self.vlad(x) 77 | return x -------------------------------------------------------------------------------- /models/robotLCD.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../') 3 | sys.path.append('../../') 4 | 5 | import time 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from models.loop_closure import make_models 11 | from models.loop_closure.losses import make_losses 12 | from utils import log_print 13 | 14 | 15 | #!=======================================================================# 16 | #! Traning # 17 | #!=======================================================================# 18 | class LcdNet(object): 19 | def __init__(self, config, logger, use_cuda, device, neptune=None, gpu_ids=None): 20 | super(LcdNet, self).__init__() 21 | 22 | self.neptune = neptune 23 | self.config = config 24 | self.logger = logger 25 | 26 | #! Let's define different models 27 | self.model = make_models(config) 28 | log_print("Runing {}".format(config.MODEL.NAME), 'r') 29 | log_print("#params of model: {}".format(sum([x.numel() 30 | for x in self.model.parameters()])), 'b') 31 | log_print("#trainable params of model: {}".format(sum([x.numel() 32 | for x in self.model.parameters() if x.requires_grad])), 'b') 33 | 34 | #! Data Parallel 35 | if use_cuda: 36 | if gpu_ids is not None and len(gpu_ids)>1 : 37 | print("Let's use", len(gpu_ids), "GPUs!") 38 | self.model = nn.DataParallel(self.model, device_ids=gpu_ids) 39 | self.model = self.model.to(device) 40 | 41 | #! Loss Function 42 | self.criterion = make_losses(config).to(device) 43 | 44 | #! Optimizer and Scheduler 45 | if config.TRAINING.OPTIMIZER.NAME == 'Adam': 46 | self.optimizer = torch.optim.Adam( 47 | filter(lambda p: p.requires_grad, self.model.parameters()), 48 | lr=config.TRAINING.OPTIMIZER.INIT_LEARNING_RATE) 49 | else: 50 | raise NotImplementedError(f"Unrecognized Optimizer {config.TRAINING.OPTIMIZER.NAME}") 51 | 52 | if config.TRAINING.SCHEDULER.NAME == None: 53 | self.scheduler = None 54 | elif config.TRAINING.SCHEDULER.NAME == "StepLR": 55 | self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 56 | step_size=config.TRAINING.SCHEDULER.STEP_SIZE, 57 | gamma=config.TRAINING.SCHEDULER.GAMMA) 58 | elif config.TRAINING.SCHEDULER.NAME == "MultiStepLR": 59 | self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, 60 | milestones=config.TRAINING.SCHEDULER.MILESTONES, 61 | gamma=config.TRAINING.SCHEDULER.GAMMA) 62 | else: 63 | raise NotImplementedError(f"Unrecognized Scheduler {config.TRAINING.SCHEDULER.NAME}") 64 | 65 | #! Resume Training & Testing 66 | if config.TRAINING.IS_TRAIN: 67 | if config.TRAINING.RESUME: 68 | self.epoch = self.load_checkpoint(config.WEIGHT.LOAD_ADDRESS, config.TRAINING.RESUME) + 1 69 | else: 70 | self.epoch = 1 71 | else: 72 | self.load_checkpoint(config.WEIGHT.LOAD_ADDRESS) 73 | 74 | def train_lcd(self, x): 75 | """[summary] 76 | Args: 77 | x ([type]): [description] 78 | """ 79 | 80 | self.model.train() 81 | data = torch.cat(x, dim=1) 82 | B = data.shape[0] 83 | N = data.shape[1] 84 | lidar_data = data.view(B*N, -1, data.shape[3], data.shape[4]) 85 | self.optimizer.zero_grad() 86 | feature_lidar = self.model(lidar_data).view(B, N, -1) 87 | 88 | loss_lidar, losses = self.criterion(feature_lidar) 89 | if self.neptune is not None: 90 | self.neptune['Sphere/training_lidar_loss'].append(loss_lidar.item()) 91 | self.neptune['Sphere/training_lidar_trip'].append(losses[0].item()) 92 | self.neptune['Sphere/training_lidar_secd'].append(losses[1].item()) 93 | loss_lidar.backward() 94 | self.optimizer.step() 95 | return loss_lidar.item() 96 | 97 | def eval_lcd(self, x): 98 | self.model.eval() 99 | data = torch.cat(x, dim=1) 100 | B = data.shape[0] 101 | N = data.shape[1] 102 | lidar_data = data.view(B*N, -1, data.shape[3], data.shape[4]) 103 | feature_lidar = self.model(lidar_data).view(B, N, -1) 104 | with torch.no_grad(): 105 | loss, (trip, secd) = self.criterion(feature_lidar) 106 | if self.neptune is not None: 107 | self.neptune['Sphere/eval_lidar_loss'].append(loss.item()) 108 | self.neptune['Sphere/eval_lidar_trip'].append(trip.item()) 109 | self.neptune['Sphere/eval_lidar_secd'].append(secd.item()) 110 | 111 | def adjust_learning_rate(self): 112 | if self.scheduler != None: 113 | self.scheduler.step() 114 | 115 | def save_checkpoint(self, epoch): 116 | state_dict = {"state_dict": self.model.state_dict(), 117 | "optimizer": self.optimizer.state_dict(), 118 | "epoch": epoch} 119 | torch.save(state_dict, 120 | '{}/pth/model_{}.pth'.format(self.config.OUTPUT.DIR, epoch)) 121 | 122 | def load_checkpoint(self, weight_path, resume=False): 123 | checkpoint = torch.load(weight_path) 124 | # load model parameters 125 | try: 126 | model_dict = checkpoint["state_dict"] 127 | except: 128 | model_dict = checkpoint # for pointnetvlad 129 | try: 130 | self.model.load_state_dict(model_dict) 131 | except: 132 | from collections import OrderedDict 133 | model_dict = OrderedDict() 134 | for key, value in checkpoint["state_dict"].items(): 135 | new_key = key.split('module.')[-1] 136 | model_dict[new_key] = value 137 | self.model.load_state_dict(model_dict) 138 | log_print("Load models from {}!".format(weight_path), 'g') 139 | # load optimizer parameters 140 | if resume: 141 | self.optimizer.load_state_dict(checkpoint["optimizer"]) 142 | log_print("Load optimizer parameters from {}!".format(weight_path), 'g') 143 | return checkpoint["epoch"] 144 | else: 145 | return 146 | 147 | 148 | #!=======================================================================# 149 | #! Inference # 150 | #!=======================================================================# 151 | def infer_frame(self, x, t=False): 152 | if self.config.MODEL.TYPE in ["Lidar", "LiSPH"]: 153 | return self.infer_lidar(*x, t) 154 | elif self.config.MODEL.TYPE == "Image": 155 | return self.infer_image(*x, t) 156 | 157 | def infer_image(self, x, t=False): 158 | self.model.eval() 159 | x = x.view(1, 3, x.shape[-2], x.shape[-1]) 160 | if t == True: 161 | t1 = time.time() 162 | with torch.no_grad(): 163 | x = self.model(x).view(-1) 164 | if t == True: 165 | t2 = time.time() 166 | t_delta = t2-t1 167 | return x.detach().cpu().numpy(), t_delta 168 | return x.detach().cpu().numpy() 169 | 170 | def infer_lidar(self, x, t=False): 171 | self.model.eval() 172 | x = x.view(1, self.config.MODEL.S2CNN.INPUT_CHANNEL_NUM, x.shape[-2], x.shape[-1]) 173 | if t == True: 174 | t1 = time.time() 175 | with torch.no_grad(): 176 | x = self.model(x).view(-1) 177 | if t == True: 178 | t2 = time.time() 179 | t_delta = t2-t1 180 | return x.detach().cpu().numpy(), t_delta 181 | return x.detach().cpu().numpy() 182 | -------------------------------------------------------------------------------- /pics/spherevlad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/pics/spherevlad.png -------------------------------------------------------------------------------- /pics/spherevlad2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MetaSLAM/SphereVLAD/559d5f46b0f8217874273c78e622f28999200482/pics/spherevlad2.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | tqdm 4 | open3d 5 | scikit-image 6 | Pillow 7 | pandas 8 | yacs 9 | neptune 10 | lie_learn -------------------------------------------------------------------------------- /train_lcd.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author: Peng Yin, Shiqi Zhao 3 | train.py 4 | ''' 5 | 6 | import os 7 | import sys 8 | import argparse 9 | import numpy as np 10 | import time 11 | import datetime 12 | import torch 13 | import torch.nn as nn 14 | from tqdm import tqdm 15 | 16 | from config import cfg 17 | from models import set_lcd_model 18 | from dataloader import make_data_loader 19 | from eval import EvaluationPitts 20 | from utils import setup_logger, log_print 21 | 22 | #!============================================================================================# 23 | #! Parameters for Dataset and Network 24 | #!============================================================================================# 25 | 26 | 27 | def para_args(): 28 | parser = argparse.ArgumentParser(description="Network configurations!") 29 | parser.add_argument("--config-network", default="config/network/spherevlad.yaml", metavar="FILE", 30 | help="path to config file", type=str) 31 | parser.add_argument("--config-dataset", default="config/dataset/PITT.yaml", metavar="FILE", 32 | help="path to config file", type=str) 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def train(config, logger, neptune): 38 | 39 | #! Define Model 40 | lcd, gpu_conf = set_lcd_model(config, logger, neptune) 41 | [_, device, gpu_ids] = gpu_conf 42 | 43 | #! Define Dataloader 44 | train_loader = make_data_loader(config, gpu_ids, is_train=True) 45 | if config.DATA.DATASET_NAME in ["PITT", "Campus"]: 46 | eval_loader = make_data_loader(config, gpu_ids, is_train=False) 47 | else: 48 | eval_loader = [] 49 | log_print("train batch with {}, eval batch with {}".format( 50 | len(train_loader), len(eval_loader)), 'g') 51 | 52 | #! Define Tester 53 | if config.DATA.DATASET_NAME == "PITT": 54 | tester = EvaluationPitts(config, lcd, device) 55 | 56 | #! Main loop 57 | prev_time = time.time() 58 | best_recall = 0 59 | for epoch in range(lcd.epoch, config.TRAINING.EPOCH+1): 60 | 61 | log_print("Train epoch {}".format(epoch), "g") 62 | 63 | #! Do Training 64 | for i, batch in enumerate(train_loader): 65 | 66 | # * Determine approximate time left 67 | batches_done = epoch * len(train_loader) + i 68 | batches_left = config.TRAINING.EPOCH * \ 69 | len(train_loader) - batches_done 70 | time_left = datetime.timedelta( 71 | seconds = batches_left * (time.time() - prev_time)) 72 | prev_time = time.time() 73 | data = [x.to(device, dtype=torch.float) for x in batch] 74 | loss_lidar = lcd.train_lcd(data) 75 | sys.stdout.write( 76 | "\r[Epoch %d/%d] [Batch %d/%d] [LiDAR loss: %f], ETA: %s" 77 | % (epoch, config.TRAINING.EPOCH, i, len(train_loader), loss_lidar, time_left) 78 | ) 79 | 80 | #! Do Evaluation 81 | if config.DATA.DATASET_NAME in ["PITT", "Campus"]: 82 | for i, batch in tqdm(enumerate(eval_loader), total=len(eval_loader)): 83 | data = [x.to(device, dtype=torch.float) for x in batch] 84 | lcd.eval_lcd(data) 85 | 86 | #! Do Test 87 | if config.DATA.DATASET_NAME in ["PITT", "Campus"]: 88 | test_stats = [] 89 | for traj_num in config.DATA.TEST_LIST: 90 | if config.DATA.DATASET_NAME == "PITT": 91 | recall, _, _ = tester.get_features_recall(traj_num, 2, 0, 0) 92 | else: 93 | recall, _, _ = tester.infer(traj_num) 94 | test_stats.append(recall) 95 | test_stats = np.array(test_stats).sum(axis=0) 96 | curr_recall = test_stats[0]/test_stats[-1] 97 | if curr_recall > best_recall: 98 | lcd.save_checkpoint(666) 99 | logger.info(f"[Epoch {epoch}/{config.TRAINING.EPOCH}] Recall@1 {curr_recall}") 100 | log_print(f"[Epoch {epoch}/{config.TRAINING.EPOCH}] Recall@1 {curr_recall}", 'b') 101 | 102 | #! Adjust Learning Rate 103 | lcd.adjust_learning_rate() 104 | 105 | #! Save Model 106 | if epoch % 2 == 0: 107 | lcd.save_checkpoint(epoch) 108 | 109 | 110 | if __name__ == "__main__": 111 | args = para_args() 112 | cfg.merge_from_file(args.config_network) 113 | cfg.merge_from_file(args.config_dataset) 114 | cfg.TRAINING.IS_TRAIN = True 115 | logger, neptune, cfg = setup_logger( 116 | "Loop closure benchmark", cfg, args, is_train=True) 117 | cfg.freeze() 118 | train(cfg, logger, neptune) 119 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | pointProcess.py: functions about point processing 3 | logger.py: training log 4 | log_print.py: print info with different color and font 5 | ''' 6 | from .pointProcess import pcd_downsample, pc_normalize, LaserProjection 7 | from .log_print import log_print 8 | from .logger import setup_logger 9 | -------------------------------------------------------------------------------- /utils/equirectRotate/EquirectRotate.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | 3 | 4 | class EquirectRotate: 5 | """ 6 | @:param height: height of image to rotate 7 | @:param width: widht of image to rotate 8 | @:param rotation: x, y, z degree to rotate 9 | """ 10 | 11 | def __init__(self, height: int, width: int, rotation: tuple): 12 | assert height * 2 == width 13 | self.height = height 14 | self.width = width 15 | 16 | out_img = np.zeros((height, width)) # (H, W) 17 | 18 | # mapping equirect coordinate into LatLon coordinate system 19 | self.out_LonLat = Pixel2LatLon(out_img) # (H, W, (lat, lon)) 20 | 21 | # mapping LatLon coordinate into xyz(sphere) coordinate system 22 | self.out_xyz = LatLon2Sphere(self.out_LonLat) # (H, W, (x, y, z)) 23 | 24 | self.src_xyz = np.zeros_like(self.out_xyz) # (H, W, (x, y, z)) 25 | 26 | # make pair of xyz coordinate between src_xyz and out_xyz. 27 | # src_xyz @ R = out_xyz 28 | # src_xyz = out_xyz @ R^t 29 | self.R = getRotMatrix(np.array(rotation)) 30 | Rt = np.transpose(self.R) # we should fill out the output image, so use R^t. 31 | for i in range(self.height): 32 | for j in range(self.width): 33 | self.src_xyz[i][j] = self.out_xyz[i][j] @ Rt 34 | 35 | # mapping xyz(sphere) coordinate into LatLon coordinate system 36 | self.src_LonLat = Sphere2LatLon(self.src_xyz) # (H, W, 2) 37 | 38 | # mapping LatLon coordinate into equirect coordinate system 39 | self.src_Pixel = LatLon2Pixel(self.src_LonLat) # (H, W, 2) 40 | 41 | def rotate(self, image): 42 | """ 43 | :param image: (H, W, C) 44 | :return: rotated (H, W, C) 45 | """ 46 | assert image.shape[:2] == (self.height, self.width) 47 | 48 | rotated_img = np.zeros_like(image) # (H, W, C) 49 | for i in range(self.height): 50 | for j in range(self.width): 51 | pixel = self.src_Pixel[i][j] 52 | rotated_img[i][j] = image[pixel[0]][pixel[1]] 53 | return rotated_img 54 | 55 | def setInverse(self, isInverse=False): 56 | if not isInverse: 57 | return 58 | 59 | # re-generate coordinate pairing 60 | self.R = np.transpose(self.R) 61 | Rt = np.transpose(self.R) # we should fill out the output image, so use R^t. 62 | for i in range(self.height): 63 | for j in range(self.width): 64 | self.src_xyz[i][j] = self.out_xyz[i][j] @ Rt 65 | 66 | # mapping xyz(sphere) coordinate into LatLon coordinate system 67 | self.src_LonLat = Sphere2LatLon(self.src_xyz) # (H, W, 2) 68 | 69 | # mapping LatLon coordinate into equirect coordinate system 70 | self.src_Pixel = LatLon2Pixel(self.src_LonLat) # (H, W, 2) 71 | 72 | 73 | def pointRotate(h, w, index, rotation): 74 | """ 75 | :param i, j: index of pixel in equirectangular 76 | :param rotation: (yaw, pitch, roll) in degree 77 | :return: rotated index of pixel 78 | """ 79 | i, j = index 80 | assert (0 <= i < h) and (0 <= j < w) 81 | 82 | # convert pixel index to LatLon 83 | Lat = (0.5 - i / h) * np.pi 84 | Lon = (j / w - 0.5) * 2 * np.pi 85 | 86 | # convert LatLon to xyz 87 | x = np.cos(Lat) * np.cos(Lon) 88 | y = np.cos(Lat) * np.sin(Lon) 89 | z = np.sin(Lat) 90 | xyz = np.array([x, y, z]) 91 | R = getRotMatrix(np.array(rotation)) 92 | rotated_xyz = xyz @ R 93 | 94 | _Lat = np.pi / 2 - np.arccos(rotated_xyz[2]) 95 | _Lon = np.arctan2(rotated_xyz[1], rotated_xyz[0]) 96 | 97 | _i = h * (0.5 - _Lat / np.pi) % h 98 | _j = w * (0.5 + _Lon / (2 * np.pi)) % w 99 | 100 | return _i, _j 101 | -------------------------------------------------------------------------------- /utils/equirectRotate/__init__.py: -------------------------------------------------------------------------------- 1 | from .EquirectRotate import EquirectRotate 2 | from .EquirectRotate import pointRotate 3 | -------------------------------------------------------------------------------- /utils/equirectRotate/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def getRotMatrix(rotation): 4 | """ 5 | :param rotation: (yaw, pitch, roll) in degree 6 | :return: general rotational matrix 7 | refer this: https://en.wikipedia.org/wiki/Rotation_matrix#General_rotations 8 | """ 9 | yaw, pitch, roll = (rotation / 180) * np.pi 10 | 11 | Rz = np.array([[np.cos(yaw), -np.sin(yaw), 0], 12 | [np.sin(yaw), np.cos(yaw), 0], 13 | [0, 0, 1]]) 14 | Ry = np.array([[np.cos(pitch), 0, np.sin(pitch)], 15 | [0, 1, 0], 16 | [-np.sin(pitch), 0, np.cos(pitch)]]) 17 | Rx = np.array([[1, 0, 0], 18 | [0, np.cos(roll), -np.sin(roll)], 19 | [0, np.sin(roll), np.cos(roll)]]) 20 | 21 | return Rz @ Ry @ Rx 22 | 23 | 24 | def Pixel2LatLon(equirect): 25 | # LatLon (H, W, (lat, lon)) 26 | h, w = equirect.shape 27 | 28 | Lat = (0.5 - np.arange(0, h)/h) * np.pi 29 | Lon = (np.arange(0, w) / w - 0.5) * 2 * np.pi 30 | 31 | Lat = np.tile(Lat[:, np.newaxis], w) 32 | Lon = np.tile(Lon, (h, 1)) 33 | 34 | return np.dstack((Lat, Lon)) 35 | 36 | 37 | def LatLon2Sphere(LatLon): 38 | Lat = LatLon[:, :, 0] 39 | Lon = LatLon[:, :, 1] 40 | x = np.cos(Lat) * np.cos(Lon) 41 | y = np.cos(Lat) * np.sin(Lon) 42 | z = np.sin(Lat) 43 | 44 | return np.dstack((x, y, z)) 45 | 46 | 47 | def Sphere2LatLon(xyz): 48 | Lat = np.pi / 2 - np.arccos(xyz[:, :, 2]) 49 | Lon = np.arctan2(xyz[:, :, 1], xyz[:, :, 0]) 50 | 51 | return np.dstack((Lat, Lon)) 52 | 53 | 54 | def LatLon2Pixel(LatLon): 55 | h, w, _ = LatLon.shape 56 | Lat = LatLon[:, :, 0] 57 | Lon = LatLon[:, :, 1] 58 | i = (h * (0.5 - Lat / np.pi)) % h 59 | j = (w * (0.5 + Lon / (2 * np.pi))) % w 60 | 61 | return np.dstack((i, j)).astype('int') -------------------------------------------------------------------------------- /utils/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | from .so3_rotate import get_projection_grid, rand_rotation_matrix, rotate_grid, project_2d_on_sphere -------------------------------------------------------------------------------- /utils/geometry/equi_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from equilib import equi2pers, equi2equi 3 | 4 | class EquiTrans(object): 5 | """Class that contains Equi Trans with x,y,z,r""" 6 | 7 | def __init__(self, pers, fov_x, rot): 8 | self.w_pers = pers[0] 9 | self.h_pers = pers[1] 10 | self.fov_x = fov_x 11 | self.rot = rot #{'roll': 0.,'pitch': 0, 'yaw': 0.} 12 | 13 | def trans(self, equi_img): 14 | equi_img = np.asarray(equi_img) 15 | equi_img = np.transpose(equi_img, (2, 0, 1)) 16 | pers_img = equi2pers( 17 | equi=equi_img, 18 | rot=self.rot, 19 | w_pers=self.w_pers, 20 | h_pers=self.h_pers, 21 | fov_x=self.fov_x, 22 | skew=0.0, 23 | sampling_method="default", 24 | mode="bilinear",) 25 | pers_img = np.transpose(pers_img, (1, 2, 0)) 26 | return pers_img 27 | 28 | class Equi2Equi(object): 29 | 30 | def __init__(self, pers, rot): 31 | self.w_out = pers[0] 32 | self.h_out = pers[1] 33 | # self.rot={'roll': 0.,'pitch': 0, 'yaw': 0.} 34 | self.rot = rot 35 | 36 | def trans(self, equi_img): 37 | equi_img = np.asarray(equi_img) 38 | equi_img = np.transpose(equi_img, (2, 0, 1)) 39 | img_out = equi2equi(equi_img, self.rot, w_out=self.w_out, h_out=self.h_out) 40 | img_out = np.transpose(img_out, (1, 2, 0)) 41 | return img_out 42 | 43 | -------------------------------------------------------------------------------- /utils/geometry/so3_rotate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from PIL import Image 4 | 5 | import lie_learn.spaces.S2 as S2 6 | from scipy.spatial.transform import Rotation as R 7 | from s2cnn import so3_rotation, so3_integrate 8 | 9 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 10 | 11 | NORTHPOLE_EPSILON = 1e-3 12 | 13 | def rand_rotation_matrix(deflection=1.0, randnums=None): 14 | """ 15 | Creates a random rotation matrix. 16 | 17 | deflection: the magnitude of the rotation. For 0, no rotation; for 1, competely random 18 | rotation. Small deflection => small perturbation. 19 | randnums: 3 random numbers in the range [0, 1]. If `None`, they will be auto-generated. 20 | 21 | # http://blog.lostinmyterminal.com/python/2015/05/12/random-rotation-matrix.html 22 | """ 23 | 24 | if randnums is None: 25 | randnums = np.random.uniform(size=(3,)) 26 | 27 | theta, phi, z = randnums 28 | 29 | theta = theta * 2.0*deflection*np.pi # Rotation about the pole (Z). 30 | phi = phi * 2.0*deflection*np.pi # For direction of pole deflection. 31 | z = z * 2.0*0.00001*deflection # For magnitude of pole deflection. 32 | 33 | # Compute a vector V used for distributing points over the sphere 34 | # via the reflection I - V Transpose(V). This formulation of V 35 | # will guarantee that if x[1] and x[2] are uniformly distributed, 36 | # the reflected points will be uniform on the sphere. Note that V 37 | # has length sqrt(2) to eliminate the 2 in the Householder matrix. 38 | 39 | r = np.sqrt(z) 40 | V = ( 41 | np.sin(phi) * r, 42 | np.cos(phi) * r, 43 | np.sqrt(2.0 - z) 44 | ) 45 | 46 | st = np.sin(theta) 47 | ct = np.cos(theta) 48 | 49 | R = np.array(((ct, st, 0), (-st, ct, 0), (0, 0, 1))) 50 | 51 | # Construct the rotation matrix ( V Transpose(V) - I ) R. 52 | 53 | M = (np.outer(V, V) - np.eye(3)).dot(R) 54 | return M 55 | 56 | 57 | def rotate_grid(rot, grid): 58 | x, y, z = grid 59 | xyz = np.array((x, y, z)) 60 | x_r, y_r, z_r = np.einsum('ij,jab->iab', rot, xyz) 61 | return x_r, y_r, z_r 62 | 63 | 64 | def get_projection_grid(b, grid_type="Driscoll-Healy"): 65 | ''' returns the spherical grid in euclidean 66 | coordinates, where the sphere's center is moved 67 | to (0, 0, 1)''' 68 | theta, phi = S2.meshgrid(b=b, grid_type=grid_type) 69 | theta -= np.pi/2 70 | x_ = np.cos(theta) * np.cos(phi) 71 | y_ = np.cos(theta) * np.sin(phi) 72 | z_ = np.sin(theta) 73 | 74 | return x_, y_, z_ 75 | 76 | 77 | def project_sphere_on_xy_plane(grid, projection_origin): 78 | ''' returns xy coordinates on the plane 79 | obtained from projecting each point of 80 | the spherical grid along the ray from 81 | the projection origin through the sphere ''' 82 | 83 | sx, sy, sz = projection_origin 84 | x, y, z = grid 85 | 86 | yaw = np.arctan2(y, x) 87 | pitch = np.arcsin(z) 88 | 89 | rx = 0.5 * (yaw / np.pi + 1.0) 90 | ry = (pitch + np.pi/2) /np.pi 91 | 92 | return ry, rx 93 | 94 | 95 | def sample_within_bounds(signal, x, y, bounds): 96 | ''' ''' 97 | xmin, xmax, ymin, ymax = bounds 98 | 99 | idxs = (xmin <= x) & (x < xmax) & (ymin <= y) & (y < ymax) 100 | 101 | if len(signal.shape) > 2: 102 | sample = np.zeros((signal.shape[0], x.shape[0], x.shape[1])) 103 | sample[:, idxs] = signal[:, x[idxs], y[idxs]] 104 | else: 105 | sample = np.zeros((x.shape[0], x.shape[1])) 106 | sample[idxs] = signal[x[idxs], y[idxs]] 107 | return sample 108 | 109 | 110 | def sample_bilinear(signal, rx, ry): 111 | ''' ''' 112 | 113 | signal_dim_x = signal.shape[1] 114 | signal_dim_y = signal.shape[2] 115 | 116 | rx *= signal_dim_x 117 | ry *= signal_dim_y 118 | 119 | # discretize sample position 120 | ix = rx.astype(int) 121 | iy = ry.astype(int) 122 | 123 | # obtain four sample coordinates 124 | ix0 = ix - 1 125 | iy0 = iy - 1 126 | ix1 = ix + 1 127 | iy1 = iy + 1 128 | 129 | bounds = (0, signal_dim_x, 0, signal_dim_y) 130 | 131 | # sample signal at each four positions 132 | signal_00 = sample_within_bounds(signal, ix0, iy0, bounds) 133 | signal_10 = sample_within_bounds(signal, ix1, iy0, bounds) 134 | signal_01 = sample_within_bounds(signal, ix0, iy1, bounds) 135 | signal_11 = sample_within_bounds(signal, ix1, iy1, bounds) 136 | 137 | # linear interpolation in x-direction 138 | fx1 = (ix1-rx) * signal_00 + (rx-ix0) * signal_10 139 | fx2 = (ix1-rx) * signal_01 + (rx-ix0) * signal_11 140 | 141 | # linear interpolation in y-direction 142 | return (iy1 - ry) * fx1 + (ry - iy0) * fx2 143 | 144 | 145 | def project_2d_on_sphere(signal, grid, projection_origin=None): 146 | ''' ''' 147 | if projection_origin is None: 148 | projection_origin = (0, 0, 2 + NORTHPOLE_EPSILON) 149 | 150 | rx, ry = project_sphere_on_xy_plane(grid, projection_origin) 151 | sample = sample_bilinear(signal, rx, ry)/4.0 152 | 153 | return sample 154 | 155 | if __name__=='__main__': 156 | image = Image.open("test.png") 157 | image_name = "raw.png" 158 | image.save(image_name) 159 | signals = np.transpose(np.array(image).astype(np.float64), (2, 0, 1)) 160 | grid = get_projection_grid(b=32) 161 | rot = rand_rotation_matrix(deflection=1) 162 | rotated_grid = rotate_grid(rot, grid) 163 | rot_image = project_2d_on_sphere(signals, rotated_grid, projection_origin=[0,0,0.0001]) 164 | 165 | rot_image = np.transpose(rot_image, (1, 2, 0)) 166 | image_name = "raw_rot.png" 167 | image = Image.fromarray(rot_image) 168 | image.save(image_name) -------------------------------------------------------------------------------- /utils/log_print.py: -------------------------------------------------------------------------------- 1 | """ Init file for utils""" 2 | 3 | 4 | def red(sthg): 5 | ''' red color ''' 6 | return "\033[01;31m{0}\033[00m".format(str(sthg)) 7 | 8 | 9 | def green(sthg): 10 | ''' green color ''' 11 | return "\033[01;32m{0}\033[00m".format(str(sthg)) 12 | 13 | 14 | def lblue(sthg): 15 | ''' green color ''' 16 | return "\033[01;36m{0}\033[00m".format(str(sthg)) 17 | 18 | 19 | def pink(sthg): 20 | ''' pink color ''' 21 | return "\033[01;35m{0}\033[00m".format(str(sthg)) 22 | 23 | 24 | def blue(sthg): 25 | ''' blue color ''' 26 | return "\033[01;34m{0}\033[00m".format(str(sthg)) 27 | 28 | 29 | def yellow(sthg): 30 | ''' yellow color ''' 31 | return "\033[01;93m{0}\033[00m".format(str(sthg)) 32 | 33 | 34 | def log_print(sthg, color='', logger=None): 35 | ''' Print colored stream, color options:r, g, b, lb, y, p. ''' 36 | if color == "r": 37 | print(red(sthg)) 38 | elif color == "g": 39 | print(green(sthg)) 40 | elif color == "b": 41 | print(blue(sthg)) 42 | elif color == "lb": 43 | print(lblue(sthg)) 44 | elif color == "y": 45 | print(yellow(sthg)) 46 | elif color == "p": 47 | print(pink(sthg)) 48 | else: 49 | print(sthg) 50 | if logger != None: 51 | logger.info(sthg) 52 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | from datetime import datetime 5 | import neptune 6 | 7 | def setup_logger(name, cfg, args, is_train): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | 11 | now = datetime.now() 12 | dt_string = now.strftime("%d-%m-%Y-%H-%M-%S") 13 | if is_train: 14 | Output_dir = os.path.join(cfg.OUTPUT.DIR, "{}-{}-{}".format(cfg.MODEL.NAME, cfg.DATA.DATASET_NAME, dt_string)) 15 | else: 16 | Output_dir = os.path.join(cfg.OUTPUT.DIR, "Inference-{}-{}-{}".format(cfg.MODEL.NAME, cfg.DATA.DATASET_NAME, dt_string)) 17 | 18 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 19 | 20 | if not os.path.exists(Output_dir): 21 | os.makedirs(Output_dir) 22 | os.makedirs("{}/img".format(Output_dir)) 23 | os.makedirs("{}/pth".format(Output_dir)) 24 | 25 | fh = logging.FileHandler(os.path.join(Output_dir, "log.txt")) 26 | fh.setLevel(logging.DEBUG) 27 | fh.setFormatter(formatter) 28 | logger.addHandler(fh) 29 | 30 | logger.info(args) 31 | 32 | logger.info("Loaded network configuration file {}".format(args.config_network)) 33 | logger.info("Loaded dataset configuration file {}".format(args.config_dataset)) 34 | 35 | os.system('cp {} {}/MODEL.yaml'.format(args.config_network, Output_dir)) 36 | os.system('cp {} {}/DATA.yaml'.format(args.config_dataset, Output_dir)) 37 | 38 | cfg.OUTPUT.DIR = Output_dir 39 | 40 | # neptune_run = neptune.init_run(project=cfg.PROJECT_NAME, 41 | # api_token=cfg.API_TOKEN) 42 | # params = {"Learning_rate": cfg.TRAINING.OPTIMIZER.INIT_LEARNING_RATE, 43 | # "Optimizer": cfg.TRAINING.OPTIMIZER.NAME, 44 | # "Scheduler": cfg.TRAINING.SCHEDULER.NAME, 45 | # "Positive_Dis": cfg.DATA.POSITIVES_RADIUS, 46 | # "Negtive_Dis": cfg.DATA.NEGATIVES_RADIUS, 47 | # "Loss": cfg.LOSS.NAME, 48 | # "Loss_Margin0": cfg.LOSS.MARGIN0, 49 | # "Loss_Margin1": cfg.LOSS.MARGIN1, 50 | # "Batch_Size": cfg.TRAINING.BATCH.BATCH_SIZE} 51 | # neptune_run["parameters"] = params 52 | neptune_run = None 53 | 54 | return logger, neptune_run, cfg 55 | -------------------------------------------------------------------------------- /utils/pointProcess.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import open3d as o3d 4 | from copy import copy, deepcopy 5 | 6 | 7 | def pc_normalize(pc): 8 | centriod = np.mean(pc, axis=0) 9 | pc = pc - centriod 10 | m = np.max(np.sqrt(np.sum(pc**2, axis=1))) 11 | pc = pc / m 12 | return pc 13 | 14 | 15 | def pcd_downsample(initPcd, desiredNumOfPoint, leftVoxelSize, rightVoxelSize): 16 | """ 17 | Downsample pointcloud to 4096 points 18 | Modify based on the version from https://blog.csdn.net/SJTUzhou/article/details/122927787 19 | """ 20 | assert leftVoxelSize > rightVoxelSize, "leftVoxelSize should be larger than rightVoxelSize" 21 | assert len(initPcd.points) > desiredNumOfPoint, "desiredNumOfPoint should be less than or equal to the num of points in the given point cloud." 22 | if len(initPcd.points) == desiredNumOfPoint: 23 | return initPcd 24 | 25 | pcd = deepcopy(initPcd) 26 | pcd = pcd.voxel_down_sample(leftVoxelSize) 27 | assert len(pcd.points) <= desiredNumOfPoint, "Please specify a larger leftVoxelSize." 28 | pcd = deepcopy(initPcd) 29 | pcd = pcd.voxel_down_sample(rightVoxelSize) 30 | assert len(pcd.points) >= desiredNumOfPoint, "Please specify a smaller rightVoxelSize." 31 | 32 | pcd = deepcopy(initPcd) 33 | midVoxelSize = (leftVoxelSize + rightVoxelSize) / 2. 34 | pcd = pcd.voxel_down_sample(midVoxelSize) 35 | while len(pcd.points) != desiredNumOfPoint: 36 | if len(pcd.points) < desiredNumOfPoint: 37 | leftVoxelSize = copy(midVoxelSize) 38 | else: 39 | rightVoxelSize = copy(midVoxelSize) 40 | midVoxelSize = (leftVoxelSize + rightVoxelSize) / 2. 41 | pcd = deepcopy(initPcd) 42 | pcd = pcd.voxel_down_sample(midVoxelSize) 43 | 44 | return pcd 45 | 46 | 47 | class LaserProjection(object): 48 | """Class that contains LaserScan with x,y,z,r""" 49 | 50 | def __init__(self, device, top_size=[64, 64], z_range=[-100.0, 100.0], sph_size=[64, 64], fov_range=[-90, 90], max_dis=30, sph_ocu=16, visib_thresh=3.0, visib_radius=25): 51 | #! For top down view 52 | self.proj_H = (int)(top_size[0]/2) 53 | self.proj_W = (int)(top_size[1]/2) 54 | self.proj_Z_min = z_range[0] 55 | self.proj_Z_max = z_range[1] 56 | 57 | self.device = device 58 | 59 | #! For spherical view 60 | self.sph_H = sph_size[0] 61 | self.sph_W = sph_size[1] 62 | self.sph_down = fov_range[0] 63 | self.sph_up = fov_range[1] 64 | 65 | #! Set sph occlusion factor 66 | self.sph_ocu = sph_ocu 67 | 68 | #! For activate range 69 | self.max_dis = max_dis 70 | 71 | self.visib_thresh = visib_thresh 72 | self.visib_radius = visib_radius 73 | 74 | def proj(self, pt): 75 | sph_proj = self.do_sph_projection(pt) 76 | sph_proj = sph_proj.reshape([1, self.sph_H, self.sph_W]) 77 | sph_proj = torch.from_numpy(sph_proj).to( 78 | self.device, dtype=torch.float) 79 | return sph_proj 80 | 81 | def proj_img(self, pt): 82 | sph_proj = self.do_sph_projection(pt) 83 | sph_proj = sph_proj.reshape([self.sph_H, self.sph_W, 1]) 84 | return sph_proj 85 | 86 | def do_top_projection(self, points): 87 | """ Project a pointcloud into a BEV picture 88 | """ 89 | 90 | # get scan components 91 | scan_x = points[:, 0] 92 | scan_y = points[:, 1] 93 | scan_z = points[:, 2] 94 | 95 | # get projections in image coords 96 | proj_x = scan_x/self.max_dis 97 | proj_y = scan_y/self.max_dis 98 | 99 | # scale to image size using angular resolution 100 | proj_x = (proj_x + 1.0)*self.proj_W # in [0.0, 2W] 101 | proj_y = (proj_y + 1.0)*self.proj_H # in [0.0, 2H] 102 | 103 | # round and clamp for use as index 104 | proj_x = np.floor(proj_x) 105 | proj_x = np.minimum(2*self.proj_W - 1, proj_x) 106 | proj_x = np.maximum(0, proj_x).astype(np.int32) # in [0,2W-] 107 | 108 | proj_y = np.floor(proj_y) 109 | proj_y = np.minimum(2*self.proj_H - 1, proj_y) 110 | proj_y = np.maximum(0, proj_y).astype(np.int32) # in [0,2H-1] 111 | 112 | data_grid = np.zeros((2*self.proj_H, 2*self.proj_W), dtype='float64') 113 | data_grid[proj_y, proj_x] = scan_z 114 | 115 | data_norm = (data_grid - data_grid.min()) / \ 116 | (data_grid.max() - data_grid.min()) 117 | 118 | return data_norm 119 | 120 | def do_sph_projection(self, points): 121 | """ Project a pointcloud into a spherical projection image.projection. 122 | Function takes no arguments because it can be also called externally 123 | if the value of the constructor was not set (in case you change your 124 | mind about wanting the projection) 125 | """ 126 | 127 | # laser parameters 128 | fov_up = self.sph_up / 180.0 * np.pi # field of view up in rad 129 | fov_down = self.sph_down / 180.0 * np.pi # field of view down in rad 130 | fov = abs(fov_down) + abs(fov_up) # get field of view total in rad 131 | 132 | # get depth of all points 133 | depth = np.linalg.norm(points, 2, axis=1) 134 | 135 | # get scan components 136 | scan_x = points[:, 0] 137 | scan_y = points[:, 1] 138 | scan_z = points[:, 2] 139 | 140 | # get angles of all points 141 | yaw = -np.arctan2(scan_y, scan_x) 142 | pitch = np.arcsin(scan_z / depth) 143 | 144 | # get projections in image coords 145 | proj_x = 0.5 * (yaw / np.pi + 1.0) # in [0.0, 1.0] 146 | proj_y = 1.0 - (pitch + abs(fov_down)) / fov # in [0.0, 1.0] 147 | 148 | # scale to image size using angular resolution 149 | # sph_H = self.sph_H*self.sph_ocu 150 | # sph_W = self.sph_W*self.sph_ocu 151 | sph_H = self.sph_H 152 | sph_W = self.sph_W 153 | proj_x *= sph_W # in [0.0, W] 128 154 | proj_y *= sph_H # in [0.0, H] 64 155 | 156 | # round and clamp for use as index 157 | proj_x = np.floor(proj_x) 158 | proj_x = np.minimum(sph_W - 1, proj_x) 159 | proj_x = np.maximum(0, proj_x).astype(np.int32) # in [0,W-1] 160 | 161 | proj_y = np.floor(proj_y) 162 | proj_y = np.minimum(sph_H - 1, proj_y) 163 | proj_y = np.maximum(0, proj_y).astype(np.int32) # in [0,H-1] 164 | 165 | data_grid = np.zeros((sph_H, sph_W), dtype='float32') 166 | 167 | # NOTE Maybe the constraints come from the far building, instead of neighbor objects 168 | # NOTE For indoor datasets, constraints from near objects will reduce the accuracy 169 | 170 | indices = np.argsort(depth)[::-1] 171 | data_grid[proj_y[indices], proj_x[indices]] = depth[indices] 172 | # data_grid[proj_y, proj_x] = depth 173 | 174 | # sph_grid = skimage.measure.block_reduce(data_grid, (self.sph_ocu, self.sph_ocu), np.max) 175 | # sph_grid = 1./(sph_grid + 1) 176 | sph_norm = (data_grid - data_grid.min()) / \ 177 | (data_grid.max() - data_grid.min()) 178 | 179 | # depth_img = torch.from_numpy(data_grid).cuda() 180 | # projected_points = visibility.visibility2(depth_img, torch.FloatTensor([self.sph_down, self.sph_up]).cuda(), torch.zeros_like(depth_img, device='cuda'), depth_img.shape[1], depth_img.shape[0], self.visib_thresh, self.visib_radius) 181 | # data_out = projected_points.cpu().numpy() 182 | # data_out[data_out ==0] = 100 183 | # data_out = skimage.measure.block_reduce(data_out, (self.sph_ocu, self.sph_ocu), np.min) 184 | # data_index = (data_out ==100) 185 | # data_out[data_index] = 0 186 | # # data_out= 1./(data_out+ 1) 187 | # data_norm = (data_out - data_out.min()) / (data_out.max() - data_out.min()) 188 | # ## NOTE define -1 for all negative points 189 | # data_norm[data_index] = -1 190 | return sph_norm 191 | 192 | def do_multi_sph_projection(self, points): 193 | depth = np.linalg.norm(points, 2, axis=1) 194 | index_r20 = np.where(depth<=20) 195 | index_r35 = np.intersect1d(np.where(depth>20), np.where(depth<=35)) 196 | index_r50 = np.where(depth>35) 197 | 198 | sphere_r20 = self.do_sph_projection(points[index_r20]) 199 | sphere_r35 = self.do_sph_projection(points[index_r35]) 200 | sphere_r50 = self.do_sph_projection(points[index_r50]) 201 | 202 | return np.stack((sphere_r20,sphere_r35,sphere_r50), axis=0) --------------------------------------------------------------------------------