├── libs ├── __init__.py ├── pointops │ ├── __init__.py │ ├── src │ │ ├── __init__.py │ │ ├── knnquery │ │ │ ├── __init__.py │ │ │ ├── knnquery_cuda_kernel.h │ │ │ ├── knnquery_cuda.cpp │ │ │ └── knnquery_cuda_kernel.cu │ │ ├── ballquery │ │ │ ├── ballquery_cuda_kernel.h │ │ │ ├── ballquery_cuda.cpp │ │ │ └── ballquery_cuda_kernel.cu │ │ ├── grouping_int │ │ │ ├── grouping_int_cuda_kernel.h │ │ │ ├── grouping_int_cuda.cpp │ │ │ └── grouping_int_cuda_kernel.cu │ │ ├── cuda_utils.h │ │ ├── sampling │ │ │ ├── sampling_cuda_kernel.h │ │ │ ├── sampling_cuda.cpp │ │ │ └── sampling_cuda_kernel.cu │ │ ├── grouping │ │ │ ├── grouping_cuda_kernel.h │ │ │ ├── grouping_cuda.cpp │ │ │ └── grouping_cuda_kernel.cu │ │ ├── featuredistribute │ │ │ ├── featuredistribute_cuda_kernel.h │ │ │ ├── featuredistribute_cuda.cpp │ │ │ └── featuredistribute_cuda_kernel.cu │ │ ├── labelstat │ │ │ ├── labelstat_cuda_kernel.h │ │ │ ├── labelstat_cuda.cpp │ │ │ └── labelstat_cuda_kernel.cu │ │ ├── interpolation │ │ │ ├── interpolation_cuda_kernel.h │ │ │ └── interpolation_cuda.cpp │ │ └── pointops_api.cpp │ ├── functions │ │ └── __init__.py │ └── setup.py ├── KNN_CUDA │ ├── .gitignore │ ├── requirements.txt │ ├── ninja │ ├── makefile │ ├── setup.py │ ├── knn_cuda │ │ ├── csrc │ │ │ └── cuda │ │ │ │ └── knn.cpp │ │ └── __init__.py │ ├── README.md │ └── tests │ │ └── test_knn_cuda.py ├── emd_module │ ├── Readme.md │ ├── setup.py │ ├── emd.cpp │ └── emd_module.py └── chamfer_dist │ ├── setup.py │ ├── test.py │ ├── chamfer_cuda.cpp │ └── __init__.py ├── utils ├── __init__.py ├── model_util │ ├── __init__.py │ ├── pool.py │ ├── position_embedding.py │ ├── transformer.py │ └── attention.py ├── cmp_stat.py └── visualization.py ├── datasets ├── __init__.py ├── query_pos_neg_dataset.proto ├── query_pos_neg_dataset_pb2.py └── point_clouds_utils.py ├── place_recognition ├── __init__.py ├── Minkloc3D_V2 │ ├── __init__.py │ ├── misc │ │ ├── __init__.py │ │ ├── quantization.py │ │ └── point_clouds.py │ ├── models │ │ ├── __init__.py │ │ ├── minkloc3dv2.txt │ │ ├── layers │ │ │ ├── pooling_wrapper.py │ │ │ ├── eca_block.py │ │ │ ├── pooling.py │ │ │ └── netvlad.py │ │ ├── model_factory.py │ │ ├── losses │ │ │ ├── loss_utils.py │ │ │ ├── truncated_smoothap.py │ │ │ └── loss.py │ │ ├── minkloc.py │ │ ├── minkfpn.py │ │ └── resnet.py │ └── config │ │ └── config_baseline.txt ├── patch_aug_net │ ├── __init__.py │ └── models │ │ ├── __init__.py │ │ └── pointnet_autoencoder.py └── pptnet_origin │ ├── __init__.py │ └── models │ ├── __init__.py │ └── loupe.py ├── losses ├── contrastive_loss.py ├── loss_utils.py ├── focal_loss.py ├── pose_est_loss.py ├── multi_similarity_loss.py └── truncated_smoothap.py ├── configs ├── pointnet_vlad.yaml ├── patch_aug_net.yaml └── pptnet_origin.yaml ├── .gitignore └── README.md /libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/pointops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/model_util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/pointops/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/pointops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/pointops/src/knnquery/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/patch_aug_net/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/pptnet_origin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/patch_aug_net/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /place_recognition/pptnet_origin/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | _ext 3 | __pycache__ 4 | dist 5 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scikit_learn 3 | torch>=1.1.0 4 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/ninja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WHU-USI3DV/PatchAugNet/HEAD/libs/KNN_CUDA/ninja -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/minkloc3dv2.txt: -------------------------------------------------------------------------------- 1 | [MODEL] 2 | model=MinkLoc 3 | planes=64,128,64,32 4 | layers=1,1,1,1 5 | num_top_down=2 6 | conv0_kernel_size=5 7 | feature_size=256 8 | block=ECABasicBlock 9 | pooling=GeM 10 | 11 | coordinates=cartesian 12 | quantization_step=0.01 13 | 14 | normalize_embeddings=False 15 | 16 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/makefile: -------------------------------------------------------------------------------- 1 | .PHONY : build reqs install clean 2 | NINJA := $(shell command -v ninja 2> /dev/null) 3 | 4 | 5 | build : reqs 6 | python3 setup.py bdist_wheel 7 | 8 | reqs : 9 | ifndef NINJA 10 | sudo cp ./ninja /usr/bin 11 | endif 12 | pip3 install -r requirements.txt 13 | 14 | install : 15 | pip3 install --upgrade dist/*.whl 16 | 17 | clean : 18 | -rm -rf build dist/* *.egg-info 19 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/config/config_baseline.txt: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | dataset_folder = /home/ericxhzou/Code/benchmark_datasets 3 | 4 | [TRAIN] 5 | num_workers = 8 6 | batch_size = 2048 7 | batch_split_size = 64 8 | val_batch_size = 256 9 | 10 | lr = 1e-3 11 | epochs = 400 12 | scheduler_milestones = 250, 350 13 | aug_mode = 0 14 | set_aug_mode = 0 15 | weight_decay = 1e-4 16 | loss = TruncatedSmoothAP 17 | tau1 = 0.01 18 | positives_per_query = 4 19 | 20 | train_file = training_queries_hankou.pickle 21 | val_file = test_queries_hankou.pickle 22 | -------------------------------------------------------------------------------- /libs/emd_module/Readme.md: -------------------------------------------------------------------------------- 1 | 7 | 1. install 8 | 9 | ``` 10 | cd emd_module 11 | python setup.py install 12 | ``` 13 | 14 | 2. use 15 | 16 | ``` 17 | from emd_module.emd_module import emdModule 18 | 19 | def get_emd_loss(self, pred, gt, eps=1.0, iters=512): 20 | """ 21 | pred and gt is B N 3 22 | """ 23 | dis, _ = self.emd(pred, gt, eps, iters) 24 | dis = torch.mean(torch.sqrt(dis), dim=1) 25 | return torch.mean(dis) 26 | ``` 27 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | from knn_cuda import __version__ 4 | 5 | 6 | with open('requirements.txt') as f: 7 | required = f.read().splitlines() 8 | 9 | setup( 10 | name='KNN_CUDA', 11 | version=__version__, 12 | description='pytorch version knn support cuda.', 13 | author='Shuaipeng Li', 14 | author_email='sli@mail.bnu.edu.cn', 15 | packages=find_packages(), 16 | package_data={ 17 | 'knn_cuda': ["csrc/cuda/knn.cu", "csrc/cuda/knn.cpp"] 18 | }, 19 | install_requires=required 20 | ) 21 | 22 | -------------------------------------------------------------------------------- /libs/chamfer_dist/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: Haozhe Xie 3 | # @Date: 2019-08-07 20:54:24 4 | # @Last Modified by: Haozhe Xie 5 | # @Last Modified time: 2019-12-10 10:04:25 6 | # @Email: cshzxie@gmail.com 7 | 8 | from setuptools import setup 9 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 10 | 11 | setup(name='chamfer', 12 | version='2.0.0', 13 | ext_modules=[ 14 | CUDAExtension('chamfer', [ 15 | 'chamfer_cuda.cpp', 16 | 'chamfer.cu', 17 | ]), 18 | ], 19 | cmdclass={'build_ext': BuildExtension}) 20 | -------------------------------------------------------------------------------- /libs/pointops/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif -------------------------------------------------------------------------------- /libs/emd_module/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | #import sys 3 | #print(sys.path) 4 | #sys.path = ['', '/opt/software/anaconda3/lib/python37.zip', '/opt/software/anaconda3/lib/python3.7', '/opt/software/anaconda3/lib/python3.7/lib-dynload','/opt/software/anaconda3/lib/python3.7/site-packages','/home/TrueC/.local/lib/python3.7/site-packages', '/home/TrueC/Git/VideoSuperResolution'] 5 | 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | setup( 9 | name='emd', 10 | ext_modules=[ 11 | CUDAExtension('emd', [ 12 | 'emd.cpp', 13 | 'emd_cuda.cu', 14 | ]), 15 | ], 16 | cmdclass={ 17 | 'build_ext': BuildExtension 18 | }) -------------------------------------------------------------------------------- /libs/pointops/src/ballquery/ballquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALLQUERY_CUDA_KERNEL 2 | #define _BALLQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 8 | 9 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *xyz, const float *new_xyz, int *idx); 16 | 17 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /losses/contrastive_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | 6 | # Custom Contrastive Loss 7 | class ContrastiveLoss(nn.Module): 8 | """ 9 | Contrastive loss function. 10 | Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 11 | label: 1 means negative, 0 means positive !!! 12 | """ 13 | def __init__(self, margin=0.5): 14 | super(ContrastiveLoss, self).__init__() 15 | self.margin = margin 16 | 17 | def forward(self, output1, output2, label): 18 | """ label: 1 means negative, 0 means positive """ 19 | euclidean_distance = F.pairwise_distance(output1, output2) 20 | loss_contrastive = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) + # clamp夹断用法 21 | label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)) 22 | return loss_contrastive -------------------------------------------------------------------------------- /libs/pointops/src/grouping_int/grouping_int_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_INT_CUDA_KERNEL 2 | #define _GROUPING_INT_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out); 8 | 9 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out); 16 | 17 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | #endif 23 | -------------------------------------------------------------------------------- /libs/pointops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | 8 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 9 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 10 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 11 | 12 | #define THREADS_PER_BLOCK 256 13 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 14 | 15 | inline int opt_n_threads(int work_size) { 16 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 17 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 18 | } 19 | 20 | inline dim3 opt_block_config(int x, int y) { 21 | const int x_threads = opt_n_threads(x); 22 | const int y_threads = max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 23 | dim3 block_config(x_threads, y_threads, 1); 24 | return block_config; 25 | } 26 | 27 | #endif -------------------------------------------------------------------------------- /libs/pointops/src/grouping_int/grouping_int_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | //#include 5 | 6 | #include "grouping_int_cuda_kernel.h" 7 | 8 | //extern THCState *state; 9 | 10 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 11 | { 12 | const long int *points = points_tensor.data(); 13 | const int *idx = idx_tensor.data(); 14 | long int *out = out_tensor.data(); 15 | grouping_int_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out); 16 | } 17 | 18 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 19 | { 20 | const long int *points = points_tensor.data(); 21 | const int *idx = idx_tensor.data(); 22 | long int *out = out_tensor.data(); 23 | grouping_int_forward_cuda_launcher_fast(b, c, n, m, nsample, points, idx, out); 24 | } -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 8 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 9 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out); 16 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points); 17 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | #endif 23 | -------------------------------------------------------------------------------- /datasets/query_pos_neg_dataset.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package p2m.base_type; 3 | 4 | message DataRecord { 5 | string name = 1; 6 | double x = 2; 7 | double y = 3; 8 | }; 9 | 10 | message DataRecords { 11 | repeated DataRecord records = 1; 12 | } 13 | 14 | message Uint32Pair { 15 | uint32 idx1 = 1; 16 | repeated uint32 near_indices2 = 2; 17 | repeated uint32 far_indices2 = 3; 18 | repeated uint32 bad_far_indices2 = 4; 19 | } 20 | 21 | message QueryPosOverlapIndices { 22 | uint32 positive_idx = 2; 23 | repeated Uint32Pair overlap_indices = 3; 24 | repeated Uint32Pair inv_overlap_indices = 4; 25 | } 26 | 27 | message QueryOverlapIndices { 28 | uint32 query_idx = 1; 29 | repeated QueryPosOverlapIndices qp_overlap_indices = 2; 30 | } 31 | 32 | message QueryPosNegTuple { 33 | repeated uint32 positive_indices = 1; 34 | repeated uint32 negative_indices = 2; 35 | }; 36 | 37 | message DataSet { 38 | repeated string trip_names = 1; 39 | repeated DataRecord records = 2; 40 | repeated uint32 records_size_list = 3; 41 | map tuples = 4; 42 | }; -------------------------------------------------------------------------------- /libs/chamfer_dist/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: Haozhe Xie 3 | # @Date: 2019-12-10 10:38:01 4 | # @Last Modified by: Haozhe Xie 5 | # @Last Modified time: 2019-12-26 14:21:36 6 | # @Email: cshzxie@gmail.com 7 | # 8 | # Note: 9 | # - Replace float -> double, kFloat -> kDouble in chamfer.cu 10 | 11 | import os 12 | import sys 13 | import torch 14 | import unittest 15 | 16 | 17 | from torch.autograd import gradcheck 18 | 19 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))) 20 | from extensions.chamfer_dist import ChamferFunction 21 | 22 | 23 | class ChamferDistanceTestCase(unittest.TestCase): 24 | def test_chamfer_dist(self): 25 | x = torch.rand(4, 64, 3).double() 26 | y = torch.rand(4, 128, 3).double() 27 | x.requires_grad = True 28 | y.requires_grad = True 29 | print(gradcheck(ChamferFunction.apply, [x.cuda(), y.cuda()])) 30 | 31 | 32 | 33 | if __name__ == '__main__': 34 | # unittest.main() 35 | import pdb 36 | x = torch.rand(32,128,3) 37 | y = torch.rand(32,128,3) 38 | pdb.set_trace() 39 | -------------------------------------------------------------------------------- /libs/pointops/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //#include 4 | #include 5 | 6 | #include "knnquery_cuda_kernel.h" 7 | 8 | //extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | 15 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 16 | { 17 | CHECK_INPUT(new_xyz_tensor); 18 | CHECK_INPUT(xyz_tensor); 19 | 20 | const float *new_xyz = new_xyz_tensor.data(); 21 | const float *xyz = xyz_tensor.data(); 22 | int *idx = idx_tensor.data(); 23 | float *dist2 = dist2_tensor.data(); 24 | 25 | //cudaStream_t stream = THCState_getCurrentStream(state); 26 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 27 | 28 | knnquery_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream); 29 | } 30 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out); 8 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 9 | 10 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out); 17 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points); 18 | 19 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /utils/model_util/pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class GeMPooling(nn.Module): 6 | def __init__(self, norm, output_size=1, eps=1e-6): 7 | super(GeMPooling, self).__init__() 8 | assert norm > 0 9 | self.p = float(norm) 10 | self.output_size = output_size 11 | self.eps = eps 12 | 13 | def forward(self, x): 14 | x = x.clamp(min=self.eps).pow(self.p) 15 | return F.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p) 16 | 17 | def __repr__(self): 18 | return self.__class__.__name__ + '(' \ 19 | + str(self.p) + ', ' \ 20 | + 'output_size=' + str(self.output_size) + ')' 21 | 22 | 23 | def get_pool(pool_name): 24 | if pool_name == 'avg': 25 | return nn.AdaptiveAvgPool2d((1, 1)) 26 | elif pool_name == 'max': 27 | return nn.AdaptiveMaxPool2d((1, 1)) 28 | elif pool_name == 'gem': 29 | return GeMPooling(norm=3) 30 | else: 31 | raise AttributeError('not support pooling way') 32 | 33 | 34 | if __name__ == '__main__': 35 | feed = torch.randn(3, 10, 8) 36 | my_pool = get_pool('gem') 37 | row_out = my_pool(feed[...,None]) 38 | feed = feed.transpose(1,2)[...,None] 39 | col_out = my_pool(feed) -------------------------------------------------------------------------------- /libs/pointops/src/featuredistribute/featuredistribute_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _FEATUREDISTRIBUTE_CUDA_KERNEL 2 | #define _FEATUREDISTRIBUTE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void featuredistribute_cuda(int b, int n, int m, at::Tensor max_xyz_tensor, at::Tensor xyz_tensor, at::Tensor distribute_idx_tensor); 8 | void featuregather_forward_cuda(int b, int n, int m, int c, at::Tensor max_feature_tensor, at::Tensor distribute_idx_tensor, at::Tensor distribute_feature_tensor); 9 | void featuregather_backward_cuda(int b, int n, int m, int c, at::Tensor grad_distribute_feature_tensor, at::Tensor distribute_idx_tensor, at::Tensor grad_max_feature_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void featuredistribute_cuda_launcher(int b, int n, int m, const float *max_xyz, const float *xyz, int *distribute_idx, cudaStream_t stream); 16 | void featuregather_forward_cuda_launcher(int b, int n, int m, int c, const float *max_feature, const int *distribute_idx, float *distribute_feature, cudaStream_t stream); 17 | void featuregather_backward_cuda_launcher(int b, int n, int m, int c, const float *grad_distribute_feature, const int *distribute_idx, float *grad_max_feature, cudaStream_t stream); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /utils/model_util/position_embedding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class SinusoidalPositionalEmbedding(nn.Module): 7 | def __init__(self, d_model): 8 | super(SinusoidalPositionalEmbedding, self).__init__() 9 | if d_model % 2 != 0: 10 | raise ValueError(f'Sinusoidal positional encoding with odd d_model: {d_model}') 11 | self.d_model = d_model 12 | div_indices = torch.arange(0, d_model, 2).float() 13 | div_term = torch.exp(div_indices * (-np.log(10000.0) / d_model)) 14 | self.register_buffer('div_term', div_term) 15 | 16 | def forward(self, emb_indices): 17 | r"""Sinusoidal Positional Embedding. 18 | 19 | Args: 20 | emb_indices: torch.Tensor (*) 21 | 22 | Returns: 23 | embeddings: torch.Tensor (*, D) 24 | """ 25 | input_shape = emb_indices.shape 26 | omegas = emb_indices.view(-1, 1, 1) * self.div_term.view(1, -1, 1) # (-1, d_model/2, 1) 27 | sin_embeddings = torch.sin(omegas) 28 | cos_embeddings = torch.cos(omegas) 29 | embeddings = torch.cat([sin_embeddings, cos_embeddings], dim=2) # (-1, d_model/2, 2) 30 | embeddings = embeddings.view(*input_shape, self.d_model) # (*, d_model) 31 | embeddings = embeddings.detach() 32 | return embeddings 33 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | //#include 5 | #include "sampling_cuda_kernel.h" 6 | 7 | //extern THCState *state; 8 | 9 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 10 | { 11 | const float *points = points_tensor.data(); 12 | const int *idx = idx_tensor.data(); 13 | float *out = out_tensor.data(); 14 | gathering_forward_cuda_launcher(b, c, n, m, points, idx, out); 15 | } 16 | 17 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) 18 | { 19 | 20 | const float *grad_out = grad_out_tensor.data(); 21 | const int *idx = idx_tensor.data(); 22 | float *grad_points = grad_points_tensor.data(); 23 | gathering_backward_cuda_launcher(b, c, n, m, grad_out, idx, grad_points); 24 | } 25 | 26 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) 27 | { 28 | const float *points = points_tensor.data(); 29 | float *temp = temp_tensor.data(); 30 | int *idx = idx_tensor.data(); 31 | furthestsampling_cuda_launcher(b, n, m, points, temp, idx); 32 | } 33 | -------------------------------------------------------------------------------- /libs/pointops/setup.py: -------------------------------------------------------------------------------- 1 | #python3 setup.py install 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | setup( 6 | name='pointops', 7 | ext_modules=[ 8 | CUDAExtension('pointops_cuda', [ 9 | 'src/pointops_api.cpp', 10 | 'src/ballquery/ballquery_cuda.cpp', 11 | 'src/ballquery/ballquery_cuda_kernel.cu', 12 | 'src/knnquery/knnquery_cuda.cpp', 13 | 'src/knnquery/knnquery_cuda_kernel.cu', 14 | 'src/grouping/grouping_cuda.cpp', 15 | 'src/grouping/grouping_cuda_kernel.cu', 16 | 'src/grouping_int/grouping_int_cuda.cpp', 17 | 'src/grouping_int/grouping_int_cuda_kernel.cu', 18 | 'src/interpolation/interpolation_cuda.cpp', 19 | 'src/interpolation/interpolation_cuda_kernel.cu', 20 | 'src/sampling/sampling_cuda.cpp', 21 | 'src/sampling/sampling_cuda_kernel.cu', 22 | 23 | 'src/labelstat/labelstat_cuda.cpp', 24 | 'src/labelstat/labelstat_cuda_kernel.cu', 25 | 26 | 'src/featuredistribute/featuredistribute_cuda.cpp', 27 | 'src/featuredistribute/featuredistribute_cuda_kernel.cu' 28 | ], 29 | extra_compile_args={'cxx': ['-g'], 30 | 'nvcc': ['-O2']}) 31 | ], 32 | cmdclass={'build_ext': BuildExtension}) 33 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | //#include 5 | 6 | #include "grouping_cuda_kernel.h" 7 | 8 | //extern THCState *state; 9 | 10 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 11 | { 12 | const float *points = points_tensor.data(); 13 | const int *idx = idx_tensor.data(); 14 | float *out = out_tensor.data(); 15 | grouping_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out); 16 | } 17 | 18 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) 19 | { 20 | float *grad_points = grad_points_tensor.data(); 21 | const int *idx = idx_tensor.data(); 22 | const float *grad_out = grad_out_tensor.data(); 23 | grouping_backward_cuda_launcher(b, c, n, m, nsample, grad_out, idx, grad_points); 24 | } 25 | 26 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { 27 | 28 | const float *points = points_tensor.data(); 29 | const int *idx = idx_tensor.data(); 30 | float *out = out_tensor.data(); 31 | grouping_forward_cuda_launcher_fast(b, c, n, npoints, nsample, points, idx, out); 32 | } -------------------------------------------------------------------------------- /libs/pointops/src/ballquery/ballquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //#include 4 | #include 5 | 6 | #include "ballquery_cuda_kernel.h" 7 | 8 | //extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) 15 | { 16 | const float *new_xyz = new_xyz_tensor.data(); 17 | const float *xyz = xyz_tensor.data(); 18 | int *idx = idx_tensor.data(); 19 | 20 | ballquery_cuda_launcher(b, n, m, radius, nsample, new_xyz, xyz, idx); 21 | } 22 | 23 | 24 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) 25 | { 26 | CHECK_INPUT(new_xyz_tensor); 27 | CHECK_INPUT(xyz_tensor); 28 | 29 | const float *new_xyz = new_xyz_tensor.data(); 30 | const float *xyz = xyz_tensor.data(); 31 | int *idx = idx_tensor.data(); 32 | 33 | //cudaStream_t stream = THCState_getCurrentStream(state); 34 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 35 | 36 | ballquery_cuda_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream); 37 | } 38 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/layers/pooling_wrapper.py: -------------------------------------------------------------------------------- 1 | from place_recognition.Minkloc3D_V2.models.layers.pooling import MAC, SPoC, GeM, NetVLADWrapper 2 | import torch.nn as nn 3 | import MinkowskiEngine as ME 4 | 5 | 6 | class PoolingWrapper(nn.Module): 7 | def __init__(self, pool_method, in_dim, output_dim): 8 | super().__init__() 9 | 10 | self.pool_method = pool_method 11 | self.in_dim = in_dim 12 | self.output_dim = output_dim 13 | 14 | if pool_method == 'MAC': 15 | # Global max pooling 16 | assert in_dim == output_dim 17 | self.pooling = MAC(input_dim=in_dim) 18 | elif pool_method == 'SPoC': 19 | # Global average pooling 20 | assert in_dim == output_dim 21 | self.pooling = SPoC(input_dim=in_dim) 22 | elif pool_method == 'GeM': 23 | # Generalized mean pooling 24 | assert in_dim == output_dim 25 | self.pooling = GeM(input_dim=in_dim) 26 | elif self.pool_method == 'netvlad': 27 | # NetVLAD 28 | self.pooling = NetVLADWrapper(feature_size=in_dim, output_dim=output_dim, gating=False) 29 | elif self.pool_method == 'netvladgc': 30 | # NetVLAD with Gating Context 31 | self.pooling = NetVLADWrapper(feature_size=in_dim, output_dim=output_dim, gating=True) 32 | else: 33 | raise NotImplementedError('Unknown pooling method: {}'.format(pool_method)) 34 | 35 | def forward(self, x: ME.SparseTensor): 36 | return self.pooling(x) 37 | -------------------------------------------------------------------------------- /libs/pointops/src/labelstat/labelstat_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _LABELSTAT_CUDA_KERNEL 2 | #define _LABELSTAT_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void labelstat_and_ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, int nclass, 8 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor label_stat_tensor, at::Tensor idx_tensor, at::Tensor new_label_stat_tensor); 9 | 10 | void labelstat_ballrange_cuda_fast(int b, int n, int m, float radius, int nclass, 11 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor label_stat_tensor, at::Tensor new_label_stat_tensor); 12 | 13 | void labelstat_idx_cuda_fast(int b, int n, int m, int nsample, int nclass, 14 | at::Tensor label_stat_tensor, at::Tensor idx_tensor, at::Tensor new_label_stat_tensor); 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | void labelstat_and_ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, int nclass, \ 21 | const float *new_xyz, const float *xyz, const int *label_stat, int *idx, int *new_label_stat, cudaStream_t stream); 22 | 23 | void labelstat_ballrange_cuda_launcher_fast(int b, int n, int m, float radius, int nclass, \ 24 | const float *new_xyz, const float *xyz, const int *label_stat, int *new_label_stat, cudaStream_t stream); 25 | 26 | void labelstat_idx_cuda_launcher_fast(int b, int n, int m, int nsample, int nclass, \ 27 | const int *label_stat, const int *idx, int *new_label_stat, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /libs/emd_module/emd.cpp: -------------------------------------------------------------------------------- 1 | // EMD approximation module (based on auction algorithm) 2 | // author: Minghua Liu 3 | #include 4 | #include 5 | 6 | int emd_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist, at::Tensor assignment, at::Tensor price, 7 | at::Tensor assignment_inv, at::Tensor bid, at::Tensor bid_increments, at::Tensor max_increments, 8 | at::Tensor unass_idx, at::Tensor unass_cnt, at::Tensor unass_cnt_sum, at::Tensor cnt_tmp, at::Tensor max_idx, float eps, int iters); 9 | 10 | int emd_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz, at::Tensor graddist, at::Tensor idx); 11 | 12 | 13 | 14 | int emd_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist, at::Tensor assignment, at::Tensor price, 15 | at::Tensor assignment_inv, at::Tensor bid, at::Tensor bid_increments, at::Tensor max_increments, 16 | at::Tensor unass_idx, at::Tensor unass_cnt, at::Tensor unass_cnt_sum, at::Tensor cnt_tmp, at::Tensor max_idx, float eps, int iters) { 17 | return emd_cuda_forward(xyz1, xyz2, dist, assignment, price, assignment_inv, bid, bid_increments, max_increments, unass_idx, unass_cnt, unass_cnt_sum, cnt_tmp, max_idx, eps, iters); 18 | } 19 | 20 | int emd_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz, at::Tensor graddist, at::Tensor idx) { 21 | 22 | return emd_cuda_backward(xyz1, xyz2, gradxyz, graddist, idx); 23 | } 24 | 25 | 26 | 27 | 28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 29 | m.def("forward", &emd_forward, "emd forward (CUDA)"); 30 | m.def("backward", &emd_backward, "emd backward (CUDA)"); 31 | } -------------------------------------------------------------------------------- /libs/chamfer_dist/chamfer_cuda.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: Haozhe Xie 3 | * @Date: 2019-08-07 20:54:24 4 | * @Last Modified by: Haozhe Xie 5 | * @Last Modified time: 2019-12-10 10:33:50 6 | * @Email: cshzxie@gmail.com 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | std::vector chamfer_cuda_forward(torch::Tensor xyz1, 13 | torch::Tensor xyz2); 14 | 15 | std::vector chamfer_cuda_backward(torch::Tensor xyz1, 16 | torch::Tensor xyz2, 17 | torch::Tensor idx1, 18 | torch::Tensor idx2, 19 | torch::Tensor grad_dist1, 20 | torch::Tensor grad_dist2); 21 | 22 | std::vector chamfer_forward(torch::Tensor xyz1, 23 | torch::Tensor xyz2) { 24 | return chamfer_cuda_forward(xyz1, xyz2); 25 | } 26 | 27 | std::vector chamfer_backward(torch::Tensor xyz1, 28 | torch::Tensor xyz2, 29 | torch::Tensor idx1, 30 | torch::Tensor idx2, 31 | torch::Tensor grad_dist1, 32 | torch::Tensor grad_dist2) { 33 | return chamfer_cuda_backward(xyz1, xyz2, idx1, idx2, grad_dist1, grad_dist2); 34 | } 35 | 36 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 37 | m.def("forward", &chamfer_forward, "Chamfer forward (CUDA)"); 38 | m.def("backward", &chamfer_backward, "Chamfer backward (CUDA)"); 39 | } 40 | -------------------------------------------------------------------------------- /configs/pointnet_vlad.yaml: -------------------------------------------------------------------------------- 1 | # for_debug 2 | FOR_DEBUG: True # Ture or False 3 | 4 | MANUAL_SEED: 123 5 | TRAIN_GPU: 6 | - 0 7 | 8 | DATA_TYPE: "baseline" # baseline or refine 9 | 10 | model_type: "pointnet_vlad" 11 | use_patch_recon: False 12 | use_patch_feature_contrast: False 13 | use_hard_negative_patch_mining: False 14 | weight_place_recognition: 1.0 15 | weight_patch_recon: 0.25 16 | weight_patch_feature_contrast: 0.25 17 | 18 | ARCH: "pptnet" 19 | GLOBAL_FEAT: True 20 | FEATURE_TRANSFORM: True 21 | MAX_POOL: False 22 | GROUP: 8 23 | 24 | AGGREGATION: 'spvlad' 25 | LAST_NORMALIZATION: False 26 | NUM_POINTS: 4096 27 | 28 | FEATURE_OUTPUT_DIM: 256 29 | 30 | EXP_DIR: /home/ericxhzou/Code/ppt-net-plus/exp 31 | 32 | # TRAIN 33 | TRAIN_BATCH_SIZE: 2 34 | TRAIN_POSITIVES_PER_QUERY: 2 35 | TRAIN_NEGATIVES_PER_QUERY: 18 36 | 37 | DECAY_STEP: 200000 38 | DECAY_RATE: 0.7 39 | 40 | # BASE_LEARNING_RATE: 0.00001 41 | BASE_LEARNING_RATE: 0.000005 42 | MOMENTUM: 0.9 43 | OPTIMIZER: 'adam' 44 | MAX_EPOCH: 20 45 | 46 | LEARNING_RATE_DECAY: 'step' 47 | 48 | # the margin is too small 49 | MARGIN_1: 0.5 50 | MARGIN_2: 0.2 51 | 52 | BN_INIT_DECAY: 0.5 53 | BN_DECAY_DECAY_RATE: 0.5 54 | BN_DECAY_CLIP: 0.99 55 | 56 | TRAIN_FILE_ROOT: '/test/work2020/pointnetvlad/generating_queries/pickle_data' 57 | TEST_FILE_ROOT: '/test/work2020/pointnetvlad/generating_queries/pickle_data' 58 | 59 | # LOSS 60 | LOSS_FUNCTION: 'quadruplet' 61 | LOSS_LAZY: True 62 | TRIPLET_USE_BEST_POSITIVES: False 63 | LOSS_IGNORE_ZERO_BATCH: False 64 | 65 | # EVAL6 66 | EVAL_BATCH_SIZE: 2 67 | EVAL_POSITIVES_PER_QUERY: 4 68 | EVAL_NEGATIVES_PER_QUERY: 12 69 | 70 | # ----------- switch data for evaluation ---------- 71 | # WHU Data: hankou, campus 72 | # Oxford RobotCar: oxford 73 | # 3-Inhouse: university, residential, business 74 | dataset: 'oxford' -------------------------------------------------------------------------------- /libs/KNN_CUDA/knn_cuda/csrc/cuda/knn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 6 | #define CHECK_TYPE(x, t) AT_ASSERTM(x.dtype() == t, #x " must be " #t) 7 | #define CHECK_CUDA(x) AT_ASSERTM(x.device().type() == at::Device::Type::CUDA, #x " must be on CUDA") 8 | #define CHECK_INPUT(x, t) CHECK_CONTIGUOUS(x); CHECK_TYPE(x, t); CHECK_CUDA(x) 9 | 10 | 11 | void knn_device( 12 | float* ref_dev, 13 | int ref_nb, 14 | float* query_dev, 15 | int query_nb, 16 | int dim, 17 | int k, 18 | float* dist_dev, 19 | long* ind_dev, 20 | cudaStream_t stream 21 | ); 22 | 23 | std::vector knn( 24 | at::Tensor & ref, 25 | at::Tensor & query, 26 | const int k 27 | ){ 28 | 29 | CHECK_INPUT(ref, at::kFloat); 30 | CHECK_INPUT(query, at::kFloat); 31 | int dim = ref.size(0); 32 | int ref_nb = ref.size(1); 33 | int query_nb = query.size(1); 34 | float * ref_dev = ref.data(); 35 | float * query_dev = query.data(); 36 | auto dist = at::empty({ref_nb, query_nb}, query.options().dtype(at::kFloat)); 37 | auto ind = at::empty({k, query_nb}, query.options().dtype(at::kLong)); 38 | float * dist_dev = dist.data(); 39 | long * ind_dev = ind.data(); 40 | 41 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 42 | 43 | knn_device( 44 | ref_dev, 45 | ref_nb, 46 | query_dev, 47 | query_nb, 48 | dim, 49 | k, 50 | dist_dev, 51 | ind_dev, 52 | stream 53 | ); 54 | 55 | return {dist.slice(0, 0, k), ind}; 56 | } 57 | 58 | 59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 60 | m.def("knn", &knn, "KNN cuda version"); 61 | } 62 | 63 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 8 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 9 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 10 | 11 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 13 | 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | 18 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx); 19 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out); 20 | void interpolation_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points); 21 | 22 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx); 23 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | #endif 29 | -------------------------------------------------------------------------------- /configs/patch_aug_net.yaml: -------------------------------------------------------------------------------- 1 | MANUAL_SEED: 123 2 | TRAIN_GPU: 3 | - 0 4 | 5 | model_type: "patch_aug_net" 6 | use_patch_recon: True 7 | use_patch_feature_contrast: True 8 | use_hard_negative_patch_mining: True 9 | AGGREGATION_TYPE: 2 # 1~5, Max Pooling=3, APFA1=1, APFA2(ours)=2 10 | weight_place_recognition: 1.0 11 | weight_patch_recon: 0.25 12 | weight_patch_feature_contrast: 0.25 13 | 14 | GROUP: 8 15 | AGGREGATION: 'spvlad' 16 | NUM_POINTS: 4096 17 | 18 | FEATURE_OUTPUT_DIM: 256 19 | 20 | FEATURE_SIZE: 21 | - 256 22 | - 256 23 | - 256 24 | 25 | MAX_SAMPLES: 26 | - 128 27 | - 1024 28 | - 4096 29 | 30 | CLUSTER_SIZE: 31 | - 4 32 | - 16 33 | - 64 34 | 35 | OUTPUT_DIM: 36 | - 256 37 | - 256 38 | - 256 39 | 40 | USE_ORIGIN_PC_IN_FP: True 41 | USE_SPA_ATT_AFTER_FP: True 42 | GATING: False 43 | 44 | SAMPLING: 45 | - 1024 46 | - 128 47 | - 16 48 | 49 | KNN: 50 | - 20 51 | - 20 52 | - 20 53 | 54 | KNN_DILATION: 2 55 | 56 | EXP_DIR: /home/ericxhzou/Code/ppt-net-plus/exp 57 | 58 | # TRAIN 59 | DATA_AUGMENTATION: True 60 | TRAIN_BATCH_SIZE: 4 # i.e. num of query 61 | TRAIN_POSITIVES_PER_QUERY: 2 62 | TRAIN_NEGATIVES_PER_QUERY: 14 63 | 64 | DECAY_STEP: 200000 65 | DECAY_RATE: 0.7 66 | 67 | # BASE_LEARNING_RATE: 0.00001 / 0.0005 68 | BASE_LEARNING_RATE: 0.0005 69 | MOMENTUM: 0.9 70 | OPTIMIZER: 'adam' 71 | MAX_EPOCH: 30 72 | 73 | LEARNING_RATE_DECAY: 'step' 74 | 75 | # the margin is too small 76 | MARGIN_1: 0.5 77 | MARGIN_2: 0.2 78 | 79 | # LOSS 80 | LOSS_FUNCTION: 'quadruplet' 81 | LOSS_LAZY: True 82 | TRIPLET_USE_BEST_POSITIVES: False 83 | LOSS_IGNORE_ZERO_BATCH: False 84 | 85 | # EVAL6 86 | EVAL_BATCH_SIZE: 1 87 | EVAL_POSITIVES_PER_QUERY: 4 88 | EVAL_NEGATIVES_PER_QUERY: 10 89 | 90 | # ----------- switch data for evaluation ---------- 91 | # WHU Data: hankou, campus 92 | # Oxford RobotCar: oxford 93 | # 3-Inhouse: university, residential, business 94 | dataset: 'oxford' 95 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/model_factory.py: -------------------------------------------------------------------------------- 1 | # Warsaw University of Technology 2 | 3 | import torch.nn as nn 4 | 5 | from place_recognition.Minkloc3D_V2.models.minkloc import MinkLoc 6 | from place_recognition.Minkloc3D_V2.misc.utils import ModelParams 7 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck 8 | from place_recognition.Minkloc3D_V2.models.layers.eca_block import ECABasicBlock 9 | from place_recognition.Minkloc3D_V2.models.minkfpn import MinkFPN 10 | from place_recognition.Minkloc3D_V2.models.layers.pooling_wrapper import PoolingWrapper 11 | 12 | 13 | def model_factory(model_params: ModelParams): 14 | in_channels = 1 15 | 16 | if model_params.model == 'MinkLoc': 17 | block_module = create_resnet_block(model_params.block) 18 | backbone = MinkFPN(in_channels=in_channels, out_channels=model_params.feature_size, 19 | num_top_down=model_params.num_top_down, conv0_kernel_size=model_params.conv0_kernel_size, 20 | block=block_module, layers=model_params.layers, planes=model_params.planes) 21 | pooling = PoolingWrapper(pool_method=model_params.pooling, in_dim=model_params.feature_size, 22 | output_dim=model_params.output_dim) 23 | model = MinkLoc(backbone=backbone, pooling=pooling, normalize_embeddings=model_params.normalize_embeddings) 24 | else: 25 | raise NotImplementedError('Model not implemented: {}'.format(model_params.model)) 26 | 27 | return model 28 | 29 | 30 | def create_resnet_block(block_name: str) -> nn.Module: 31 | if block_name == 'BasicBlock': 32 | block_module = BasicBlock 33 | elif block_name == 'Bottleneck': 34 | block_module = Bottleneck 35 | elif block_name == 'ECABasicBlock': 36 | block_module = ECABasicBlock 37 | else: 38 | raise NotImplementedError('Unsupported network block: {}'.format(block_name)) 39 | 40 | return block_module 41 | -------------------------------------------------------------------------------- /configs/pptnet_origin.yaml: -------------------------------------------------------------------------------- 1 | # for_debug 2 | FOR_DEBUG: True # Ture or False 3 | 4 | MANUAL_SEED: 123 5 | TRAIN_GPU: 6 | - 0 7 | 8 | DATA_TYPE: "baseline" # baseline or refine 9 | 10 | model_type: "pptnet" # pptnet, pptnet_l2_norm 11 | use_patch_recon: False 12 | use_patch_feature_contrast: False 13 | use_hard_negative_patch_mining: False 14 | weight_place_recognition: 1.0 15 | weight_patch_recon: 0.25 16 | weight_patch_feature_contrast: 0.25 17 | 18 | ARCH: "pptnet" 19 | GLOBAL_FEAT: True 20 | FEATURE_TRANSFORM: True 21 | MAX_POOL: False 22 | GROUP: 8 23 | 24 | AGGREGATION: 'spvlad' 25 | LAST_NORMALIZATION: False 26 | NUM_POINTS: 4096 27 | 28 | FEATURE_OUTPUT_DIM: 256 29 | 30 | FEATURE_SIZE: 31 | - 256 32 | - 256 33 | - 256 34 | - 256 35 | 36 | MAX_SAMPLES: 37 | - 64 38 | - 256 39 | - 1024 40 | - 4096 41 | 42 | CLUSTER_SIZE: 43 | - 1 44 | - 4 45 | - 16 46 | - 64 47 | 48 | OUTPUT_DIM: 49 | - 256 50 | - 256 51 | - 256 52 | - 256 53 | 54 | GATING: True 55 | 56 | SAMPLING: 57 | - 1024 58 | - 256 59 | - 64 60 | - 16 61 | 62 | KNN: 63 | - 20 64 | - 20 65 | - 20 66 | - 20 67 | 68 | DATASET_FOLDER: '/test/datasets/benchmark_datasets/' 69 | EXP_DIR: /home/ericxhzou/Code/ppt-net-plus/exp 70 | 71 | # TRAIN 72 | TRAIN_BATCH_SIZE: 1 73 | TRAIN_POSITIVES_PER_QUERY: 2 74 | TRAIN_NEGATIVES_PER_QUERY: 14 75 | 76 | DECAY_STEP: 200000 77 | DECAY_RATE: 0.7 78 | 79 | # BASE_LEARNING_RATE: 0.00001 80 | BASE_LEARNING_RATE: 0.0005 81 | MOMENTUM: 0.9 82 | OPTIMIZER: 'adam' 83 | MAX_EPOCH: 30 84 | 85 | LEARNING_RATE_DECAY: 'step' 86 | 87 | # the margin is too small 88 | MARGIN_1: 0.5 89 | MARGIN_2: 0.2 90 | 91 | BN_INIT_DECAY: 0.5 92 | BN_DECAY_DECAY_RATE: 0.5 93 | BN_DECAY_CLIP: 0.99 94 | 95 | # LOSS 96 | LOSS_FUNCTION: 'quadruplet' 97 | LOSS_LAZY: True 98 | TRIPLET_USE_BEST_POSITIVES: False 99 | LOSS_IGNORE_ZERO_BATCH: False 100 | 101 | # EVAL6 102 | EVAL_BATCH_SIZE: 1 103 | EVAL_POSITIVES_PER_QUERY: 4 104 | EVAL_NEGATIVES_PER_QUERY: 10 105 | 106 | # ----------- switch data for evaluation ---------- 107 | # WHU Data: hankou, campus 108 | # Oxford RobotCar: oxford 109 | # 3-Inhouse: university, residential, business 110 | dataset: 'oxford' -------------------------------------------------------------------------------- /losses/loss_utils.py: -------------------------------------------------------------------------------- 1 | # Functions and classes used by different loss functions 2 | import numpy as np 3 | import torch 4 | from torch import Tensor 5 | 6 | EPS = 1e-5 7 | 8 | 9 | def metrics_mean(l): 10 | # Compute the mean and return as Python number 11 | metrics = {} 12 | for e in l: 13 | for metric_name in e: 14 | if metric_name not in metrics: 15 | metrics[metric_name] = [] 16 | metrics[metric_name].append(e[metric_name]) 17 | 18 | for metric_name in metrics: 19 | metrics[metric_name] = np.mean(np.array(metrics[metric_name])) 20 | 21 | return metrics 22 | 23 | 24 | def squared_euclidean_distance(x: Tensor, y: Tensor) -> Tensor: 25 | ''' 26 | Compute squared Euclidean distance 27 | Input: x is Nxd matrix 28 | y is Mxd matirx 29 | Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] 30 | i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 31 | Source: https://discuss.pytorch.org/t/efficient-distance-matrix-computation/9065/3 32 | ''' 33 | x_norm = (x ** 2).sum(1).view(-1, 1) 34 | y_t = torch.transpose(y, 0, 1) 35 | y_norm = (y ** 2).sum(1).view(1, -1) 36 | dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t) 37 | return torch.clamp(dist, 0.0, np.inf) 38 | 39 | 40 | def sigmoid(tensor: Tensor, temp: float) -> Tensor: 41 | """ temperature controlled sigmoid 42 | takes as input a torch tensor (tensor) and passes it through a sigmoid, controlled by temperature: temp 43 | """ 44 | exponent = -tensor / temp 45 | # clamp the input tensor for stability 46 | exponent = torch.clamp(exponent, min=-50, max=50) 47 | y = 1.0 / (1.0 + torch.exp(exponent)) 48 | return y 49 | 50 | 51 | def compute_aff(x: Tensor, similarity: str = 'cosine') -> Tensor: 52 | """computes the affinity matrix between an input vector and itself""" 53 | if similarity == 'cosine': 54 | x = torch.mm(x, x.t()) 55 | elif similarity == 'euclidean': 56 | x = x.unsqueeze(0) 57 | x = torch.cdist(x, x, p=2) 58 | x = x.squeeze(0) 59 | # The greater the distance the smaller affinity 60 | x = -x 61 | else: 62 | raise NotImplementedError(f"Incorrect similarity measure: {similarity}") 63 | return x -------------------------------------------------------------------------------- /libs/pointops/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ballquery/ballquery_cuda_kernel.h" 5 | #include "grouping/grouping_cuda_kernel.h" 6 | #include "grouping_int/grouping_int_cuda_kernel.h" 7 | #include "sampling/sampling_cuda_kernel.h" 8 | #include "interpolation/interpolation_cuda_kernel.h" 9 | #include "knnquery/knnquery_cuda_kernel.h" 10 | 11 | #include "labelstat/labelstat_cuda_kernel.h" 12 | #include "featuredistribute/featuredistribute_cuda_kernel.h" 13 | 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("ballquery_cuda", &ballquery_cuda_fast, "ballquery_cuda_fast"); // name in python, cpp function address, docs 17 | 18 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 19 | 20 | m.def("grouping_forward_cuda", &grouping_forward_cuda_fast, "grouping_forward_cuda_fast"); 21 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 22 | 23 | m.def("grouping_int_forward_cuda", &grouping_int_forward_cuda_fast, "grouping_int_forward_cuda_fast"); 24 | 25 | m.def("gathering_forward_cuda", &gathering_forward_cuda, "gathering_forward_cuda"); 26 | m.def("gathering_backward_cuda", &gathering_backward_cuda, "gathering_backward_cuda"); 27 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 28 | 29 | m.def("nearestneighbor_cuda", &nearestneighbor_cuda_fast, "nearestneighbor_cuda_fast"); 30 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda_fast, "interpolation_forward_cuda_fast"); 31 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 32 | 33 | m.def("labelstat_idx_cuda", &labelstat_idx_cuda_fast, "labelstat_idx_cuda_fast"); 34 | m.def("labelstat_ballrange_cuda", &labelstat_ballrange_cuda_fast, "labelstat_ballrange_cuda_fast"); 35 | m.def("labelstat_and_ballquery_cuda", &labelstat_and_ballquery_cuda_fast, "labelstat_and_ballquery_cuda_fast"); 36 | 37 | m.def("featuredistribute_cuda", &featuredistribute_cuda, "featuredistribute_cuda"); 38 | m.def("featuregather_forward_cuda", &featuregather_forward_cuda, "featuregather_forward_cuda"); 39 | m.def("featuregather_backward_cuda", &featuregather_backward_cuda, "featuregather_backward_cuda"); 40 | } 41 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/losses/loss_utils.py: -------------------------------------------------------------------------------- 1 | # Functions and classes used by different loss functions 2 | import numpy as np 3 | import torch 4 | from torch import Tensor 5 | 6 | EPS = 1e-5 7 | 8 | 9 | def metrics_mean(l): 10 | # Compute the mean and return as Python number 11 | metrics = {} 12 | for e in l: 13 | for metric_name in e: 14 | if metric_name not in metrics: 15 | metrics[metric_name] = [] 16 | metrics[metric_name].append(e[metric_name]) 17 | 18 | for metric_name in metrics: 19 | metrics[metric_name] = np.mean(np.array(metrics[metric_name])) 20 | 21 | return metrics 22 | 23 | 24 | def squared_euclidean_distance(x: Tensor, y: Tensor) -> Tensor: 25 | ''' 26 | Compute squared Euclidean distance 27 | Input: x is Nxd matrix 28 | y is Mxd matirx 29 | Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] 30 | i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 31 | Source: https://discuss.pytorch.org/t/efficient-distance-matrix-computation/9065/3 32 | ''' 33 | x_norm = (x ** 2).sum(1).view(-1, 1) 34 | y_t = torch.transpose(y, 0, 1) 35 | y_norm = (y ** 2).sum(1).view(1, -1) 36 | dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t) 37 | return torch.clamp(dist, 0.0, np.inf) 38 | 39 | 40 | def sigmoid(tensor: Tensor, temp: float) -> Tensor: 41 | """ temperature controlled sigmoid 42 | takes as input a torch tensor (tensor) and passes it through a sigmoid, controlled by temperature: temp 43 | """ 44 | exponent = -tensor / temp 45 | # clamp the input tensor for stability 46 | exponent = torch.clamp(exponent, min=-50, max=50) 47 | y = 1.0 / (1.0 + torch.exp(exponent)) 48 | return y 49 | 50 | 51 | def compute_aff(x: Tensor, similarity: str = 'cosine') -> Tensor: 52 | """computes the affinity matrix between an input vector and itself""" 53 | if similarity == 'cosine': 54 | x = torch.mm(x, x.t()) 55 | elif similarity == 'euclidean': 56 | x = x.unsqueeze(0) 57 | x = torch.cdist(x, x, p=2) 58 | x = x.squeeze(0) 59 | # The greater the distance the smaller affinity 60 | x = -x 61 | else: 62 | raise NotImplementedError(f"Incorrect similarity measure: {similarity}") 63 | return x -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/misc/quantization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | from abc import ABC, abstractmethod 4 | import torch 5 | import MinkowskiEngine as ME 6 | 7 | 8 | class Quantizer(ABC): 9 | @abstractmethod 10 | def __call__(self, pc): 11 | pass 12 | 13 | 14 | class PolarQuantizer(Quantizer): 15 | def __init__(self, quant_step: List[float]): 16 | assert len(quant_step) == 3, '3 quantization steps expected: for sector (in degrees), ring and z-coordinate (in meters)' 17 | self.quant_step = torch.tensor(quant_step, dtype=torch.float) 18 | self.theta_range = int(360. // self.quant_step[0]) 19 | self.quant_step = torch.tensor(quant_step, dtype=torch.float) 20 | 21 | def __call__(self, pc): 22 | # Convert to polar coordinates and quantize with different step size for each coordinate 23 | # pc: (N, 3) point cloud with Cartesian coordinates (X, Y, Z) 24 | assert pc.shape[1] == 3 25 | 26 | # theta is an angle in degrees in 0..360 range 27 | theta = 180. + torch.atan2(pc[:, 1], pc[:, 0]) * 180./np.pi 28 | # dist is a distance from a coordinate origin 29 | dist = torch.sqrt(pc[:, 0]**2 + pc[:, 1]**2) 30 | z = pc[:, 2] 31 | polar_pc = torch.stack([theta, dist, z], dim=1) 32 | # Scale each coordinate so after quantization with step 1. we got the required quantization step in each dim 33 | polar_pc = polar_pc / self.quant_step 34 | quantized_polar_pc, ndx = ME.utils.sparse_quantize(polar_pc, quantization_size=1., return_index=True) 35 | # Return quantized coordinates and indices of selected elements 36 | return quantized_polar_pc, ndx 37 | 38 | 39 | class CartesianQuantizer(Quantizer): 40 | def __init__(self, quant_step: float): 41 | self.quant_step = quant_step 42 | 43 | def __call__(self, pc): 44 | # Converts to polar coordinates and quantizes with different step size for each coordinate 45 | # pc: (N, 3) point cloud with Cartesian coordinates (X, Y, Z) 46 | assert pc.shape[1] == 3 47 | quantized_pc, ndx = ME.utils.sparse_quantize(pc, quantization_size=self.quant_step, return_index=True) 48 | # Return quantized coordinates and index of selected elements 49 | return quantized_pc, ndx 50 | 51 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/knn_cuda/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.cpp_extension import load 5 | 6 | 7 | __version__ = "0.2" 8 | 9 | 10 | def load_cpp_ext(ext_name): 11 | root_dir = os.path.join(os.path.split(__file__)[0]) 12 | ext_csrc = os.path.join(root_dir, "csrc") 13 | ext_path = os.path.join(ext_csrc, "_ext", ext_name) 14 | os.makedirs(ext_path, exist_ok=True) 15 | assert torch.cuda.is_available(), "torch.cuda.is_available() is False." 16 | ext_sources = [ 17 | os.path.join(ext_csrc, "cuda", "{}.cpp".format(ext_name)), 18 | os.path.join(ext_csrc, "cuda", "{}.cu".format(ext_name)) 19 | ] 20 | extra_cuda_cflags = [ 21 | "-DCUDA_HAS_FP16=1", 22 | "-D__CUDA_NO_HALF_OPERATORS__", 23 | "-D__CUDA_NO_HALF_CONVERSIONS__", 24 | "-D__CUDA_NO_HALF2_OPERATORS__", 25 | ] 26 | ext = load( 27 | name=ext_name, 28 | sources=ext_sources, 29 | extra_cflags=["-O2"], 30 | build_directory=ext_path, 31 | extra_cuda_cflags=extra_cuda_cflags, 32 | verbose=False, 33 | with_cuda=True 34 | ) 35 | return ext 36 | 37 | 38 | _knn = load_cpp_ext("knn") 39 | 40 | 41 | def knn(ref, query, k): 42 | d, i = _knn.knn(ref, query, k) 43 | i -= 1 44 | return d, i 45 | 46 | 47 | def _T(t, mode=False): 48 | if mode: 49 | return t.transpose(0, 1).contiguous() 50 | else: 51 | return t 52 | 53 | 54 | class KNN(nn.Module): 55 | 56 | def __init__(self, k, transpose_mode=False): 57 | super(KNN, self).__init__() 58 | self.k = k 59 | self._t = transpose_mode 60 | 61 | def forward(self, ref, query): 62 | assert ref.size(0) == query.size(0), "ref.shape={} != query.shape={}".format(ref.shape, query.shape) 63 | with torch.no_grad(): 64 | batch_size = ref.size(0) 65 | D, I = [], [] 66 | for bi in range(batch_size): 67 | r, q = _T(ref[bi], self._t), _T(query[bi], self._t) 68 | d, i = knn(r.float(), q.float(), self.k) 69 | d, i = _T(d, self._t), _T(i, self._t) 70 | D.append(d) 71 | I.append(i) 72 | D = torch.stack(D, dim=0) 73 | I = torch.stack(I, dim=0) 74 | return D, I 75 | 76 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/README.md: -------------------------------------------------------------------------------- 1 | # KNN_CUDA 2 | 3 | + ref: [kNN-CUDA](https://github.com/vincentfpgarcia/kNN-CUDA) 4 | + ref: [pytorch knn cuda](https://github.com/chrischoy/pytorch_knn_cuda) 5 | + author: [sli@mail.bnu.edu.cn](sli@mail.bnu.edu.cn) 6 | 7 | 8 | #### Modifications 9 | + Aten support 10 | + pytorch v1.0+ support 11 | + pytorch c++ extention 12 | 13 | #### Performance 14 | 15 | + dim = 5 16 | + k = 100 17 | + ref = 224 18 | + query = 224 19 | + Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz 20 | + NVIDIA GeForce 940MX 21 | 22 | | Loop | sklearn | CUDA | Memory | 23 | | :---: | :---: | :---: | :---: | 24 | | 100 | 2.34 ms | 0.06 ms | 652/1024 | 25 | | 1000 | 2.30 ms | 1.40 ms | 652/1024 | 26 | 27 | 28 | #### Install 29 | 30 | 31 | + from source 32 | 33 | ```bash 34 | git clone https://github.com/unlimblue/KNN_CUDA.git 35 | cd KNN_CUDA 36 | make && make install 37 | ``` 38 | 39 | + from wheel 40 | 41 | ```bash 42 | pip install --upgrade https://github.com/unlimblue/KNN_CUDA/releases/download/0.2/KNN_CUDA-0.2-py3-none-any.whl 43 | ``` 44 | And then, make sure [`ninja`](https://ninja-build.org/) has been installed: 45 | 1. see [https://pytorch.org/tutorials/advanced/cpp_extension.html](https://pytorch.org/tutorials/advanced/cpp_extension.html) 46 | 2. **or just**: 47 | ```bash 48 | wget -P /usr/bin https://github.com/unlimblue/KNN_CUDA/raw/master/ninja 49 | ``` 50 | 51 | + for windows 52 | 53 | You should use branch `windows`: 54 | 55 | ```bash 56 | git clone --branch windows https://github.com/unlimblue/KNN_CUDA.git 57 | cd C:\\PATH_TO_KNN_CUDA 58 | make 59 | make install 60 | ``` 61 | 62 | #### Usage 63 | 64 | ```python 65 | import torch 66 | 67 | # Make sure your CUDA is available. 68 | assert torch.cuda.is_available() 69 | 70 | from knn_cuda import KNN 71 | """ 72 | if transpose_mode is True, 73 | ref is Tensor [bs x nr x dim] 74 | query is Tensor [bs x nq x dim] 75 | 76 | return 77 | dist is Tensor [bs x nq x k] 78 | indx is Tensor [bs x nq x k] 79 | else 80 | ref is Tensor [bs x dim x nr] 81 | query is Tensor [bs x dim x nq] 82 | 83 | return 84 | dist is Tensor [bs x k x nq] 85 | indx is Tensor [bs x k x nq] 86 | """ 87 | 88 | knn = KNN(k=10, transpose_mode=True) 89 | 90 | ref = torch.rand(32, 1000, 5).cuda() 91 | query = torch.rand(32, 50, 5).cuda() 92 | 93 | dist, indx = knn(ref, query) # 32 x 50 x 10 94 | ``` 95 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/minkloc.py: -------------------------------------------------------------------------------- 1 | # Author: Jacek Komorowski 2 | # Warsaw University of Technology 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import MinkowskiEngine as ME 8 | 9 | from place_recognition.Minkloc3D_V2.models.layers.pooling_wrapper import PoolingWrapper 10 | 11 | 12 | class MinkLoc(torch.nn.Module): 13 | def __init__(self, backbone: nn.Module, pooling: PoolingWrapper, normalize_embeddings: bool = False): 14 | super().__init__() 15 | self.backbone = backbone 16 | self.pooling = pooling 17 | self.normalize_embeddings = normalize_embeddings 18 | self.stats = {} 19 | 20 | def forward(self, batch): 21 | x = ME.SparseTensor(batch['features'], coordinates=batch['coords']) 22 | x = self.backbone(x) 23 | # x is (num_points, n_features) tensor 24 | assert x.shape[1] == self.pooling.in_dim, f'Backbone output tensor has: {x.shape[1]} channels. ' \ 25 | f'Expected: {self.pooling.in_dim}' 26 | x = self.pooling(x) 27 | if hasattr(self.pooling, 'stats'): 28 | self.stats.update(self.pooling.stats) 29 | 30 | #x = x.flatten(1) 31 | assert x.dim() == 2, f'Expected 2-dimensional tensor (batch_size,output_dim). Got {x.dim()} dimensions.' 32 | assert x.shape[1] == self.pooling.output_dim, f'Output tensor has: {x.shape[1]} channels. ' \ 33 | f'Expected: {self.pooling.output_dim}' 34 | 35 | if self.normalize_embeddings: 36 | x = F.normalize(x, dim=1) 37 | 38 | # x is (batch_size, output_dim) tensor 39 | return x 40 | 41 | def print_info(self): 42 | print('Model class: MinkLoc') 43 | n_params = sum([param.nelement() for param in self.parameters()]) 44 | print(f'Total parameters: {n_params}') 45 | n_params = sum([param.nelement() for param in self.backbone.parameters()]) 46 | print(f'Backbone: {type(self.backbone).__name__} #parameters: {n_params}') 47 | n_params = sum([param.nelement() for param in self.pooling.parameters()]) 48 | print(f'Pooling method: {self.pooling.pool_method} #parameters: {n_params}') 49 | print('# channels from the backbone: {}'.format(self.pooling.in_dim)) 50 | print('# output channels : {}'.format(self.pooling.output_dim)) 51 | print(f'Embedding normalization: {self.normalize_embeddings}') 52 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/layers/eca_block.py: -------------------------------------------------------------------------------- 1 | # Implementation of Efficient Channel Attention ECA block 2 | 3 | import numpy as np 4 | import torch.nn as nn 5 | 6 | import MinkowskiEngine as ME 7 | 8 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck 9 | 10 | 11 | class ECALayer(nn.Module): 12 | def __init__(self, channels, gamma=2, b=1): 13 | super().__init__() 14 | t = int(abs((np.log2(channels) + b) / gamma)) 15 | k_size = t if t % 2 else t + 1 16 | self.avg_pool = ME.MinkowskiGlobalPooling() 17 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 18 | self.sigmoid = nn.Sigmoid() 19 | self.broadcast_mul = ME.MinkowskiBroadcastMultiplication() 20 | 21 | def forward(self, x: ME.SparseTensor): 22 | # feature descriptor on the global spatial information 23 | y_sparse = self.avg_pool(x) 24 | 25 | # Apply 1D convolution along the channel dimension 26 | y = self.conv(y_sparse.F.unsqueeze(1)).squeeze(1) 27 | # y is (batch_size, channels) tensor 28 | 29 | y = self.sigmoid(y) 30 | # y is (batch_size, channels) tensor 31 | 32 | y_sparse = ME.SparseTensor(y, coordinate_manager=y_sparse.coordinate_manager, 33 | coordinate_map_key=y_sparse.coordinate_map_key) 34 | # y must be features reduced to the origin 35 | return self.broadcast_mul(x, y_sparse) 36 | 37 | 38 | class ECABasicBlock(BasicBlock): 39 | def __init__(self, 40 | inplanes, 41 | planes, 42 | stride=1, 43 | dilation=1, 44 | downsample=None, 45 | dimension=3): 46 | super(ECABasicBlock, self).__init__( 47 | inplanes, 48 | planes, 49 | stride=stride, 50 | dilation=dilation, 51 | downsample=downsample, 52 | dimension=dimension) 53 | self.eca = ECALayer(planes, gamma=2, b=1) 54 | 55 | def forward(self, x): 56 | residual = x 57 | 58 | out = self.conv1(x) 59 | out = self.norm1(out) 60 | out = self.relu(out) 61 | 62 | out = self.conv2(out) 63 | out = self.norm2(out) 64 | out = self.eca(out) 65 | 66 | if self.downsample is not None: 67 | residual = self.downsample(x) 68 | 69 | out += residual 70 | out = self.relu(out) 71 | 72 | return out 73 | -------------------------------------------------------------------------------- /libs/KNN_CUDA/tests/test_knn_cuda.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.neighbors import KDTree 4 | from knn_cuda import KNN 5 | 6 | 7 | def t2n(t): 8 | return t.detach().cpu().numpy() 9 | 10 | 11 | def run_kdtree(ref, query, k): 12 | bs = ref.shape[0] 13 | D, I = [], [] 14 | for j in range(bs): 15 | tree = KDTree(ref[j], leaf_size=100) 16 | d, i = tree.query(query[j], k=k) 17 | D.append(d) 18 | I.append(i) 19 | D = np.stack(D) 20 | I = np.stack(I) 21 | return D, I 22 | 23 | 24 | def run_knnCuda(ref, query, k): 25 | ref = torch.from_numpy(ref).float().cuda() 26 | query = torch.from_numpy(query).float().cuda() 27 | knn = KNN(k, transpose_mode=True) 28 | d, i = knn(ref, query) 29 | return t2n(d), t2n(i) 30 | 31 | 32 | def compare(k, dim, n1, n2=-1): 33 | if n2 < 0: 34 | n2 = n1 35 | for _ in range(5): 36 | ref = np.random.random((2, n1, dim)) 37 | query = np.random.random((2, n2, dim)) 38 | 39 | kd_dist, kd_idices = run_kdtree(ref, query, k) 40 | kn_dist, kn_idices = run_knnCuda(ref, query, k) 41 | 42 | # diff = (kd_idices - kn_idices) != 0 43 | # print(kd_dist[diff]) 44 | # print(kn_dist[diff]) 45 | 46 | np.testing.assert_almost_equal(kd_dist, kn_dist, decimal=3) 47 | # np.testing.assert_array_equal(kd_idices, kn_idices) 48 | 49 | 50 | class TestKNNCuda: 51 | 52 | def test_knn_cuda_performance(self, benchmark): 53 | dim = 5 54 | k = 100 55 | ref = np.random.random((1, 224, dim)) 56 | query = np.random.random((1, 224, dim)) 57 | benchmark(run_knnCuda, ref, query, k) 58 | 59 | def test_knn_cuda_400_5_1000(self): 60 | compare(400, 5, 1000) 61 | 62 | def test_knn_cuda_400_5_100(self): 63 | compare(10, 5, 100) 64 | 65 | def test_knn_cuda_400_5_10(self): 66 | compare(2, 5, 10) 67 | 68 | def test_knn_cuda_400_5_1001(self): 69 | compare(400, 5, 1001) 70 | 71 | def test_knn_cuda_400_5_101(self): 72 | compare(10, 5, 101) 73 | 74 | def test_knn_cuda_400_5_11(self): 75 | compare(2, 5, 11) 76 | 77 | def test_knn_cuda_400_5_300000_50(self): 78 | compare(400, 5, 30000, 50) 79 | 80 | def test_knn_cuda_400_5_300001_50(self): 81 | compare(400, 5, 30001, 50) 82 | 83 | def test_knn_cuda_400_5_10000(self): 84 | compare(400, 5, 10000) 85 | 86 | def test_knn_cuda_400_5_10001(self): 87 | compare(400, 5, 10001) 88 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | //#include 5 | #include "interpolation_cuda_kernel.h" 6 | 7 | //extern THCState *state; 8 | 9 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) 10 | { 11 | const float *unknown = unknown_tensor.data(); 12 | const float *known = known_tensor.data(); 13 | float *dist2 = dist2_tensor.data(); 14 | int *idx = idx_tensor.data(); 15 | nearestneighbor_cuda_launcher(b, n, m, unknown, known, dist2, idx); 16 | } 17 | 18 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) 19 | { 20 | const float *points = points_tensor.data(); 21 | const float *weight = weight_tensor.data(); 22 | float *out = out_tensor.data(); 23 | const int *idx = idx_tensor.data(); 24 | interpolation_forward_cuda_launcher(b, c, m, n, points, idx, weight, out); 25 | } 26 | 27 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor) 28 | { 29 | const float *grad_out = grad_out_tensor.data(); 30 | const float *weight = weight_tensor.data(); 31 | float *grad_points = grad_points_tensor.data(); 32 | const int *idx = idx_tensor.data(); 33 | interpolation_backward_cuda_launcher(b, c, n, m, grad_out, idx, weight, grad_points); 34 | } 35 | 36 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 37 | const float *unknown = unknown_tensor.data(); 38 | const float *known = known_tensor.data(); 39 | float *dist2 = dist2_tensor.data(); 40 | int *idx = idx_tensor.data(); 41 | nearestneighbor_cuda_launcher_fast(b, n, m, unknown, known, dist2, idx); 42 | } 43 | 44 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) { 45 | 46 | const float *points = points_tensor.data(); 47 | const float *weight = weight_tensor.data(); 48 | float *out = out_tensor.data(); 49 | const int *idx = idx_tensor.data(); 50 | interpolation_forward_cuda_launcher_fast(b, c, m, n, points, idx, weight, out); 51 | } -------------------------------------------------------------------------------- /libs/pointops/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | // input: xyz (b, n, 3) new_xyz (b, m, 3) 5 | // output: idx (b, m, nsample) dist2 (b, m, nsample) 6 | __global__ void knnquery_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) { 7 | int bs_idx = blockIdx.y; 8 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (bs_idx >= b || pt_idx >= m) return; 10 | 11 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 12 | xyz += bs_idx * n * 3; 13 | idx += bs_idx * m * nsample + pt_idx * nsample; 14 | 15 | float new_x = new_xyz[0]; 16 | float new_y = new_xyz[1]; 17 | float new_z = new_xyz[2]; 18 | 19 | //double* best = new double[nsample]; 20 | //int* besti = new int[nsample]; 21 | double best[200]; 22 | int besti[200]; 23 | for(int i = 0; i < nsample; i++){ 24 | best[i] = 1e40; 25 | besti[i] = 0; 26 | } 27 | for(int k = 0; k < n; k++){ 28 | float x = xyz[k * 3 + 0]; 29 | float y = xyz[k * 3 + 1]; 30 | float z = xyz[k * 3 + 2]; 31 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 32 | for(int j = 0; j < nsample; j++){ 33 | if(d2 < best[j]){ 34 | for(int i = nsample - 1; i > j; i--){ 35 | best[i] = best[i - 1]; 36 | besti[i] = besti[i - 1]; 37 | } 38 | best[j] = d2; 39 | besti[j] = k; 40 | break; 41 | } 42 | } 43 | } 44 | for(int i = 0; i < nsample; i++){ 45 | idx[i] = besti[i]; 46 | dist2[i] = best[i]; 47 | } 48 | //delete []best; 49 | //delete []besti; 50 | } 51 | 52 | 53 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) { 54 | // param new_xyz: (B, m, 3) 55 | // param xyz: (B, n, 3) 56 | // param idx: (B, m, nsample) 57 | 58 | cudaError_t err; 59 | 60 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 61 | dim3 threads(THREADS_PER_BLOCK); 62 | 63 | knnquery_cuda_kernel<<>>(b, n, m, nsample, xyz, new_xyz, idx, dist2); 64 | // cudaDeviceSynchronize(); // for using printf in kernel function 65 | 66 | err = cudaGetLastError(); 67 | if (cudaSuccess != err) { 68 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 69 | exit(-1); 70 | } 71 | } -------------------------------------------------------------------------------- /libs/pointops/src/featuredistribute/featuredistribute_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //#include 4 | #include 5 | 6 | #include "featuredistribute_cuda_kernel.h" 7 | 8 | //extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | 15 | void featuredistribute_cuda(int b, int n, int m, at::Tensor max_xyz_tensor, at::Tensor xyz_tensor, at::Tensor distribute_idx_tensor) 16 | { 17 | CHECK_INPUT(max_xyz_tensor); 18 | CHECK_INPUT(xyz_tensor); 19 | 20 | const float *max_xyz = max_xyz_tensor.data(); 21 | const float *xyz = xyz_tensor.data(); 22 | int *distribute_idx = distribute_idx_tensor.data(); 23 | 24 | //cudaStream_t stream = THCState_getCurrentStream(state); 25 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 26 | 27 | featuredistribute_cuda_launcher(b, n, m, max_xyz, xyz, distribute_idx, stream); 28 | } 29 | 30 | 31 | void featuregather_forward_cuda(int b, int n, int m, int c, at::Tensor max_feature_tensor, at::Tensor distribute_idx_tensor, at::Tensor distribute_feature_tensor) 32 | { 33 | CHECK_INPUT(max_feature_tensor); 34 | CHECK_INPUT(distribute_idx_tensor); 35 | 36 | const float *max_feature = max_feature_tensor.data(); 37 | const int *distribute_idx = distribute_idx_tensor.data(); 38 | float *distribute_feature = distribute_feature_tensor.data(); 39 | 40 | //cudaStream_t stream = THCState_getCurrentStream(state); 41 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 42 | 43 | featuregather_forward_cuda_launcher(b, n, m, c, max_feature, distribute_idx, distribute_feature, stream); 44 | } 45 | 46 | 47 | void featuregather_backward_cuda(int b, int n, int m, int c, at::Tensor grad_distribute_feature_tensor, at::Tensor distribute_idx_tensor, at::Tensor grad_max_feature_tensor) 48 | { 49 | CHECK_INPUT(grad_distribute_feature_tensor); 50 | CHECK_INPUT(distribute_idx_tensor); 51 | 52 | const float *grad_distribute_feature = grad_distribute_feature_tensor.data(); 53 | const int *distribute_idx = distribute_idx_tensor.data(); 54 | float *grad_max_feature = grad_max_feature_tensor.data(); 55 | 56 | //cudaStream_t stream = THCState_getCurrentStream(state); 57 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 58 | 59 | featuregather_backward_cuda_launcher(b, n, m, c, grad_distribute_feature, distribute_idx, grad_max_feature, stream); 60 | } -------------------------------------------------------------------------------- /libs/pointops/src/grouping_int/grouping_int_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_int_cuda_kernel.h" 3 | 4 | // input: points(b, c, n) idx(b, m, nsample) 5 | // output: out(b, c, m, nsample) 6 | __global__ void grouping_int_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out) 7 | { 8 | int batch_index = blockIdx.x; 9 | points += batch_index * n * c; 10 | idx += batch_index * m * nsample; 11 | out += batch_index * m * nsample * c; 12 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 13 | const int stride = blockDim.y * blockDim.x; 14 | for (int i = index; i < c * m; i += stride) 15 | { 16 | const int l = i / m; 17 | const int j = i % m; 18 | for (int k = 0; k < nsample; ++k) 19 | { 20 | int ii = idx[j * nsample + k]; 21 | out[(l * m + j) * nsample + k] = points[l * n + ii]; 22 | } 23 | } 24 | } 25 | 26 | 27 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out) 28 | { 29 | grouping_int_forward_cuda_kernel<<>>(b, c, n, m, nsample, points, idx, out); 30 | } 31 | 32 | 33 | __global__ void grouping_int_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const long int *__restrict__ points, const int *__restrict__ idx, long int *__restrict__ out) 34 | { 35 | int bs_idx = blockIdx.z; 36 | int c_idx = blockIdx.y; 37 | int index = blockIdx.x * blockDim.x + threadIdx.x; 38 | int pt_idx = index / nsample; 39 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 40 | 41 | int sample_idx = index % nsample; 42 | 43 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 44 | int in_idx = bs_idx * c * n + c_idx * n + idx[0]; 45 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 46 | 47 | out[out_idx] = points[in_idx]; 48 | } 49 | 50 | 51 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out) 52 | { 53 | cudaError_t err; 54 | 55 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 56 | dim3 threads(THREADS_PER_BLOCK); 57 | 58 | grouping_int_forward_cuda_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); 59 | // cudaDeviceSynchronize(); // for using printf in kernel function 60 | err = cudaGetLastError(); 61 | if (cudaSuccess != err) { 62 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 63 | exit(-1); 64 | } 65 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # add by ericxhzou 141 | exp/ 142 | generating_queries/pickle_data/ 143 | .idea/ 144 | libs/__pycache__ 145 | models/__pycache__ 146 | utils/__pycache__ 147 | -------------------------------------------------------------------------------- /libs/pointops/src/labelstat/labelstat_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //#include 4 | #include 5 | 6 | #include "labelstat_cuda_kernel.h" 7 | 8 | //extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | void labelstat_idx_cuda_fast(int b, int n, int m, int nsample, int nclass, 15 | at::Tensor label_stat_tensor, at::Tensor idx_tensor, at::Tensor new_label_stat_tensor) 16 | { 17 | CHECK_INPUT(label_stat_tensor); 18 | CHECK_INPUT(idx_tensor); 19 | 20 | const int *label_stat = label_stat_tensor.data(); 21 | const int *idx = idx_tensor.data(); 22 | int *new_label_stat = new_label_stat_tensor.data(); 23 | 24 | //cudaStream_t stream = THCState_getCurrentStream(state); 25 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 26 | 27 | labelstat_idx_cuda_launcher_fast(b, n, m, nsample, nclass, label_stat, idx, new_label_stat, stream); 28 | } 29 | 30 | void labelstat_ballrange_cuda_fast(int b, int n, int m, float radius, int nclass, 31 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor label_stat_tensor, at::Tensor new_label_stat_tensor) 32 | { 33 | CHECK_INPUT(new_xyz_tensor); 34 | CHECK_INPUT(xyz_tensor); 35 | CHECK_INPUT(label_stat_tensor); 36 | 37 | const float *new_xyz = new_xyz_tensor.data(); 38 | const float *xyz = xyz_tensor.data(); 39 | const int *label_stat = label_stat_tensor.data(); 40 | int *new_label_stat = new_label_stat_tensor.data(); 41 | 42 | //cudaStream_t stream = THCState_getCurrentStream(state); 43 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 44 | 45 | labelstat_ballrange_cuda_launcher_fast(b, n, m, radius, nclass, new_xyz, xyz, label_stat, new_label_stat, stream); 46 | } 47 | 48 | void labelstat_and_ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, int nclass, 49 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor label_stat_tensor, at::Tensor idx_tensor, at::Tensor new_label_stat_tensor) 50 | { 51 | CHECK_INPUT(new_xyz_tensor); 52 | CHECK_INPUT(xyz_tensor); 53 | CHECK_INPUT(label_stat_tensor); 54 | CHECK_INPUT(idx_tensor); 55 | 56 | const float *new_xyz = new_xyz_tensor.data(); 57 | const float *xyz = xyz_tensor.data(); 58 | const int *label_stat = label_stat_tensor.data(); 59 | int *idx = idx_tensor.data(); 60 | int *new_label_stat = new_label_stat_tensor.data(); 61 | 62 | //cudaStream_t stream = THCState_getCurrentStream(state); 63 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 64 | 65 | labelstat_and_ballquery_cuda_launcher_fast(b, n, m, radius, nsample, nclass, new_xyz, xyz, label_stat, idx, new_label_stat, stream); 66 | } 67 | -------------------------------------------------------------------------------- /libs/chamfer_dist/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: Thibault GROUEIX 3 | # @Date: 2019-08-07 20:54:24 4 | # @Last Modified by: Haozhe Xie 5 | # @Last Modified time: 2019-12-18 15:06:25 6 | # @Email: cshzxie@gmail.com 7 | 8 | import torch 9 | 10 | import chamfer 11 | 12 | 13 | class ChamferFunction(torch.autograd.Function): 14 | @staticmethod 15 | def forward(ctx, xyz1, xyz2): 16 | dist1, dist2, idx1, idx2 = chamfer.forward(xyz1, xyz2) 17 | ctx.save_for_backward(xyz1, xyz2, idx1, idx2) 18 | 19 | return dist1, dist2 20 | 21 | @staticmethod 22 | def backward(ctx, grad_dist1, grad_dist2): 23 | xyz1, xyz2, idx1, idx2 = ctx.saved_tensors 24 | grad_xyz1, grad_xyz2 = chamfer.backward(xyz1, xyz2, idx1, idx2, grad_dist1, grad_dist2) 25 | return grad_xyz1, grad_xyz2 26 | 27 | 28 | class ChamferDistanceL2(torch.nn.Module): 29 | f''' Chamder Distance L2 30 | ''' 31 | def __init__(self, ignore_zeros=False): 32 | super().__init__() 33 | self.ignore_zeros = ignore_zeros 34 | 35 | def forward(self, xyz1, xyz2): 36 | batch_size = xyz1.size(0) 37 | if batch_size == 1 and self.ignore_zeros: 38 | non_zeros1 = torch.sum(xyz1, dim=2).ne(0) 39 | non_zeros2 = torch.sum(xyz2, dim=2).ne(0) 40 | xyz1 = xyz1[non_zeros1].unsqueeze(dim=0) 41 | xyz2 = xyz2[non_zeros2].unsqueeze(dim=0) 42 | 43 | dist1, dist2 = ChamferFunction.apply(xyz1, xyz2) 44 | return torch.mean(dist1) + torch.mean(dist2) 45 | 46 | class ChamferDistanceL2_split(torch.nn.Module): 47 | f''' Chamder Distance L2 48 | ''' 49 | def __init__(self, ignore_zeros=False): 50 | super().__init__() 51 | self.ignore_zeros = ignore_zeros 52 | 53 | def forward(self, xyz1, xyz2): 54 | batch_size = xyz1.size(0) 55 | if batch_size == 1 and self.ignore_zeros: 56 | non_zeros1 = torch.sum(xyz1, dim=2).ne(0) 57 | non_zeros2 = torch.sum(xyz2, dim=2).ne(0) 58 | xyz1 = xyz1[non_zeros1].unsqueeze(dim=0) 59 | xyz2 = xyz2[non_zeros2].unsqueeze(dim=0) 60 | 61 | dist1, dist2 = ChamferFunction.apply(xyz1, xyz2) 62 | return torch.mean(dist1), torch.mean(dist2) 63 | 64 | class ChamferDistanceL1(torch.nn.Module): 65 | f''' Chamder Distance L1 66 | ''' 67 | def __init__(self, ignore_zeros=False): 68 | super().__init__() 69 | self.ignore_zeros = ignore_zeros 70 | 71 | def forward(self, xyz1, xyz2): 72 | batch_size = xyz1.size(0) 73 | if batch_size == 1 and self.ignore_zeros: 74 | non_zeros1 = torch.sum(xyz1, dim=2).ne(0) 75 | non_zeros2 = torch.sum(xyz2, dim=2).ne(0) 76 | xyz1 = xyz1[non_zeros1].unsqueeze(dim=0) 77 | xyz2 = xyz2[non_zeros2].unsqueeze(dim=0) 78 | 79 | dist1, dist2 = ChamferFunction.apply(xyz1, xyz2) 80 | # import pdb 81 | # pdb.set_trace() 82 | dist1 = torch.sqrt(dist1) 83 | dist2 = torch.sqrt(dist2) 84 | return (torch.mean(dist1) + torch.mean(dist2))/2 85 | 86 | -------------------------------------------------------------------------------- /losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class BinaryFocalLoss(nn.Module): 6 | """ 7 | 参考 https://github.com/lonePatient/TorchBlocks 8 | """ 9 | 10 | def __init__(self, gamma=2.0, alpha=0.25, epsilon=1.e-9, with_logit=False): 11 | super(BinaryFocalLoss, self).__init__() 12 | self.gamma = gamma 13 | self.alpha = alpha 14 | self.epsilon = epsilon 15 | self.with_logit = with_logit 16 | 17 | def forward(self, input, target): 18 | """ 19 | Args: 20 | input: model's output, shape of [batch_size, num_cls] 21 | target: ground truth labels, shape of [batch_size] 22 | Returns: 23 | shape of [batch_size] 24 | """ 25 | multi_hot_key = target 26 | logits = input 27 | # 如果模型没有做sigmoid的话,这里需要加上 28 | if self.with_logit: 29 | logits = torch.sigmoid(logits) 30 | zero_hot_key = 1 - multi_hot_key 31 | loss = -self.alpha * multi_hot_key * torch.pow((1 - logits), self.gamma) * (logits + self.epsilon).log() 32 | loss += -(1 - self.alpha) * zero_hot_key * torch.pow(logits, self.gamma) * (1 - logits + self.epsilon).log() 33 | return loss.mean() 34 | 35 | 36 | class FocalLoss(nn.Module): 37 | """ 38 | 参考 https://github.com/lonePatient/TorchBlocks 39 | """ 40 | 41 | def __init__(self, gamma=2.0, alpha=1, epsilon=1.e-9, device=None): 42 | super(FocalLoss, self).__init__() 43 | self.gamma = gamma 44 | if isinstance(alpha, list): 45 | self.alpha = torch.Tensor(alpha, device=device) 46 | else: 47 | self.alpha = alpha 48 | self.epsilon = epsilon 49 | 50 | def forward(self, input, target): 51 | """ 52 | Args: 53 | input: model's output, shape of [batch_size, num_cls] 54 | target: ground truth labels, shape of [batch_size] 55 | Returns: 56 | shape of [batch_size] 57 | """ 58 | num_labels = input.size(-1) 59 | idx = target.view(-1, 1).long() 60 | one_hot_key = torch.zeros(idx.size(0), num_labels, dtype=torch.float32, device=idx.device) 61 | one_hot_key = one_hot_key.scatter_(1, idx, 1) 62 | one_hot_key[:, 0] = 0 # ignore 0 index. 63 | logits = torch.softmax(input, dim=-1) 64 | loss = -self.alpha * one_hot_key * torch.pow((1 - logits), self.gamma) * (logits + self.epsilon).log() 65 | loss = loss.sum(1) 66 | return loss.mean() 67 | 68 | 69 | if __name__ == '__main__': 70 | # 71 | m = nn.Sigmoid() 72 | loss = BinaryFocalLoss() 73 | input = torch.randn(4, 10, 256, requires_grad=True) 74 | target = torch.randn(4, 10, 256, requires_grad=True) 75 | output = loss(m(input), target) 76 | print("loss:", output) 77 | output.backward() 78 | # 79 | loss = FocalLoss(alpha=[0.1, 0.2, 0.3, 0.15, 0.25]) 80 | input = torch.randn(3, 5, requires_grad=True) 81 | target = torch.empty(3, dtype=torch.long).random_(5) 82 | output = loss(input, target) 83 | print(output) 84 | output.backward() -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/layers/pooling.py: -------------------------------------------------------------------------------- 1 | # Pooling methods code based on: https://github.com/filipradenovic/cnnimageretrieval-pytorch 2 | 3 | import torch 4 | import torch.nn as nn 5 | import MinkowskiEngine as ME 6 | 7 | from place_recognition.Minkloc3D_V2.models.layers.netvlad import NetVLADLoupe 8 | 9 | 10 | class MAC(nn.Module): 11 | def __init__(self, input_dim): 12 | super().__init__() 13 | self.input_dim = input_dim 14 | # Same output number of channels as input number of channels 15 | self.output_dim = self.input_dim 16 | self.f = ME.MinkowskiGlobalMaxPooling() 17 | 18 | def forward(self, x: ME.SparseTensor): 19 | x = self.f(x) 20 | return x.F # Return (batch_size, n_features) tensor 21 | 22 | 23 | class SPoC(nn.Module): 24 | def __init__(self, input_dim): 25 | super().__init__() 26 | self.input_dim = input_dim 27 | # Same output number of channels as input number of channels 28 | self.output_dim = self.input_dim 29 | self.f = ME.MinkowskiGlobalAvgPooling() 30 | 31 | def forward(self, x: ME.SparseTensor): 32 | x = self.f(x) 33 | return x.F # Return (batch_size, n_features) tensor 34 | 35 | 36 | class GeM(nn.Module): 37 | def __init__(self, input_dim, p=3, eps=1e-6): 38 | super(GeM, self).__init__() 39 | self.input_dim = input_dim 40 | # Same output number of channels as input number of channels 41 | self.output_dim = self.input_dim 42 | self.p = nn.Parameter(torch.ones(1) * p) 43 | self.eps = eps 44 | self.f = ME.MinkowskiGlobalAvgPooling() 45 | 46 | def forward(self, x: ME.SparseTensor): 47 | # This implicitly applies ReLU on x (clamps negative values) 48 | #temp = ME.SparseTensor(x.F.clamp(min=self.eps).pow(self.p), coordinates=x.C) 49 | temp = ME.SparseTensor(x.F.clamp(min=self.eps).pow(self.p), 50 | coordinate_manager = x.coordinate_manager, 51 | coordinate_map_key = x.coordinate_map_key) 52 | temp = self.f(temp) # Apply ME.MinkowskiGlobalAvgPooling 53 | return temp.F.pow(1./self.p) # Return (batch_size, n_features) tensor 54 | 55 | 56 | class NetVLADWrapper(nn.Module): 57 | def __init__(self, feature_size, output_dim, gating=True): 58 | super().__init__() 59 | self.feature_size = feature_size 60 | self.output_dim = output_dim 61 | self.net_vlad = NetVLADLoupe(feature_size=feature_size, cluster_size=64, output_dim=output_dim, gating=gating, 62 | add_batch_norm=True) 63 | 64 | def forward(self, x: ME.SparseTensor): 65 | # x is (batch_size, C, H, W) 66 | assert x.F.shape[1] == self.feature_size 67 | features = x.decomposed_features 68 | # features is a list of (n_points, feature_size) tensors with variable number of points 69 | batch_size = len(features) 70 | features = torch.nn.utils.rnn.pad_sequence(features, batch_first=True) 71 | # features is (batch_size, n_points, feature_size) tensor padded with zeros 72 | 73 | x = self.net_vlad(features) 74 | assert x.shape[0] == batch_size 75 | assert x.shape[1] == self.output_dim 76 | return x # Return (batch_size, output_dim) tensor 77 | -------------------------------------------------------------------------------- /losses/pose_est_loss.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class OTLoss(nn.Module): 10 | def __init__(self): 11 | super(OTLoss, self).__init__() 12 | 13 | def forward(self, scores, pairs, unpair0, unpair1, use_unpair=True): 14 | ploss = 0 15 | uloss = 0 16 | nvalid = 0 17 | for i in range(scores.shape[0]): 18 | logscore = -scores[i, :, :] 19 | if len(pairs[i]) == 0: # negative group has no point pairs! 20 | continue 21 | nvalid = nvalid + 1 22 | if len(pairs[i]) > 0: 23 | ploss += torch.mean(logscore[pairs[i][:, 0], pairs[i][:, 1]]) 24 | if len(unpair0[i]) > 0 and use_unpair: 25 | uloss += torch.mean(logscore[unpair0[i], -1]) 26 | if len(unpair1[i]) > 0 and use_unpair: 27 | uloss += torch.mean(logscore[-1, unpair1[i]]) 28 | loss = ploss + uloss 29 | if nvalid > 0: 30 | loss = loss / nvalid 31 | return loss 32 | 33 | 34 | class PPSLoss(nn.Module): 35 | def __init__(self, margin=0.5): 36 | super(PPSLoss, self).__init__() 37 | self.margin = margin 38 | 39 | def forward(self, src_feat, tgt_feat, neg_idxs): 40 | """ src_feat: b x m x d, tgt_feat: b x m x d, 41 | pairs: list of ndarray(* x (2+num_keep))) 42 | """ 43 | if tgt_feat is None: 44 | return 0.0 45 | src_feat = F.normalize(src_feat, dim=-1) 46 | tgt_feat = F.normalize(tgt_feat, dim=-1) 47 | a_vec, p_vec, n_vec = [], [], [] 48 | for i in range(src_feat.shape[0]): 49 | neg_idxs_i = neg_idxs[i] 50 | if len(neg_idxs_i) == 0: 51 | continue 52 | a_vec_i = src_feat[i][neg_idxs_i[:, 0]] # * x d 53 | p_vec_i = tgt_feat[i][neg_idxs_i[:, 1]] # * x d 54 | n_vec_i = [] 55 | for j in range(2, neg_idxs_i.shape[-1]): 56 | n_vec_ij = tgt_feat[i][neg_idxs_i[:, j]][:, None, :] # * x 1 x d 57 | n_vec_i.append(n_vec_ij) 58 | n_vec_i = torch.cat(n_vec_i, dim=1) # * x num_keep x d 59 | an_euc_dist = F.pairwise_distance(a_vec_i[:, None, :], n_vec_i) # * x num_keep 60 | n_min_ind = torch.min(an_euc_dist, dim=-1)[1] # * 61 | new_n_vec_i = [] 62 | for j in range(n_min_ind.shape[0]): 63 | n_vec_ij = n_vec_i[j, n_min_ind[j], :][None, ...] # 1 x 1 x d 64 | new_n_vec_i.append(n_vec_ij) 65 | new_n_vec_i = torch.cat(new_n_vec_i, dim=0) # * x d 66 | a_vec.append(a_vec_i) 67 | p_vec.append(p_vec_i) 68 | n_vec.append(new_n_vec_i) 69 | if len(a_vec) == 0: 70 | return 0.0 71 | a_vec = torch.cat(a_vec, dim=0) # * x d 72 | p_vec = torch.cat(p_vec, dim=0) # * x d 73 | n_vec = torch.cat(n_vec, dim=0) # * x d 74 | loss = 0.0 75 | if a_vec.shape[0] > 0: 76 | # a - p 77 | euc_dist = F.pairwise_distance(a_vec, p_vec) 78 | ap_loss = torch.mean(torch.pow(euc_dist, 2)) 79 | loss += ap_loss 80 | # a - n 81 | euc_dist = F.pairwise_distance(a_vec, n_vec) 82 | an_loss = torch.mean(torch.pow(torch.clamp(self.margin - euc_dist, min=0.0), 2)) 83 | loss += an_loss 84 | return loss -------------------------------------------------------------------------------- /utils/cmp_stat.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | import numpy as np 5 | 6 | 7 | def cmp_stats(ref_pkl, query_pkl, RRE_thresh=1.0, RTE_thresh=0.5, log_better=True, log_worse=False): 8 | # check input 9 | if not os.path.exists(ref_pkl) or not os.path.exists(query_pkl): 10 | return None 11 | with open(ref_pkl, 'rb') as handle: 12 | ref_aidxs, ref_RREs, ref_RTEs, ref_run_time = pickle.load(handle) 13 | if ref_aidxs is None or len(ref_aidxs) == 0: 14 | return None 15 | with open(query_pkl, 'rb') as handle: 16 | que_aidxs, que_RREs, que_RTEs, que_run_time = pickle.load(handle) 17 | if que_aidxs is None or len(que_aidxs) == 0: 18 | return None 19 | # format data 20 | ref_data = dict() 21 | for i in range(len(ref_aidxs)): 22 | if ref_aidxs[i] in que_aidxs: 23 | ref_data[ref_aidxs[i]] = { 24 | 'RRE': ref_RREs[i], 25 | 'RTE': ref_RTEs[i], 26 | 'run_time': ref_run_time[i] 27 | } 28 | que_data = dict() 29 | for i in range(len(que_aidxs)): 30 | if que_aidxs[i] in ref_aidxs: 31 | que_data[que_aidxs[i]] = { 32 | 'RRE': que_RREs[i], 33 | 'RTE': que_RTEs[i], 34 | 'run_time': que_run_time[i] 35 | } 36 | # compare 37 | keys = list(ref_data.keys()) 38 | keys.sort() 39 | delta_RREs, delta_RTEs, delta_run_times = [], [], [] 40 | for key in keys: 41 | ref, que = ref_data[key], que_data[key] 42 | delta_RREs.append(que['RRE'] - ref['RRE']) 43 | delta_RTEs.append(que['RTE'] - ref['RTE']) 44 | delta_run_times.append(que['run_time'] - ref['run_time']) 45 | is_worse = que['RRE'] - ref['RRE'] > RRE_thresh and que['RTE'] - ref['RTE'] > RTE_thresh 46 | is_better = ref['RRE'] - que['RRE'] > RRE_thresh and ref['RTE'] - que['RTE'] > RTE_thresh 47 | if is_better: 48 | if not log_better: 49 | continue 50 | print('a_idx: {}, delta RRE(deg): {:.3f}, delta RTE(m): {:.3f}, delta run time(ms): {:.2f}, state: better'.format( 51 | key, que['RRE'] - ref['RRE'], que['RTE'] - ref['RTE'], que['run_time'] - ref['run_time'])) 52 | elif is_worse: 53 | if not log_worse: 54 | continue 55 | print('a_idx: {}, delta RRE(deg): {:.3f}, delta RTE(m): {:.3f}, delta run time(ms): {:.2f}, state: worse'.format( 56 | key, que['RRE'] - ref['RRE'], que['RTE'] - ref['RTE'], que['run_time'] - ref['run_time'])) 57 | else: 58 | continue 59 | print('--------------------Summary--------------------') 60 | delta_RRE = np.mean(delta_RREs) 61 | delta_RTE = np.mean(delta_RTEs) 62 | delta_run_time = np.mean(delta_run_times) 63 | print('mean delta RRE(deg): {:.3f}, mean delta RTE(m): {:.3f}, mean delta run time(ms): {:.2f}'.format( 64 | delta_RRE, delta_RTE, delta_run_time)) 65 | 66 | 67 | if __name__ == '__main__': 68 | ref_pkl = '/home/ericxhzou/Code/ppt-net-plus/exp/pose_est/events/2023-08-06T22-15-27_deepl_nn_dist/2023-08-10T07-53-10_deepl_with_ransac_hankou/pose_est_res_deepl_infer_eval_individual_top_k/stat.pickle' 69 | query_pkl = '/home/ericxhzou/Code/ppt-net-plus/exp/pose_est/events/2023-08-06T22-15-27_deepl_nn_dist/2023-08-10T07-53-10_deepl_with_ransac_hankou/pose_est_res_deepl_infer_eval_group_top_k/stat.pickle' 70 | cmp_stats(ref_pkl, query_pkl, RRE_thresh=5.0, RTE_thresh=2.0, log_better=True, log_worse=False) 71 | print() 72 | print('********************************************************************************') 73 | cmp_stats(ref_pkl, query_pkl, RRE_thresh=5.0, RTE_thresh=2.0, log_better=False, log_worse=True) -------------------------------------------------------------------------------- /libs/pointops/src/ballquery/ballquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "ballquery_cuda_kernel.h" 3 | 4 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 5 | // output: idx(b, m, nsample) 6 | __global__ void ballquery_cuda_kernel(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx) 7 | { 8 | int batch_index = blockIdx.x; 9 | xyz += batch_index * n * 3; 10 | new_xyz += batch_index * m * 3; 11 | idx += m * nsample * batch_index; 12 | int index = threadIdx.x; 13 | int stride = blockDim.x; 14 | 15 | float radius2 = radius * radius; 16 | for (int j = index; j < m; j += stride) 17 | { 18 | float new_x = new_xyz[j * 3 + 0]; 19 | float new_y = new_xyz[j * 3 + 1]; 20 | float new_z = new_xyz[j * 3 + 2]; 21 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) 22 | { 23 | float x = xyz[k * 3 + 0]; 24 | float y = xyz[k * 3 + 1]; 25 | float z = xyz[k * 3 + 2]; 26 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 27 | if (d2 < radius2) 28 | { 29 | if (cnt == 0) 30 | { 31 | for (int l = 0; l < nsample; ++l) 32 | idx[j * nsample + l] = k; 33 | } 34 | idx[j * nsample + cnt] = k; 35 | ++cnt; 36 | } 37 | } 38 | } 39 | } 40 | 41 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx) 42 | { 43 | ballquery_cuda_kernel<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 44 | } 45 | 46 | 47 | __global__ void ballquery_cuda_kernel_fast(int b, int n, int m, float radius, int nsample, const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { 48 | int bs_idx = blockIdx.y; 49 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 50 | if (bs_idx >= b || pt_idx >= m) return; 51 | 52 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 53 | xyz += bs_idx * n * 3; 54 | idx += bs_idx * m * nsample + pt_idx * nsample; 55 | 56 | float radius2 = radius * radius; 57 | float new_x = new_xyz[0]; 58 | float new_y = new_xyz[1]; 59 | float new_z = new_xyz[2]; 60 | 61 | int cnt = 0; 62 | for (int k = 0; k < n; ++k) { 63 | float x = xyz[k * 3 + 0]; 64 | float y = xyz[k * 3 + 1]; 65 | float z = xyz[k * 3 + 2]; 66 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 67 | if (d2 < radius2){ 68 | if (cnt == 0){ 69 | for (int l = 0; l < nsample; ++l) { 70 | idx[l] = k; 71 | } 72 | } 73 | idx[cnt] = k; 74 | ++cnt; 75 | if (cnt >= nsample){ 76 | break; 77 | } 78 | } 79 | } 80 | } 81 | 82 | 83 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) { 84 | // param new_xyz: (B, m, 3) 85 | // param xyz: (B, n, 3) 86 | // param idx: (B, m, nsample) 87 | 88 | cudaError_t err; 89 | 90 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 91 | dim3 threads(THREADS_PER_BLOCK); 92 | 93 | ballquery_cuda_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 94 | // cudaDeviceSynchronize(); // for using printf in kernel function 95 | 96 | err = cudaGetLastError(); 97 | if (cudaSuccess != err) { 98 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 99 | exit(-1); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## PatchAugNet 2 | * For details, please refer to [Project Main Page](https://whu-usi3dv.github.io/PatchAugNet/) 3 | 4 | ### Benchmark Datasets 5 | * Oxford dataset (for baseline config training/testing: PointNetVLAD, PPT-Net, Minkloc3DV2) 6 | * NUS (in-house) Datasets (for testing: PointNetVLAD, PPT-Net, Minkloc3DV2) 7 | * university sector (U.S.) 8 | * residential area (R.A.) 9 | * business district (B.D.) 10 | * Self-Collected Datasets (we will publish it in the future, welcome to follow our work!) 11 | * wuhan hankou (for training/testing: PointNetVLAD, PPT-Net, Minkloc3DV2, PatchAugNet) 12 | * whu campus (for testing: PointNetVLAD, PPT-Net, Minkloc3DV2, PatchAugNet) 13 | 14 | ### Project Code 15 | #### Pre-requisites 16 | Docker image: 17 | ``` 18 | docker pull zouxh22135/pc_loc:v1 19 | ``` 20 | 21 | #### Dataset set-up 22 | * Download the zip file of the Oxford RobotCar and 3-Inhouse benchmark datasets found [here](https://drive.google.com/open?id=1H9Ep76l8KkUpwILY-13owsEMbVCYTmyx) and extract the folder. 23 | * Generate pickle files: We store the positive and negative point clouds to each anchor on pickle files that are used in our training and evaluation codes. The files only need to be generated once. The generation of these files may take a few minutes. 24 | * Note: please check dataset info in 'datasets/dataset_info.py' 25 | * Datasets defined in 'datasets/dataset_info.py', you can switch datasets by '--dataset' argument: 26 | * oxford 27 | * university, residential, business 28 | * hankou, campus 29 | ``` 30 | # For Oxford RobotCar / 3-Inhouse Datasets 31 | python datasets/place_recognition_dataset.py 32 | 33 | # For Self-Collected Dataset (we will publish it in the future, welcome to follow our work!) 34 | python datasets/scene_dataset.py 35 | ``` 36 | 37 | #### Place Recognition: Training and Evaluation 38 | * Build the third parties 39 | ``` 40 | cd libs/pointops && python setup.py install && cd ../../ 41 | cd libs/chamfer_dist && python setup.py install && cd ../../ 42 | cd libs/emd_module && python setup.py install && cd ../../ 43 | cd libs/KNN_CUDA && python setup.py install && cd ../../ 44 | ``` 45 | 46 | * Train / Eavaluate PointNetVLAD / PPT-Net / Minkloc3DV2 / PatchAugNet 47 | ``` 48 | # Train PointNetVLAD / PPT-Net / Minkloc3D V2 / PatchAugNet on Oxford 49 | python place_recognition/train_place_recognition.py --config configs/[pointnet_vlad / pptnet_origin / patch_aug_net].yaml --train_dataset oxford --test_dataset oxford 50 | 51 | # Evaluate PointNetVLAD / PPT-Net / Minkloc3D V2 / PatchAugNet on Oxford, and save top k 52 | python place_recognition/evaluate.py --model_type [model type] --weight [weight pth file] --dataset oxford --exp_dir [exp_dir] 53 | 54 | Note: model types include [pointnet_vlad / pptnet / pptnet_l2_norm / minkloc3d_v2 / patch_aug_net] 55 | datasets include [oxford / university / residential / business / hankou / campus] 56 | ``` 57 | 58 | * Train Minkloc3D V2, see [Minkloc3DV2](https://github.com/jac99/MinkLoc3Dv2) 59 | 60 | * Model pretrained on Self-Collected Dataset: https://drive.google.com/drive/folders/1w5Yekh-Yq2SjQmrAsVRWAWtB7xHletmK?usp=drive_link 61 | 62 | #### TODO 63 | * Optimize the code. 64 | 65 | #### Citation 66 | If you find the code or trained models useful, please consider citing: 67 | ``` 68 | @article{zou2023patchaugnet, 69 | title={PatchAugNet: Patch feature augmentation-based heterogeneous point cloud place recognition in large-scale street scenes}, 70 | author={Zou, Xianghong and Li, Jianping and Wang, Yuan and Liang, Fuxun and Wu, Weitong and Wang, Haiping and Yang, Bisheng and Dong, Zhen}, 71 | journal={ISPRS Journal of Photogrammetry and Remote Sensing}, 72 | volume={206}, 73 | pages={273--292}, 74 | year={2023}, 75 | publisher={Elsevier} 76 | } 77 | ``` 78 | 79 | #### Acknowledgement 80 | Our code refers to [PointNetVLAD](https://github.com/mikacuy/pointnetvlad), [PPT-Net](https://github.com/fpthink/PPT-Net), [Minkloc3DV2](https://github.com/jac99/MinkLoc3Dv2). 81 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | // input: points(b, c, n) idx(b, m, nsample) 5 | // output: out(b, c, m, nsample) 6 | __global__ void grouping_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out) 7 | { 8 | int batch_index = blockIdx.x; 9 | points += batch_index * n * c; 10 | idx += batch_index * m * nsample; 11 | out += batch_index * m * nsample * c; 12 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 13 | const int stride = blockDim.y * blockDim.x; 14 | for (int i = index; i < c * m; i += stride) 15 | { 16 | const int l = i / m; 17 | const int j = i % m; 18 | for (int k = 0; k < nsample; ++k) 19 | { 20 | int ii = idx[j * nsample + k]; 21 | out[(l * m + j) * nsample + k] = points[l * n + ii]; 22 | } 23 | } 24 | } 25 | 26 | // input: grad_out(b, c, m, nsample), idx(b, m, nsample) 27 | // output: grad_points(b, c, n) 28 | __global__ void grouping_backward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) 29 | { 30 | int batch_index = blockIdx.x; 31 | grad_out += batch_index * m * nsample * c; 32 | idx += batch_index * m * nsample; 33 | grad_points += batch_index * n * c; 34 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 35 | const int stride = blockDim.y * blockDim.x; 36 | for (int i = index; i < c * m; i += stride) 37 | { 38 | const int l = i / m; 39 | const int j = i % m; 40 | for (int k = 0; k < nsample; ++k) 41 | { 42 | int ii = idx[j * nsample + k]; 43 | atomicAdd(grad_points + l * n + ii, grad_out[(l * m + j) * nsample + k]); 44 | } 45 | } 46 | } 47 | 48 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out) 49 | { 50 | grouping_forward_cuda_kernel<<>>(b, c, n, m, nsample, points, idx, out); 51 | } 52 | 53 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) 54 | { 55 | grouping_backward_cuda_kernel<<>>(b, c, n, m, nsample, grad_out, idx, grad_points); 56 | } 57 | 58 | // input: points(b, c, n) idx(b, npoints, nsample) 59 | // output: out(b, c, npoints, nsample) 60 | __global__ void grouping_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { 61 | int bs_idx = blockIdx.z; 62 | int c_idx = blockIdx.y; 63 | int index = blockIdx.x * blockDim.x + threadIdx.x; 64 | int pt_idx = index / nsample; 65 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 66 | 67 | int sample_idx = index % nsample; 68 | 69 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 70 | int in_idx = bs_idx * c * n + c_idx * n + idx[0]; 71 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 72 | 73 | out[out_idx] = points[in_idx]; 74 | } 75 | 76 | // input: points(b, c, n) idx(b, npoints, nsample) 77 | // output: out(b, c, npoints, nsample) 78 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out) { 79 | 80 | cudaError_t err; 81 | 82 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 83 | dim3 threads(THREADS_PER_BLOCK); 84 | 85 | grouping_forward_cuda_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); 86 | // cudaDeviceSynchronize(); // for using printf in kernel function 87 | err = cudaGetLastError(); 88 | if (cudaSuccess != err) { 89 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 90 | exit(-1); 91 | } 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /utils/model_util/transformer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Mainly copy-paste from https://github.com/facebookresearch/detr/blob/master/models/transformer.py 4 | """ 5 | """ 6 | DETR Transformer class. 7 | 8 | Copy-paste from torch.nn.Transformer with modifications: 9 | * positional encodings are passed in MHattention 10 | * extra LN at the end of encoder is removed 11 | * decoder returns a stack of activations from all decoding layers 12 | """ 13 | 14 | import torch.nn.functional as F 15 | from torch import nn 16 | from utils.model_util.linear_multihead_attention import LinearMultiheadAttention 17 | 18 | 19 | class TransformerEncoderLayer(nn.Module): 20 | 21 | def __init__(self, 22 | d_model, 23 | nhead, 24 | dim_feedforward, 25 | mha_dropout, 26 | ffn_dropout, 27 | activation, 28 | normalize_before, 29 | attn_mode='full', # linear or full attention 30 | seq_len=None, # sequence length for linear attention, i.e. num of input tokens 31 | proj_k=128, # the projected dimension 'k' in Linformer paper. Default: 128 32 | param_sharing=None # parameter sharing mode: layerwise, none.headwise is not implemented.Default: none. 33 | ): 34 | super().__init__() 35 | 36 | if attn_mode == 'full': 37 | self.mha = nn.MultiheadAttention(d_model, nhead, dropout=mha_dropout) 38 | else: 39 | self.mha = LinearMultiheadAttention(d_model, nhead, dropout=mha_dropout, 40 | seq_len=seq_len, proj_k=proj_k, 41 | param_sharing=param_sharing) 42 | 43 | # Implementation of Feedforward model 44 | self.linear1 = nn.Linear(d_model, dim_feedforward) 45 | self.dropout = nn.Dropout(ffn_dropout) 46 | self.linear2 = nn.Linear(dim_feedforward, d_model) 47 | 48 | self.norm1 = nn.LayerNorm(d_model) 49 | self.norm2 = nn.LayerNorm(d_model) 50 | self.dropout1 = nn.Dropout(ffn_dropout) 51 | self.dropout2 = nn.Dropout(ffn_dropout) 52 | 53 | self.activation = _get_activation_fn(activation) 54 | self.normalize_before = normalize_before 55 | 56 | def forward_post(self, src, tgt): 57 | # if self-attention the q,k,v is the same, either all src or all target 58 | q, k, v = src, tgt, tgt 59 | 60 | # MHA 61 | src2 = self.mha(query=q, key=k, value=v)[0] 62 | src = src + self.dropout1(src2) 63 | src = self.norm1(src) 64 | 65 | # FFN 66 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 67 | src = src + self.dropout2(src2) 68 | src = self.norm2(src) 69 | return src 70 | 71 | def forward_pre(self, src, tgt): 72 | src2 = self.norm1(src) 73 | q, k, v = src2, src2, src2 74 | 75 | # MHA 76 | src2 = self.mha(query=q, key=k, value=v)[0] 77 | src = src + self.dropout1(src2) 78 | 79 | # FFN 80 | src2 = self.norm2(src) 81 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src2)))) 82 | src = src + self.dropout2(src2) 83 | return src 84 | 85 | def forward(self, src, tgt): 86 | 87 | if self.normalize_before: 88 | return self.forward_pre(src, tgt) 89 | else: 90 | return self.forward_post(src, tgt) 91 | 92 | def _get_activation_fn(activation): 93 | """Return an activation function given a string""" 94 | if activation == "relu": 95 | return F.relu 96 | if activation == "gelu": 97 | return F.gelu 98 | if activation == "glu": 99 | return F.glu 100 | if activation == "elu": 101 | return F.elu 102 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.") 103 | -------------------------------------------------------------------------------- /libs/emd_module/emd_module.py: -------------------------------------------------------------------------------- 1 | # EMD approximation module (based on auction algorithm) 2 | # memory complexity: O(n) 3 | # time complexity: O(n^2 * iter) 4 | # author: Minghua Liu 5 | 6 | # Input: 7 | # xyz1, xyz2: [#batch, #points, 3] 8 | # where xyz1 is the predicted point cloud and xyz2 is the ground truth point cloud 9 | # two point clouds should have same size and be normalized to [0, 1] 10 | # #points should be a multiple of 1024 11 | # #batch should be no greater than 512 12 | # eps is a parameter which balances the error rate and the speed of convergence 13 | # iters is the number of iteration 14 | # we only calculate gradient for xyz1 15 | 16 | # Output: 17 | # dist: [#batch, #points], sqrt(dist) -> L2 distance 18 | # assignment: [#batch, #points], index of the matched point in the ground truth point cloud 19 | # the result is an approximation and the assignment is not guranteed to be a bijection 20 | 21 | import time 22 | import numpy as np 23 | import torch 24 | from torch import nn 25 | from torch.autograd import Function 26 | import emd 27 | 28 | 29 | class emdFunction(Function): 30 | @staticmethod 31 | def forward(ctx, xyz1, xyz2, eps, iters): 32 | batchsize, n, _ = xyz1.size() 33 | _, m, _ = xyz2.size() 34 | 35 | assert (n == m) 36 | assert (xyz1.size()[0] == xyz2.size()[0]) 37 | # assert(n % 1024 == 0) 38 | assert (batchsize <= 512) 39 | 40 | xyz1 = xyz1.contiguous().float().cuda() 41 | xyz2 = xyz2.contiguous().float().cuda() 42 | dist = torch.zeros(batchsize, n, device='cuda').contiguous() 43 | assignment = torch.zeros(batchsize, n, device='cuda', dtype=torch.int32).contiguous() - 1 44 | assignment_inv = torch.zeros(batchsize, m, device='cuda', dtype=torch.int32).contiguous() - 1 45 | price = torch.zeros(batchsize, m, device='cuda').contiguous() 46 | bid = torch.zeros(batchsize, n, device='cuda', dtype=torch.int32).contiguous() 47 | bid_increments = torch.zeros(batchsize, n, device='cuda').contiguous() 48 | max_increments = torch.zeros(batchsize, m, device='cuda').contiguous() 49 | unass_idx = torch.zeros(batchsize * n, device='cuda', dtype=torch.int32).contiguous() 50 | max_idx = torch.zeros(batchsize * m, device='cuda', dtype=torch.int32).contiguous() 51 | unass_cnt = torch.zeros(512, dtype=torch.int32, device='cuda').contiguous() 52 | unass_cnt_sum = torch.zeros(512, dtype=torch.int32, device='cuda').contiguous() 53 | cnt_tmp = torch.zeros(512, dtype=torch.int32, device='cuda').contiguous() 54 | 55 | emd.forward(xyz1, xyz2, dist, assignment, price, assignment_inv, bid, bid_increments, max_increments, unass_idx, 56 | unass_cnt, unass_cnt_sum, cnt_tmp, max_idx, eps, iters) 57 | 58 | ctx.save_for_backward(xyz1, xyz2, assignment) 59 | return dist, assignment 60 | 61 | @staticmethod 62 | def backward(ctx, graddist, gradidx): 63 | xyz1, xyz2, assignment = ctx.saved_tensors 64 | graddist = graddist.contiguous() 65 | 66 | gradxyz1 = torch.zeros(xyz1.size(), device='cuda').contiguous() 67 | gradxyz2 = torch.zeros(xyz2.size(), device='cuda').contiguous() 68 | 69 | emd.backward(xyz1, xyz2, gradxyz1, graddist, assignment) 70 | return gradxyz1, gradxyz2, None, None 71 | 72 | 73 | class emdModule(nn.Module): 74 | def __init__(self): 75 | super(emdModule, self).__init__() 76 | 77 | def forward(self, input1, input2, eps, iters): 78 | return emdFunction.apply(input1, input2, eps, iters) 79 | 80 | 81 | def test_emd(): 82 | x1 = torch.rand(20, 8192, 3).cuda() 83 | x2 = torch.rand(20, 8192, 3).cuda() 84 | emd = emdModule() 85 | start_time = time.perf_counter() 86 | dis, assigment = emd(x1, x2, 0.05, 3000) 87 | print("Input_size: ", x1.shape) 88 | print("Runtime: %lfs" % (time.perf_counter() - start_time)) 89 | print("EMD: %lf" % np.sqrt(dis.cpu()).mean()) 90 | print("|set(assignment)|: %d" % assigment.unique().numel()) 91 | assigment = assigment.cpu().numpy() 92 | assigment = np.expand_dims(assigment, -1) 93 | x2 = np.take_along_axis(x2, assigment, axis=1) 94 | d = (x1 - x2) * (x1 - x2) 95 | print("Verified EMD: %lf" % np.sqrt(d.cpu().sum(-1)).mean()) 96 | 97 | # test_emd() 98 | -------------------------------------------------------------------------------- /utils/model_util/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import init 4 | 5 | 6 | class SEAttention(nn.Module): 7 | 8 | def __init__(self, channel=512, reduction=16): 9 | super().__init__() 10 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Linear(channel, channel // reduction, bias=False), 13 | nn.ReLU(inplace=True), 14 | nn.Linear(channel // reduction, channel, bias=False), 15 | nn.Sigmoid() 16 | ) 17 | 18 | def init_weights(self): 19 | for m in self.modules(): 20 | if isinstance(m, nn.Conv2d): 21 | init.kaiming_normal_(m.weight, mode='fan_out') 22 | if m.bias is not None: 23 | init.constant_(m.bias, 0) 24 | elif isinstance(m, nn.BatchNorm2d): 25 | init.constant_(m.weight, 1) 26 | init.constant_(m.bias, 0) 27 | elif isinstance(m, nn.Linear): 28 | init.normal_(m.weight, std=0.001) 29 | if m.bias is not None: 30 | init.constant_(m.bias, 0) 31 | 32 | def forward(self, x): 33 | b, c, _, _ = x.size() 34 | y = self.avg_pool(x).view(b, c) 35 | y = self.fc(y).view(b, c, 1, 1) 36 | return x * y.expand_as(x) 37 | 38 | 39 | class ChannelAttentionModule(nn.Module): 40 | """ this function is used to achieve the channel attention module in CBAM paper""" 41 | def __init__(self, C, ratio=8): # getting from the CBAM paper, ratio=16 42 | super(ChannelAttentionModule, self).__init__() 43 | self.mlp = nn.Sequential( 44 | nn.Conv1d(in_channels=C, out_channels=C // ratio, kernel_size=1, bias=False), 45 | nn.ReLU(), 46 | nn.Conv1d(in_channels=C // ratio, out_channels=C, kernel_size=1, bias=False) 47 | ) 48 | self.sigmoid = nn.Sigmoid() 49 | 50 | def forward(self, x): 51 | """ x: B x C x N""" 52 | out1 = torch.mean(x, dim=-1, keepdim=True) # b, c, 1 53 | out1 = self.mlp(out1) # b, c, 1 54 | out2 = nn.AdaptiveMaxPool1d(1)(x) # b, c, 1 55 | out2 = self.mlp(out2) # b, c, 1 56 | out = self.sigmoid(out1 + out2) 57 | return out * x 58 | 59 | 60 | class SpatialAttentionModule(nn.Module): 61 | """ this function is used to achieve the spatial attention module in CBAM paper""" 62 | def __init__(self): 63 | super(SpatialAttentionModule, self).__init__() 64 | self.conv1 = nn.Conv1d(in_channels=2, out_channels=1, kernel_size=1, bias=False) 65 | self.bn = nn.BatchNorm1d(1, eps=1e-5, momentum=0.01, affine=True) 66 | self.relu = nn.ReLU() 67 | #self.sigmoid = nn.Sigmoid() 68 | self.softmax = nn.Softmax(dim=-1) 69 | 70 | def forward(self, x, return_attn=False): 71 | """ x: B x C x N""" 72 | out1 = torch.mean(x, dim=1, keepdim=True) # B,1,N 73 | out2, _ = torch.max(x, dim=1, keepdim=True) # B,1,N 74 | out = torch.cat([out2, out1], dim=1) # B,2,N 75 | 76 | out = self.conv1(out) # B,1,N 77 | out = self.bn(out) # B,1,N 78 | out = self.relu(out) # B,1,N 79 | 80 | #att = self.sigmoid(out) # B, 1, N 81 | att = self.softmax(out) 82 | res = att * x 83 | if return_attn: 84 | res = res, att 85 | return res 86 | 87 | 88 | class CBAMAttentionModule(nn.Module): 89 | def __init__(self, C, ratio=8): 90 | super(CBAMAttentionModule, self).__init__() 91 | self.channel_attn = ChannelAttentionModule(C, ratio) 92 | self.spatial_attn = SpatialAttentionModule() 93 | 94 | def forward(self, x, return_att=False): 95 | x = self.channel_attn(x) 96 | return self.spatial_attn(x, return_att) 97 | 98 | 99 | if __name__ == '__main__': 100 | # SE 101 | input = torch.randn(50, 512, 49) 102 | input = input.unsqueeze(-1) 103 | se = SEAttention(channel=512, reduction=8) 104 | output = se(input) 105 | output = output.squeeze(-1) 106 | print(output.shape) 107 | # Spatial attention 108 | input = input.squeeze(-1) 109 | spatial_attn = SpatialAttentionModule() 110 | output = spatial_attn(input) 111 | print(output.shape) 112 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/layers/netvlad.py: -------------------------------------------------------------------------------- 1 | """ 2 | PointNet code taken from PointNetVLAD Pytorch implementation. 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.utils.data 8 | import torch.nn.functional as F 9 | import math 10 | 11 | """ 12 | NOTE: The toolbox can only pool lists of features of the same length. It was specifically optimized to efficiently 13 | do so. One way to handle multiple lists of features of variable length is to create, via a data augmentation 14 | technique, a tensor of shape: 'batch_size'x'max_samples'x'feature_size'. Where 'max_samples' would be the maximum 15 | number of feature per list. Then for each list, you would fill the tensor with 0 values. 16 | """ 17 | 18 | class NetVLADLoupe(nn.Module): 19 | def __init__(self, feature_size, cluster_size, output_dim, gating=True, add_batch_norm=True): 20 | super().__init__() 21 | self.feature_size = feature_size 22 | self.output_dim = output_dim 23 | self.gating = gating 24 | self.add_batch_norm = add_batch_norm 25 | self.cluster_size = cluster_size 26 | self.softmax = nn.Softmax(dim=-1) 27 | self.cluster_weights = nn.Parameter(torch.randn(feature_size, cluster_size) * 1 / math.sqrt(feature_size)) 28 | self.cluster_weights2 = nn.Parameter(torch.randn(1, feature_size, cluster_size) * 1 / math.sqrt(feature_size)) 29 | self.hidden1_weights = nn.Parameter( 30 | torch.randn(cluster_size * feature_size, output_dim) * 1 / math.sqrt(feature_size)) 31 | 32 | if add_batch_norm: 33 | self.cluster_biases = None 34 | self.bn1 = nn.BatchNorm1d(cluster_size) 35 | else: 36 | self.cluster_biases = nn.Parameter(torch.randn(cluster_size) * 1 / math.sqrt(feature_size)) 37 | self.bn1 = None 38 | 39 | self.bn2 = nn.BatchNorm1d(output_dim) 40 | 41 | if gating: 42 | self.context_gating = GatingContext(output_dim, add_batch_norm=add_batch_norm) 43 | 44 | def forward(self, x): 45 | # Expects (batch_size, num_points, channels) tensor 46 | assert x.dim() == 3 47 | num_points = x.shape[1] 48 | activation = torch.matmul(x, self.cluster_weights) 49 | if self.add_batch_norm: 50 | # activation = activation.transpose(1,2).contiguous() 51 | activation = activation.view(-1, self.cluster_size) 52 | activation = self.bn1(activation) 53 | activation = activation.view(-1, num_points, self.cluster_size) 54 | # activation = activation.transpose(1,2).contiguous() 55 | else: 56 | activation = activation + self.cluster_biases 57 | activation = self.softmax(activation) 58 | activation = activation.view((-1, num_points, self.cluster_size)) 59 | 60 | a_sum = activation.sum(-2, keepdim=True) 61 | a = a_sum * self.cluster_weights2 62 | 63 | activation = torch.transpose(activation, 2, 1) 64 | x = x.view((-1, num_points, self.feature_size)) 65 | vlad = torch.matmul(activation, x) 66 | vlad = torch.transpose(vlad, 2, 1) 67 | vlad = vlad - a 68 | 69 | vlad = F.normalize(vlad, dim=1, p=2) 70 | vlad = vlad.reshape((-1, self.cluster_size * self.feature_size)) 71 | vlad = F.normalize(vlad, dim=1, p=2) 72 | 73 | vlad = torch.matmul(vlad, self.hidden1_weights) 74 | 75 | vlad = self.bn2(vlad) 76 | 77 | if self.gating: 78 | vlad = self.context_gating(vlad) 79 | 80 | return vlad 81 | 82 | 83 | class GatingContext(nn.Module): 84 | def __init__(self, dim, add_batch_norm=True): 85 | super(GatingContext, self).__init__() 86 | self.dim = dim 87 | self.add_batch_norm = add_batch_norm 88 | self.gating_weights = nn.Parameter( 89 | torch.randn(dim, dim) * 1 / math.sqrt(dim)) 90 | self.sigmoid = nn.Sigmoid() 91 | 92 | if add_batch_norm: 93 | self.gating_biases = None 94 | self.bn1 = nn.BatchNorm1d(dim) 95 | else: 96 | self.gating_biases = nn.Parameter( 97 | torch.randn(dim) * 1 / math.sqrt(dim)) 98 | self.bn1 = None 99 | 100 | def forward(self, x): 101 | gates = torch.matmul(x, self.gating_weights) 102 | 103 | if self.add_batch_norm: 104 | gates = self.bn1(gates) 105 | else: 106 | gates = gates + self.gating_biases 107 | 108 | gates = self.sigmoid(gates) 109 | 110 | activation = x * gates 111 | 112 | return activation 113 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/minkfpn.py: -------------------------------------------------------------------------------- 1 | # Warsaw University of Technology 2 | 3 | import torch.nn as nn 4 | import MinkowskiEngine as ME 5 | from MinkowskiEngine.modules.resnet_block import BasicBlock 6 | from place_recognition.Minkloc3D_V2.models.resnet import ResNetBase 7 | 8 | 9 | class MinkFPN(ResNetBase): 10 | # Feature Pyramid Network (FPN) architecture implementation using Minkowski ResNet building blocks 11 | def __init__(self, in_channels, out_channels, num_top_down=1, conv0_kernel_size=5, block=BasicBlock, 12 | layers=(1, 1, 1), planes=(32, 64, 64)): 13 | assert len(layers) == len(planes) 14 | assert 1 <= len(layers) 15 | assert 0 <= num_top_down <= len(layers) 16 | self.num_bottom_up = len(layers) 17 | self.num_top_down = num_top_down 18 | self.conv0_kernel_size = conv0_kernel_size 19 | self.block = block 20 | self.layers = layers 21 | self.planes = planes 22 | self.lateral_dim = out_channels 23 | self.init_dim = planes[0] 24 | ResNetBase.__init__(self, in_channels, out_channels, D=3) 25 | 26 | def network_initialization(self, in_channels, out_channels, D): 27 | assert len(self.layers) == len(self.planes) 28 | assert len(self.planes) == self.num_bottom_up 29 | 30 | self.convs = nn.ModuleList() # Bottom-up convolutional blocks with stride=2 31 | self.bn = nn.ModuleList() # Bottom-up BatchNorms 32 | self.blocks = nn.ModuleList() # Bottom-up blocks 33 | self.tconvs = nn.ModuleList() # Top-down tranposed convolutions 34 | self.conv1x1 = nn.ModuleList() # 1x1 convolutions in lateral connections 35 | 36 | # The first convolution is special case, with kernel size = 5 37 | self.inplanes = self.planes[0] 38 | self.conv0 = ME.MinkowskiConvolution(in_channels, self.inplanes, kernel_size=self.conv0_kernel_size, 39 | dimension=D) 40 | self.bn0 = ME.MinkowskiBatchNorm(self.inplanes) 41 | 42 | for plane, layer in zip(self.planes, self.layers): 43 | self.convs.append(ME.MinkowskiConvolution(self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)) 44 | self.bn.append(ME.MinkowskiBatchNorm(self.inplanes)) 45 | self.blocks.append(self._make_layer(self.block, plane, layer)) 46 | 47 | # Lateral connections 48 | for i in range(self.num_top_down): 49 | self.conv1x1.append(ME.MinkowskiConvolution(self.planes[-1 - i], self.lateral_dim, kernel_size=1, 50 | stride=1, dimension=D)) 51 | self.tconvs.append(ME.MinkowskiConvolutionTranspose(self.lateral_dim, self.lateral_dim, kernel_size=2, 52 | stride=2, dimension=D)) 53 | # There's one more lateral connection than top-down TConv blocks 54 | if self.num_top_down < self.num_bottom_up: 55 | # Lateral connection from Conv block 1 or above 56 | self.conv1x1.append(ME.MinkowskiConvolution(self.planes[-1 - self.num_top_down], self.lateral_dim, kernel_size=1, 57 | stride=1, dimension=D)) 58 | else: 59 | # Lateral connection from Con0 block 60 | self.conv1x1.append(ME.MinkowskiConvolution(self.planes[0], self.lateral_dim, kernel_size=1, 61 | stride=1, dimension=D)) 62 | 63 | self.relu = ME.MinkowskiReLU(inplace=True) 64 | 65 | def forward(self, x): 66 | # *** BOTTOM-UP PASS *** 67 | # First bottom-up convolution is special (with bigger kernel) 68 | feature_maps = [] 69 | x = self.conv0(x) 70 | x = self.bn0(x) 71 | x = self.relu(x) 72 | if self.num_top_down == self.num_bottom_up: 73 | feature_maps.append(x) 74 | 75 | # BOTTOM-UP PASS 76 | for ndx, (conv, bn, block) in enumerate(zip(self.convs, self.bn, self.blocks)): 77 | x = conv(x) # Downsample (conv stride=2 with 2x2x2 kernel) 78 | x = bn(x) 79 | x = self.relu(x) 80 | x = block(x) 81 | if self.num_bottom_up - 1 - self.num_top_down <= ndx < len(self.convs) - 1: 82 | feature_maps.append(x) 83 | 84 | assert len(feature_maps) == self.num_top_down 85 | 86 | x = self.conv1x1[0](x) 87 | 88 | # TOP-DOWN PASS 89 | for ndx, tconv in enumerate(self.tconvs): 90 | x = tconv(x) # Upsample using transposed convolution 91 | x = x + self.conv1x1[ndx+1](feature_maps[-ndx - 1]) 92 | 93 | return x 94 | -------------------------------------------------------------------------------- /libs/pointops/src/featuredistribute/featuredistribute_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "featuredistribute_cuda_kernel.h" 3 | 4 | __global__ void featuredistribute_cuda_kernel(int b, int n, int m, const float *max_xyz, const float *xyz, int *distribute_idx) { 5 | int bs_idx = blockIdx.y; 6 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 7 | if (bs_idx >= b || pt_idx >= m) return; 8 | 9 | max_xyz += bs_idx * n * 3; 10 | xyz += bs_idx * m * 3 + pt_idx * 3; 11 | distribute_idx += bs_idx * m + pt_idx; 12 | 13 | float x = xyz[0]; 14 | float y = xyz[1]; 15 | float z = xyz[2]; 16 | 17 | float min_dist2 = 100000; 18 | int min_dist_idx = -1; 19 | for (int k = 0; k < n; ++k) { 20 | float max_x = max_xyz[k * 3 + 0]; 21 | float max_y = max_xyz[k * 3 + 1]; 22 | float max_z = max_xyz[k * 3 + 2]; 23 | float d2 = (max_x - x) * (max_x - x) + (max_y - y) * (max_y - y) + (max_z - z) * (max_z - z); 24 | if (d2 < min_dist2){ 25 | min_dist_idx = k; 26 | min_dist2 = d2; 27 | } 28 | } 29 | distribute_idx[0] = min_dist_idx; 30 | } 31 | 32 | 33 | void featuredistribute_cuda_launcher(int b, int n, int m, const float *max_xyz, const float *xyz, int *distribute_idx, cudaStream_t stream) { 34 | // param max_xyz: (b, n, 3) 35 | // param xyz: (b, m, 3) 36 | // return distribute_idx: (b, m) 37 | 38 | cudaError_t err; 39 | 40 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 41 | dim3 threads(THREADS_PER_BLOCK); 42 | 43 | featuredistribute_cuda_kernel<<>>(b, n, m, max_xyz, xyz, distribute_idx); 44 | // cudaDeviceSynchronize(); // for using printf in kernel function 45 | 46 | err = cudaGetLastError(); 47 | if (cudaSuccess != err) { 48 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 49 | exit(-1); 50 | } 51 | } 52 | 53 | __global__ void featuregather_forward_cuda_kernel(int b, int n, int m, int c, const float *max_feature, const int *distribute_idx, float *distribute_feature) { 54 | int bs_idx = blockIdx.z; 55 | int c_idx = blockIdx.y; 56 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 57 | if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; 58 | 59 | max_feature += bs_idx * c * n + c_idx * n; 60 | distribute_idx += bs_idx * m + pt_idx; 61 | distribute_feature += bs_idx * c * m + c_idx * m + pt_idx; 62 | 63 | int idx = distribute_idx[0]; 64 | distribute_feature[0] = max_feature[idx]; 65 | } 66 | 67 | 68 | void featuregather_forward_cuda_launcher(int b, int n, int m, int c, const float *max_feature, const int *distribute_idx, float *distribute_feature, cudaStream_t stream){ 69 | // param max_feature: (b, c, n) 70 | // param distribute_idx: (b, m) 71 | // return distribute_feature: (b, c, m) 72 | 73 | cudaError_t err; 74 | 75 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 76 | dim3 threads(THREADS_PER_BLOCK); 77 | 78 | featuregather_forward_cuda_kernel<<>>(b, n, m, c, max_feature, distribute_idx, distribute_feature); 79 | // cudaDeviceSynchronize(); // for using printf in kernel function 80 | 81 | err = cudaGetLastError(); 82 | if (cudaSuccess != err) { 83 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 84 | exit(-1); 85 | } 86 | } 87 | 88 | 89 | __global__ void featuregather_backward_cuda_kernel(int b, int n, int m, int c, const float *grad_distribute_feature, const int *distribute_idx, float *grad_max_feature){ 90 | int bs_idx = blockIdx.z; 91 | int c_idx = blockIdx.y; 92 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 93 | if(bs_idx >= b || c_idx >= c || pt_idx >= m) return; 94 | 95 | grad_distribute_feature += bs_idx * c * m + c_idx * m + pt_idx; 96 | distribute_idx += bs_idx * m + pt_idx; 97 | grad_max_feature += bs_idx * c * n + c_idx * n; 98 | 99 | int idx = distribute_idx[0]; 100 | atomicAdd(grad_max_feature + idx, grad_distribute_feature[0]); 101 | } 102 | 103 | 104 | void featuregather_backward_cuda_launcher(int b, int n, int m, int c, const float *grad_distribute_feature, const int *distribute_idx, float *grad_max_feature, cudaStream_t stream){ 105 | // param grad_distribute_feature: (b, c, m) 106 | // param distribute_idx: (b, m) 107 | // return grad_max_feature: (b, c, n) 108 | 109 | cudaError_t err; 110 | 111 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 112 | dim3 threads(THREADS_PER_BLOCK); 113 | 114 | featuregather_backward_cuda_kernel<<>>(b, n, m, c, grad_distribute_feature, distribute_idx, grad_max_feature); 115 | // cudaDeviceSynchronize(); // for using printf in kernel function 116 | 117 | err = cudaGetLastError(); 118 | if (cudaSuccess != err) { 119 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 120 | exit(-1); 121 | } 122 | } -------------------------------------------------------------------------------- /place_recognition/patch_aug_net/models/pointnet_autoencoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class STN3D(nn.Module): 7 | def __init__(self, input_channels=3): 8 | super(STN3D, self).__init__() 9 | self.input_channels = input_channels 10 | self.mlp1 = nn.Sequential( 11 | nn.Conv1d(input_channels, 64, 1), 12 | nn.BatchNorm1d(64), 13 | nn.ReLU(), 14 | nn.Conv1d(64, 128, 1), 15 | nn.BatchNorm1d(128), 16 | nn.ReLU(), 17 | nn.Conv1d(128, 1024, 1), 18 | nn.BatchNorm1d(1024), 19 | nn.ReLU(), 20 | ) 21 | self.mlp2 = nn.Sequential( 22 | nn.Linear(1024, 512), 23 | nn.BatchNorm1d(512), 24 | nn.ReLU(), 25 | nn.Linear(512, 256), 26 | nn.BatchNorm1d(256), 27 | nn.ReLU(), 28 | nn.Linear(256, input_channels * input_channels), 29 | ) 30 | 31 | def forward(self, x): 32 | batch_size = x.size(0) 33 | num_points = x.size(2) 34 | x = self.mlp1(x) 35 | x = F.max_pool1d(x, num_points).squeeze(2) 36 | x = self.mlp2(x) 37 | I = torch.eye(self.input_channels).view(-1).to(x.device) 38 | x = x + I 39 | x = x.view(-1, self.input_channels, self.input_channels) 40 | return x 41 | 42 | 43 | class PointNetEncoder(nn.Module): 44 | def __init__(self, embedding_size, input_channels=3): 45 | super(PointNetEncoder, self).__init__() 46 | self.input_channels = input_channels 47 | self.stn1 = STN3D(input_channels) 48 | self.stn2 = STN3D(64) 49 | self.mlp1 = nn.Sequential( 50 | nn.Conv1d(input_channels, 64, 1), 51 | nn.BatchNorm1d(64), 52 | nn.ReLU(), 53 | nn.Conv1d(64, 64, 1), 54 | nn.BatchNorm1d(64), 55 | nn.ReLU(), 56 | ) 57 | self.mlp2 = nn.Sequential( 58 | nn.Conv1d(64, 64, 1), 59 | nn.BatchNorm1d(64), 60 | nn.ReLU(), 61 | nn.Conv1d(64, 128, 1), 62 | nn.BatchNorm1d(128), 63 | nn.ReLU(), 64 | nn.Conv1d(128, 1024, 1), 65 | nn.BatchNorm1d(1024), 66 | nn.ReLU(), 67 | ) 68 | self.fc = nn.Linear(1024, embedding_size) 69 | 70 | def forward(self, x): 71 | """ x: B x 1 x N x C 72 | """ 73 | x = x.squeeze(1) 74 | batch_size = x.shape[0] 75 | num_points = x.shape[1] 76 | x = x[:, :, : self.input_channels] 77 | x = x.transpose(2, 1) # transpose to apply 1D convolution 78 | x = self.mlp1(x) 79 | x = self.mlp2(x) 80 | x = F.max_pool1d(x, num_points).squeeze(2) # max pooling 81 | x = self.fc(x) 82 | return x 83 | 84 | 85 | class PointNetDecoder(nn.Module): 86 | def __init__(self, embedding_size, output_channels=3, num_points=1024): 87 | super(PointNetDecoder, self).__init__() 88 | self.num_points = num_points 89 | self.output_channels = output_channels 90 | self.fc1 = nn.Linear(embedding_size, 1024) 91 | self.fc2 = nn.Linear(1024, 1024) 92 | #self.fc22 = nn.Linear(1024, 2048) # add by ericxhzou 93 | #self.fc222 = nn.Linear(2048, 2048) # add by ericxhzou 94 | self.bn1 = nn.BatchNorm1d(1024) 95 | self.bn2 = nn.BatchNorm1d(1024) 96 | #self.bn22 = nn.BatchNorm1d(2048) # add by ericxhzou 97 | #self.bn222 = nn.BatchNorm1d(2048) # add by ericxhzou 98 | self.fc3 = nn.Linear(1024, num_points * output_channels) 99 | 100 | def forward(self, x): 101 | """ x: B x C 102 | """ 103 | batch_size = x.shape[0] 104 | x = F.relu(self.bn1(self.fc1(x))) 105 | x = F.relu(self.bn2(self.fc2(x))) 106 | #x = F.relu(self.bn22(self.fc22(x))) # add by ericxhzou 107 | #x = F.relu(self.bn222(self.fc222(x))) # add by ericxhzou 108 | x = torch.tanh(self.fc3(x)) 109 | x = x.view(batch_size, self.num_points, self.output_channels) 110 | x = x.contiguous() 111 | return x 112 | 113 | 114 | class PointNetAutoencoder(nn.Module): 115 | def __init__( 116 | self, embedding_size, input_channels=3, output_channels=3, output_num_points=1024, normalize=True 117 | ): 118 | super(PointNetAutoencoder, self).__init__() 119 | self.normalize = normalize 120 | self.input_channels = input_channels 121 | self.output_channels = output_channels 122 | self.embedding_size = embedding_size 123 | self.encoder = PointNetEncoder(embedding_size, input_channels) 124 | self.decoder = PointNetDecoder(embedding_size, output_channels, num_points=output_num_points) 125 | 126 | def forward(self, x): 127 | z = self.encode(x) 128 | y = self.decode(z) 129 | return y, z 130 | 131 | def encode(self, x): 132 | z = self.encoder(x) 133 | if self.normalize: 134 | z = F.normalize(z) 135 | return z 136 | 137 | def decode(self, z): 138 | y = self.decoder(z) 139 | return y 140 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/misc/point_clouds.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | 4 | import numpy as np 5 | import open3d as o3d 6 | 7 | 8 | def draw_registration_result(source, target, transformation): 9 | source_temp = copy.deepcopy(source) 10 | target_temp = copy.deepcopy(target) 11 | source_temp.paint_uniform_color([1, 0.706, 0]) 12 | target_temp.paint_uniform_color([0, 0.651, 0.929]) 13 | source_temp.transform(transformation) 14 | o3d.visualization.draw_geometries([source_temp, target_temp], 15 | zoom=0.4459, 16 | front=[0.9288, -0.2951, -0.2242], 17 | lookat=[1.6784, 2.0612, 1.4451], 18 | up=[-0.3402, -0.9189, -0.1996]) 19 | 20 | 21 | def draw_pc(pc): 22 | pc = copy.deepcopy(pc) 23 | pc.paint_uniform_color([1, 0.706, 0]) 24 | o3d.visualization.draw_geometries([pc], 25 | zoom=0.4459, 26 | front=[0.9288, -0.2951, -0.2242], 27 | lookat=[1.6784, 2.0612, 1.4451], 28 | up=[-0.3402, -0.9189, -0.1996]) 29 | 30 | 31 | def icp(anchor_pc, positive_pc, transform: np.ndarray = None, point2plane: bool = False, 32 | inlier_dist_threshold: float = 1.2, max_iteration: int = 200): 33 | # transform: initial alignment transform 34 | if transform is not None: 35 | transform = transform.astype(float) 36 | 37 | voxel_size = 0.1 38 | pcd1 = o3d.geometry.PointCloud() 39 | pcd1.points = o3d.utility.Vector3dVector(anchor_pc) 40 | pcd1 = pcd1.voxel_down_sample(voxel_size=voxel_size) 41 | 42 | pcd2 = o3d.geometry.PointCloud() 43 | pcd2.points = o3d.utility.Vector3dVector(positive_pc) 44 | pcd2 = pcd2.voxel_down_sample(voxel_size=voxel_size) 45 | 46 | if point2plane: 47 | pcd1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamKNN(knn=20)) 48 | pcd2.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamKNN(knn=20)) 49 | transform_estimation = o3d.pipelines.registration.TransformationEstimationPointToPlane() 50 | else: 51 | transform_estimation = o3d.pipelines.registration.TransformationEstimationPointToPoint() 52 | 53 | if transform is not None: 54 | reg_p2p = o3d.pipelines.registration.registration_icp(pcd1, pcd2, inlier_dist_threshold, transform, 55 | estimation_method=transform_estimation, 56 | criteria=o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=max_iteration)) 57 | else: 58 | reg_p2p = o3d.pipelines.registration.registration_icp(pcd1, pcd2, inlier_dist_threshold, 59 | estimation_method=transform_estimation, 60 | criteria=o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=max_iteration)) 61 | 62 | return reg_p2p.transformation, reg_p2p.fitness, reg_p2p.inlier_rmse 63 | 64 | 65 | def make_open3d_feature(data, dim, npts): 66 | feature = o3d.pipelines.registration.Feature() 67 | feature.resize(dim, npts) 68 | feature.data = data.cpu().numpy().astype('d').transpose() 69 | return feature 70 | 71 | 72 | def make_open3d_point_cloud(xyz, color=None): 73 | pcd = o3d.geometry.PointCloud() 74 | pcd.points = o3d.utility.Vector3dVector(xyz) 75 | if color is not None: 76 | pcd.colors = o3d.utility.Vector3dVector(color) 77 | return pcd 78 | 79 | 80 | class PointCloudLoader: 81 | # Generic point cloud loader class 82 | def __init__(self): 83 | # remove_zero_points: remove points with all zero coordinates 84 | # remove_ground_plane: remove points on ground plane level and below 85 | # ground_plane_level: ground plane level 86 | self.remove_zero_points = True 87 | self.remove_ground_plane = True 88 | self.ground_plane_level = None 89 | self.set_properties() 90 | 91 | def set_properties(self): 92 | # Set point cloud properties, such as ground_plane_level. Must be defined in inherited classes. 93 | raise NotImplementedError('set_properties must be defined in inherited classes') 94 | 95 | def __call__(self, file_pathname): 96 | # Reads the point cloud from a disk and preprocess (optional removal of zero points and points on the ground 97 | # plane and below 98 | # file_pathname: relative file path 99 | assert os.path.exists(file_pathname), f"Cannot open point cloud: {file_pathname}" 100 | pc = self.read_pc(file_pathname) 101 | assert pc.shape[1] == 3 102 | 103 | if self.remove_zero_points: 104 | mask = np.all(np.isclose(pc, 0), axis=1) 105 | pc = pc[~mask] 106 | 107 | if self.remove_ground_plane: 108 | mask = pc[:, 2] > self.ground_plane_level 109 | pc = pc[mask] 110 | 111 | return pc 112 | 113 | def read_pc(self, file_pathname): 114 | # Reads the point cloud without pre-processing 115 | raise NotImplementedError("read_pc must be overloaded in an inheriting class") 116 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/resnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Chris Choy (chrischoy@ai.stanford.edu). 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | # this software and associated documentation files (the "Software"), to deal in 5 | # the Software without restriction, including without limitation the rights to 6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | # of the Software, and to permit persons to whom the Software is furnished to do 8 | # so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all 11 | # copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | # 21 | # Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural 22 | # Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part 23 | # of the code. 24 | 25 | import torch.nn as nn 26 | 27 | import MinkowskiEngine as ME 28 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck 29 | 30 | 31 | class ResNetBase(nn.Module): 32 | block = None 33 | layers = () 34 | init_dim = 64 35 | planes = (64, 128, 256, 512) 36 | 37 | def __init__(self, in_channels, out_channels, D=3): 38 | nn.Module.__init__(self) 39 | self.D = D 40 | assert self.block is not None 41 | 42 | self.network_initialization(in_channels, out_channels, D) 43 | self.weight_initialization() 44 | 45 | def network_initialization(self, in_channels, out_channels, D): 46 | self.inplanes = self.init_dim 47 | self.conv1 = ME.MinkowskiConvolution(in_channels, self.inplanes, kernel_size=5, stride=2, dimension=D) 48 | 49 | self.bn1 = ME.MinkowskiBatchNorm(self.inplanes) 50 | self.relu = ME.MinkowskiReLU(inplace=True) 51 | 52 | self.pool = ME.MinkowskiAvgPooling(kernel_size=2, stride=2, dimension=D) 53 | 54 | self.layer1 = self._make_layer(self.block, self.planes[0], self.layers[0], stride=2) 55 | self.layer2 = self._make_layer(self.block, self.planes[1], self.layers[1], stride=2) 56 | self.layer3 = self._make_layer(self.block, self.planes[2], self.layers[2], stride=2) 57 | self.layer4 = self._make_layer(self.block, self.planes[3], self.layers[3], stride=2) 58 | 59 | self.conv5 = ME.MinkowskiConvolution(self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D) 60 | self.bn5 = ME.MinkowskiBatchNorm(self.inplanes) 61 | self.glob_avg = ME.MinkowskiGlobalMaxPooling() 62 | self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True) 63 | 64 | def weight_initialization(self): 65 | for m in self.modules(): 66 | if isinstance(m, ME.MinkowskiConvolution): 67 | ME.utils.kaiming_normal_(m.kernel, mode='fan_out', nonlinearity='relu') 68 | 69 | if isinstance(m, ME.MinkowskiBatchNorm): 70 | nn.init.constant_(m.bn.weight, 1) 71 | nn.init.constant_(m.bn.bias, 0) 72 | 73 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1): 74 | downsample = None 75 | if stride != 1 or self.inplanes != planes * block.expansion: 76 | downsample = nn.Sequential(ME.MinkowskiConvolution(self.inplanes, planes * block.expansion, 77 | kernel_size=1, stride=stride, dimension=self.D), 78 | ME.MinkowskiBatchNorm(planes * block.expansion)) 79 | layers = [] 80 | layers.append(block(self.inplanes, planes, stride=stride, dilation=dilation, downsample=downsample, 81 | dimension=self.D)) 82 | self.inplanes = planes * block.expansion 83 | for i in range(1, blocks): 84 | layers.append(block(self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D)) 85 | 86 | return nn.Sequential(*layers) 87 | 88 | def forward(self, x): 89 | x = self.conv1(x) 90 | x = self.bn1(x) 91 | x = self.relu(x) 92 | x = self.pool(x) 93 | 94 | x = self.layer1(x) 95 | x = self.layer2(x) 96 | x = self.layer3(x) 97 | x = self.layer4(x) 98 | 99 | x = self.conv5(x) 100 | x = self.bn5(x) 101 | x = self.relu(x) 102 | 103 | x = self.glob_avg(x) 104 | return self.final(x) 105 | 106 | 107 | class ResNet14(ResNetBase): 108 | BLOCK = BasicBlock 109 | LAYERS = (1, 1, 1, 1) 110 | 111 | 112 | class ResNet18(ResNetBase): 113 | BLOCK = BasicBlock 114 | LAYERS = (2, 2, 2, 2) 115 | 116 | 117 | class ResNet34(ResNetBase): 118 | BLOCK = BasicBlock 119 | LAYERS = (3, 4, 6, 3) 120 | 121 | 122 | class ResNet50(ResNetBase): 123 | BLOCK = Bottleneck 124 | LAYERS = (3, 4, 6, 3) 125 | 126 | 127 | class ResNet101(ResNetBase): 128 | BLOCK = Bottleneck 129 | LAYERS = (3, 4, 23, 3) 130 | -------------------------------------------------------------------------------- /losses/multi_similarity_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class MultiSimilarityLoss(nn.Module): 7 | def __init__(self, thresh=0.5, margin=0.3, scale_pos=2.0, scale_neg=40.0, mode='apn'): 8 | super(MultiSimilarityLoss, self).__init__() 9 | self.mode = mode # 'apn' or 'apn_sim' or 'ak' 10 | self.thresh = thresh 11 | self.margin = margin 12 | self.scale_pos = scale_pos 13 | self.scale_neg = scale_neg 14 | 15 | def forward(self, a, b, c): 16 | if self.mode == 'apn': 17 | return self._forward_apn(a, b, c) 18 | if self.mode == 'apn_sim': 19 | return self._forward_apn_sim(a, b, c) 20 | elif self.mode == 'ak': 21 | return self._forward_ak(a, b, c) 22 | else: 23 | return None 24 | 25 | def _forward_apn(self, a_feat, p_feat, n_feat): 26 | """ a/p/n: b x k x d """ 27 | if len(a_feat.shape) == 2: 28 | a_feat = a_feat.unsqueeze(0) 29 | p_feat = p_feat.unsqueeze(0) 30 | n_feat = n_feat.unsqueeze(0) 31 | b, k, d = a_feat.size() 32 | loss = [] 33 | for i in range(b): 34 | pos_sim = F.cosine_similarity(a_feat[i], p_feat[i]) 35 | neg_sim = F.cosine_similarity(a_feat[i], n_feat[i]) 36 | neg_sim = neg_sim[neg_sim + self.margin > min(pos_sim)] 37 | if len(pos_sim) < 1 or len(neg_sim) < 1: 38 | continue 39 | pos_loss = 1.0 / self.scale_pos * torch.log( 40 | 1 + torch.sum(torch.exp(-self.scale_pos * (pos_sim - self.thresh)))) 41 | neg_loss = 1.0 / self.scale_neg * torch.log( 42 | 1 + torch.sum(torch.exp(self.scale_neg * (neg_sim - self.thresh)))) 43 | loss.append(pos_loss + neg_loss) 44 | if len(loss) == 0: 45 | return torch.zeros([], requires_grad=True) 46 | loss = torch.mean(torch.stack(loss)) 47 | return loss 48 | 49 | def _forward_apn_sim(self, pos_sim, neg_sim, place_holder=None): 50 | """ pos_sim: b x p, neg_sim: b x n """ 51 | loss = [] 52 | for i in range(pos_sim.shape[0]): 53 | pos_sim_i = pos_sim[i] 54 | neg_sim_i = neg_sim[i] 55 | neg_sim_i = neg_sim_i[neg_sim_i + self.margin > min(pos_sim_i)] 56 | if len(pos_sim_i) < 1 or len(neg_sim_i) < 1: 57 | continue 58 | pos_loss = 1.0 / self.scale_pos * torch.log( 59 | 1 + torch.sum(torch.exp(-self.scale_pos * (pos_sim_i - self.thresh)))) 60 | neg_loss = 1.0 / self.scale_neg * torch.log( 61 | 1 + torch.sum(torch.exp(self.scale_neg * (neg_sim_i - self.thresh)))) 62 | loss.append(pos_loss + neg_loss) 63 | if len(loss) == 0: 64 | return torch.zeros([], requires_grad=True) 65 | loss = torch.mean(torch.stack(loss)) 66 | return loss 67 | 68 | def _forward_ak(self, a_feat, k_feat, k_label): 69 | """ a_feat: b x 1 x d, k_feat: b x k x d, k_label: b x k """ 70 | if len(a_feat.shape) == 2: 71 | a_feat = a_feat.unsqueeze(0) 72 | k_feat = k_feat.unsqueeze(0) 73 | k_label = k_label.unsqueeze(0) 74 | b, k, d = k_feat.size() 75 | loss = [] 76 | for i in range(b): 77 | # pos 78 | p_idx = (k_label[i] > 0).nonzero().squeeze(1) 79 | if len(p_idx) == 0: 80 | continue 81 | p_feat_i = k_feat[i][p_idx] 82 | a_feat_i = a_feat[i].repeat(p_feat_i.shape[0] ,1) 83 | pos_sim = F.cosine_similarity(a_feat_i, p_feat_i) 84 | # neg 85 | n_idx = (k_label[i] < 1).nonzero().squeeze(1) 86 | if len(n_idx) == 0: 87 | continue 88 | n_feat_i = k_feat[i][n_idx] 89 | a_feat_i = a_feat[i].repeat(n_feat_i.shape[0], 1) 90 | neg_sim = F.cosine_similarity(a_feat_i, n_feat_i) 91 | neg_sim = neg_sim[neg_sim + self.margin > min(pos_sim)] 92 | if len(pos_sim) < 1 or len(neg_sim) < 1: 93 | continue 94 | pos_loss = 1.0 / self.scale_pos * torch.log( 95 | 1 + torch.sum(torch.exp(-self.scale_pos * (pos_sim - self.thresh)))) 96 | neg_loss = 1.0 / self.scale_neg * torch.log( 97 | 1 + torch.sum(torch.exp(self.scale_neg * (neg_sim - self.thresh)))) 98 | loss.append(pos_loss + neg_loss) 99 | if len(loss) == 0: 100 | return torch.zeros([], requires_grad=True) 101 | loss = torch.mean(torch.stack(loss)) 102 | return loss 103 | 104 | 105 | if __name__ == '__main__': 106 | # forward apn 107 | a_feat = torch.randn(16, 5, 256) # b x k x d 108 | a_feat = F.normalize(a_feat, dim=-1) 109 | p_feat = torch.randn(16, 5, 256) # b x k x d 110 | p_feat = F.normalize(p_feat, dim=-1) 111 | n_feat = torch.randn(16, 5, 256) # b x k x d 112 | n_feat = F.normalize(n_feat, dim=-1) 113 | loss_func = MultiSimilarityLoss(mode='apn') 114 | loss = loss_func(a_feat, p_feat, n_feat) 115 | # forward ak 116 | a_feat = torch.randn(16, 1, 256) # b x 1 x d 117 | a_feat = F.normalize(a_feat, dim=-1) 118 | k_feat = torch.randn(16, 5, 256) # b x k x d 119 | k_feat = F.normalize(k_feat, dim=-1) 120 | k_label = torch.randint(0, 2, (16, 5)).float() 121 | loss_func = MultiSimilarityLoss(mode='ak') 122 | loss = loss_func(a_feat, k_feat, k_label) 123 | print('loss: ', loss) -------------------------------------------------------------------------------- /losses/truncated_smoothap.py: -------------------------------------------------------------------------------- 1 | # Implemented as per "Recall@k Surrogate Loss with Large Batches and Similarity Mixup" paper 2 | # but only the fixed number of the closest positives is considered 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from losses.loss_utils import sigmoid, compute_aff 8 | 9 | 10 | class TruncatedSmoothAP: 11 | def __init__(self, tau1: float = 0.01, similarity: str = 'cosine', positives_per_query: int = 4): 12 | # We reversed the notation compared to the paper (tau1 is sigmoid on similarity differences) 13 | # tau1: sigmoid temperature applied on similarity differences 14 | # positives_per_query: number of positives per query to consider 15 | # negatives_only: if True in denominator we consider positives and negatives; if False we consider all elements 16 | # (with except to the anchor itself) 17 | 18 | self.tau1 = tau1 19 | self.similarity = similarity 20 | self.positives_per_query = positives_per_query 21 | 22 | def __call__(self, embeddings, positives_mask, negatives_mask): 23 | device = embeddings.device 24 | 25 | positives_mask = positives_mask.to(device) 26 | negatives_mask = negatives_mask.to(device) 27 | 28 | # Ranking of the retrieval set 29 | # For each element we ignore elements that are neither positives nor negatives 30 | 31 | # Compute cosine similarity scores 32 | # 1st dimension corresponds to q, 2nd dimension to z 33 | s_qz = compute_aff(embeddings, similarity=self.similarity) 34 | 35 | # Find the positives_per_query closest positives for each query 36 | s_positives = s_qz.detach().clone() 37 | s_positives.masked_fill_(torch.logical_not(positives_mask), np.NINF) 38 | #closest_positives_ndx = torch.argmax(s_positives, dim=1).view(-1, 1) # Indices of closests positives for each query 39 | top_k = self.positives_per_query 40 | if top_k > embeddings.shape[0]: 41 | top_k = embeddings.shape[0] 42 | closest_positives_ndx = torch.topk(s_positives, k=top_k, dim=1, largest=True, sorted=True)[1] 43 | # closest_positives_ndx is (batch_size, positives_per_query) with positives_per_query closest positives 44 | # per each batch element 45 | 46 | n_positives = positives_mask.sum(dim=1) # Number of positives for each anchor 47 | 48 | # Compute the rank of each example x with respect to query element q as per Eq. (2) 49 | s_diff = s_qz.unsqueeze(1) - s_qz.gather(1, closest_positives_ndx).unsqueeze(2) 50 | s_sigmoid = sigmoid(s_diff, temp=self.tau1) 51 | 52 | # Compute the nominator in Eq. 2 and 5 - for q compute the ranking of each of its positives with respect to other positives of q 53 | # Filter out z not in Positives 54 | pos_mask = positives_mask.unsqueeze(1) 55 | pos_s_sigmoid = s_sigmoid * pos_mask 56 | 57 | # Filter out z on the same position as the positive (they have value = 0.5, as the similarity difference is zero) 58 | mask = torch.ones_like(pos_s_sigmoid).scatter(2, closest_positives_ndx.unsqueeze(2), 0.) 59 | pos_s_sigmoid = pos_s_sigmoid * mask 60 | 61 | # Compute the rank for each query and its positives_per_query closest positive examples with respect to other positives 62 | r_p = torch.sum(pos_s_sigmoid, dim=2) + 1. 63 | # r_p is (batch_size, positives_per_query) matrix 64 | 65 | # Consider only positives and negatives in the denominator 66 | # Compute the denominator in Eq. 5 - add sum of Indicator function for negatives (or non-positives) 67 | neg_mask = negatives_mask.unsqueeze(1) 68 | neg_s_sigmoid = s_sigmoid * neg_mask 69 | r_omega = r_p + torch.sum(neg_s_sigmoid, dim=2) 70 | 71 | # Compute R(i, S_p) / R(i, S_omega) ration in Eq. 2 72 | r = r_p / r_omega 73 | 74 | # Compute metrics mean ranking of the positive example, recall@1 75 | stats = {} 76 | # Mean number of positives per query 77 | stats['positives_per_query'] = n_positives.float().mean(dim=0).item() 78 | # Mean ranking of selected positive examples (closests positives) 79 | temp = s_diff.detach() > 0 80 | temp = torch.logical_and(temp[:, 0], negatives_mask) # Take the best positive 81 | hard_ranking = temp.sum(dim=1) 82 | stats['best_positive_ranking'] = hard_ranking.float().mean(dim=0).item() 83 | # Recall at 1~top_k and at 1% 84 | stats['recall'] = {} 85 | for i in range(1, self.positives_per_query+1): 86 | if i > top_k: 87 | stats['recall'][i] = stats['recall'][top_k] 88 | else: 89 | stats['recall'][i] = (hard_ranking <= i).float().mean(dim=0).item() 90 | 91 | # r is (N, positives_per_query) tensor 92 | # Zero entries not corresponding to real positives - this happens when the number of true positives is lower than positives_per_query 93 | valid_positives_mask = torch.gather(positives_mask, 1, closest_positives_ndx) # () tensor 94 | masked_r = r * valid_positives_mask 95 | n_valid_positives = valid_positives_mask.sum(dim=1) 96 | 97 | # Filter out rows (queries) without any positive to avoid division by zero 98 | valid_q_mask = n_valid_positives > 0 99 | masked_r = masked_r[valid_q_mask] 100 | 101 | ap = (masked_r.sum(dim=1) / n_valid_positives[valid_q_mask]).mean() 102 | loss = 1. - ap 103 | 104 | stats['loss'] = loss.item() 105 | stats['ap'] = ap.item() 106 | stats['avg_embedding_norm'] = embeddings.norm(dim=1).mean().item() 107 | return loss, stats 108 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/losses/truncated_smoothap.py: -------------------------------------------------------------------------------- 1 | # Implemented as per "Recall@k Surrogate Loss with Large Batches and Similarity Mixup" paper 2 | # but only the fixed number of the closest positives is considered 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from place_recognition.Minkloc3D_V2.models.losses.loss_utils import sigmoid, compute_aff 8 | 9 | 10 | class TruncatedSmoothAP: 11 | def __init__(self, tau1: float = 0.01, similarity: str = 'cosine', positives_per_query: int = 4): 12 | # We reversed the notation compared to the paper (tau1 is sigmoid on similarity differences) 13 | # tau1: sigmoid temperature applied on similarity differences 14 | # positives_per_query: number of positives per query to consider 15 | # negatives_only: if True in denominator we consider positives and negatives; if False we consider all elements 16 | # (with except to the anchor itself) 17 | 18 | self.tau1 = tau1 19 | self.similarity = similarity 20 | self.positives_per_query = positives_per_query 21 | 22 | def __call__(self, embeddings, positives_mask, negatives_mask): 23 | device = embeddings.device 24 | 25 | positives_mask = positives_mask.to(device) 26 | negatives_mask = negatives_mask.to(device) 27 | 28 | # Ranking of the retrieval set 29 | # For each element we ignore elements that are neither positives nor negatives 30 | 31 | # Compute cosine similarity scores 32 | # 1st dimension corresponds to q, 2nd dimension to z 33 | s_qz = compute_aff(embeddings, similarity=self.similarity) 34 | 35 | # Find the positives_per_query closest positives for each query 36 | s_positives = s_qz.detach().clone() 37 | s_positives.masked_fill_(torch.logical_not(positives_mask), np.NINF) 38 | #closest_positives_ndx = torch.argmax(s_positives, dim=1).view(-1, 1) # Indices of closests positives for each query 39 | top_k = self.positives_per_query 40 | if top_k > embeddings.shape[0]: 41 | top_k = embeddings.shape[0] 42 | closest_positives_ndx = torch.topk(s_positives, k=top_k, dim=1, largest=True, sorted=True)[1] 43 | # closest_positives_ndx is (batch_size, positives_per_query) with positives_per_query closest positives 44 | # per each batch element 45 | 46 | n_positives = positives_mask.sum(dim=1) # Number of positives for each anchor 47 | 48 | # Compute the rank of each example x with respect to query element q as per Eq. (2) 49 | s_diff = s_qz.unsqueeze(1) - s_qz.gather(1, closest_positives_ndx).unsqueeze(2) 50 | s_sigmoid = sigmoid(s_diff, temp=self.tau1) 51 | 52 | # Compute the nominator in Eq. 2 and 5 - for q compute the ranking of each of its positives with respect to other positives of q 53 | # Filter out z not in Positives 54 | pos_mask = positives_mask.unsqueeze(1) 55 | pos_s_sigmoid = s_sigmoid * pos_mask 56 | 57 | # Filter out z on the same position as the positive (they have value = 0.5, as the similarity difference is zero) 58 | mask = torch.ones_like(pos_s_sigmoid).scatter(2, closest_positives_ndx.unsqueeze(2), 0.) 59 | pos_s_sigmoid = pos_s_sigmoid * mask 60 | 61 | # Compute the rank for each query and its positives_per_query closest positive examples with respect to other positives 62 | r_p = torch.sum(pos_s_sigmoid, dim=2) + 1. 63 | # r_p is (batch_size, positives_per_query) matrix 64 | 65 | # Consider only positives and negatives in the denominator 66 | # Compute the denominator in Eq. 5 - add sum of Indicator function for negatives (or non-positives) 67 | neg_mask = negatives_mask.unsqueeze(1) 68 | neg_s_sigmoid = s_sigmoid * neg_mask 69 | r_omega = r_p + torch.sum(neg_s_sigmoid, dim=2) 70 | 71 | # Compute R(i, S_p) / R(i, S_omega) ration in Eq. 2 72 | r = r_p / r_omega 73 | 74 | # Compute metrics mean ranking of the positive example, recall@1 75 | stats = {} 76 | # Mean number of positives per query 77 | stats['positives_per_query'] = n_positives.float().mean(dim=0).item() 78 | # Mean ranking of selected positive examples (closests positives) 79 | temp = s_diff.detach() > 0 80 | temp = torch.logical_and(temp[:, 0], negatives_mask) # Take the best positive 81 | hard_ranking = temp.sum(dim=1) 82 | stats['best_positive_ranking'] = hard_ranking.float().mean(dim=0).item() 83 | # Recall at 1~top_k and at 1% 84 | stats['recall'] = {} 85 | for i in range(1, self.positives_per_query+1): 86 | if i > top_k: 87 | stats['recall'][i] = stats['recall'][top_k] 88 | else: 89 | stats['recall'][i] = (hard_ranking <= i).float().mean(dim=0).item() 90 | 91 | # r is (N, positives_per_query) tensor 92 | # Zero entries not corresponding to real positives - this happens when the number of true positives is lower than positives_per_query 93 | valid_positives_mask = torch.gather(positives_mask, 1, closest_positives_ndx) # () tensor 94 | masked_r = r * valid_positives_mask 95 | n_valid_positives = valid_positives_mask.sum(dim=1) 96 | 97 | # Filter out rows (queries) without any positive to avoid division by zero 98 | valid_q_mask = n_valid_positives > 0 99 | masked_r = masked_r[valid_q_mask] 100 | 101 | ap = (masked_r.sum(dim=1) / n_valid_positives[valid_q_mask]).mean() 102 | loss = 1. - ap 103 | 104 | stats['loss'] = loss.item() 105 | stats['ap'] = ap.item() 106 | stats['avg_embedding_norm'] = embeddings.norm(dim=1).mean().item() 107 | return loss, stats 108 | -------------------------------------------------------------------------------- /datasets/query_pos_neg_dataset_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: query_pos_neg_dataset.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf import descriptor as _descriptor 6 | from google.protobuf import descriptor_pool as _descriptor_pool 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | # @@protoc_insertion_point(imports) 11 | 12 | _sym_db = _symbol_database.Default() 13 | 14 | 15 | 16 | 17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1bquery_pos_neg_dataset.proto\x12\rp2m.base_type\"0\n\nDataRecord\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01x\x18\x02 \x01(\x01\x12\t\n\x01y\x18\x03 \x01(\x01\"9\n\x0b\x44\x61taRecords\x12*\n\x07records\x18\x01 \x03(\x0b\x32\x19.p2m.base_type.DataRecord\"a\n\nUint32Pair\x12\x0c\n\x04idx1\x18\x01 \x01(\r\x12\x15\n\rnear_indices2\x18\x02 \x03(\r\x12\x14\n\x0c\x66\x61r_indices2\x18\x03 \x03(\r\x12\x18\n\x10\x62\x61\x64_far_indices2\x18\x04 \x03(\r\"\x9a\x01\n\x16QueryPosOverlapIndices\x12\x14\n\x0cpositive_idx\x18\x02 \x01(\r\x12\x32\n\x0foverlap_indices\x18\x03 \x03(\x0b\x32\x19.p2m.base_type.Uint32Pair\x12\x36\n\x13inv_overlap_indices\x18\x04 \x03(\x0b\x32\x19.p2m.base_type.Uint32Pair\"k\n\x13QueryOverlapIndices\x12\x11\n\tquery_idx\x18\x01 \x01(\r\x12\x41\n\x12qp_overlap_indices\x18\x02 \x03(\x0b\x32%.p2m.base_type.QueryPosOverlapIndices\"F\n\x10QueryPosNegTuple\x12\x18\n\x10positive_indices\x18\x01 \x03(\r\x12\x18\n\x10negative_indices\x18\x02 \x03(\r\"\xe8\x01\n\x07\x44\x61taSet\x12\x12\n\ntrip_names\x18\x01 \x03(\t\x12*\n\x07records\x18\x02 \x03(\x0b\x32\x19.p2m.base_type.DataRecord\x12\x19\n\x11records_size_list\x18\x03 \x03(\r\x12\x32\n\x06tuples\x18\x04 \x03(\x0b\x32\".p2m.base_type.DataSet.TuplesEntry\x1aN\n\x0bTuplesEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.p2m.base_type.QueryPosNegTuple:\x02\x38\x01\x62\x06proto3') 18 | 19 | 20 | 21 | _DATARECORD = DESCRIPTOR.message_types_by_name['DataRecord'] 22 | _DATARECORDS = DESCRIPTOR.message_types_by_name['DataRecords'] 23 | _UINT32PAIR = DESCRIPTOR.message_types_by_name['Uint32Pair'] 24 | _QUERYPOSOVERLAPINDICES = DESCRIPTOR.message_types_by_name['QueryPosOverlapIndices'] 25 | _QUERYOVERLAPINDICES = DESCRIPTOR.message_types_by_name['QueryOverlapIndices'] 26 | _QUERYPOSNEGTUPLE = DESCRIPTOR.message_types_by_name['QueryPosNegTuple'] 27 | _DATASET = DESCRIPTOR.message_types_by_name['DataSet'] 28 | _DATASET_TUPLESENTRY = _DATASET.nested_types_by_name['TuplesEntry'] 29 | DataRecord = _reflection.GeneratedProtocolMessageType('DataRecord', (_message.Message,), { 30 | 'DESCRIPTOR' : _DATARECORD, 31 | '__module__' : 'query_pos_neg_dataset_pb2' 32 | # @@protoc_insertion_point(class_scope:p2m.base_type.DataRecord) 33 | }) 34 | _sym_db.RegisterMessage(DataRecord) 35 | 36 | DataRecords = _reflection.GeneratedProtocolMessageType('DataRecords', (_message.Message,), { 37 | 'DESCRIPTOR' : _DATARECORDS, 38 | '__module__' : 'query_pos_neg_dataset_pb2' 39 | # @@protoc_insertion_point(class_scope:p2m.base_type.DataRecords) 40 | }) 41 | _sym_db.RegisterMessage(DataRecords) 42 | 43 | Uint32Pair = _reflection.GeneratedProtocolMessageType('Uint32Pair', (_message.Message,), { 44 | 'DESCRIPTOR' : _UINT32PAIR, 45 | '__module__' : 'query_pos_neg_dataset_pb2' 46 | # @@protoc_insertion_point(class_scope:p2m.base_type.Uint32Pair) 47 | }) 48 | _sym_db.RegisterMessage(Uint32Pair) 49 | 50 | QueryPosOverlapIndices = _reflection.GeneratedProtocolMessageType('QueryPosOverlapIndices', (_message.Message,), { 51 | 'DESCRIPTOR' : _QUERYPOSOVERLAPINDICES, 52 | '__module__' : 'query_pos_neg_dataset_pb2' 53 | # @@protoc_insertion_point(class_scope:p2m.base_type.QueryPosOverlapIndices) 54 | }) 55 | _sym_db.RegisterMessage(QueryPosOverlapIndices) 56 | 57 | QueryOverlapIndices = _reflection.GeneratedProtocolMessageType('QueryOverlapIndices', (_message.Message,), { 58 | 'DESCRIPTOR' : _QUERYOVERLAPINDICES, 59 | '__module__' : 'query_pos_neg_dataset_pb2' 60 | # @@protoc_insertion_point(class_scope:p2m.base_type.QueryOverlapIndices) 61 | }) 62 | _sym_db.RegisterMessage(QueryOverlapIndices) 63 | 64 | QueryPosNegTuple = _reflection.GeneratedProtocolMessageType('QueryPosNegTuple', (_message.Message,), { 65 | 'DESCRIPTOR' : _QUERYPOSNEGTUPLE, 66 | '__module__' : 'query_pos_neg_dataset_pb2' 67 | # @@protoc_insertion_point(class_scope:p2m.base_type.QueryPosNegTuple) 68 | }) 69 | _sym_db.RegisterMessage(QueryPosNegTuple) 70 | 71 | DataSet = _reflection.GeneratedProtocolMessageType('DataSet', (_message.Message,), { 72 | 73 | 'TuplesEntry' : _reflection.GeneratedProtocolMessageType('TuplesEntry', (_message.Message,), { 74 | 'DESCRIPTOR' : _DATASET_TUPLESENTRY, 75 | '__module__' : 'query_pos_neg_dataset_pb2' 76 | # @@protoc_insertion_point(class_scope:p2m.base_type.DataSet.TuplesEntry) 77 | }) 78 | , 79 | 'DESCRIPTOR' : _DATASET, 80 | '__module__' : 'query_pos_neg_dataset_pb2' 81 | # @@protoc_insertion_point(class_scope:p2m.base_type.DataSet) 82 | }) 83 | _sym_db.RegisterMessage(DataSet) 84 | _sym_db.RegisterMessage(DataSet.TuplesEntry) 85 | 86 | if _descriptor._USE_C_DESCRIPTORS == False: 87 | 88 | DESCRIPTOR._options = None 89 | _DATASET_TUPLESENTRY._options = None 90 | _DATASET_TUPLESENTRY._serialized_options = b'8\001' 91 | _DATARECORD._serialized_start=46 92 | _DATARECORD._serialized_end=94 93 | _DATARECORDS._serialized_start=96 94 | _DATARECORDS._serialized_end=153 95 | _UINT32PAIR._serialized_start=155 96 | _UINT32PAIR._serialized_end=252 97 | _QUERYPOSOVERLAPINDICES._serialized_start=255 98 | _QUERYPOSOVERLAPINDICES._serialized_end=409 99 | _QUERYOVERLAPINDICES._serialized_start=411 100 | _QUERYOVERLAPINDICES._serialized_end=518 101 | _QUERYPOSNEGTUPLE._serialized_start=520 102 | _QUERYPOSNEGTUPLE._serialized_end=590 103 | _DATASET._serialized_start=593 104 | _DATASET._serialized_end=825 105 | _DATASET_TUPLESENTRY._serialized_start=747 106 | _DATASET_TUPLESENTRY._serialized_end=825 107 | # @@protoc_insertion_point(module_scope) 108 | -------------------------------------------------------------------------------- /datasets/point_clouds_utils.py: -------------------------------------------------------------------------------- 1 | #############################From Spectral Geometric Verification############################# 2 | # This file is adapted from: https://github.com/jac99/Egonn/blob/main/misc/point_clouds.py 3 | 4 | import copy 5 | import os 6 | 7 | import numpy as np 8 | import open3d as o3d 9 | 10 | 11 | def draw_registration_result(source, target, transformation): 12 | source_temp = copy.deepcopy(source) 13 | target_temp = copy.deepcopy(target) 14 | source_temp.paint_uniform_color([1, 0.706, 0]) 15 | target_temp.paint_uniform_color([0, 0.651, 0.929]) 16 | source_temp.transform(transformation) 17 | o3d.visualization.draw_geometries([source_temp, target_temp], 18 | zoom=0.4459, 19 | front=[0.9288, -0.2951, -0.2242], 20 | lookat=[1.6784, 2.0612, 1.4451], 21 | up=[-0.3402, -0.9189, -0.1996]) 22 | 23 | 24 | def draw_pc(pc): 25 | pc = copy.deepcopy(pc) 26 | pc.paint_uniform_color([1, 0.706, 0]) 27 | o3d.visualization.draw_geometries([pc], 28 | zoom=0.4459, 29 | front=[0.9288, -0.2951, -0.2242], 30 | lookat=[1.6784, 2.0612, 1.4451], 31 | up=[-0.3402, -0.9189, -0.1996]) 32 | 33 | 34 | def icp(anchor_pc, positive_pc, transform: np.ndarray = None, point2plane: bool = False, 35 | inlier_dist_threshold: float = 1.2, max_iteration: int = 200): 36 | # transform: initial alignment transform 37 | if transform is not None: 38 | transform = transform.astype(float) 39 | 40 | voxel_size = 0.1 41 | pcd1 = o3d.geometry.PointCloud() 42 | pcd1.points = o3d.utility.Vector3dVector(anchor_pc) 43 | pcd1 = pcd1.voxel_down_sample(voxel_size=voxel_size) 44 | 45 | pcd2 = o3d.geometry.PointCloud() 46 | pcd2.points = o3d.utility.Vector3dVector(positive_pc) 47 | pcd2 = pcd2.voxel_down_sample(voxel_size=voxel_size) 48 | 49 | if point2plane: 50 | pcd1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamKNN(knn=20)) 51 | pcd2.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamKNN(knn=20)) 52 | transform_estimation = o3d.pipelines.registration.TransformationEstimationPointToPlane() 53 | else: 54 | transform_estimation = o3d.pipelines.registration.TransformationEstimationPointToPoint() 55 | 56 | if transform is not None: 57 | reg_p2p = o3d.pipelines.registration.registration_icp(pcd1, pcd2, inlier_dist_threshold, transform, 58 | estimation_method=transform_estimation, 59 | criteria=o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=max_iteration)) 60 | else: 61 | reg_p2p = o3d.pipelines.registration.registration_icp(pcd1, pcd2, inlier_dist_threshold, 62 | estimation_method=transform_estimation, 63 | criteria=o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=max_iteration)) 64 | 65 | return reg_p2p.transformation, reg_p2p.fitness, reg_p2p.inlier_rmse 66 | 67 | 68 | def make_open3d_feature(data, dim, npts): 69 | feature = o3d.pipelines.registration.Feature() 70 | feature.resize(dim, npts) 71 | if not isinstance(data, np.ndarray): 72 | feature.data = data.cpu().numpy().astype('d').transpose() 73 | else: 74 | feature.data = data.astype('d').transpose() 75 | return feature 76 | 77 | 78 | def make_open3d_point_cloud(xyz, color=None): 79 | pcd = o3d.geometry.PointCloud() 80 | pcd.points = o3d.utility.Vector3dVector(xyz) 81 | if color is not None: 82 | pcd.colors = o3d.utility.Vector3dVector(color) 83 | return pcd 84 | 85 | def preprocess_pointcloud(pc, remove_zero_points: bool = False, 86 | min_x: float = None, max_x: float = None, 87 | min_y: float = None, max_y: float = None, 88 | min_z: float = None, max_z: float = None): 89 | if remove_zero_points: 90 | mask = np.all(np.isclose(pc, 0.), axis=1) 91 | pc = pc[~mask] 92 | 93 | if min_x is not None: 94 | mask = pc[:, 0] > min_x 95 | pc = pc[mask] 96 | 97 | if max_x is not None: 98 | mask = pc[:, 0] <= max_x 99 | pc = pc[mask] 100 | 101 | if min_y is not None: 102 | mask = pc[:, 1] > min_y 103 | pc = pc[mask] 104 | 105 | if max_y is not None: 106 | mask = pc[:, 1] <= max_y 107 | pc = pc[mask] 108 | 109 | if min_z is not None: 110 | mask = pc[:, 2] > min_z 111 | pc = pc[mask] 112 | 113 | if max_z is not None: 114 | mask = pc[:, 2] <= max_z 115 | pc = pc[mask] 116 | 117 | return pc 118 | 119 | 120 | class PointCloudLoader: 121 | # Generic point cloud loader class 122 | def __init__(self): 123 | # remove_zero_points: remove points with all zero coordinates 124 | # remove_ground_plane: remove points on ground plane level and below 125 | # ground_plane_level: ground plane level 126 | self.remove_zero_points = True 127 | self.remove_ground_plane = True 128 | self.ground_plane_level = None 129 | self.set_properties() 130 | 131 | def set_properties(self): 132 | # Set point cloud properties, such as ground_plane_level. Must be defined in inherited classes. 133 | raise NotImplementedError('set_properties must be defined in inherited classes') 134 | 135 | def __call__(self, file_pathname): 136 | # Reads the point cloud from a disk and preprocess (optional removal of zero points and points on the ground 137 | # plane and below 138 | # file_pathname: relative file path 139 | assert os.path.exists(file_pathname), f"Cannot open point cloud: {file_pathname}" 140 | pc = self.read_pc(file_pathname) 141 | assert pc.shape[1] == 3 142 | 143 | if self.remove_zero_points: 144 | mask = np.all(np.isclose(pc, 0), axis=1) 145 | pc = pc[~mask] 146 | 147 | if self.remove_ground_plane: 148 | mask = pc[:, 2] > self.ground_plane_level 149 | pc = pc[mask] 150 | 151 | return pc 152 | 153 | def read_pc(self, file_pathname): 154 | # Reads the point cloud without pre-processing 155 | raise NotImplementedError("read_pc must be overloaded in an inheriting class") 156 | -------------------------------------------------------------------------------- /libs/pointops/src/labelstat/labelstat_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "labelstat_cuda_kernel.h" 3 | 4 | // input: new_xyz(b, m, 3) xyz(b, n, 3) label_stat(b, n, nclass) 5 | // output: idx(b, m, nsample) new_label_stat(b, m, nclass) 6 | __global__ void labelstat_and_ballquery_cuda_kernel_fast(int b, int n, int m, float radius, int nsample, int nclass, 7 | const float *new_xyz, const float *xyz, const int *label_stat, int *idx, int *new_label_stat) { 8 | int bs_idx = blockIdx.y; 9 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 10 | if (bs_idx >= b || pt_idx >= m) return; 11 | 12 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 13 | xyz += bs_idx * n * 3; 14 | idx += bs_idx * m * nsample + pt_idx * nsample; 15 | label_stat += bs_idx * n * nclass; 16 | new_label_stat += bs_idx * m * nclass + pt_idx * nclass; 17 | 18 | for(int i = 0; i < nclass; i++){ 19 | new_label_stat[i] = 0; 20 | } 21 | 22 | float radius2 = radius * radius; 23 | float new_x = new_xyz[0]; 24 | float new_y = new_xyz[1]; 25 | float new_z = new_xyz[2]; 26 | 27 | int cnt = 0; 28 | for (int k = 0; k < n; ++k) { 29 | float x = xyz[k * 3 + 0]; 30 | float y = xyz[k * 3 + 1]; 31 | float z = xyz[k * 3 + 2]; 32 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 33 | if (d2 < radius2){ 34 | for(int i = 0; i < nclass; i++){ 35 | new_label_stat[i] += label_stat[k * nclass + i]; 36 | } 37 | if (cnt == 0){ 38 | for (int l = 0; l < nsample; ++l) { 39 | idx[l] = k; 40 | } 41 | } 42 | idx[cnt] = k; 43 | ++cnt; 44 | if (cnt >= nsample){ 45 | break; 46 | } 47 | } 48 | } 49 | } 50 | 51 | void labelstat_and_ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, int nclass, 52 | const float *new_xyz, const float *xyz, const int *label_stat, int *idx, int *new_label_stat, cudaStream_t stream) { 53 | // param new_xyz: (B, m, 3) 54 | // param xyz: (B, n, 3) 55 | // param idx: (B, m, nsample) 56 | 57 | cudaError_t err; 58 | 59 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 60 | dim3 threads(THREADS_PER_BLOCK); 61 | 62 | labelstat_and_ballquery_cuda_kernel_fast<<>>(b, n, m, radius, nsample, nclass, new_xyz, xyz, label_stat, idx, new_label_stat); 63 | // cudaDeviceSynchronize(); // for using printf in kernel function 64 | 65 | err = cudaGetLastError(); 66 | if (cudaSuccess != err) { 67 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 68 | exit(-1); 69 | } 70 | } 71 | 72 | // input: new_xyz(b, m, 3) xyz(b, n, 3) label_stat(b, n, nclass) 73 | // output: new_label_stat(b, m, nclass) 74 | __global__ void labelstat_ballrange_cuda_kernel_fast(int b, int n, int m, float radius, int nclass, 75 | const float *new_xyz, const float *xyz, const int *label_stat, int *new_label_stat) { 76 | int bs_idx = blockIdx.y; 77 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 78 | if (bs_idx >= b || pt_idx >= m) return; 79 | 80 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 81 | xyz += bs_idx * n * 3; 82 | label_stat += bs_idx * n * nclass; 83 | new_label_stat += bs_idx * m * nclass + pt_idx * nclass; 84 | 85 | for(int i = 0; i < nclass; i++){ 86 | new_label_stat[i] = 0; 87 | } 88 | 89 | float radius2 = radius * radius; 90 | float new_x = new_xyz[0]; 91 | float new_y = new_xyz[1]; 92 | float new_z = new_xyz[2]; 93 | 94 | for (int k = 0; k < n; ++k) { 95 | float x = xyz[k * 3 + 0]; 96 | float y = xyz[k * 3 + 1]; 97 | float z = xyz[k * 3 + 2]; 98 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 99 | if (d2 < radius2){ 100 | for(int i = 0; i < nclass; i++){ 101 | new_label_stat[i] += label_stat[k * nclass + i]; 102 | } 103 | } 104 | } 105 | } 106 | 107 | 108 | void labelstat_ballrange_cuda_launcher_fast(int b, int n, int m, float radius, int nclass, 109 | const float *new_xyz, const float *xyz, const int *label_stat, int *new_label_stat, cudaStream_t stream) { 110 | // param new_xyz: (B, m, 3) 111 | // param xyz: (B, n, 3) 112 | // param idx: (B, m, nsample) 113 | 114 | cudaError_t err; 115 | 116 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 117 | dim3 threads(THREADS_PER_BLOCK); 118 | 119 | labelstat_ballrange_cuda_kernel_fast<<>>(b, n, m, radius, nclass, new_xyz, xyz, label_stat, new_label_stat); 120 | // cudaDeviceSynchronize(); // for using printf in kernel function 121 | 122 | err = cudaGetLastError(); 123 | if (cudaSuccess != err) { 124 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 125 | exit(-1); 126 | } 127 | } 128 | 129 | // input: idx(b, m, nsample) label_stat(b, n, nclass) 130 | // output: new_label_stat(b, m, nclass) 131 | __global__ void labelstat_idx_cuda_kernel_fast(int b, int n, int m, int nsample, int nclass, 132 | const int *label_stat, const int *idx, int *new_label_stat) { 133 | int bs_idx = blockIdx.y; 134 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 135 | if (bs_idx >= b || pt_idx >= m) return; 136 | 137 | idx += bs_idx * m * nsample + pt_idx * nsample; 138 | label_stat += bs_idx * n * nclass; 139 | new_label_stat += bs_idx * m * nclass + pt_idx * nclass; 140 | 141 | for(int i = 0; i < nclass; i++){ 142 | new_label_stat[i] = 0; 143 | } 144 | 145 | for(int k = 0; k < nsample; k++){ 146 | const int *label_stat_k = label_stat + idx[k] * nclass; 147 | for(int i = 0; i < nclass; i++){ 148 | new_label_stat[i] += label_stat_k[i]; 149 | } 150 | } 151 | } 152 | 153 | 154 | void labelstat_idx_cuda_launcher_fast(int b, int n, int m, int nsample, int nclass, 155 | const int *label_stat, const int *idx, int *new_label_stat, cudaStream_t stream) { 156 | // param new_xyz: (B, m, 3) 157 | // param xyz: (B, n, 3) 158 | // param idx: (B, m, nsample) 159 | 160 | cudaError_t err; 161 | 162 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 163 | dim3 threads(THREADS_PER_BLOCK); 164 | 165 | labelstat_idx_cuda_kernel_fast<<>>(b, n, m, nsample, nclass, label_stat, idx, new_label_stat); 166 | // cudaDeviceSynchronize(); // for using printf in kernel function 167 | 168 | err = cudaGetLastError(); 169 | if (cudaSuccess != err) { 170 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 171 | exit(-1); 172 | } 173 | } -------------------------------------------------------------------------------- /place_recognition/pptnet_origin/models/loupe.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class NetVLADBase(nn.Module): 7 | def __init__(self, feature_size, max_samples, cluster_size, output_dim, 8 | gating=True, add_batch_norm=True): 9 | super(NetVLADBase, self).__init__() 10 | self.feature_size = feature_size 11 | self.max_samples = max_samples 12 | self.output_dim = output_dim 13 | self.gating = gating 14 | self.add_batch_norm = add_batch_norm 15 | self.cluster_size = cluster_size 16 | self.softmax = nn.Softmax(dim=-1) 17 | 18 | self.cluster_weights = nn.Parameter( 19 | torch.randn(feature_size, cluster_size) * 1 / math.sqrt(feature_size)) 20 | 21 | self.cluster_weights2 = nn.Parameter( 22 | torch.randn(1, feature_size, cluster_size) * 1 / math.sqrt(feature_size)) 23 | 24 | self.hidden1_weights = nn.Parameter( 25 | torch.randn(feature_size*cluster_size, output_dim)* 1 / math.sqrt(feature_size)) 26 | 27 | if add_batch_norm: 28 | self.cluster_biases = None 29 | self.bn1 = nn.BatchNorm1d(cluster_size) 30 | else: 31 | self.cluster_biases = nn.Parameter(torch.randn(cluster_size) * 1 / math.sqrt(feature_size)) # attention initialization 32 | self.bn1 = None 33 | 34 | self.bn2 = nn.BatchNorm1d(output_dim) 35 | 36 | if gating: 37 | self.context_gating = GatingContext(output_dim, add_batch_norm=add_batch_norm) 38 | 39 | def forward(self, x): 40 | x = x.transpose(1, 3).contiguous() # B x 1024 x N x 1 -> B x 1 x N x 1024 41 | x = x.view((-1, self.max_samples, self.feature_size)) # B x N x 1024 42 | 43 | activation = torch.matmul(x, self.cluster_weights) # B x N x 1024 X 1024 x 64 -> B x N x 64 44 | if self.add_batch_norm: 45 | # activation = activation.transpose(1,2).contiguous() 46 | activation = activation.view(-1, self.cluster_size) # B x N x 64 -> BN x 64 47 | activation = self.bn1(activation) # BN x 64 48 | activation = activation.view(-1, self.max_samples, self.cluster_size) # BN x 64 -> B x N x 64 49 | # activation = activation.transpose(1,2).contiguous() 50 | else: 51 | activation = activation + self.cluster_biases # B x N x 64 + 64 -> B x N x 64 52 | 53 | activation = self.softmax(activation) # B x N x 64 --(dim=-1)--> B x N x 64 54 | # activation = activation[:,:,:64] 55 | activation = activation.view((-1, self.max_samples, self.cluster_size)) # B x N x 64 56 | 57 | a_sum = activation.sum(-2, keepdim=True) # B x N x K --(dim=-2)--> B x 1 x K 58 | a = a_sum * self.cluster_weights2 # B x 1 x K X 1 x C x K -> B x C x K 59 | # element-wise multiply, broadcast mechanism 60 | 61 | 62 | activation = torch.transpose(activation, 2, 1) # B x N x 64 -> B x 64 x N 63 | 64 | x = x.view((-1, self.max_samples, self.feature_size)) # B x N x C -> B x N x C 65 | vlad = torch.matmul(activation, x) # B x K x N X B x N x C -> B x K x C 66 | vlad = torch.transpose(vlad, 2, 1) # B x K x C -> B x C x K 67 | vlad = vlad - a # B x C x K - B x C x K -> B x C x K 68 | 69 | vlad = F.normalize(vlad, dim=1, p=2).contiguous() # B x C x K -> B x C x K 70 | vlad = vlad.view((-1, self.cluster_size * self.feature_size)) # B x (C*K) 71 | return vlad 72 | 73 | class SpatialPyramidNetVLAD(nn.Module): 74 | def __init__(self, feature_size, max_samples, cluster_size, output_dim, 75 | gating=True, add_batch_norm=True): 76 | super(SpatialPyramidNetVLAD, self).__init__() 77 | # max_samples[0] = 64 78 | self.vlad0 = NetVLADBase(feature_size[0], max_samples[0], cluster_size[0], output_dim[0], gating, add_batch_norm) 79 | # max_samples[1] = 256 80 | self.vlad1 = NetVLADBase(feature_size[1], max_samples[1], cluster_size[1], output_dim[1], gating, add_batch_norm) 81 | # max_samples[2] = 1024 82 | self.vlad2 = NetVLADBase(feature_size[2], max_samples[2], cluster_size[2], output_dim[2], gating, add_batch_norm) 83 | # max_samples[3] = 4096 84 | self.vlad3 = NetVLADBase(feature_size[3], max_samples[3], cluster_size[3], output_dim[3], gating, add_batch_norm) 85 | 86 | sum_cluster_size = cluster_size[0] + cluster_size[1] + cluster_size[2] + cluster_size[3] 87 | self.hidden_weights = nn.Parameter(torch.randn(feature_size[0]*sum_cluster_size, output_dim[0])* 1 / math.sqrt(feature_size[0])) 88 | 89 | self.bn2 = nn.BatchNorm1d(output_dim[0]) 90 | self.gating = gating 91 | if self.gating: 92 | self.context_gating = GatingContext(output_dim[0], add_batch_norm=add_batch_norm) 93 | 94 | def forward(self, f0, f1, f2, f3): 95 | v0 = self.vlad0(f0) 96 | v1 = self.vlad1(f1) 97 | v2 = self.vlad2(f2) 98 | v3 = self.vlad3(f3) 99 | vlad = torch.cat((v0, v1, v2, v3), dim=-1) 100 | vlad = torch.matmul(vlad, self.hidden_weights) # B x (1024*64) X (1024*64) x 256 -> B x 256 101 | vlad = self.bn2(vlad) # B x 256 -> B x 256 102 | 103 | if self.gating: 104 | vlad = self.context_gating(vlad) # B x 256 -> B x 256 105 | return vlad # B x 256 106 | 107 | class GatingContext(nn.Module): 108 | def __init__(self, dim, add_batch_norm=True): 109 | super(GatingContext, self).__init__() 110 | self.dim = dim 111 | self.add_batch_norm = add_batch_norm 112 | self.gating_weights = nn.Parameter( 113 | torch.randn(dim, dim) * 1 / math.sqrt(dim)) 114 | self.sigmoid = nn.Sigmoid() 115 | 116 | if add_batch_norm: 117 | self.gating_biases = None 118 | self.bn1 = nn.BatchNorm1d(dim) 119 | else: 120 | self.gating_biases = nn.Parameter( 121 | torch.randn(dim) * 1 / math.sqrt(dim)) 122 | self.bn1 = None 123 | 124 | def forward(self, x): 125 | gates = torch.matmul(x, self.gating_weights) # B x 256 X 256 x 256 -> B x 256 126 | 127 | if self.add_batch_norm: 128 | gates = self.bn1(gates) # B x 256 -> B x 256 129 | else: 130 | gates = gates + self.gating_biases # B x 256 + 256 -> B x 256 131 | 132 | gates = self.sigmoid(gates) # B x 256 -> B x 256 133 | 134 | activation = x * gates # B x 256 * B x 256 -> B x 256 135 | 136 | return activation 137 | -------------------------------------------------------------------------------- /place_recognition/Minkloc3D_V2/models/losses/loss.py: -------------------------------------------------------------------------------- 1 | # Warsaw University of Technology 2 | 3 | from pytorch_metric_learning import losses, reducers 4 | from pytorch_metric_learning.distances import LpDistance 5 | from place_recognition.Minkloc3D_V2.misc.utils import TrainingParams 6 | from place_recognition.Minkloc3D_V2.models.losses.loss_utils import * 7 | from place_recognition.Minkloc3D_V2.models.losses.truncated_smoothap import TruncatedSmoothAP 8 | 9 | 10 | def make_losses(params: TrainingParams): 11 | if params.loss == 'batchhardtripletmarginloss': 12 | # BatchHard mining with triplet margin loss 13 | # Expects input: embeddings, positives_mask, negatives_mask 14 | loss_fn = BatchHardTripletLossWithMasks(params.margin) 15 | elif params.loss == 'batchhardcontrastiveloss': 16 | loss_fn = BatchHardContrastiveLossWithMasks(params.pos_margin, params.neg_margin) 17 | elif params.loss == 'truncatedsmoothap': 18 | loss_fn = TruncatedSmoothAP(tau1=params.tau1, similarity=params.similarity, 19 | positives_per_query=params.positives_per_query) 20 | else: 21 | print('Unknown loss: {}'.format(params.loss)) 22 | raise NotImplementedError 23 | 24 | return loss_fn 25 | 26 | 27 | class HardTripletMinerWithMasks: 28 | # Hard triplet miner 29 | def __init__(self, distance): 30 | self.distance = distance 31 | # Stats 32 | self.max_pos_pair_dist = None 33 | self.max_neg_pair_dist = None 34 | self.mean_pos_pair_dist = None 35 | self.mean_neg_pair_dist = None 36 | self.min_pos_pair_dist = None 37 | self.min_neg_pair_dist = None 38 | 39 | def __call__(self, embeddings, positives_mask, negatives_mask): 40 | assert embeddings.dim() == 2 41 | d_embeddings = embeddings.detach() 42 | with torch.no_grad(): 43 | hard_triplets = self.mine(d_embeddings, positives_mask, negatives_mask) 44 | return hard_triplets 45 | 46 | def mine(self, embeddings, positives_mask, negatives_mask): 47 | # Based on pytorch-metric-learning implementation 48 | dist_mat = self.distance(embeddings) 49 | (hardest_positive_dist, hardest_positive_indices), a1p_keep = get_max_per_row(dist_mat, positives_mask) 50 | (hardest_negative_dist, hardest_negative_indices), a2n_keep = get_min_per_row(dist_mat, negatives_mask) 51 | a_keep_idx = torch.where(a1p_keep & a2n_keep) 52 | a = torch.arange(dist_mat.size(0)).to(hardest_positive_indices.device)[a_keep_idx] 53 | p = hardest_positive_indices[a_keep_idx] 54 | n = hardest_negative_indices[a_keep_idx] 55 | self.max_pos_pair_dist = torch.max(hardest_positive_dist[a_keep_idx]).item() 56 | self.max_neg_pair_dist = torch.max(hardest_negative_dist[a_keep_idx]).item() 57 | self.mean_pos_pair_dist = torch.mean(hardest_positive_dist[a_keep_idx]).item() 58 | self.mean_neg_pair_dist = torch.mean(hardest_negative_dist[a_keep_idx]).item() 59 | self.min_pos_pair_dist = torch.min(hardest_positive_dist[a_keep_idx]).item() 60 | self.min_neg_pair_dist = torch.min(hardest_negative_dist[a_keep_idx]).item() 61 | return a, p, n 62 | 63 | 64 | def get_max_per_row(mat, mask): 65 | non_zero_rows = torch.any(mask, dim=1) 66 | mat_masked = mat.clone() 67 | mat_masked[~mask] = 0 68 | return torch.max(mat_masked, dim=1), non_zero_rows 69 | 70 | 71 | def get_min_per_row(mat, mask): 72 | non_inf_rows = torch.any(mask, dim=1) 73 | mat_masked = mat.clone() 74 | mat_masked[~mask] = float('inf') 75 | return torch.min(mat_masked, dim=1), non_inf_rows 76 | 77 | 78 | class BatchHardTripletLossWithMasks: 79 | def __init__(self, margin:float): 80 | self.margin = margin 81 | self.distance = LpDistance(normalize_embeddings=False, collect_stats=True) 82 | # We use triplet loss with Euclidean distance 83 | self.miner_fn = HardTripletMinerWithMasks(distance=self.distance) 84 | reducer_fn = reducers.AvgNonZeroReducer(collect_stats=True) 85 | self.loss_fn = losses.TripletMarginLoss(margin=self.margin, swap=True, distance=self.distance, 86 | reducer=reducer_fn, collect_stats=True) 87 | 88 | def __call__(self, embeddings, positives_mask, negatives_mask): 89 | hard_triplets = self.miner_fn(embeddings, positives_mask, negatives_mask) 90 | dummy_labels = torch.arange(embeddings.shape[0]).to(embeddings.device) 91 | loss = self.loss_fn(embeddings, dummy_labels, hard_triplets) 92 | 93 | stats = {'loss': loss.item(), 'avg_embedding_norm': self.loss_fn.distance.final_avg_query_norm, 94 | 'num_non_zero_triplets': self.loss_fn.reducer.triplets_past_filter, 95 | 'num_triplets': len(hard_triplets[0]), 96 | 'mean_pos_pair_dist': self.miner_fn.mean_pos_pair_dist, 97 | 'mean_neg_pair_dist': self.miner_fn.mean_neg_pair_dist, 98 | 'max_pos_pair_dist': self.miner_fn.max_pos_pair_dist, 99 | 'max_neg_pair_dist': self.miner_fn.max_neg_pair_dist, 100 | 'min_pos_pair_dist': self.miner_fn.min_pos_pair_dist, 101 | 'min_neg_pair_dist': self.miner_fn.min_neg_pair_dist 102 | } 103 | return loss, stats 104 | 105 | 106 | class BatchHardContrastiveLossWithMasks: 107 | def __init__(self, pos_margin: float, neg_margin: float): 108 | self.pos_margin = pos_margin 109 | self.neg_margin = neg_margin 110 | self.distance = LpDistance(normalize_embeddings=False, collect_stats=True) 111 | self.miner_fn = HardTripletMinerWithMasks(distance=self.distance) 112 | # We use contrastive loss with squared Euclidean distance 113 | reducer_fn = reducers.AvgNonZeroReducer(collect_stats=True) 114 | self.loss_fn = losses.ContrastiveLoss(pos_margin=self.pos_margin, neg_margin=self.neg_margin, 115 | distance=self.distance, reducer=reducer_fn, collect_stats=True) 116 | 117 | def __call__(self, embeddings, positives_mask, negatives_mask): 118 | hard_triplets = self.miner_fn(embeddings, positives_mask, negatives_mask) 119 | dummy_labels = torch.arange(embeddings.shape[0]).to(embeddings.device) 120 | loss = self.loss_fn(embeddings, dummy_labels, hard_triplets) 121 | stats = {'loss': loss.item(), 'avg_embedding_norm': self.loss_fn.distance.final_avg_query_norm, 122 | 'pos_pairs_above_threshold': self.loss_fn.reducer.reducers['pos_loss'].pos_pairs_above_threshold, 123 | 'neg_pairs_above_threshold': self.loss_fn.reducer.reducers['neg_loss'].neg_pairs_above_threshold, 124 | 'pos_loss': self.loss_fn.reducer.reducers['pos_loss'].pos_loss.item(), 125 | 'neg_loss': self.loss_fn.reducer.reducers['neg_loss'].neg_loss.item(), 126 | 'num_pairs': 2*len(hard_triplets[0]), 127 | 'mean_pos_pair_dist': self.miner_fn.mean_pos_pair_dist, 128 | 'mean_neg_pair_dist': self.miner_fn.mean_neg_pair_dist, 129 | 'max_pos_pair_dist': self.miner_fn.max_pos_pair_dist, 130 | 'max_neg_pair_dist': self.miner_fn.max_neg_pair_dist, 131 | 'min_pos_pair_dist': self.miner_fn.min_pos_pair_dist, 132 | 'min_neg_pair_dist': self.miner_fn.min_neg_pair_dist 133 | } 134 | 135 | return loss, stats 136 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "sampling_cuda_kernel.h" 3 | 4 | // input: points(b, c, n) idx(b, m) 5 | // output: out(b, c, m) 6 | __global__ void gathering_forward_cuda_kernel(int b, int c, int n, int m, const float *points, const int *idx, float *out) 7 | { 8 | for (int i = blockIdx.x; i < b; i += gridDim.x) 9 | { 10 | for (int l = blockIdx.y; l < c; l += gridDim.y) 11 | { 12 | for (int j = threadIdx.x; j < m; j += blockDim.x) 13 | { 14 | int a = idx[i * m + j]; 15 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 16 | } 17 | } 18 | } 19 | } 20 | 21 | // input: grad_out(b, c, m) idx(b, m) 22 | // output: grad_points(b, c, n) 23 | __global__ void gathering_backward_cuda_kernel(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points) 24 | { 25 | for (int i = blockIdx.x; i < b; i += gridDim.x) 26 | { 27 | for (int l = blockIdx.y; l < c; l += gridDim.y) 28 | { 29 | for (int j = threadIdx.x; j < m; j += blockDim.x) 30 | { 31 | int a = idx[i * m + j]; 32 | atomicAdd(grad_points + (i * c + l) * n + a, grad_out[(i * c + l) * m + j]); 33 | } 34 | } 35 | } 36 | } 37 | 38 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out) 39 | { 40 | gathering_forward_cuda_kernel<<>>(b, c, n, m, points, idx, out); 41 | } 42 | 43 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points) 44 | { 45 | gathering_backward_cuda_kernel<<>>(b, c, n, m, grad_out, idx, grad_points); 46 | } 47 | 48 | __device__ void __update(float *dists, int *dists_i, 49 | int idx1, int idx2) { 50 | const float v1 = dists[idx1], v2 = dists[idx2]; 51 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 52 | dists[idx1] = max(v1, v2); 53 | dists_i[idx1] = v2 > v1 ? i2 : i1; 54 | } 55 | 56 | // Input dataset: (b, n, 3), tmp: (b, n) 57 | // Ouput idxs (b, m) 58 | template 59 | __global__ void furthestsampling_cuda_kernel(int b, int n, int m, const float *dataset, float *temp, int *idxs) 60 | { 61 | if (m <= 0) 62 | return; 63 | __shared__ float dists[block_size]; 64 | __shared__ int dists_i[block_size]; 65 | 66 | int batch_index = blockIdx.x; 67 | dataset += batch_index * n * 3; 68 | temp += batch_index * n; 69 | idxs += batch_index * m; 70 | int tid = threadIdx.x; 71 | const int stride = block_size; 72 | int old = 0; 73 | if (threadIdx.x == 0) 74 | idxs[0] = old; 75 | 76 | __syncthreads(); 77 | for (int j = 1; j < m; j++) 78 | { 79 | int besti = 0; 80 | float best = -1; 81 | float x1 = dataset[old * 3 + 0]; 82 | float y1 = dataset[old * 3 + 1]; 83 | float z1 = dataset[old * 3 + 2]; 84 | for (int k = tid; k < n; k += stride) 85 | { 86 | float x2, y2, z2; 87 | x2 = dataset[k * 3 + 0]; 88 | y2 = dataset[k * 3 + 1]; 89 | z2 = dataset[k * 3 + 2]; 90 | //float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 91 | //if (mag <= 1e-3) 92 | // continue; 93 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 94 | float d2 = min(d, temp[k]); 95 | temp[k] = d2; 96 | besti = d2 > best ? k : besti; 97 | best = d2 > best ? d2 : best; 98 | } 99 | dists[tid] = best; 100 | dists_i[tid] = besti; 101 | __syncthreads(); 102 | 103 | if (block_size >= 1024) { 104 | if (tid < 512) { 105 | __update(dists, dists_i, tid, tid + 512); 106 | } 107 | __syncthreads(); 108 | } 109 | if (block_size >= 512) { 110 | if (tid < 256) { 111 | __update(dists, dists_i, tid, tid + 256); 112 | } 113 | __syncthreads(); 114 | } 115 | if (block_size >= 256) { 116 | if (tid < 128) { 117 | __update(dists, dists_i, tid, tid + 128); 118 | } 119 | __syncthreads(); 120 | } 121 | if (block_size >= 128) { 122 | if (tid < 64) { 123 | __update(dists, dists_i, tid, tid + 64); 124 | } 125 | __syncthreads(); 126 | } 127 | if (block_size >= 64) { 128 | if (tid < 32) { 129 | __update(dists, dists_i, tid, tid + 32); 130 | } 131 | __syncthreads(); 132 | } 133 | if (block_size >= 32) { 134 | if (tid < 16) { 135 | __update(dists, dists_i, tid, tid + 16); 136 | } 137 | __syncthreads(); 138 | } 139 | if (block_size >= 16) { 140 | if (tid < 8) { 141 | __update(dists, dists_i, tid, tid + 8); 142 | } 143 | __syncthreads(); 144 | } 145 | if (block_size >= 8) { 146 | if (tid < 4) { 147 | __update(dists, dists_i, tid, tid + 4); 148 | } 149 | __syncthreads(); 150 | } 151 | if (block_size >= 4) { 152 | if (tid < 2) { 153 | __update(dists, dists_i, tid, tid + 2); 154 | } 155 | __syncthreads(); 156 | } 157 | if (block_size >= 2) { 158 | if (tid < 1) { 159 | __update(dists, dists_i, tid, tid + 1); 160 | } 161 | __syncthreads(); 162 | } 163 | 164 | old = dists_i[0]; 165 | if (tid == 0) 166 | idxs[j] = old; 167 | } 168 | } 169 | 170 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs) 171 | { 172 | unsigned int n_threads = opt_n_threads(n); 173 | switch (n_threads) { 174 | case 1024: 175 | furthestsampling_cuda_kernel<1024><<>>(b, n, m, dataset, temp, idxs); 176 | break; 177 | case 512: 178 | furthestsampling_cuda_kernel<512><<>>(b, n, m, dataset, temp, idxs); 179 | break; 180 | case 256: 181 | furthestsampling_cuda_kernel<256><<>>(b, n, m, dataset, temp, idxs); 182 | break; 183 | case 128: 184 | furthestsampling_cuda_kernel<128><<>>(b, n, m, dataset, temp, idxs); 185 | break; 186 | case 64: 187 | furthestsampling_cuda_kernel<64><<>>(b, n, m, dataset, temp, idxs); 188 | break; 189 | case 32: 190 | furthestsampling_cuda_kernel<32><<>>(b, n, m, dataset, temp, idxs); 191 | break; 192 | case 16: 193 | furthestsampling_cuda_kernel<16><<>>(b, n, m, dataset, temp, idxs); 194 | break; 195 | case 8: 196 | furthestsampling_cuda_kernel<8><<>>(b, n, m, dataset, temp, idxs); 197 | break; 198 | case 4: 199 | furthestsampling_cuda_kernel<4><<>>(b, n, m, dataset, temp, idxs); 200 | break; 201 | case 2: 202 | furthestsampling_cuda_kernel<2><<>>(b, n, m, dataset, temp, idxs); 203 | break; 204 | case 1: 205 | furthestsampling_cuda_kernel<1><<>>(b, n, m, dataset, temp, idxs); 206 | break; 207 | default: 208 | furthestsampling_cuda_kernel<512><<>>(b, n, m, dataset, temp, idxs); 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /utils/visualization.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | 4 | import open3d as o3d 5 | import numpy as np 6 | 7 | from sklearn.manifold import TSNE 8 | from matplotlib import pyplot as plt 9 | 10 | from utils.loading_pointclouds import normalize_point_cloud, load_pc_file 11 | 12 | 13 | def get_color_map(x): 14 | colours = plt.cm.Spectral(x) 15 | return colours[:, :3] 16 | 17 | 18 | def mesh_sphere(pcd, voxel_size, sphere_size=0.6): 19 | # Create a mesh sphere 20 | spheres = o3d.geometry.TriangleMesh() 21 | s = o3d.geometry.TriangleMesh.create_sphere(radius=voxel_size * sphere_size) 22 | s.compute_vertex_normals() 23 | 24 | for i, p in enumerate(pcd.points): 25 | si = copy.deepcopy(s) 26 | trans = np.identity(4) 27 | trans[:3, 3] = p 28 | si.transform(trans) 29 | si.paint_uniform_color(pcd.colors[i]) 30 | spheres += si 31 | return spheres 32 | 33 | 34 | def get_colored_point_cloud_feature(pcd, feature, voxel_size): 35 | tsne_results = embed_tsne(feature) 36 | # color = get_color_map(tsne_results) 37 | color = tsne_results 38 | pcd.colors = o3d.utility.Vector3dVector(color) 39 | spheres = mesh_sphere(pcd, voxel_size) 40 | return spheres 41 | 42 | 43 | def embed_tsne(data): 44 | """ 45 | N x D np.array data 46 | """ 47 | tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=300, random_state=0, n_jobs=2) 48 | tsne_results = tsne.fit_transform(data) 49 | tsne_results = np.squeeze(tsne_results) 50 | tsne_min = np.min(tsne_results) 51 | tsne_max = np.max(tsne_results) 52 | return (tsne_results - tsne_min) / (tsne_max - tsne_min) 53 | 54 | 55 | def visualize_feature_embedding(coor, feature): 56 | vis_pcd = o3d.geometry.PointCloud() 57 | vis_pcd.points = o3d.utility.Vector3dVector(coor) 58 | vis_pcd = get_colored_point_cloud_feature(vis_pcd, feature, 0.02) 59 | o3d.visualization.draw_geometries([vis_pcd]) 60 | 61 | 62 | def visualize_multi_feature_embedding(coor_feature_list): 63 | geometries = [] 64 | for coor, feature in coor_feature_list: 65 | vis_pcd = o3d.geometry.PointCloud() 66 | vis_pcd.points = o3d.utility.Vector3dVector(coor) 67 | vis_pcd = get_colored_point_cloud_feature(vis_pcd, feature, 0.02) 68 | geometries.append(vis_pcd) 69 | o3d.visualization.draw_geometries(geometries) 70 | 71 | 72 | def visualize_point_cloud(wnd_name, coor, colors): 73 | vis = o3d.visualization.Visualizer() 74 | # 设置窗口标题 75 | vis.create_window(window_name=wnd_name) 76 | # 设置点云大小 77 | vis.get_render_option().point_size = 3 78 | # 设置颜色背景为黑色 79 | opt = vis.get_render_option() 80 | opt.background_color = np.asarray([0, 0, 0]) 81 | # 创建点云对象 82 | pcd = o3d.open3d.geometry.PointCloud() 83 | # 将点云数据转换为Open3d可以直接使用的数据类型 84 | pcd.points = o3d.open3d.utility.Vector3dVector(coor) 85 | pcd.colors = o3d.open3d.utility.Vector3dVector(colors) 86 | # 将点云加入到窗口中 87 | vis.add_geometry(pcd) 88 | vis.run() 89 | vis.destroy_window() 90 | 91 | 92 | def vis_cloud_simple(wnd_name, coor_list): 93 | colors = [np.array([1.0, 1.0, 1.0]).reshape([1, 3]), np.array([1.0, 0.0, 0.0]).reshape([1, 3]), 94 | np.array([0.0, 1.0, 0.0]).reshape([1, 3]), np.array([0.0, 0.0, 1.0]).reshape([1, 3]), 95 | np.array([1.0, 1.0, 0.0]).reshape([1, 3]), np.array([0.0, 1.0, 1.0]).reshape([1, 3])] 96 | pcs, feats = [], [] 97 | for i in range(len(coor_list)): 98 | coor = np.array(coor_list[i], dtype=np.float32) 99 | feat = np.repeat(colors[i], coor.shape[0], axis=0) 100 | pcs.append(coor) 101 | feats.append(feat) 102 | vis_pc = np.concatenate(pcs, axis=0) 103 | vis_feat = np.concatenate(feats, axis=0) 104 | visualize_point_cloud(wnd_name, vis_pc, vis_feat) 105 | 106 | 107 | class BadCase: 108 | def __init__(self): 109 | self.query_file = '' 110 | self.pos_files = [] 111 | self.neg_files = [] 112 | 113 | 114 | def load_bad_case(bad_case_file): 115 | bad_cases_top1p, bad_case_top1 = [], [] 116 | load_top1 = False 117 | temp_bad_case = None 118 | for line in open(bad_case_file, 'r'): 119 | line = line.strip() 120 | if line == '--------------------BadCases: top1--------------------': 121 | load_top1 = True 122 | elif 'query' in line: 123 | temp_bad_case = BadCase() 124 | temp_bad_case.query_file = line.strip(' ')[2] 125 | elif 'true' in line: 126 | lines = line.strip(' ') 127 | for i in range(2, len(lines)): 128 | temp_bad_case.pos_files.append(lines[i]) 129 | elif 'false' in line: 130 | lines = line.strip(' ') 131 | for i in range(2, len(lines)): 132 | temp_bad_case.neg_files.append(lines[i]) 133 | if load_top1: 134 | bad_case_top1.append(temp_bad_case) 135 | else: 136 | bad_cases_top1p.append(temp_bad_case) 137 | return bad_cases_top1p, bad_case_top1 138 | 139 | 140 | def vis_bad_cases(bad_case_file): 141 | # load bad cases 142 | bad_cases_top1p, bad_case_top1 = load_bad_case(bad_case_file) 143 | # get case dir 144 | colors = [np.array([0.0, 0.0, 1.0]).reshape([1, 3]), # blue 145 | np.array([0.0, 1.0, 0.0]).reshape([1, 3]), # gree 146 | np.array([1.0, 0.0, 0.0]).reshape([1, 3])] # red 147 | bad_cases_list = [bad_cases_top1p, bad_case_top1] 148 | wnd_name_list = ['badcase_top1%_', 'badcase_top1_'] 149 | for bad_cases_idx in bad_cases_list: 150 | bad_cases = bad_cases_list[bad_cases_idx] 151 | for bad_case_idx in bad_cases: 152 | bad_case = bad_cases[bad_case_idx] 153 | files_list = [[bad_case.query_file], bad_case.pos_files, bad_case.neg_files] 154 | pcs, feats = [], [] 155 | for i in range(len(files_list)): 156 | files = files_list[i] 157 | for f_idx in files: 158 | pc = load_pc_file(files[f_idx], use_np_load=True) + np.array([f_idx, 0.0, 0.0]).reshape([1, 3]) 159 | coor = np.array(pc, dtype=np.float32) 160 | feat = np.repeat(colors[i], coor.shape[0], axis=0) 161 | pcs.append(coor) 162 | feats.append(feat) 163 | vis_pc = np.concatenate(pcs, axis=0) 164 | vis_feat = np.concatenate(feats, axis=0) 165 | wnd_name = wnd_name_list[bad_cases_idx] + '{}'.format(bad_case_idx) 166 | visualize_point_cloud(wnd_name, vis_pc, vis_feat) 167 | 168 | 169 | if __name__ == '__main__': 170 | # coor_feat_tuples = [] 171 | # query_files = ["/home/ericxhzou/Code/benchmark_datasets/info_campus_all/helmet_submap/pointcloud_25m_70.4deg_30deg/4402874639.bin"] 172 | # for query_file in query_files: 173 | # query_pc = np.load(query_file) 174 | # query_pc = query_pc.reshape([-1, 3]) 175 | # query_pc = normalize_point_cloud(query_pc) 176 | # feat = np.random.random(size=query_pc.shape) 177 | # coor_feat_tuple = tuple([query_pc, feat]) 178 | # coor_feat_tuples.append(coor_feat_tuple) 179 | # visualize_multi_feature_embedding(coor_feat_tuples) 180 | 181 | # query_pc = np.load("/home/ericxhzou/Code/benchmark_datasets/info_campus_all/helmet_submap/pointcloud_25m_70.4deg_30deg/4402874639.bin") 182 | # query_pc = query_pc.reshape([-1, 3]) 183 | # query_feat = np.random.random(size=query_pc.shape) 184 | # top1_ref_pc = np.load("/home/ericxhzou/Code/benchmark_datasets/info_campus_all/map_submap_along_traj/pointcloud_25m_70.4deg_30deg/24_-23_0_3.bin") 185 | # top1_ref_pc = top1_ref_pc.reshape([-1, 3]) 186 | # top1_ref_feat = np.random.random(size=query_pc.shape) + 1.0 187 | # vis_pc = np.concatenate((query_pc, top1_ref_pc), axis=0) 188 | # vis_feat = np.concatenate((query_feat, top1_ref_feat), axis=0) 189 | # # visualize_feature_embedding(vis_pc, vis_feat) 190 | # visualize_point_cloud("query_top1ref", vis_pc, colors=vis_feat) 191 | 192 | vis_bad_cases('/home/ericxhzou/Code/test_res_cmp/1->2baseline-hankou/new_badcase.txt') 193 | --------------------------------------------------------------------------------