├── .gitignore ├── LICENSE.txt ├── README.md ├── assets └── overview.png ├── configs ├── _base_ │ └── default_runtime.py └── semantic_kitti │ └── semseg_mambamos.py ├── exp └── semantic_kitti │ └── mambamos │ └── config.py ├── libs ├── pointgroup_ops │ ├── functions │ │ ├── __init__.py │ │ └── functions.py │ ├── setup.py │ └── src │ │ ├── bfs_cluster.cpp │ │ └── bfs_cluster_kernel.cu ├── pointops │ ├── __init__.py │ ├── build │ │ ├── lib.linux-x86_64-cpython-38 │ │ │ └── pointops │ │ │ │ ├── _C.cpython-38-x86_64-linux-gnu.so │ │ │ │ ├── __init__.py │ │ │ │ ├── aggregation.py │ │ │ │ ├── attention.py │ │ │ │ ├── grouping.py │ │ │ │ ├── interpolation.py │ │ │ │ ├── query.py │ │ │ │ ├── sampling.py │ │ │ │ ├── subtraction.py │ │ │ │ └── utils.py │ │ └── temp.linux-x86_64-cpython-38 │ │ │ ├── .ninja_deps │ │ │ ├── .ninja_log │ │ │ ├── build.ninja │ │ │ └── src │ │ │ ├── aggregation │ │ │ ├── aggregation_cuda.o │ │ │ └── aggregation_cuda_kernel.o │ │ │ ├── attention │ │ │ ├── attention_cuda.o │ │ │ └── attention_cuda_kernel.o │ │ │ ├── ball_query │ │ │ ├── ball_query_cuda.o │ │ │ └── ball_query_cuda_kernel.o │ │ │ ├── grouping │ │ │ ├── grouping_cuda.o │ │ │ └── grouping_cuda_kernel.o │ │ │ ├── interpolation │ │ │ ├── interpolation_cuda.o │ │ │ └── interpolation_cuda_kernel.o │ │ │ ├── knn_query │ │ │ ├── knn_query_cuda.o │ │ │ └── knn_query_cuda_kernel.o │ │ │ ├── pointops_api.o │ │ │ ├── random_ball_query │ │ │ ├── random_ball_query_cuda.o │ │ │ └── random_ball_query_cuda_kernel.o │ │ │ ├── sampling │ │ │ ├── sampling_cuda.o │ │ │ └── sampling_cuda_kernel.o │ │ │ └── subtraction │ │ │ ├── subtraction_cuda.o │ │ │ └── subtraction_cuda_kernel.o │ ├── dist │ │ └── pointops-1.0-py3.8-linux-x86_64.egg │ ├── functions │ │ ├── __init__.py │ │ ├── aggregation.py │ │ ├── attention.py │ │ ├── grouping.py │ │ ├── interpolation.py │ │ ├── query.py │ │ ├── sampling.py │ │ ├── subtraction.py │ │ └── utils.py │ ├── pointops.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ ├── requires.txt │ │ └── top_level.txt │ ├── setup.py │ └── src │ │ ├── __init__.py │ │ ├── aggregation │ │ ├── aggregation_cuda.cpp │ │ ├── aggregation_cuda_kernel.cu │ │ └── aggregation_cuda_kernel.h │ │ ├── attention │ │ ├── attention_cuda.cpp │ │ ├── attention_cuda_kernel.cu │ │ └── attention_cuda_kernel.h │ │ ├── ball_query │ │ ├── ball_query_cuda.cpp │ │ ├── ball_query_cuda_kernel.cu │ │ └── ball_query_cuda_kernel.h │ │ ├── cuda_utils.h │ │ ├── grouping │ │ ├── grouping_cuda.cpp │ │ ├── grouping_cuda_kernel.cu │ │ └── grouping_cuda_kernel.h │ │ ├── interpolation │ │ ├── interpolation_cuda.cpp │ │ ├── interpolation_cuda_kernel.cu │ │ └── interpolation_cuda_kernel.h │ │ ├── knn_query │ │ ├── knn_query_cuda.cpp │ │ ├── knn_query_cuda_kernel.cu │ │ └── knn_query_cuda_kernel.h │ │ ├── pointops_api.cpp │ │ ├── random_ball_query │ │ ├── random_ball_query_cuda.cpp │ │ ├── random_ball_query_cuda_kernel.cu │ │ └── random_ball_query_cuda_kernel.h │ │ ├── sampling │ │ ├── sampling_cuda.cpp │ │ ├── sampling_cuda_kernel.cu │ │ └── sampling_cuda_kernel.h │ │ └── subtraction │ │ ├── subtraction_cuda.cpp │ │ ├── subtraction_cuda_kernel.cu │ │ └── subtraction_cuda_kernel.h └── pointops2 │ ├── __init__.py │ ├── functions │ ├── __init__.py │ ├── pointops.py │ ├── pointops2.py │ ├── pointops_ablation.py │ ├── test_attention_op_step1.py │ ├── test_attention_op_step1_v2.py │ ├── test_attention_op_step2.py │ ├── test_relative_pos_encoding_op_step1.py │ ├── test_relative_pos_encoding_op_step1_v2.py │ ├── test_relative_pos_encoding_op_step1_v3.py │ ├── test_relative_pos_encoding_op_step2.py │ └── test_relative_pos_encoding_op_step2_v2.py │ ├── setup.py │ └── src │ ├── __init__.py │ ├── aggregation │ ├── aggregation_cuda.cpp │ ├── aggregation_cuda_kernel.cu │ └── aggregation_cuda_kernel.h │ ├── attention │ ├── attention_cuda.cpp │ ├── attention_cuda_kernel.cu │ └── attention_cuda_kernel.h │ ├── attention_v2 │ ├── attention_cuda_kernel_v2.cu │ ├── attention_cuda_kernel_v2.h │ └── attention_cuda_v2.cpp │ ├── cuda_utils.h │ ├── grouping │ ├── grouping_cuda.cpp │ ├── grouping_cuda_kernel.cu │ └── grouping_cuda_kernel.h │ ├── interpolation │ ├── interpolation_cuda.cpp │ ├── interpolation_cuda_kernel.cu │ └── interpolation_cuda_kernel.h │ ├── knnquery │ ├── knnquery_cuda.cpp │ ├── knnquery_cuda_kernel.cu │ └── knnquery_cuda_kernel.h │ ├── pointops_api.cpp │ ├── rpe │ ├── relative_pos_encoding_cuda.cpp │ ├── relative_pos_encoding_cuda_kernel.cu │ └── relative_pos_encoding_cuda_kernel.h │ ├── rpe_v2 │ ├── relative_pos_encoding_cuda_kernel_v2.cu │ ├── relative_pos_encoding_cuda_kernel_v2.h │ └── relative_pos_encoding_cuda_v2.cpp │ ├── sampling │ ├── sampling_cuda.cpp │ ├── sampling_cuda_kernel.cu │ └── sampling_cuda_kernel.h │ └── subtraction │ ├── subtraction_cuda.cpp │ ├── subtraction_cuda_kernel.cu │ └── subtraction_cuda_kernel.h ├── pointcept ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── __init__.cpython-39.pyc ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── builder.cpython-38.pyc │ │ ├── dataloader.cpython-38.pyc │ │ ├── defaults.cpython-38.pyc │ │ ├── semantic_kitti_multi_scans.cpython-38.pyc │ │ ├── transform.cpython-38.pyc │ │ └── utils.cpython-38.pyc │ ├── builder.py │ ├── dataloader.py │ ├── defaults.py │ ├── semantic_kitti_multi_scans.py │ ├── train_split_dynamic_pointnumber.txt │ ├── transform.py │ └── utils.py ├── engines │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── defaults.cpython-38.pyc │ │ ├── defaults.cpython-39.pyc │ │ ├── launch.cpython-38.pyc │ │ ├── test.cpython-38.pyc │ │ └── train.cpython-38.pyc │ ├── defaults.py │ ├── hooks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── builder.cpython-38.pyc │ │ │ ├── default.cpython-38.pyc │ │ │ ├── evaluator.cpython-38.pyc │ │ │ └── misc.cpython-38.pyc │ │ ├── builder.py │ │ ├── default.py │ │ ├── evaluator.py │ │ └── misc.py │ ├── launch.py │ ├── test.py │ └── train.py ├── models │ ├── MambaMOS │ │ ├── MambaMOS.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-38.pyc │ │ └── mssm.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── builder.cpython-38.pyc │ │ ├── default.cpython-38.pyc │ │ └── modules.cpython-38.pyc │ ├── builder.py │ ├── default.py │ ├── losses │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── builder.cpython-38.pyc │ │ │ ├── lovasz.cpython-38.pyc │ │ │ └── misc.cpython-38.pyc │ │ ├── builder.py │ │ ├── lovasz.py │ │ └── misc.py │ ├── modules.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── checkpoint.cpython-38.pyc │ │ ├── misc.cpython-38.pyc │ │ └── structure.cpython-38.pyc │ │ ├── checkpoint.py │ │ ├── misc.py │ │ ├── serialization │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── default.cpython-38.pyc │ │ │ ├── hilbert.cpython-38.pyc │ │ │ └── z_order.cpython-38.pyc │ │ ├── default.py │ │ ├── hilbert.py │ │ └── z_order.py │ │ └── structure.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── cache.cpython-38.pyc │ ├── comm.cpython-38.pyc │ ├── config.cpython-38.pyc │ ├── env.cpython-38.pyc │ ├── events.cpython-38.pyc │ ├── logger.cpython-38.pyc │ ├── misc.cpython-38.pyc │ ├── optimizer.cpython-38.pyc │ ├── path.cpython-38.pyc │ ├── registry.cpython-38.pyc │ ├── scheduler.cpython-38.pyc │ └── timer.cpython-38.pyc │ ├── cache.py │ ├── comm.py │ ├── config.py │ ├── env.py │ ├── events.py │ ├── logger.py │ ├── misc.py │ ├── optimizer.py │ ├── path.py │ ├── registry.py │ ├── scheduler.py │ ├── timer.py │ └── visualization.py ├── scripts ├── build_image.sh ├── test.sh └── train.sh └── tools ├── test.py ├── test_s3dis_6fold.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | image/ 2 | __pycache__ 3 | **/build/ 4 | **/*.egg-info/ 5 | **/dist/ 6 | *.so 7 | exp 8 | weights 9 | data 10 | log 11 | outputs/ 12 | .vscode 13 | .idea 14 | */.DS_Store 15 | **/*.out 16 | Dockerfile 17 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Pointcept 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/assets/overview.png -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | weight = None # path to model weight 2 | resume = False # whether to resume training process 3 | evaluate = True # evaluate after each epoch training process 4 | test_only = False # test process 5 | 6 | seed = None # train process will init a random seed and record 7 | save_path = "exp/default" 8 | num_worker = 16 # total worker in all gpu 9 | batch_size = 16 # total batch size in all gpu 10 | batch_size_val = None # auto adapt to bs 1 for each gpu 11 | batch_size_test = None # auto adapt to bs 1 for each gpu 12 | epoch = 100 # total epoch, data loop = epoch // eval_epoch 13 | eval_epoch = 100 # sche total eval & checkpoint epoch 14 | 15 | sync_bn = False 16 | enable_amp = False 17 | empty_cache = False 18 | find_unused_parameters = False 19 | 20 | mix_prob = 0 21 | param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] 22 | 23 | # hook 24 | hooks = [ 25 | dict(type="CheckpointLoader"), 26 | dict(type="IterationTimer", warmup_iter=2), 27 | dict(type="InformationWriter"), 28 | dict(type="SemSegEvaluator"), 29 | dict(type="CheckpointSaver", save_freq=None), 30 | dict(type="PreciseEvaluator", test_last=False), 31 | ] 32 | 33 | # Trainer 34 | train = dict(type="DefaultTrainer") 35 | 36 | # Tester 37 | test = dict(type="SemSegTester", verbose=True) 38 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import bfs_cluster, ballquery_batch_p, Clustering 2 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from sys import argv 3 | from setuptools import setup 4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 5 | from distutils.sysconfig import get_config_vars 6 | 7 | (opt,) = get_config_vars("OPT") 8 | os.environ["OPT"] = " ".join( 9 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 10 | ) 11 | 12 | 13 | def _argparse(pattern, argv, is_flag=True, is_list=False): 14 | if is_flag: 15 | found = pattern in argv 16 | if found: 17 | argv.remove(pattern) 18 | return found, argv 19 | else: 20 | arr = [arg for arg in argv if pattern == arg.split("=")[0]] 21 | if is_list: 22 | if len(arr) == 0: # not found 23 | return False, argv 24 | else: 25 | assert "=" in arr[0], f"{arr[0]} requires a value." 26 | argv.remove(arr[0]) 27 | val = arr[0].split("=")[1] 28 | if "," in val: 29 | return val.split(","), argv 30 | else: 31 | return [val], argv 32 | else: 33 | if len(arr) == 0: # not found 34 | return False, argv 35 | else: 36 | assert "=" in arr[0], f"{arr[0]} requires a value." 37 | argv.remove(arr[0]) 38 | return arr[0].split("=")[1], argv 39 | 40 | 41 | INCLUDE_DIRS, argv = _argparse("--include_dirs", argv, False, is_list=True) 42 | include_dirs = [] 43 | if not (INCLUDE_DIRS is False): 44 | include_dirs += INCLUDE_DIRS 45 | 46 | setup( 47 | name="pointgroup_ops", 48 | packages=["pointgroup_ops"], 49 | package_dir={"pointgroup_ops": "functions"}, 50 | ext_modules=[ 51 | CUDAExtension( 52 | name="pointgroup_ops_cuda", 53 | sources=["src/bfs_cluster.cpp", "src/bfs_cluster_kernel.cu"], 54 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 55 | ) 56 | ], 57 | include_dirs=[*include_dirs], 58 | cmdclass={"build_ext": BuildExtension}, 59 | ) 60 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/bfs_cluster_kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | #include 7 | #include 8 | #include 9 | 10 | #define TOTAL_THREADS 1024 11 | #define THREADS_PER_BLOCK 512 12 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 13 | 14 | 15 | /* ================================== ballquery_batch_p ================================== */ 16 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) { 17 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 18 | if (pt_idx >= n) return; 19 | 20 | start_len += (pt_idx * 2); 21 | int idx_temp[1000]; 22 | 23 | float radius2 = radius * radius; 24 | float o_x = xyz[pt_idx * 3 + 0]; 25 | float o_y = xyz[pt_idx * 3 + 1]; 26 | float o_z = xyz[pt_idx * 3 + 2]; 27 | 28 | int batch_idx = batch_idxs[pt_idx]; 29 | int start = batch_offsets[batch_idx]; 30 | int end = batch_offsets[batch_idx + 1]; 31 | 32 | int cnt = 0; 33 | for(int k = start; k < end; k++){ 34 | float x = xyz[k * 3 + 0]; 35 | float y = xyz[k * 3 + 1]; 36 | float z = xyz[k * 3 + 2]; 37 | float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z); 38 | if(d2 < radius2){ 39 | if(cnt < 1000){ 40 | idx_temp[cnt] = k; 41 | } 42 | else{ 43 | break; 44 | } 45 | ++cnt; 46 | } 47 | } 48 | 49 | start_len[0] = atomicAdd(cumsum, cnt); 50 | start_len[1] = cnt; 51 | 52 | int thre = n * meanActive; 53 | if(start_len[0] >= thre) return; 54 | 55 | idx += start_len[0]; 56 | if(start_len[0] + cnt >= thre) cnt = thre - start_len[0]; 57 | 58 | for(int k = 0; k < cnt; k++){ 59 | idx[k] = idx_temp[k]; 60 | } 61 | } 62 | 63 | 64 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) { 65 | // param xyz: (n, 3) 66 | // param batch_idxs: (n) 67 | // param batch_offsets: (B + 1) 68 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n 69 | // output start_len: (n, 2), int 70 | 71 | cudaError_t err; 72 | 73 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); 74 | dim3 threads(THREADS_PER_BLOCK); 75 | 76 | int cumsum = 0; 77 | int* p_cumsum; 78 | cudaMalloc((void**)&p_cumsum, sizeof(int)); 79 | cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice); 80 | 81 | ballquery_batch_p_cuda_<<>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum); 82 | 83 | err = cudaGetLastError(); 84 | if (cudaSuccess != err) { 85 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 86 | exit(-1); 87 | } 88 | 89 | cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost); 90 | return cumsum; 91 | } 92 | -------------------------------------------------------------------------------- /libs/pointops/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/_C.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/_C.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/__init__.py: -------------------------------------------------------------------------------- 1 | from .query import knn_query, ball_query, random_ball_query 2 | from .sampling import farthest_point_sampling 3 | from .grouping import grouping, grouping2 4 | from .interpolation import interpolation, interpolation2 5 | from .subtraction import subtraction 6 | from .aggregation import aggregation 7 | from .attention import attention_relation_step, attention_fusion_step 8 | from .utils import ( 9 | query_and_group, 10 | knn_query_and_group, 11 | ball_query_and_group, 12 | batch2offset, 13 | offset2batch, 14 | ) 15 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/aggregation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda 5 | 6 | 7 | class Aggregation(Function): 8 | @staticmethod 9 | def forward(ctx, input, position, weight, idx): 10 | """ 11 | input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) 12 | output: (n, c) 13 | """ 14 | assert ( 15 | input.is_contiguous() 16 | and position.is_contiguous() 17 | and weight.is_contiguous() 18 | ) 19 | n, nsample, c = position.shape 20 | w_c = weight.shape[-1] 21 | output = torch.cuda.FloatTensor(n, c).zero_() 22 | aggregation_forward_cuda( 23 | n, nsample, c, w_c, input, position, weight, idx, output 24 | ) 25 | ctx.save_for_backward(input, position, weight, idx) 26 | return output 27 | 28 | @staticmethod 29 | def backward(ctx, grad_output): 30 | """ 31 | input: grad_out: (n, c) 32 | output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') 33 | """ 34 | input, position, weight, idx = ctx.saved_tensors 35 | n, nsample, c = position.shape 36 | w_c = weight.shape[-1] 37 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 38 | grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() 39 | grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() 40 | aggregation_backward_cuda( 41 | n, 42 | nsample, 43 | c, 44 | w_c, 45 | input, 46 | position, 47 | weight, 48 | idx, 49 | grad_output, 50 | grad_input, 51 | grad_position, 52 | grad_weight, 53 | ) 54 | return grad_input, grad_position, grad_weight, None 55 | 56 | 57 | aggregation = Aggregation.apply 58 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/grouping.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda 5 | 6 | 7 | class Grouping(Function): 8 | @staticmethod 9 | def forward(ctx, input, idx): 10 | """ 11 | input: input: (n, c), idx : (m, nsample) 12 | output: (m, nsample, c) 13 | """ 14 | assert input.is_contiguous() and idx.is_contiguous() 15 | m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] 16 | output = torch.cuda.FloatTensor(m, nsample, c) 17 | grouping_forward_cuda(m, nsample, c, input, idx, output) 18 | ctx.n = n 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (m, c, nsample) 26 | output: (n, c), None 27 | """ 28 | n = ctx.n 29 | (idx,) = ctx.saved_tensors 30 | m, nsample, c = grad_output.shape 31 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 32 | grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input) 33 | return grad_input, None 34 | 35 | 36 | def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False): 37 | if new_xyz is None: 38 | new_xyz = xyz 39 | assert xyz.is_contiguous() and feat.is_contiguous() 40 | m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1] 41 | xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0) 42 | feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0) 43 | grouped_feat = feat[idx.view(-1).long(), :].view( 44 | m, nsample, c 45 | ) # (m, num_sample, c) 46 | 47 | if with_xyz: 48 | assert new_xyz.is_contiguous() 49 | mask = torch.sign(idx + 1) 50 | grouped_xyz = xyz[idx.view(-1).long(), :].view( 51 | m, nsample, 3 52 | ) - new_xyz.unsqueeze( 53 | 1 54 | ) # (m, num_sample, 3) 55 | grouped_xyz = torch.einsum( 56 | "n s c, n s -> n s c", grouped_xyz, mask 57 | ) # (m, num_sample, 3) 58 | return torch.cat((grouped_xyz, grouped_feat), -1) 59 | else: 60 | return grouped_feat 61 | 62 | 63 | grouping2 = Grouping.apply 64 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/interpolation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda 5 | from .query import knn_query 6 | 7 | 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): 9 | """ 10 | input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b) 11 | output: (n, c) 12 | """ 13 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() 14 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, 3), (n, 3) 15 | dist_recip = 1.0 / (dist + 1e-8) # (n, 3) 16 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 17 | weight = dist_recip / norm # (n, 3) 18 | 19 | new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() 20 | for i in range(k): 21 | new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) 22 | return new_feat 23 | 24 | 25 | class Interpolation(Function): 26 | @staticmethod 27 | def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): 28 | """ 29 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 30 | output: (n, c) 31 | """ 32 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() 33 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, k), (n, k) 34 | dist_recip = 1.0 / (dist + 1e-8) # (n, k) 35 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 36 | weight = dist_recip / norm # (n, k) 37 | 38 | n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] 39 | output = torch.cuda.FloatTensor(n, c).zero_() 40 | interpolation_forward_cuda(n, c, k, input, idx, weight, output) 41 | ctx.m, ctx.k = m, k 42 | ctx.save_for_backward(idx, weight) 43 | return output 44 | 45 | @staticmethod 46 | def backward(ctx, grad_output): 47 | """ 48 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 49 | output: (n, c) 50 | """ 51 | m, k = ctx.m, ctx.k 52 | idx, weight = ctx.saved_tensors 53 | n, c = grad_output.shape 54 | grad_input = torch.cuda.FloatTensor(m, c).zero_() 55 | interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input) 56 | return None, None, grad_input, None, None, None 57 | 58 | 59 | interpolation2 = Interpolation.apply 60 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/query.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda 5 | 6 | 7 | class KNNQuery(Function): 8 | @staticmethod 9 | def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None): 10 | """ 11 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 12 | output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample) 13 | """ 14 | if new_xyz is None or new_offset is None: 15 | new_xyz = xyz 16 | new_offset = offset 17 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 18 | m = new_xyz.shape[0] 19 | idx = torch.cuda.IntTensor(m, nsample).zero_() 20 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 21 | knn_query_cuda( 22 | m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2 23 | ) 24 | return idx, torch.sqrt(dist2) 25 | 26 | 27 | class RandomBallQuery(Function): 28 | """Random Ball Query. 29 | 30 | Find nearby points in spherical space. 31 | """ 32 | 33 | @staticmethod 34 | def forward( 35 | ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None 36 | ): 37 | """ 38 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 39 | output: idx: (m, nsample), dist2: (m, nsample) 40 | """ 41 | if new_xyz is None or new_offset is None: 42 | new_xyz = xyz 43 | new_offset = offset 44 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 45 | assert min_radius < max_radius 46 | 47 | m = new_xyz.shape[0] 48 | order = [] 49 | for k in range(offset.shape[0]): 50 | s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k]) 51 | order.append( 52 | torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k 53 | ) 54 | order = torch.cat(order, dim=0) 55 | idx = torch.cuda.IntTensor(m, nsample).zero_() 56 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 57 | random_ball_query_cuda( 58 | m, 59 | nsample, 60 | min_radius, 61 | max_radius, 62 | order, 63 | xyz, 64 | new_xyz, 65 | offset.int(), 66 | new_offset.int(), 67 | idx, 68 | dist2, 69 | ) 70 | return idx, torch.sqrt(dist2) 71 | 72 | 73 | class BallQuery(Function): 74 | """Ball Query. 75 | 76 | Find nearby points in spherical space. 77 | """ 78 | 79 | @staticmethod 80 | def forward( 81 | ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None 82 | ): 83 | """ 84 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 85 | output: idx: (m, nsample), dist2: (m, nsample) 86 | """ 87 | if new_xyz is None or new_offset is None: 88 | new_xyz = xyz 89 | new_offset = offset 90 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 91 | assert min_radius < max_radius 92 | 93 | m = new_xyz.shape[0] 94 | idx = torch.cuda.IntTensor(m, nsample).zero_() 95 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 96 | ball_query_cuda( 97 | m, 98 | nsample, 99 | min_radius, 100 | max_radius, 101 | xyz, 102 | new_xyz, 103 | offset.int(), 104 | new_offset.int(), 105 | idx, 106 | dist2, 107 | ) 108 | return idx, torch.sqrt(dist2) 109 | 110 | 111 | knn_query = KNNQuery.apply 112 | ball_query = BallQuery.apply 113 | random_ball_query = RandomBallQuery.apply 114 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import farthest_point_sampling_cuda 5 | 6 | 7 | class FarthestPointSampling(Function): 8 | @staticmethod 9 | def forward(ctx, xyz, offset, new_offset): 10 | """ 11 | input: coords: (n, 3), offset: (b), new_offset: (b) 12 | output: idx: (m) 13 | """ 14 | assert xyz.is_contiguous() 15 | n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] 16 | for i in range(1, b): 17 | n_max = max(offset[i] - offset[i - 1], n_max) 18 | idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() 19 | tmp = torch.cuda.FloatTensor(n).fill_(1e10) 20 | farthest_point_sampling_cuda( 21 | b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx 22 | ) 23 | del tmp 24 | return idx 25 | 26 | 27 | farthest_point_sampling = FarthestPointSampling.apply 28 | -------------------------------------------------------------------------------- /libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/subtraction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda 5 | 6 | 7 | class Subtraction(Function): 8 | @staticmethod 9 | def forward(ctx, input1, input2, idx): 10 | """ 11 | input: input1: (n, c), input2: (n, c), idx: (n, nsample) 12 | output: (n, nsample, c) 13 | """ 14 | assert input1.is_contiguous() and input2.is_contiguous() 15 | n, c = input1.shape 16 | nsample = idx.shape[-1] 17 | output = torch.cuda.FloatTensor(n, nsample, c).zero_() 18 | subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output) 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (n, nsample, c) 26 | output: grad_input1: (n, c), grad_input2: (n, c) 27 | """ 28 | (idx,) = ctx.saved_tensors 29 | n, nsample, c = grad_output.shape 30 | grad_input1 = torch.cuda.FloatTensor(n, c).zero_() 31 | grad_input2 = torch.cuda.FloatTensor(n, c).zero_() 32 | subtraction_backward_cuda( 33 | n, nsample, c, idx, grad_output, grad_input1, grad_input2 34 | ) 35 | return grad_input1, grad_input2, None 36 | 37 | 38 | subtraction = Subtraction.apply 39 | -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/.ninja_deps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/.ninja_deps -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/.ninja_log: -------------------------------------------------------------------------------- 1 | # ninja log v5 2 | 3 7569 1710387977587488570 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda.o d73dc38972e55dd8 3 | 1 7727 1710387977751486924 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda.o d117e6c1a2a7ad8b 4 | 3 7750 1710387977771486723 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda.o 9476417bcd253be5 5 | 2 7760 1710387977779486643 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda.o b20558b68956808c 6 | 2 7769 1710387977791486522 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda.o 6fe83449741d2622 7 | 6 7791 1710387977815486281 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda.o 544593e0e6e56bd 8 | 6 7800 1710387977827486161 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda.o b29340f2d904de9c 9 | 5 7818 1710387977839486040 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda.o d252ffd3483b2e61 10 | 4 8168 1710387978187482548 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda.o e6c3e85ad8e50008 11 | 2 20028 1710387990051363658 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda_kernel.o c07840bbb1b5f55f 12 | 5 20205 1710387990227361896 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda_kernel.o 4f0384c69776f916 13 | 4 20206 1710387990227361896 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda_kernel.o 95bc027ff4af3e21 14 | 1 20209 1710387990231361856 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda_kernel.o a4de86cebf648997 15 | 2 20278 1710387990303361136 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda_kernel.o b3c0bf1073958f70 16 | 6 20337 1710387990363360535 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda_kernel.o ba41d5b7850adbd0 17 | 3 20343 1710387990371360455 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda_kernel.o 983231fda4ec3136 18 | 4 20457 1710387990479359374 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda_kernel.o 4042659449e6d424 19 | 5 20512 1710387990519358974 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/pointops_api.o db403ce1a1c25111 20 | 6 20530 1710387990555358615 /data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda_kernel.o 74f5cc5d78468355 21 | -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/pointops_api.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/pointops_api.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda.o -------------------------------------------------------------------------------- /libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda_kernel.o -------------------------------------------------------------------------------- /libs/pointops/dist/pointops-1.0-py3.8-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/dist/pointops-1.0-py3.8-linux-x86_64.egg -------------------------------------------------------------------------------- /libs/pointops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .query import knn_query, ball_query, random_ball_query 2 | from .sampling import farthest_point_sampling 3 | from .grouping import grouping, grouping2 4 | from .interpolation import interpolation, interpolation2 5 | from .subtraction import subtraction 6 | from .aggregation import aggregation 7 | from .attention import attention_relation_step, attention_fusion_step 8 | from .utils import ( 9 | query_and_group, 10 | knn_query_and_group, 11 | ball_query_and_group, 12 | batch2offset, 13 | offset2batch, 14 | ) 15 | -------------------------------------------------------------------------------- /libs/pointops/functions/aggregation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda 5 | 6 | 7 | class Aggregation(Function): 8 | @staticmethod 9 | def forward(ctx, input, position, weight, idx): 10 | """ 11 | input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) 12 | output: (n, c) 13 | """ 14 | assert ( 15 | input.is_contiguous() 16 | and position.is_contiguous() 17 | and weight.is_contiguous() 18 | ) 19 | n, nsample, c = position.shape 20 | w_c = weight.shape[-1] 21 | output = torch.cuda.FloatTensor(n, c).zero_() 22 | aggregation_forward_cuda( 23 | n, nsample, c, w_c, input, position, weight, idx, output 24 | ) 25 | ctx.save_for_backward(input, position, weight, idx) 26 | return output 27 | 28 | @staticmethod 29 | def backward(ctx, grad_output): 30 | """ 31 | input: grad_out: (n, c) 32 | output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') 33 | """ 34 | input, position, weight, idx = ctx.saved_tensors 35 | n, nsample, c = position.shape 36 | w_c = weight.shape[-1] 37 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 38 | grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() 39 | grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() 40 | aggregation_backward_cuda( 41 | n, 42 | nsample, 43 | c, 44 | w_c, 45 | input, 46 | position, 47 | weight, 48 | idx, 49 | grad_output, 50 | grad_input, 51 | grad_position, 52 | grad_weight, 53 | ) 54 | return grad_input, grad_position, grad_weight, None 55 | 56 | 57 | aggregation = Aggregation.apply 58 | -------------------------------------------------------------------------------- /libs/pointops/functions/grouping.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda 5 | 6 | 7 | class Grouping(Function): 8 | @staticmethod 9 | def forward(ctx, input, idx): 10 | """ 11 | input: input: (n, c), idx : (m, nsample) 12 | output: (m, nsample, c) 13 | """ 14 | assert input.is_contiguous() and idx.is_contiguous() 15 | m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] 16 | output = torch.cuda.FloatTensor(m, nsample, c) 17 | grouping_forward_cuda(m, nsample, c, input, idx, output) 18 | ctx.n = n 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (m, c, nsample) 26 | output: (n, c), None 27 | """ 28 | n = ctx.n 29 | (idx,) = ctx.saved_tensors 30 | m, nsample, c = grad_output.shape 31 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 32 | grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input) 33 | return grad_input, None 34 | 35 | 36 | def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False): 37 | if new_xyz is None: 38 | new_xyz = xyz 39 | assert xyz.is_contiguous() and feat.is_contiguous() 40 | m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1] 41 | xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0) 42 | feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0) 43 | grouped_feat = feat[idx.view(-1).long(), :].view( 44 | m, nsample, c 45 | ) # (m, num_sample, c) 46 | 47 | if with_xyz: 48 | assert new_xyz.is_contiguous() 49 | mask = torch.sign(idx + 1) 50 | grouped_xyz = xyz[idx.view(-1).long(), :].view( 51 | m, nsample, 3 52 | ) - new_xyz.unsqueeze( 53 | 1 54 | ) # (m, num_sample, 3) 55 | grouped_xyz = torch.einsum( 56 | "n s c, n s -> n s c", grouped_xyz, mask 57 | ) # (m, num_sample, 3) 58 | return torch.cat((grouped_xyz, grouped_feat), -1) 59 | else: 60 | return grouped_feat 61 | 62 | 63 | grouping2 = Grouping.apply 64 | -------------------------------------------------------------------------------- /libs/pointops/functions/interpolation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda 5 | from .query import knn_query 6 | 7 | 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): 9 | """ 10 | input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b) 11 | output: (n, c) 12 | """ 13 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() 14 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, 3), (n, 3) 15 | dist_recip = 1.0 / (dist + 1e-8) # (n, 3) 16 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 17 | weight = dist_recip / norm # (n, 3) 18 | 19 | new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() 20 | for i in range(k): 21 | new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) 22 | return new_feat 23 | 24 | 25 | class Interpolation(Function): 26 | @staticmethod 27 | def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): 28 | """ 29 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 30 | output: (n, c) 31 | """ 32 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() 33 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, k), (n, k) 34 | dist_recip = 1.0 / (dist + 1e-8) # (n, k) 35 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 36 | weight = dist_recip / norm # (n, k) 37 | 38 | n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] 39 | output = torch.cuda.FloatTensor(n, c).zero_() 40 | interpolation_forward_cuda(n, c, k, input, idx, weight, output) 41 | ctx.m, ctx.k = m, k 42 | ctx.save_for_backward(idx, weight) 43 | return output 44 | 45 | @staticmethod 46 | def backward(ctx, grad_output): 47 | """ 48 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 49 | output: (n, c) 50 | """ 51 | m, k = ctx.m, ctx.k 52 | idx, weight = ctx.saved_tensors 53 | n, c = grad_output.shape 54 | grad_input = torch.cuda.FloatTensor(m, c).zero_() 55 | interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input) 56 | return None, None, grad_input, None, None, None 57 | 58 | 59 | interpolation2 = Interpolation.apply 60 | -------------------------------------------------------------------------------- /libs/pointops/functions/query.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda 5 | 6 | 7 | class KNNQuery(Function): 8 | @staticmethod 9 | def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None): 10 | """ 11 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 12 | output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample) 13 | """ 14 | if new_xyz is None or new_offset is None: 15 | new_xyz = xyz 16 | new_offset = offset 17 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 18 | m = new_xyz.shape[0] 19 | idx = torch.cuda.IntTensor(m, nsample).zero_() 20 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 21 | knn_query_cuda( 22 | m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2 23 | ) 24 | return idx, torch.sqrt(dist2) 25 | 26 | 27 | class RandomBallQuery(Function): 28 | """Random Ball Query. 29 | 30 | Find nearby points in spherical space. 31 | """ 32 | 33 | @staticmethod 34 | def forward( 35 | ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None 36 | ): 37 | """ 38 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 39 | output: idx: (m, nsample), dist2: (m, nsample) 40 | """ 41 | if new_xyz is None or new_offset is None: 42 | new_xyz = xyz 43 | new_offset = offset 44 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 45 | assert min_radius < max_radius 46 | 47 | m = new_xyz.shape[0] 48 | order = [] 49 | for k in range(offset.shape[0]): 50 | s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k]) 51 | order.append( 52 | torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k 53 | ) 54 | order = torch.cat(order, dim=0) 55 | idx = torch.cuda.IntTensor(m, nsample).zero_() 56 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 57 | random_ball_query_cuda( 58 | m, 59 | nsample, 60 | min_radius, 61 | max_radius, 62 | order, 63 | xyz, 64 | new_xyz, 65 | offset.int(), 66 | new_offset.int(), 67 | idx, 68 | dist2, 69 | ) 70 | return idx, torch.sqrt(dist2) 71 | 72 | 73 | class BallQuery(Function): 74 | """Ball Query. 75 | 76 | Find nearby points in spherical space. 77 | """ 78 | 79 | @staticmethod 80 | def forward( 81 | ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None 82 | ): 83 | """ 84 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 85 | output: idx: (m, nsample), dist2: (m, nsample) 86 | """ 87 | if new_xyz is None or new_offset is None: 88 | new_xyz = xyz 89 | new_offset = offset 90 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 91 | assert min_radius < max_radius 92 | 93 | m = new_xyz.shape[0] 94 | idx = torch.cuda.IntTensor(m, nsample).zero_() 95 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 96 | ball_query_cuda( 97 | m, 98 | nsample, 99 | min_radius, 100 | max_radius, 101 | xyz, 102 | new_xyz, 103 | offset.int(), 104 | new_offset.int(), 105 | idx, 106 | dist2, 107 | ) 108 | return idx, torch.sqrt(dist2) 109 | 110 | 111 | knn_query = KNNQuery.apply 112 | ball_query = BallQuery.apply 113 | random_ball_query = RandomBallQuery.apply 114 | -------------------------------------------------------------------------------- /libs/pointops/functions/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import farthest_point_sampling_cuda 5 | 6 | 7 | class FarthestPointSampling(Function): 8 | @staticmethod 9 | def forward(ctx, xyz, offset, new_offset): 10 | """ 11 | input: coords: (n, 3), offset: (b), new_offset: (b) 12 | output: idx: (m) 13 | """ 14 | assert xyz.is_contiguous() 15 | n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] 16 | for i in range(1, b): 17 | n_max = max(offset[i] - offset[i - 1], n_max) 18 | idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() 19 | tmp = torch.cuda.FloatTensor(n).fill_(1e10) 20 | farthest_point_sampling_cuda( 21 | b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx 22 | ) 23 | del tmp 24 | return idx 25 | 26 | 27 | farthest_point_sampling = FarthestPointSampling.apply 28 | -------------------------------------------------------------------------------- /libs/pointops/functions/subtraction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda 5 | 6 | 7 | class Subtraction(Function): 8 | @staticmethod 9 | def forward(ctx, input1, input2, idx): 10 | """ 11 | input: input1: (n, c), input2: (n, c), idx: (n, nsample) 12 | output: (n, nsample, c) 13 | """ 14 | assert input1.is_contiguous() and input2.is_contiguous() 15 | n, c = input1.shape 16 | nsample = idx.shape[-1] 17 | output = torch.cuda.FloatTensor(n, nsample, c).zero_() 18 | subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output) 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (n, nsample, c) 26 | output: grad_input1: (n, c), grad_input2: (n, c) 27 | """ 28 | (idx,) = ctx.saved_tensors 29 | n, nsample, c = grad_output.shape 30 | grad_input1 = torch.cuda.FloatTensor(n, c).zero_() 31 | grad_input2 = torch.cuda.FloatTensor(n, c).zero_() 32 | subtraction_backward_cuda( 33 | n, nsample, c, idx, grad_output, grad_input1, grad_input2 34 | ) 35 | return grad_input1, grad_input2, None 36 | 37 | 38 | subtraction = Subtraction.apply 39 | -------------------------------------------------------------------------------- /libs/pointops/pointops.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: pointops 3 | Version: 1.0 4 | Requires-Dist: torch 5 | Requires-Dist: numpy 6 | -------------------------------------------------------------------------------- /libs/pointops/pointops.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | functions/__init__.py 3 | functions/aggregation.py 4 | functions/attention.py 5 | functions/grouping.py 6 | functions/interpolation.py 7 | functions/query.py 8 | functions/sampling.py 9 | functions/subtraction.py 10 | functions/utils.py 11 | pointops.egg-info/PKG-INFO 12 | pointops.egg-info/SOURCES.txt 13 | pointops.egg-info/dependency_links.txt 14 | pointops.egg-info/requires.txt 15 | pointops.egg-info/top_level.txt 16 | src/pointops_api.cpp 17 | src/aggregation/aggregation_cuda.cpp 18 | src/aggregation/aggregation_cuda_kernel.cu 19 | src/attention/attention_cuda.cpp 20 | src/attention/attention_cuda_kernel.cu 21 | src/ball_query/ball_query_cuda.cpp 22 | src/ball_query/ball_query_cuda_kernel.cu 23 | src/grouping/grouping_cuda.cpp 24 | src/grouping/grouping_cuda_kernel.cu 25 | src/interpolation/interpolation_cuda.cpp 26 | src/interpolation/interpolation_cuda_kernel.cu 27 | src/knn_query/knn_query_cuda.cpp 28 | src/knn_query/knn_query_cuda_kernel.cu 29 | src/random_ball_query/random_ball_query_cuda.cpp 30 | src/random_ball_query/random_ball_query_cuda_kernel.cu 31 | src/sampling/sampling_cuda.cpp 32 | src/sampling/sampling_cuda_kernel.cu 33 | src/subtraction/subtraction_cuda.cpp 34 | src/subtraction/subtraction_cuda_kernel.cu -------------------------------------------------------------------------------- /libs/pointops/pointops.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/pointops/pointops.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | -------------------------------------------------------------------------------- /libs/pointops/pointops.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pointops 2 | -------------------------------------------------------------------------------- /libs/pointops/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | from distutils.sysconfig import get_config_vars 5 | 6 | (opt,) = get_config_vars("OPT") 7 | os.environ["OPT"] = " ".join( 8 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 9 | ) 10 | 11 | src = "src" 12 | sources = [ 13 | os.path.join(root, file) 14 | for root, dirs, files in os.walk(src) 15 | for file in files 16 | if file.endswith(".cpp") or file.endswith(".cu") 17 | ] 18 | 19 | setup( 20 | name="pointops", 21 | version="1.0", 22 | install_requires=["torch", "numpy"], 23 | packages=["pointops"], 24 | package_dir={"pointops": "functions"}, 25 | ext_modules=[ 26 | CUDAExtension( 27 | name="pointops._C", 28 | sources=sources, 29 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 30 | ) 31 | ], 32 | cmdclass={"build_ext": BuildExtension}, 33 | ) 34 | -------------------------------------------------------------------------------- /libs/pointops/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/src/__init__.py -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "aggregation_cuda_kernel.h" 5 | 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const float *position = position_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 15 | } 16 | 17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 18 | { 19 | const float *input = input_tensor.data_ptr(); 20 | const float *position = position_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | const int *idx = idx_tensor.data_ptr(); 23 | const float *grad_output = grad_output_tensor.data_ptr(); 24 | float *grad_input = grad_input_tensor.data_ptr(); 25 | float *grad_position = grad_position_tensor.data_ptr(); 26 | float *grad_weight = grad_weight_tensor.data_ptr(); 27 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 28 | } 29 | -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_relation_step_forward_cuda(int m, int g, int c, 8 | at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor, 9 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 10 | at::Tensor output_tensor); 11 | void attention_relation_step_backward_cuda(int m, int g, int c, 12 | at::Tensor query_tensor, at::Tensor grad_query_tensor, 13 | at::Tensor key_tensor, at::Tensor grad_key_tensor, 14 | at::Tensor weight_tensor, at::Tensor grad_weight_tensor, 15 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 16 | at::Tensor grad_output_tensor); 17 | void attention_fusion_step_forward_cuda(int m, int g, int c, 18 | at::Tensor weight_tensor, at::Tensor value_tensor, 19 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 20 | at::Tensor output_tensor); 21 | void attention_fusion_step_backward_cuda(int m, int g, int c, 22 | at::Tensor weight_tensor, at::Tensor grad_weight_tensor, 23 | at::Tensor value_tensor, at::Tensor grad_value_tensor, 24 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 25 | at::Tensor grad_output_tensor); 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | void attention_relation_step_forward_cuda_launcher(int m, int g, int c, 32 | const float *query, const float *key, const float *weight, 33 | const int *index_target, const int *index_refer, 34 | float *output); 35 | void attention_relation_step_backward_cuda_launcher(int m, int g, int c, 36 | const float *query, float *grad_query, 37 | const float *key, float *grad_key, 38 | const float *weight, float *grad_weight, 39 | const int *index_target, const int *index_refer, 40 | const float *grad_output); 41 | void attention_fusion_step_forward_cuda_launcher(int m, int g, int c, 42 | const float *weight, const float *value, 43 | const int *index_target, const int *index_refer, 44 | float *output); 45 | void attention_fusion_step_backward_cuda_launcher(int m, int g, int c, 46 | const float *weight, float *grad_weight, 47 | const float *value, float *grad_value, 48 | const int *index_target, const int *index_refer, 49 | const float *grad_output); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | #endif 55 | -------------------------------------------------------------------------------- /libs/pointops/src/ball_query/ball_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "ball_query_cuda_kernel.h" 5 | 6 | 7 | void ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor) 12 | { 13 | const float *xyz = xyz_tensor.data_ptr(); 14 | const float *new_xyz = new_xyz_tensor.data_ptr(); 15 | const int *offset = offset_tensor.data_ptr(); 16 | const int *new_offset = new_offset_tensor.data_ptr(); 17 | int *idx = idx_tensor.data_ptr(); 18 | float *dist2 = dist2_tensor.data_ptr(); 19 | ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2); 20 | } 21 | -------------------------------------------------------------------------------- /libs/pointops/src/ball_query/ball_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_CUDA_KERNEL 2 | #define _BALL_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void ball_query_cuda_launcher(int m, int nsample, 18 | float min_radius, float max_radius, 19 | const float *xyz, const float *new_xyz, 20 | const int *offset, const int *new_offset, 21 | int *idx, float *dist2); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 512 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grouping_cuda_kernel.h" 5 | 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | float *output = output_tensor.data_ptr(); 12 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 13 | } 14 | 15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 16 | { 17 | const float *grad_output = grad_output_tensor.data_ptr(); 18 | const int *idx = idx_tensor.data_ptr(); 19 | float *grad_input = grad_input_tensor.data_ptr(); 20 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "interpolation_cuda_kernel.h" 5 | 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 14 | } 15 | 16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *weight = weight_tensor.data_ptr(); 21 | float *grad_input = grad_input_tensor.data_ptr(); 22 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "knn_query_cuda_kernel.h" 5 | 6 | 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const float *new_xyz = new_xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | float *dist2 = dist2_tensor.data_ptr(); 15 | knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knn_query_cuda_kernel.h" 3 | 4 | 5 | namespace knn_query_utils{ 6 | 7 | template 8 | __device__ void swap(DType *x, DType *y) 9 | { 10 | DType tmp = *x; 11 | *x = *y; 12 | *y = tmp; 13 | } 14 | 15 | __device__ void reheap(float *dist, int *idx, int k) 16 | { 17 | int root = 0; 18 | int child = root * 2 + 1; 19 | while (child < k) 20 | { 21 | if(child + 1 < k && dist[child+1] > dist[child]) 22 | child++; 23 | if(dist[root] > dist[child]) 24 | return; 25 | swap(&dist[root], &dist[child]); 26 | swap(&idx[root], &idx[child]); 27 | root = child; 28 | child = root * 2 + 1; 29 | } 30 | } 31 | 32 | 33 | __device__ void heap_sort(float *dist, int *idx, int k) 34 | { 35 | int i; 36 | for (i = k - 1; i > 0; i--) 37 | { 38 | swap(&dist[0], &dist[i]); 39 | swap(&idx[0], &idx[i]); 40 | reheap(dist, idx, i); 41 | } 42 | } 43 | 44 | 45 | __device__ int get_bt_idx(int idx, const int *offset) 46 | { 47 | int i = 0; 48 | while (1) 49 | { 50 | if (idx < offset[i]) 51 | break; 52 | else 53 | i++; 54 | } 55 | return i; 56 | } 57 | } // namespace knn_query_utils 58 | 59 | 60 | __global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 61 | // input: xyz (n, 3) new_xyz (m, 3) 62 | // output: idx (m, nsample) dist2 (m, nsample) 63 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 64 | if (pt_idx >= m) return; 65 | 66 | new_xyz += pt_idx * 3; 67 | idx += pt_idx * nsample; 68 | dist2 += pt_idx * nsample; 69 | 70 | int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset); 71 | int start; 72 | if (bt_idx == 0) 73 | start = 0; 74 | else 75 | start = offset[bt_idx - 1]; 76 | int end = offset[bt_idx]; 77 | 78 | float new_x = new_xyz[0]; 79 | float new_y = new_xyz[1]; 80 | float new_z = new_xyz[2]; 81 | 82 | float best_dist[128]; 83 | int best_idx[128]; 84 | for(int i = 0; i < nsample; i++){ 85 | best_dist[i] = 1e10; 86 | best_idx[i] = -1; 87 | } 88 | for(int i = start; i < end; i++){ 89 | float x = xyz[i * 3 + 0]; 90 | float y = xyz[i * 3 + 1]; 91 | float z = xyz[i * 3 + 2]; 92 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 93 | if (d2 < best_dist[0]){ 94 | best_dist[0] = d2; 95 | best_idx[0] = i; 96 | knn_query_utils::reheap(best_dist, best_idx, nsample); 97 | } 98 | } 99 | knn_query_utils::heap_sort(best_dist, best_idx, nsample); 100 | for(int i = 0; i < nsample; i++){ 101 | idx[i] = best_idx[i]; 102 | dist2[i] = best_dist[i]; 103 | } 104 | } 105 | 106 | 107 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 108 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 109 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 110 | dim3 threads(THREADS_PER_BLOCK); 111 | knn_query_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 112 | } 113 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNN_QUERY_CUDA_KERNEL 2 | #define _KNN_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knn_query/knn_query_cuda_kernel.h" 5 | #include "ball_query/ball_query_cuda_kernel.h" 6 | #include "random_ball_query/random_ball_query_cuda_kernel.h" 7 | #include "sampling/sampling_cuda_kernel.h" 8 | #include "grouping/grouping_cuda_kernel.h" 9 | #include "interpolation/interpolation_cuda_kernel.h" 10 | #include "aggregation/aggregation_cuda_kernel.h" 11 | #include "subtraction/subtraction_cuda_kernel.h" 12 | #include "attention/attention_cuda_kernel.h" 13 | 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda"); 17 | m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda"); 18 | m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda"); 19 | m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda"); 20 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 21 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 22 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 23 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 24 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 25 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 26 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 27 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 28 | m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda"); 29 | m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda"); 30 | m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda"); 31 | m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda"); 32 | } 33 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "random_ball_query_cuda_kernel.h" 5 | 6 | 7 | void random_ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, at::Tensor order_tensor, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor) 12 | { 13 | const int *order = order_tensor.data_ptr(); 14 | const float *xyz = xyz_tensor.data_ptr(); 15 | const float *new_xyz = new_xyz_tensor.data_ptr(); 16 | const int *offset = offset_tensor.data_ptr(); 17 | const int *new_offset = new_offset_tensor.data_ptr(); 18 | int *idx = idx_tensor.data_ptr(); 19 | float *dist2 = dist2_tensor.data_ptr(); 20 | random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL 2 | #define _RANDOM_BALL_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void random_ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, at::Tensor order_tensor, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void random_ball_query_cuda_launcher(int m, int nsample, 18 | float min_radius, float max_radius, const int *order, 19 | const float *xyz, const float *new_xyz, 20 | const int *offset, const int *new_offset, 21 | int *idx, float *dist2); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sampling_cuda_kernel.h" 5 | 6 | 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const int *offset = offset_tensor.data_ptr(); 11 | const int *new_offset = new_offset_tensor.data_ptr(); 12 | float *tmp = tmp_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 15 | } 16 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subtraction_cuda_kernel.h" 5 | 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input1 = input1_tensor.data_ptr(); 10 | const float *input2 = input2_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 14 | } 15 | 16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 17 | { 18 | const int *idx = idx_tensor.data_ptr(); 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | float *grad_input1 = grad_input1_tensor.data_ptr(); 21 | float *grad_input2 = grad_input2_tensor.data_ptr(); 22 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops2/__init__.py -------------------------------------------------------------------------------- /libs/pointops2/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from pointops2 import * 2 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 800000 14 | N = 35000 15 | C = 96 16 | h = 6 17 | query = torch.rand(N, h, C // h).cuda() 18 | key = torch.rand(N, h, C // h).cuda() 19 | 20 | index_0 = torch.rand(M) 21 | index_0[index_0 < 0] = 0 22 | index_0 = (index_0 * N).long().cuda() 23 | 24 | index_1 = torch.rand(M) 25 | index_1[index_1 < 0] = 0 26 | index_1 = (index_1 * N).long().cuda() 27 | 28 | query.requires_grad = True 29 | key.requires_grad = True 30 | 31 | # rearrange index for acceleration 32 | index_0, indices = torch.sort(index_0) # [M,] 33 | index_1 = index_1[indices] # [M,] 34 | index_0_counts = index_0.bincount() 35 | 36 | print("index_0_counts.shape: ", index_0_counts.shape) 37 | 38 | n_max = index_0_counts.max() 39 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 40 | 41 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 42 | 43 | index_0_offsets = torch.cat( 44 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 45 | ) # [N+1] 46 | 47 | # print("index_0[:100]: ", index_0[:100]) 48 | print("n_max: ", n_max) 49 | print("index_0_offsets.shape: ", index_0_offsets.shape) 50 | # input() 51 | 52 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 53 | print("index_1[300:320]: ", index_1[300:320]) 54 | 55 | 56 | attn_flat = pointops.attention_step1( 57 | query.float(), key.float(), index_0.int(), index_1.int() 58 | ) 59 | # loss = attn_flat.sum() 60 | # loss.backward() 61 | print( 62 | "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format( 63 | attn_flat.shape, attn_flat[300:320, :10] 64 | ) 65 | ) 66 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 67 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 68 | # input() 69 | 70 | print("query.is_contiguous(): ", query.is_contiguous()) 71 | print("key.is_contiguous(): ", key.is_contiguous()) 72 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 73 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 74 | 75 | attn_flat_v2 = pointops.attention_step1_v2( 76 | query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max 77 | ) 78 | # loss = attn_flat_v2.sum() 79 | # loss.backward() 80 | print( 81 | "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format( 82 | attn_flat_v2.shape, attn_flat_v2[300:320, :10] 83 | ) 84 | ) 85 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 86 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 87 | # input() 88 | 89 | mask = attn_flat_v2.sum(-1) != 0 90 | print("mask.sum(): ", mask.sum()) 91 | print( 92 | "attn_flat_v2[mask] - attn_flat[mask]: ", 93 | ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(), 94 | ) 95 | 96 | 97 | print( 98 | "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", 99 | ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), 100 | ) 101 | 102 | selected = 10000 103 | print( 104 | "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", 105 | torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), 106 | ) 107 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 800000 14 | N = 35000 15 | C = 96 16 | h = 6 17 | query = torch.rand(N, h, C // h).cuda() 18 | key = torch.rand(N, h, C // h).cuda() 19 | 20 | index_0 = torch.rand(M) 21 | index_0[index_0 < 0] = 0 22 | index_0 = (index_0 * N).long().cuda() 23 | 24 | index_1 = torch.rand(M) 25 | index_1[index_1 < 0] = 0 26 | index_1 = (index_1 * N).long().cuda() 27 | 28 | query.requires_grad = True 29 | key.requires_grad = True 30 | 31 | 32 | attn_flat = pointops.attention_step1( 33 | query.float(), key.float(), index_0.int(), index_1.int() 34 | ) 35 | loss = attn_flat.sum() 36 | loss.backward() 37 | print( 38 | "attn_flat.shape: {}, attn_flat[:20,:10]: {}".format( 39 | attn_flat.shape, attn_flat[:20, :10] 40 | ) 41 | ) 42 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 43 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 44 | input() 45 | 46 | 47 | # rearrange index for acceleration 48 | index_0, indices = torch.sort(index_0) # [M,] 49 | index_1 = index_1[indices] # [M,] 50 | index_0_counts = index_0.bincount() 51 | 52 | print("index_0_counts.shape: ", index_0_counts.shape) 53 | 54 | n_max = index_0_counts.max() 55 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 56 | 57 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 58 | 59 | index_0_offsets = torch.cat( 60 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 61 | ) # [N+1] 62 | 63 | # print("index_0[:100]: ", index_0[:100]) 64 | print("n_max: ", n_max) 65 | print("index_0_offsets.shape: ", index_0_offsets.shape) 66 | # input() 67 | 68 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 69 | print("index_1[:20]: ", index_1[:20]) 70 | 71 | 72 | attn_flat = pointops.attention_step1( 73 | query.float(), key.float(), index_0.int(), index_1.int() 74 | ) 75 | # loss = attn_flat.sum() 76 | # loss.backward() 77 | # # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) 78 | # # loss = attn_flat.sum() 79 | # # loss.backward() 80 | # print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10])) 81 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 82 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 83 | # input() 84 | 85 | print("query.is_contiguous(): ", query.is_contiguous()) 86 | print("key.is_contiguous(): ", key.is_contiguous()) 87 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 88 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 89 | 90 | attn_flat_v2 = pointops.attention_step1_v2( 91 | query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max 92 | ) 93 | loss = attn_flat_v2.sum() 94 | loss.backward() 95 | 96 | # attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max) 97 | # loss = attn_flat_v2.sum() 98 | # loss.backward() 99 | 100 | print( 101 | "attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format( 102 | attn_flat_v2.shape, attn_flat_v2[:20, :10] 103 | ) 104 | ) 105 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 106 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 107 | # input() 108 | 109 | # mask = attn_flat_v2.sum(-1) != 0 110 | # print("mask.sum(): ", mask.sum()) 111 | # print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max()) 112 | 113 | 114 | print( 115 | "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", 116 | ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), 117 | ) 118 | 119 | selected = 10000 120 | print( 121 | "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", 122 | torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), 123 | ) 124 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 800000 14 | N = 35000 15 | C = 96 16 | h = 6 17 | softmax_attn_flat = torch.rand(M, h).cuda() 18 | value = torch.rand(N, h, C // h).cuda() 19 | 20 | index_0 = torch.rand(M) 21 | index_0[index_0 < 0] = 0 22 | index_0 = (index_0 * N).long().cuda() 23 | 24 | index_1 = torch.rand(M) 25 | index_1[index_1 < 0] = 0 26 | index_1 = (index_1 * N).long().cuda() 27 | 28 | softmax_attn_flat.requires_grad = True 29 | value.requires_grad = True 30 | 31 | # value_flat = value[index_1] #[M, num_heads, C // num_heads] 32 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C) 33 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C] 34 | # loss = x.sum() 35 | # loss.backward() 36 | 37 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10])) 38 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 39 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 40 | # input() 41 | 42 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous()) 43 | print("value.is_contiguous(): ", value.is_contiguous()) 44 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 45 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 46 | 47 | x_v2 = pointops.attention_step2( 48 | softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int() 49 | ) 50 | x_v2 = x_v2.view(N, C) 51 | loss = x_v2.sum() 52 | loss.backward() 53 | 54 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10])) 55 | 56 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 57 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 58 | input() 59 | 60 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all()) 61 | 62 | print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2)) 63 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | query = torch.rand(N, h, hdim).cuda() 19 | table = torch.rand(L, h, hdim, 3).cuda() 20 | 21 | index = torch.rand(M) 22 | index[index < 0] = 0 23 | index = (index * N).long().cuda() 24 | 25 | rel_index = torch.rand(M, 3) 26 | rel_index[rel_index < 0] = 0 27 | rel_index = (rel_index * L).long().cuda() 28 | 29 | query.requires_grad = True 30 | table.requires_grad = True 31 | 32 | # query_flat = query[index] #[M, h, hdim] 33 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim] 34 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M] 35 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim] 36 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h] 37 | # loss = output.mean() 38 | # loss.backward() 39 | 40 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 41 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 42 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 43 | # input() 44 | 45 | # print("query.is_contiguous(): ", query.is_contiguous()) 46 | # print("key.is_contiguous(): ", key.is_contiguous()) 47 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 48 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 49 | 50 | output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int()) 51 | loss = output_v2.mean() 52 | loss.backward() 53 | 54 | print( 55 | "output_v2.shape: {}, output_v2[:5,:10]: {}".format( 56 | output_v2.shape, output_v2[:5, :10] 57 | ) 58 | ) 59 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 60 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 61 | input() 62 | 63 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 64 | 65 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 66 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | query = torch.rand(N, h, hdim).cuda() 19 | table_q = torch.rand(L, h, hdim, 3).cuda() 20 | key = torch.rand(N, h, hdim).cuda() 21 | table_k = torch.rand(L, h, hdim, 3).cuda() 22 | 23 | index_q = torch.rand(M) 24 | index_q[index_q < 0] = 0 25 | index_q = (index_q * N).long().cuda() 26 | 27 | index_k = torch.rand(M) 28 | index_k[index_k < 0] = 0 29 | index_k = (index_k * N).long().cuda() 30 | 31 | rel_index = torch.rand(M, 3) 32 | rel_index[rel_index < 0] = 0 33 | rel_index = (rel_index * L).long().cuda() 34 | 35 | query.requires_grad = True 36 | table_q.requires_grad = True 37 | key.requires_grad = True 38 | table_k.requires_grad = True 39 | 40 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) 41 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) 42 | output = output1 + output2 43 | # loss = output.mean() 44 | # loss.backward() 45 | 46 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 47 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 48 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 49 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 50 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 51 | # input() 52 | 53 | # print("query.is_contiguous(): ", query.is_contiguous()) 54 | # print("key.is_contiguous(): ", key.is_contiguous()) 55 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 56 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 57 | 58 | output_v2 = pointops.dot_prod_with_idx_v2( 59 | query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int() 60 | ) 61 | loss = output_v2.mean() 62 | loss.backward() 63 | 64 | print( 65 | "output_v2.shape: {}, output_v2[:5,:10]: {}".format( 66 | output_v2.shape, output_v2[:5, :10] 67 | ) 68 | ) 69 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 70 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 71 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 72 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 73 | # input() 74 | 75 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 76 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | # M = 80 16 | # N = 5 17 | hdim = 16 18 | h = 6 19 | L = 31 20 | query = torch.rand(N, h, hdim).cuda() 21 | table_q = torch.rand(L, h, hdim, 3).cuda() 22 | key = torch.rand(N, h, hdim).cuda() 23 | table_k = torch.rand(L, h, hdim, 3).cuda() 24 | 25 | index_q = torch.rand(M) 26 | index_q[index_q < 0] = 0 27 | index_q = (index_q * N).long().cuda() 28 | 29 | index_k = torch.rand(M) 30 | index_k[index_k < 0] = 0 31 | index_k = (index_k * N).long().cuda() 32 | 33 | rel_index = torch.rand(M, 3) 34 | rel_index[rel_index < 0] = 0 35 | rel_index = (rel_index * L).long().cuda() 36 | 37 | 38 | # rearrange index for acceleration 39 | index_q, indices = torch.sort(index_q) # [M,] 40 | index_k = index_k[indices] # [M,] 41 | rel_index = rel_index[indices] 42 | index_q_counts = index_q.bincount() 43 | 44 | print("index_q_counts.shape: ", index_q_counts.shape) 45 | 46 | n_max = index_q_counts.max() 47 | index_q_offsets = index_q_counts.cumsum(dim=-1) # [N] 48 | 49 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape) 50 | 51 | index_q_offsets = torch.cat( 52 | [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0 53 | ) # [N+1] 54 | 55 | # print("index_q[:100]: ", index_q[:100]) 56 | print("n_max: ", n_max) 57 | print("index_q_offsets.shape: ", index_q_offsets.shape) 58 | # input() 59 | 60 | print("index_q_offsets[:100]: ", index_q_offsets[:100]) 61 | print("index_k[:20]: ", index_k[:20]) 62 | 63 | query.requires_grad = True 64 | table_q.requires_grad = True 65 | key.requires_grad = True 66 | table_k.requires_grad = True 67 | 68 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) 69 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) 70 | output = output1 + output2 71 | loss = output.mean() 72 | loss.backward() 73 | 74 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 75 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 76 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 77 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 78 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 79 | # input() 80 | 81 | # print("query.is_contiguous(): ", query.is_contiguous()) 82 | # print("key.is_contiguous(): ", key.is_contiguous()) 83 | # print("index_q.is_contiguous(): ", index_q.is_contiguous()) 84 | # print("index_k.is_contiguous(): ", index_k.is_contiguous()) 85 | 86 | output_v2 = pointops.dot_prod_with_idx_v3( 87 | query, 88 | index_q_offsets.int(), 89 | n_max, 90 | key, 91 | index_k.int(), 92 | table_q, 93 | table_k, 94 | rel_index.int(), 95 | ) 96 | # loss = output_v2.mean() 97 | # loss.backward() 98 | 99 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) 100 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 101 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 102 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 103 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 104 | # input() 105 | 106 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 107 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | attn = torch.rand(M, h).cuda() 19 | v = torch.rand(N, h, hdim).cuda() 20 | table = torch.rand(L, h, hdim, 3).cuda() 21 | 22 | index_0 = torch.rand(M) 23 | index_0[index_0 < 0] = 0 24 | index_0 = (index_0 * N).long().cuda() 25 | 26 | index_1 = torch.rand(M) 27 | index_1[index_1 < 0] = 0 28 | index_1 = (index_1 * N).long().cuda() 29 | 30 | rel_index = torch.rand(M, 3) 31 | rel_index[rel_index < 0] = 0 32 | rel_index = (rel_index * L).long().cuda() 33 | 34 | attn.requires_grad = True 35 | v.requires_grad = True 36 | table.requires_grad = True 37 | 38 | v_flat = v[index_1] # [M, h, hdim] 39 | table_x, table_y, table_z = ( 40 | table[:, :, :, 0], 41 | table[:, :, :, 1], 42 | table[:, :, :, 2], 43 | ) # [L, h, hdim] 44 | rel_index_x, rel_index_y, rel_index_z = ( 45 | rel_index[:, 0], 46 | rel_index[:, 1], 47 | rel_index[:, 2], 48 | ) # [M] 49 | rel_pos_encoding = ( 50 | table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] 51 | ) # [M, h, hdim] 52 | v_flat_new = v_flat + rel_pos_encoding # [M, h, hdim] 53 | output = attn.unsqueeze(-1) * v_flat_new # [M, h, hdim] 54 | output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N) # [N, h, hdim] 55 | loss = output.mean() 56 | loss.backward() 57 | 58 | print( 59 | "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5]) 60 | ) 61 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 62 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 63 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 64 | input() 65 | 66 | # print("query.is_contiguous(): ", query.is_contiguous()) 67 | # print("key.is_contiguous(): ", key.is_contiguous()) 68 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 69 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 70 | 71 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int()) 72 | # loss = output_v2.mean() 73 | # loss.backward() 74 | 75 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5])) 76 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 77 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 78 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 79 | # input() 80 | 81 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 82 | 83 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 84 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | attn = torch.rand(M, h).cuda() 19 | v = torch.rand(N, h, hdim).cuda() 20 | table = torch.rand(L, h, hdim, 3).cuda() 21 | 22 | index_0 = torch.rand(M) 23 | index_0[index_0 < 0] = 0 24 | index_0 = (index_0 * N).long().cuda() 25 | 26 | index_1 = torch.rand(M) 27 | index_1[index_1 < 0] = 0 28 | index_1 = (index_1 * N).long().cuda() 29 | 30 | rel_index = torch.rand(M, 3) 31 | rel_index[rel_index < 0] = 0 32 | rel_index = (rel_index * L).long().cuda() 33 | 34 | 35 | # rearrange index for acceleration 36 | index_0, indices = torch.sort(index_0) # [M,] 37 | index_1 = index_1[indices] # [M,] 38 | rel_index = rel_index[indices] 39 | index_0_counts = index_0.bincount() 40 | 41 | print("index_0_counts.shape: ", index_0_counts.shape) 42 | 43 | n_max = index_0_counts.max() 44 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 45 | 46 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 47 | 48 | index_0_offsets = torch.cat( 49 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 50 | ) # [N+1] 51 | 52 | 53 | attn.requires_grad = True 54 | v.requires_grad = True 55 | table.requires_grad = True 56 | 57 | 58 | output = pointops.attention_step2_with_rel_pos_value( 59 | attn, v, index_0.int(), index_1.int(), table, rel_index.int() 60 | ) 61 | loss = output.mean() 62 | loss.backward() 63 | 64 | print( 65 | "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5]) 66 | ) 67 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 68 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 69 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 70 | # input() 71 | 72 | attn_grad = attn.grad.clone() 73 | v_grad = v.grad.clone() 74 | table_grad = table.grad.clone() 75 | 76 | attn.grad.zero_() 77 | v.grad.zero_() 78 | table.grad.zero_() 79 | 80 | # print("query.is_contiguous(): ", query.is_contiguous()) 81 | # print("key.is_contiguous(): ", key.is_contiguous()) 82 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 83 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 84 | 85 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2( 86 | attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int() 87 | ) 88 | loss = output_v2.mean() 89 | loss.backward() 90 | 91 | print( 92 | "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format( 93 | output_v2.shape, output_v2[:5, :10, :5] 94 | ) 95 | ) 96 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 97 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 98 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 99 | # input() 100 | 101 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 102 | 103 | print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max()) 104 | 105 | print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max()) 106 | 107 | print("((table_grad-table.grad)**2).max(): ", ((table_grad - table.grad) ** 2).max()) 108 | 109 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 110 | -------------------------------------------------------------------------------- /libs/pointops2/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | from distutils.sysconfig import get_config_vars 5 | 6 | (opt,) = get_config_vars("OPT") 7 | os.environ["OPT"] = " ".join( 8 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 9 | ) 10 | 11 | src = "src" 12 | sources = [ 13 | os.path.join(root, file) 14 | for root, dirs, files in os.walk(src) 15 | for file in files 16 | if file.endswith(".cpp") or file.endswith(".cu") 17 | ] 18 | 19 | setup( 20 | name="pointops2", 21 | version="1.0", 22 | install_requires=["torch", "numpy"], 23 | packages=["pointops2"], 24 | package_dir={"pointops2": "functions"}, 25 | ext_modules=[ 26 | CUDAExtension( 27 | name="pointops2_cuda", 28 | sources=sources, 29 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 30 | ) 31 | ], 32 | cmdclass={"build_ext": BuildExtension}, 33 | ) 34 | -------------------------------------------------------------------------------- /libs/pointops2/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops2/src/__init__.py -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "aggregation_cuda_kernel.h" 5 | 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const float *position = position_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 15 | } 16 | 17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 18 | { 19 | const float *input = input_tensor.data_ptr(); 20 | const float *position = position_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | const int *idx = idx_tensor.data_ptr(); 23 | const float *grad_output = grad_output_tensor.data_ptr(); 24 | float *grad_input = grad_input_tensor.data_ptr(); 25 | float *grad_position = grad_position_tensor.data_ptr(); 26 | float *grad_weight = grad_weight_tensor.data_ptr(); 27 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 28 | } 29 | -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention/attention_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "attention_cuda_kernel.h" 5 | 6 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 7 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *k = k_tensor.data_ptr(); 11 | const int *index0 = index0_tensor.data_ptr(); 12 | const int *index1 = index1_tensor.data_ptr(); 13 | float *attn = attn_tensor.data_ptr(); 14 | attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); 15 | } 16 | 17 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 18 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const int *index0 = index0_tensor.data_ptr(); 23 | const int *index1 = index1_tensor.data_ptr(); 24 | const float *q = q_tensor.data_ptr(); 25 | const float *k = k_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_k = grad_k_tensor.data_ptr(); 28 | attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 29 | } 30 | 31 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | float *output = output_tensor.data_ptr(); 39 | attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); 40 | } 41 | 42 | 43 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 44 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 45 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 46 | { 47 | const float *grad_out = grad_out_tensor.data_ptr(); 48 | const int *index0 = index0_tensor.data_ptr(); 49 | const int *index1 = index1_tensor.data_ptr(); 50 | const float *attn = attn_tensor.data_ptr(); 51 | const float *v = v_tensor.data_ptr(); 52 | float *grad_attn = grad_attn_tensor.data_ptr(); 53 | float *grad_v = grad_v_tensor.data_ptr(); 54 | attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 55 | } 56 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL 2 | #define _ATTENTION_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention_v2/attention_cuda_v2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "attention_cuda_kernel_v2.h" 5 | 6 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 7 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *k = k_tensor.data_ptr(); 11 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 12 | const int *index1 = index1_tensor.data_ptr(); 13 | float *attn = attn_tensor.data_ptr(); 14 | attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); 15 | } 16 | 17 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 18 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 23 | const int *index1 = index1_tensor.data_ptr(); 24 | const float *q = q_tensor.data_ptr(); 25 | const float *k = k_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_k = grad_k_tensor.data_ptr(); 28 | attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); 29 | } 30 | 31 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | float *output = output_tensor.data_ptr(); 39 | attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); 40 | } 41 | 42 | 43 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 44 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 45 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 46 | { 47 | const float *grad_out = grad_out_tensor.data_ptr(); 48 | const int *index0 = index0_tensor.data_ptr(); 49 | const int *index1 = index1_tensor.data_ptr(); 50 | const float *attn = attn_tensor.data_ptr(); 51 | const float *v = v_tensor.data_ptr(); 52 | float *grad_attn = grad_attn_tensor.data_ptr(); 53 | float *grad_v = grad_v_tensor.data_ptr(); 54 | attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 55 | } 56 | -------------------------------------------------------------------------------- /libs/pointops2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grouping_cuda_kernel.h" 5 | 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | float *output = output_tensor.data_ptr(); 12 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 13 | } 14 | 15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 16 | { 17 | const float *grad_output = grad_output_tensor.data_ptr(); 18 | const int *idx = idx_tensor.data_ptr(); 19 | float *grad_input = grad_input_tensor.data_ptr(); 20 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "interpolation_cuda_kernel.h" 5 | 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 14 | } 15 | 16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *weight = weight_tensor.data_ptr(); 21 | float *grad_input = grad_input_tensor.data_ptr(); 22 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "knnquery_cuda_kernel.h" 5 | 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const float *new_xyz = new_xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | float *dist2 = dist2_tensor.data_ptr(); 15 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops2/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knnquery/knnquery_cuda_kernel.h" 5 | #include "sampling/sampling_cuda_kernel.h" 6 | #include "grouping/grouping_cuda_kernel.h" 7 | #include "interpolation/interpolation_cuda_kernel.h" 8 | #include "aggregation/aggregation_cuda_kernel.h" 9 | #include "subtraction/subtraction_cuda_kernel.h" 10 | #include "attention/attention_cuda_kernel.h" 11 | #include "rpe/relative_pos_encoding_cuda_kernel.h" 12 | #include "attention_v2/attention_cuda_kernel_v2.h" 13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h" 14 | 15 | 16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 17 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 18 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 19 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 21 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 22 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 23 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 24 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 25 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 26 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 27 | m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda"); 28 | m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda"); 29 | m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda"); 30 | m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda"); 31 | m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda"); 32 | m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda"); 33 | m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda"); 34 | m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda"); 35 | m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2"); 36 | m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2"); 37 | m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2"); 38 | m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2"); 39 | m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2"); 40 | m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2"); 41 | m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2"); 42 | m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2"); 43 | m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3"); 44 | m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3"); 45 | } 46 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "relative_pos_encoding_cuda_kernel.h" 5 | 6 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 7 | at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *table = table_tensor.data_ptr(); 11 | const int *index = index_tensor.data_ptr(); 12 | const int *rel_idx = rel_idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); 15 | } 16 | 17 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 18 | at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const float *q = q_tensor.data_ptr(); 23 | const int *index = index_tensor.data_ptr(); 24 | const float *table = table_tensor.data_ptr(); 25 | const int *rel_idx = rel_idx_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_table = grad_table_tensor.data_ptr(); 28 | dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); 29 | } 30 | 31 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | const float *table = table_tensor.data_ptr(); 39 | const int *rel_idx = rel_idx_tensor.data_ptr(); 40 | float *output = output_tensor.data_ptr(); 41 | attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); 42 | } 43 | 44 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, 46 | at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | const float *table = table_tensor.data_ptr(); 54 | const int *rel_idx = rel_idx_tensor.data_ptr(); 55 | float *grad_attn = grad_attn_tensor.data_ptr(); 56 | float *grad_v = grad_v_tensor.data_ptr(); 57 | float *grad_table = grad_table_tensor.data_ptr(); 58 | attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); 59 | } 60 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_CUDA_KERNEL 2 | #define _RPE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); 9 | 10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); 18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); 19 | 20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); 21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sampling_cuda_kernel.h" 5 | 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const int *offset = offset_tensor.data_ptr(); 11 | const int *new_offset = new_offset_tensor.data_ptr(); 12 | float *tmp = tmp_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 15 | } 16 | -------------------------------------------------------------------------------- /libs/pointops2/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subtraction_cuda_kernel.h" 5 | 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input1 = input1_tensor.data_ptr(); 10 | const float *input2 = input2_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 14 | } 15 | 16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 17 | { 18 | const int *idx = idx_tensor.data_ptr(); 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | float *grad_input1 = grad_input1_tensor.data_ptr(); 21 | float *grad_input2 = grad_input2_tensor.data_ptr(); 22 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /pointcept/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/__init__.py -------------------------------------------------------------------------------- /pointcept/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import DefaultDataset, ConcatDataset 2 | from .builder import build_dataset 3 | from .utils import point_collate_fn, collate_fn 4 | 5 | # outdoor scene 6 | from .semantic_kitti_multi_scans import SemanticKITTIMultiScansDataset 7 | # dataloader 8 | from .dataloader import MultiDatasetDataloader 9 | -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/builder.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/dataloader.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/dataloader.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/defaults.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/defaults.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/semantic_kitti_multi_scans.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/semantic_kitti_multi_scans.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/transform.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/transform.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/datasets/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | 9 | from pointcept.utils.registry import Registry 10 | 11 | DATASETS = Registry("datasets") 12 | 13 | 14 | def build_dataset(cfg): 15 | """Build datasets.""" 16 | return DATASETS.build(cfg) 17 | -------------------------------------------------------------------------------- /pointcept/datasets/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for Datasets 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import random 9 | from collections.abc import Mapping, Sequence 10 | import numpy as np 11 | import torch 12 | from torch.utils.data.dataloader import default_collate 13 | 14 | 15 | def collate_fn(batch): 16 | """ 17 | collate function for point cloud which support dict and list, 18 | 'coord' is necessary to determine 'offset' 19 | """ 20 | if not isinstance(batch, Sequence): 21 | raise TypeError(f"{batch.dtype} is not supported.") 22 | 23 | if isinstance(batch[0], torch.Tensor): 24 | return torch.cat(list(batch)) 25 | elif isinstance(batch[0], str): 26 | # str is also a kind of Sequence, judgement should before Sequence 27 | return list(batch) 28 | elif isinstance(batch[0], Sequence): 29 | for data in batch: 30 | data.append(torch.tensor([data[0].shape[0]])) 31 | batch = [collate_fn(samples) for samples in zip(*batch)] 32 | batch[-1] = torch.cumsum(batch[-1], dim=0).int() 33 | return batch 34 | elif isinstance(batch[0], Mapping): 35 | batch = {key: collate_fn([d[key] for d in batch]) for key in batch[0]} 36 | for key in batch.keys(): 37 | if "offset" in key: 38 | batch[key] = torch.cumsum(batch[key], dim=0) 39 | return batch 40 | else: 41 | return default_collate(batch) 42 | 43 | 44 | def point_collate_fn(batch, mix_prob=0): 45 | assert isinstance( 46 | batch[0], Mapping 47 | ) # currently, only support input_dict, rather than input_list 48 | batch = collate_fn(batch) 49 | if "offset" in batch.keys(): 50 | # Mix3d (https://arxiv.org/pdf/2110.02210.pdf) 51 | if random.random() < mix_prob: 52 | batch["offset"] = torch.cat( 53 | [batch["offset"][1:-1:2], batch["offset"][-1].unsqueeze(0)], dim=0 54 | ) 55 | return batch 56 | 57 | 58 | def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): 59 | return a * np.exp(-dist2 / (2 * c**2)) 60 | -------------------------------------------------------------------------------- /pointcept/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__init__.py -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/defaults.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/defaults.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/defaults.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/defaults.cpython-39.pyc -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/launch.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/launch.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/test.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/test.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/__pycache__/train.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/train.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import HookBase 2 | from .misc import * 3 | from .evaluator import * 4 | 5 | from .builder import build_hooks 6 | -------------------------------------------------------------------------------- /pointcept/engines/hooks/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/hooks/__pycache__/builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/builder.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/hooks/__pycache__/default.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/default.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/hooks/__pycache__/evaluator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/evaluator.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/hooks/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/engines/hooks/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hook Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pointcept.utils.registry import Registry 9 | 10 | 11 | HOOKS = Registry("hooks") 12 | 13 | 14 | def build_hooks(cfg): 15 | hooks = [] 16 | for hook_cfg in cfg: 17 | hooks.append(HOOKS.build(hook_cfg)) 18 | return hooks 19 | -------------------------------------------------------------------------------- /pointcept/engines/hooks/default.py: -------------------------------------------------------------------------------- 1 | """ 2 | Default Hook 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | 9 | class HookBase: 10 | """ 11 | Base class for hooks that can be registered with :class:`TrainerBase`. 12 | """ 13 | 14 | trainer = None # A weak reference to the trainer object. 15 | 16 | def before_train(self): 17 | pass 18 | 19 | def before_epoch(self): 20 | pass 21 | 22 | def before_step(self): 23 | pass 24 | 25 | def after_step(self): 26 | pass 27 | 28 | def after_epoch(self): 29 | pass 30 | 31 | def after_train(self): 32 | pass 33 | -------------------------------------------------------------------------------- /pointcept/models/MambaMOS/__init__.py: -------------------------------------------------------------------------------- 1 | from .MambaMOS import * -------------------------------------------------------------------------------- /pointcept/models/MambaMOS/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/MambaMOS/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_model 2 | from .default import DefaultSegmentor, DefaultClassifier 3 | 4 | # Backbones 5 | from .MambaMOS import * -------------------------------------------------------------------------------- /pointcept/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/__pycache__/builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/builder.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/__pycache__/default.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/default.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/__pycache__/modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/modules.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pointcept.utils.registry import Registry 9 | 10 | MODELS = Registry("models") 11 | MODULES = Registry("modules") 12 | 13 | 14 | def build_model(cfg): 15 | """Build models.""" 16 | return MODELS.build(cfg) 17 | -------------------------------------------------------------------------------- /pointcept/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_criteria 2 | 3 | from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss 4 | from .lovasz import LovaszLoss 5 | -------------------------------------------------------------------------------- /pointcept/models/losses/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/losses/__pycache__/builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/builder.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/losses/__pycache__/lovasz.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/lovasz.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/losses/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/losses/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Criteria Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pointcept.utils.registry import Registry 9 | 10 | LOSSES = Registry("losses") 11 | 12 | 13 | class Criteria(object): 14 | def __init__(self, cfg=None): 15 | self.cfg = cfg if cfg is not None else [] 16 | self.criteria = [] 17 | for loss_cfg in self.cfg: 18 | self.criteria.append(LOSSES.build(cfg=loss_cfg)) 19 | 20 | def __call__(self, pred, target): 21 | if len(self.criteria) == 0: 22 | # loss computation occur in model 23 | return pred 24 | loss = 0 25 | for c in self.criteria: 26 | loss += c(pred, target) 27 | return loss 28 | 29 | 30 | def build_criteria(cfg): 31 | return Criteria(cfg) 32 | -------------------------------------------------------------------------------- /pointcept/models/modules.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch.nn as nn 3 | import spconv.pytorch as spconv 4 | from collections import OrderedDict 5 | from pointcept.models.utils.structure import Point 6 | 7 | 8 | class PointModule(nn.Module): 9 | r"""PointModule 10 | placeholder, all module subclass from this will take Point in PointSequential. 11 | """ 12 | 13 | def __init__(self, *args, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | 16 | 17 | class PointSequential(PointModule): 18 | r"""A sequential container. 19 | Modules will be added to it in the order they are passed in the constructor. 20 | Alternatively, an ordered dict of modules can also be passed in. 21 | """ 22 | 23 | def __init__(self, *args, **kwargs): 24 | super().__init__() 25 | if len(args) == 1 and isinstance(args[0], OrderedDict): 26 | for key, module in args[0].items(): 27 | self.add_module(key, module) 28 | else: 29 | for idx, module in enumerate(args): 30 | self.add_module(str(idx), module) 31 | for name, module in kwargs.items(): 32 | if sys.version_info < (3, 6): 33 | raise ValueError("kwargs only supported in py36+") 34 | if name in self._modules: 35 | raise ValueError("name exists.") 36 | self.add_module(name, module) 37 | 38 | def __getitem__(self, idx): 39 | if not (-len(self) <= idx < len(self)): 40 | raise IndexError("index {} is out of range".format(idx)) 41 | if idx < 0: 42 | idx += len(self) 43 | it = iter(self._modules.values()) 44 | for i in range(idx): 45 | next(it) 46 | return next(it) 47 | 48 | def __len__(self): 49 | return len(self._modules) 50 | 51 | def add(self, module, name=None): 52 | if name is None: 53 | name = str(len(self._modules)) 54 | if name in self._modules: 55 | raise KeyError("name exists") 56 | self.add_module(name, module) 57 | 58 | def forward(self, input): 59 | for k, module in self._modules.items(): 60 | # Point module 61 | if isinstance(module, PointModule): 62 | input = module(input) 63 | # Spconv module 64 | elif spconv.modules.is_spconv_module(module): 65 | if isinstance(input, Point): 66 | input.sparse_conv_feat = module(input.sparse_conv_feat) 67 | input.feat = input.sparse_conv_feat.features 68 | else: 69 | input = module(input) 70 | # PyTorch module 71 | else: 72 | if isinstance(input, Point): 73 | input.feat = module(input.feat) 74 | if "sparse_conv_feat" in input.keys(): 75 | input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( 76 | input.feat 77 | ) 78 | elif isinstance(input, spconv.SparseConvTensor): 79 | if input.indices.shape[0] != 0: 80 | input = input.replace_feature(module(input.features)) 81 | else: 82 | input = module(input) 83 | return input 84 | -------------------------------------------------------------------------------- /pointcept/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import offset2batch, offset2bincount, batch2offset, off_diagonal 2 | from .checkpoint import checkpoint 3 | from .serialization import encode, decode 4 | from .structure import Point 5 | -------------------------------------------------------------------------------- /pointcept/models/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/__pycache__/checkpoint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/checkpoint.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/__pycache__/structure.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/structure.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Checkpoint Utils for Models 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | 9 | import torch 10 | 11 | 12 | class CheckpointFunction(torch.autograd.Function): 13 | @staticmethod 14 | def forward(ctx, run_function, length, *args): 15 | ctx.run_function = run_function 16 | ctx.input_tensors = list(args[:length]) 17 | ctx.input_params = list(args[length:]) 18 | 19 | with torch.no_grad(): 20 | output_tensors = ctx.run_function(*ctx.input_tensors) 21 | return output_tensors 22 | 23 | @staticmethod 24 | def backward(ctx, *output_grads): 25 | ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] 26 | with torch.enable_grad(): 27 | # Fixes a bug where the first op in run_function modifies the 28 | # Tensor storage in place, which is not allowed for detach()'d 29 | # Tensors. 30 | shallow_copies = [x.view_as(x) for x in ctx.input_tensors] 31 | output_tensors = ctx.run_function(*shallow_copies) 32 | input_grads = torch.autograd.grad( 33 | output_tensors, 34 | ctx.input_tensors + ctx.input_params, 35 | output_grads, 36 | allow_unused=True, 37 | ) 38 | del ctx.input_tensors 39 | del ctx.input_params 40 | del output_tensors 41 | return (None, None) + input_grads 42 | 43 | 44 | def checkpoint(func, inputs, params, flag): 45 | """ 46 | Evaluate a function without caching intermediate activations, allowing for 47 | reduced memory at the expense of extra compute in the backward pass. 48 | :param func: the function to evaluate. 49 | :param inputs: the argument sequence to pass to `func`. 50 | :param params: a sequence of parameters `func` depends on but does not 51 | explicitly take as arguments. 52 | :param flag: if False, disable gradient checkpointing. 53 | """ 54 | if flag: 55 | args = tuple(inputs) + tuple(params) 56 | return CheckpointFunction.apply(func, len(inputs), *args) 57 | else: 58 | return func(*inputs) 59 | -------------------------------------------------------------------------------- /pointcept/models/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | General Utils for Models 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import torch 9 | 10 | 11 | @torch.inference_mode() 12 | def offset2bincount(offset): 13 | return torch.diff( 14 | offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) 15 | ) 16 | 17 | 18 | @torch.inference_mode() 19 | def offset2batch(offset): 20 | bincount = offset2bincount(offset) 21 | return torch.arange( 22 | len(bincount), device=offset.device, dtype=torch.long 23 | ).repeat_interleave(bincount) 24 | 25 | 26 | @torch.inference_mode() 27 | def batch2offset(batch): 28 | return torch.cumsum(batch.bincount(), dim=0).long() 29 | 30 | 31 | def off_diagonal(x): 32 | # return a flattened view of the off-diagonal elements of a square matrix 33 | n, m = x.shape 34 | assert n == m 35 | return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() 36 | -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import ( 2 | encode, 3 | decode, 4 | z_order_encode, 5 | z_order_decode, 6 | hilbert_encode, 7 | hilbert_decode, 8 | ) 9 | -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/__pycache__/default.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/default.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/__pycache__/hilbert.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/hilbert.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/__pycache__/z_order.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/z_order.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/default.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .z_order import xyz2key as z_order_encode_ 3 | from .z_order import key2xyz as z_order_decode_ 4 | from .hilbert import encode as hilbert_encode_ 5 | from .hilbert import decode as hilbert_decode_ 6 | 7 | @torch.inference_mode() 8 | def encode(grid_coord, batch=None, depth=16, order="z"): 9 | assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} 10 | if order == "z": 11 | code = z_order_encode(grid_coord, depth=depth) 12 | elif order == "z-trans": 13 | code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) 14 | elif order == "hilbert": 15 | code = hilbert_encode(grid_coord, depth=depth) 16 | elif order == "hilbert-trans": 17 | code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) 18 | else: 19 | raise NotImplementedError 20 | if batch is not None: 21 | batch = batch.long() 22 | code = batch << depth * 3 | code 23 | return code 24 | 25 | 26 | @torch.inference_mode() 27 | def decode(code, depth=16, order="z"): 28 | assert order in {"z", "hilbert"} 29 | batch = code >> depth * 3 30 | code = code & ((1 << depth * 3) - 1) 31 | if order == "z": 32 | grid_coord = z_order_decode(code, depth=depth) 33 | elif order == "hilbert": 34 | grid_coord = hilbert_decode(code, depth=depth) 35 | else: 36 | raise NotImplementedError 37 | return grid_coord, batch 38 | 39 | 40 | def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): 41 | x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() 42 | # we block the support to batch, maintain batched code in Point class 43 | code = z_order_encode_(x, y, z, b=None, depth=depth) 44 | return code 45 | 46 | 47 | def z_order_decode(code: torch.Tensor, depth): 48 | x, y, z = z_order_decode_(code, depth=depth) 49 | grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) 50 | return grid_coord 51 | 52 | 53 | def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): 54 | return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) 55 | 56 | 57 | def hilbert_decode(code: torch.Tensor, depth: int = 16): 58 | return hilbert_decode_(code, num_dims=3, num_bits=depth) 59 | -------------------------------------------------------------------------------- /pointcept/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__init__.py -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/cache.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/cache.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/comm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/comm.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/env.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/env.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/events.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/events.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/logger.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/logger.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/optimizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/optimizer.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/path.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/path.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/registry.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/registry.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/scheduler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/scheduler.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/__pycache__/timer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/timer.cpython-38.pyc -------------------------------------------------------------------------------- /pointcept/utils/cache.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Cache Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import SharedArray 10 | 11 | try: 12 | from multiprocessing.shared_memory import ShareableList 13 | except ImportError: 14 | import warnings 15 | 16 | warnings.warn("Please update python version >= 3.8 to enable shared_memory") 17 | import numpy as np 18 | 19 | 20 | def shared_array(name, var=None): 21 | if var is not None: 22 | # check exist 23 | if os.path.exists(f"/dev/shm/{name}"): 24 | return SharedArray.attach(f"shm://{name}") 25 | # create shared_array 26 | data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype) 27 | data[...] = var[...] 28 | data.flags.writeable = False 29 | else: 30 | data = SharedArray.attach(f"shm://{name}").copy() 31 | return data 32 | 33 | 34 | def shared_dict(name, var=None): 35 | name = str(name) 36 | assert "." not in name # '.' is used as sep flag 37 | data = {} 38 | if var is not None: 39 | assert isinstance(var, dict) 40 | keys = var.keys() 41 | # current version only cache np.array 42 | keys_valid = [] 43 | for key in keys: 44 | if isinstance(var[key], np.ndarray): 45 | keys_valid.append(key) 46 | keys = keys_valid 47 | 48 | ShareableList(sequence=keys, name=name + ".keys") 49 | for key in keys: 50 | if isinstance(var[key], np.ndarray): 51 | data[key] = shared_array(name=f"{name}.{key}", var=var[key]) 52 | else: 53 | keys = list(ShareableList(name=name + ".keys")) 54 | for key in keys: 55 | data[key] = shared_array(name=f"{name}.{key}") 56 | return data 57 | -------------------------------------------------------------------------------- /pointcept/utils/env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Environment Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import random 10 | import numpy as np 11 | import torch 12 | import torch.backends.cudnn as cudnn 13 | 14 | from datetime import datetime 15 | 16 | 17 | def get_random_seed(): 18 | seed = ( 19 | os.getpid() 20 | + int(datetime.now().strftime("%S%f")) 21 | + int.from_bytes(os.urandom(2), "big") 22 | ) 23 | return seed 24 | 25 | 26 | def set_seed(seed=None): 27 | if seed is None: 28 | seed = get_random_seed() 29 | random.seed(seed) 30 | np.random.seed(seed) 31 | torch.manual_seed(seed) 32 | torch.cuda.manual_seed(seed) 33 | torch.cuda.manual_seed_all(seed) 34 | # cudnn.benchmark = False 35 | # cudnn.deterministic = True 36 | os.environ["PYTHONHASHSEED"] = str(seed) 37 | -------------------------------------------------------------------------------- /pointcept/utils/optimizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Optimizer 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import torch 9 | from pointcept.utils.logger import get_root_logger 10 | from pointcept.utils.registry import Registry 11 | 12 | OPTIMIZERS = Registry("optimizers") 13 | 14 | 15 | OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") 16 | OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") 17 | OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") 18 | 19 | 20 | def build_optimizer(cfg, model, param_dicts=None): 21 | if param_dicts is None: 22 | cfg.params = model.parameters() 23 | else: 24 | cfg.params = [dict(names=[], params=[], lr=cfg.lr)] 25 | for i in range(len(param_dicts)): 26 | param_group = dict(names=[], params=[]) 27 | if "lr" in param_dicts[i].keys(): 28 | param_group["lr"] = param_dicts[i].lr 29 | if "momentum" in param_dicts[i].keys(): 30 | param_group["momentum"] = param_dicts[i].momentum 31 | if "weight_decay" in param_dicts[i].keys(): 32 | param_group["weight_decay"] = param_dicts[i].weight_decay 33 | cfg.params.append(param_group) 34 | 35 | for n, p in model.named_parameters(): 36 | flag = False 37 | for i in range(len(param_dicts)): 38 | if param_dicts[i].keyword in n: 39 | cfg.params[i + 1]["names"].append(n) 40 | cfg.params[i + 1]["params"].append(p) 41 | flag = True 42 | break 43 | if not flag: 44 | cfg.params[0]["names"].append(n) 45 | cfg.params[0]["params"].append(p) 46 | 47 | logger = get_root_logger() 48 | 49 | for i in range(len(cfg.params)): 50 | param_names = cfg.params[i].pop("names") 51 | message = "" 52 | for key in cfg.params[i].keys(): 53 | if key != "params": 54 | message += f" {key}: {cfg.params[i][key]};" 55 | logger.info(f"Params Group {i+1} -{message} Params: {param_names}.") 56 | return OPTIMIZERS.build(cfg=cfg) 57 | -------------------------------------------------------------------------------- /pointcept/utils/path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import os.path as osp 4 | from pathlib import Path 5 | 6 | from .misc import is_str 7 | 8 | 9 | def is_filepath(x): 10 | return is_str(x) or isinstance(x, Path) 11 | 12 | 13 | def fopen(filepath, *args, **kwargs): 14 | if is_str(filepath): 15 | return open(filepath, *args, **kwargs) 16 | elif isinstance(filepath, Path): 17 | return filepath.open(*args, **kwargs) 18 | raise ValueError("`filepath` should be a string or a Path") 19 | 20 | 21 | def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): 22 | if not osp.isfile(filename): 23 | raise FileNotFoundError(msg_tmpl.format(filename)) 24 | 25 | 26 | def mkdir_or_exist(dir_name, mode=0o777): 27 | if dir_name == "": 28 | return 29 | dir_name = osp.expanduser(dir_name) 30 | os.makedirs(dir_name, mode=mode, exist_ok=True) 31 | 32 | 33 | def symlink(src, dst, overwrite=True, **kwargs): 34 | if os.path.lexists(dst) and overwrite: 35 | os.remove(dst) 36 | os.symlink(src, dst, **kwargs) 37 | 38 | 39 | def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): 40 | """Scan a directory to find the interested files. 41 | 42 | Args: 43 | dir_path (str | obj:`Path`): Path of the directory. 44 | suffix (str | tuple(str), optional): File suffix that we are 45 | interested in. Default: None. 46 | recursive (bool, optional): If set to True, recursively scan the 47 | directory. Default: False. 48 | case_sensitive (bool, optional) : If set to False, ignore the case of 49 | suffix. Default: True. 50 | 51 | Returns: 52 | A generator for all the interested files with relative paths. 53 | """ 54 | if isinstance(dir_path, (str, Path)): 55 | dir_path = str(dir_path) 56 | else: 57 | raise TypeError('"dir_path" must be a string or Path object') 58 | 59 | if (suffix is not None) and not isinstance(suffix, (str, tuple)): 60 | raise TypeError('"suffix" must be a string or tuple of strings') 61 | 62 | if suffix is not None and not case_sensitive: 63 | suffix = ( 64 | suffix.lower() 65 | if isinstance(suffix, str) 66 | else tuple(item.lower() for item in suffix) 67 | ) 68 | 69 | root = dir_path 70 | 71 | def _scandir(dir_path, suffix, recursive, case_sensitive): 72 | for entry in os.scandir(dir_path): 73 | if not entry.name.startswith(".") and entry.is_file(): 74 | rel_path = osp.relpath(entry.path, root) 75 | _rel_path = rel_path if case_sensitive else rel_path.lower() 76 | if suffix is None or _rel_path.endswith(suffix): 77 | yield rel_path 78 | elif recursive and os.path.isdir(entry.path): 79 | # scan recursively if entry.path is a directory 80 | yield from _scandir(entry.path, suffix, recursive, case_sensitive) 81 | 82 | return _scandir(dir_path, suffix, recursive, case_sensitive) 83 | 84 | 85 | def find_vcs_root(path, markers=(".git",)): 86 | """Finds the root directory (including itself) of specified markers. 87 | 88 | Args: 89 | path (str): Path of directory or file. 90 | markers (list[str], optional): List of file or directory names. 91 | 92 | Returns: 93 | The directory contained one of the markers or None if not found. 94 | """ 95 | if osp.isfile(path): 96 | path = osp.dirname(path) 97 | 98 | prev, cur = None, osp.abspath(osp.expanduser(path)) 99 | while cur != prev: 100 | if any(osp.exists(osp.join(cur, marker)) for marker in markers): 101 | return cur 102 | prev, cur = cur, osp.split(cur)[0] 103 | return None 104 | -------------------------------------------------------------------------------- /pointcept/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # -*- coding: utf-8 -*- 3 | 4 | from time import perf_counter 5 | from typing import Optional 6 | 7 | 8 | class Timer: 9 | """ 10 | A timer which computes the time elapsed since the start/reset of the timer. 11 | """ 12 | 13 | def __init__(self) -> None: 14 | self.reset() 15 | 16 | def reset(self) -> None: 17 | """ 18 | Reset the timer. 19 | """ 20 | self._start = perf_counter() 21 | self._paused: Optional[float] = None 22 | self._total_paused = 0 23 | self._count_start = 1 24 | 25 | def pause(self) -> None: 26 | """ 27 | Pause the timer. 28 | """ 29 | if self._paused is not None: 30 | raise ValueError("Trying to pause a Timer that is already paused!") 31 | self._paused = perf_counter() 32 | 33 | def is_paused(self) -> bool: 34 | """ 35 | Returns: 36 | bool: whether the timer is currently paused 37 | """ 38 | return self._paused is not None 39 | 40 | def resume(self) -> None: 41 | """ 42 | Resume the timer. 43 | """ 44 | if self._paused is None: 45 | raise ValueError("Trying to resume a Timer that is not paused!") 46 | # pyre-fixme[58]: `-` is not supported for operand types `float` and 47 | # `Optional[float]`. 48 | self._total_paused += perf_counter() - self._paused 49 | self._paused = None 50 | self._count_start += 1 51 | 52 | def seconds(self) -> float: 53 | """ 54 | Returns: 55 | (float): the total number of seconds since the start/reset of the 56 | timer, excluding the time when the timer is paused. 57 | """ 58 | if self._paused is not None: 59 | end_time: float = self._paused # type: ignore 60 | else: 61 | end_time = perf_counter() 62 | return end_time - self._start - self._total_paused 63 | 64 | def avg_seconds(self) -> float: 65 | """ 66 | Returns: 67 | (float): the average number of seconds between every start/reset and 68 | pause. 69 | """ 70 | return self.seconds() / self._count_start 71 | -------------------------------------------------------------------------------- /pointcept/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visualization Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import open3d as o3d 10 | import numpy as np 11 | import torch 12 | 13 | 14 | def to_numpy(x): 15 | if isinstance(x, torch.Tensor): 16 | x = x.clone().detach().cpu().numpy() 17 | assert isinstance(x, np.ndarray) 18 | return x 19 | 20 | 21 | def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): 22 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 23 | coord = to_numpy(coord) 24 | if color is not None: 25 | color = to_numpy(color) 26 | pcd = o3d.geometry.PointCloud() 27 | pcd.points = o3d.utility.Vector3dVector(coord) 28 | pcd.colors = o3d.utility.Vector3dVector( 29 | np.ones_like(coord) if color is None else color 30 | ) 31 | o3d.io.write_point_cloud(file_path, pcd) 32 | if logger is not None: 33 | logger.info(f"Save Point Cloud to: {file_path}") 34 | 35 | 36 | def save_bounding_boxes( 37 | bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None 38 | ): 39 | bboxes_corners = to_numpy(bboxes_corners) 40 | # point list 41 | points = bboxes_corners.reshape(-1, 3) 42 | # line list 43 | box_lines = np.array( 44 | [ 45 | [0, 1], 46 | [1, 2], 47 | [2, 3], 48 | [3, 0], 49 | [4, 5], 50 | [5, 6], 51 | [6, 7], 52 | [7, 0], 53 | [0, 4], 54 | [1, 5], 55 | [2, 6], 56 | [3, 7], 57 | ] 58 | ) 59 | lines = [] 60 | for i, _ in enumerate(bboxes_corners): 61 | lines.append(box_lines + i * 8) 62 | lines = np.concatenate(lines) 63 | # color list 64 | color = np.array([color for _ in range(len(lines))]) 65 | # generate line set 66 | line_set = o3d.geometry.LineSet() 67 | line_set.points = o3d.utility.Vector3dVector(points) 68 | line_set.lines = o3d.utility.Vector2iVector(lines) 69 | line_set.colors = o3d.utility.Vector3dVector(color) 70 | o3d.io.write_line_set(file_path, line_set) 71 | 72 | if logger is not None: 73 | logger.info(f"Save Boxes to: {file_path}") 74 | 75 | 76 | def save_lines( 77 | points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None 78 | ): 79 | points = to_numpy(points) 80 | lines = to_numpy(lines) 81 | colors = np.array([color for _ in range(len(lines))]) 82 | line_set = o3d.geometry.LineSet() 83 | line_set.points = o3d.utility.Vector3dVector(points) 84 | line_set.lines = o3d.utility.Vector2iVector(lines) 85 | line_set.colors = o3d.utility.Vector3dVector(colors) 86 | o3d.io.write_line_set(file_path, line_set) 87 | 88 | if logger is not None: 89 | logger.info(f"Save Lines to: {file_path}") 90 | -------------------------------------------------------------------------------- /scripts/build_image.sh: -------------------------------------------------------------------------------- 1 | TORCH_VERSION=2.0.1 2 | CUDA_VERSION=11.7 3 | CUDNN_VERSION=8 4 | 5 | ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"` 6 | [ $? != 0 ] && exit 1 7 | eval set -- "${ARGS}" 8 | while true ; do 9 | case "$1" in 10 | -t | --torch) 11 | TORCH_VERSION=$2 12 | shift 2 13 | ;; 14 | -c | --cuda) 15 | CUDA_VERSION=$2 16 | shift 2 17 | ;; 18 | --cudnn) 19 | CUDNN_VERSION=$2 20 | shift 2 21 | ;; 22 | --) 23 | break 24 | ;; 25 | *) 26 | echo "Invalid option: $1" 27 | exit 1 28 | ;; 29 | esac 30 | done 31 | 32 | CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."` 33 | BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel 34 | IMG_TAG=pointcept/pointcept:pytorch${BASE_TORCH_TAG} 35 | 36 | echo "TORCH VERSION: ${TORCH_VERSION}" 37 | echo "CUDA VERSION: ${CUDA_VERSION}" 38 | echo "CUDNN VERSION: ${CUDNN_VERSION}" 39 | 40 | 41 | cat > ./Dockerfile <<- EOM 42 | FROM pytorch/pytorch:${BASE_TORCH_TAG} 43 | 44 | # Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC) 45 | RUN rm /etc/apt/sources.list.d/*.list 46 | 47 | # Installing apt packages 48 | RUN export DEBIAN_FRONTEND=noninteractive \ 49 | && apt -y update --no-install-recommends \ 50 | && apt -y install --no-install-recommends \ 51 | git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \ 52 | && apt autoremove -y \ 53 | && apt clean -y \ 54 | && export DEBIAN_FRONTEND=dialog 55 | 56 | # Install Pointcept environment 57 | RUN conda install h5py pyyaml -c anaconda -y 58 | RUN conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y 59 | RUN conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y 60 | 61 | RUN pip install --upgrade pip 62 | RUN pip install torch-geometric 63 | RUN pip install spconv-cu${CUDA_VERSION_NO_DOT} 64 | RUN pip install open3d 65 | 66 | # Build MinkowskiEngine 67 | RUN git clone https://github.com/NVIDIA/MinkowskiEngine.git 68 | WORKDIR /workspace/MinkowskiEngine 69 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" python setup.py install --blas=openblas --force_cuda 70 | WORKDIR /workspace 71 | 72 | # Build pointops 73 | RUN git clone https://github.com/Pointcept/Pointcept.git 74 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointops -v 75 | 76 | # Build pointgroup_ops 77 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointgroup_ops -v 78 | 79 | # Build swin3d 80 | RUN TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX 8.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v 81 | EOM 82 | 83 | docker build . -f ./Dockerfile -t $IMG_TAG -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | PYTHON=python 5 | 6 | TEST_CODE=test.py 7 | 8 | DATASET=scannet 9 | CONFIG="None" 10 | EXP_NAME=debug 11 | WEIGHT=model_best 12 | GPU=None 13 | 14 | while getopts "p:d:c:n:w:g:" opt; do 15 | case $opt in 16 | p) 17 | PYTHON=$OPTARG 18 | ;; 19 | d) 20 | DATASET=$OPTARG 21 | ;; 22 | c) 23 | CONFIG=$OPTARG 24 | ;; 25 | n) 26 | EXP_NAME=$OPTARG 27 | ;; 28 | w) 29 | WEIGHT=$OPTARG 30 | ;; 31 | g) 32 | GPU=$OPTARG 33 | ;; 34 | \?) 35 | echo "Invalid option: -$OPTARG" 36 | ;; 37 | esac 38 | done 39 | 40 | if [ "${NUM_GPU}" = 'None' ] 41 | then 42 | NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` 43 | fi 44 | 45 | echo "Experiment name: $EXP_NAME" 46 | echo "Python interpreter dir: $PYTHON" 47 | echo "Dataset: $DATASET" 48 | echo "GPU Num: $GPU" 49 | 50 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 51 | MODEL_DIR=${EXP_DIR}/model 52 | CODE_DIR=${EXP_DIR}/code 53 | CONFIG_DIR=${EXP_DIR}/config.py 54 | 55 | if [ "${CONFIG}" = "None" ] 56 | then 57 | CONFIG_DIR=${EXP_DIR}/config.py 58 | else 59 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 60 | fi 61 | 62 | echo "Loading config in:" $CONFIG_DIR 63 | #export PYTHONPATH=./$CODE_DIR 64 | export PYTHONPATH=./ 65 | echo "Running code in: $CODE_DIR" 66 | 67 | 68 | echo " =========> RUN TASK <=========" 69 | 70 | #$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ 71 | $PYTHON -u tools/$TEST_CODE \ 72 | --config-file "$CONFIG_DIR" \ 73 | --num-gpus "$GPU" \ 74 | --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth 75 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | ROOT_DIR=$(pwd) 5 | PYTHON=python 6 | 7 | TRAIN_CODE=train.py 8 | 9 | DATASET=scannet 10 | CONFIG="None" 11 | EXP_NAME=debug 12 | WEIGHT="None" 13 | RESUME=false 14 | GPU=None 15 | 16 | 17 | while getopts "p:d:c:n:w:g:r:" opt; do 18 | case $opt in 19 | p) 20 | PYTHON=$OPTARG 21 | ;; 22 | d) 23 | DATASET=$OPTARG 24 | ;; 25 | c) 26 | CONFIG=$OPTARG 27 | ;; 28 | n) 29 | EXP_NAME=$OPTARG 30 | ;; 31 | w) 32 | WEIGHT=$OPTARG 33 | ;; 34 | r) 35 | RESUME=$OPTARG 36 | ;; 37 | g) 38 | GPU=$OPTARG 39 | ;; 40 | \?) 41 | echo "Invalid option: -$OPTARG" 42 | ;; 43 | esac 44 | done 45 | 46 | if [ "${NUM_GPU}" = 'None' ] 47 | then 48 | NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` 49 | fi 50 | 51 | echo "Experiment name: $EXP_NAME" 52 | echo "Python interpreter dir: $PYTHON" 53 | echo "Dataset: $DATASET" 54 | echo "Config: $CONFIG" 55 | echo "GPU Num: $GPU" 56 | 57 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 58 | MODEL_DIR=${EXP_DIR}/model 59 | CODE_DIR=${EXP_DIR}/code 60 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 61 | 62 | 63 | echo " =========> CREATE EXP DIR <=========" 64 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR" 65 | if ${RESUME} 66 | then 67 | CONFIG_DIR=${EXP_DIR}/config.py 68 | WEIGHT=$MODEL_DIR/model_last.pth 69 | else 70 | mkdir -p "$MODEL_DIR" "$CODE_DIR" 71 | cp -r scripts tools pointcept "$CODE_DIR" 72 | fi 73 | 74 | echo "Loading config in:" $CONFIG_DIR 75 | export PYTHONPATH=./$CODE_DIR 76 | echo "Running code in: $CODE_DIR" 77 | 78 | 79 | echo " =========> RUN TASK <=========" 80 | 81 | if [ "${WEIGHT}" = "None" ] 82 | then 83 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 84 | --config-file "$CONFIG_DIR" \ 85 | --num-gpus "$GPU" \ 86 | --options save_path="$EXP_DIR" 87 | else 88 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 89 | --config-file "$CONFIG_DIR" \ 90 | --num-gpus "$GPU" \ 91 | --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" 92 | fi -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | # os.environ['CUDA_VISIBLE_DEVICES'] = "3" 10 | 11 | from pointcept.engines.defaults import ( 12 | default_argument_parser, 13 | default_config_parser, 14 | default_setup, 15 | ) 16 | from pointcept.engines.test import TESTERS 17 | from pointcept.engines.launch import launch 18 | 19 | 20 | def main_worker(cfg): 21 | cfg = default_setup(cfg) 22 | tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) 23 | tester.test() 24 | 25 | 26 | def main(): 27 | args = default_argument_parser().parse_args() 28 | cfg = default_config_parser(args.config_file, args.options) 29 | 30 | launch( 31 | main_worker, 32 | num_gpus_per_machine=args.num_gpus, 33 | num_machines=args.num_machines, 34 | machine_rank=args.machine_rank, 35 | dist_url=args.dist_url, 36 | cfg=(cfg,), 37 | ) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /tools/test_s3dis_6fold.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test script for S3DIS 6-fold cross validation 3 | 4 | Gathering Area_X.pth from result folder of experiment record of each area as follows: 5 | |- RECORDS_PATH 6 | |- Area_1.pth 7 | |- Area_2.pth 8 | |- Area_3.pth 9 | |- Area_4.pth 10 | |- Area_5.pth 11 | |- Area_6.pth 12 | 13 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 14 | Please cite our work if the code is helpful to you. 15 | """ 16 | 17 | import argparse 18 | import os 19 | 20 | import torch 21 | import numpy as np 22 | import glob 23 | from pointcept.utils.logger import get_root_logger 24 | 25 | CLASS_NAMES = [ 26 | "ceiling", 27 | "floor", 28 | "wall", 29 | "beam", 30 | "column", 31 | "window", 32 | "door", 33 | "table", 34 | "chair", 35 | "sofa", 36 | "bookcase", 37 | "board", 38 | "clutter", 39 | ] 40 | 41 | 42 | def evaluation(intersection, union, target, logger=None): 43 | iou_class = intersection / (union + 1e-10) 44 | accuracy_class = intersection / (target + 1e-10) 45 | mIoU = np.mean(iou_class) 46 | mAcc = np.mean(accuracy_class) 47 | allAcc = sum(intersection) / (sum(target) + 1e-10) 48 | 49 | if logger is not None: 50 | logger.info( 51 | "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( 52 | mIoU, mAcc, allAcc 53 | ) 54 | ) 55 | for i in range(len(CLASS_NAMES)): 56 | logger.info( 57 | "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( 58 | idx=i, 59 | name=CLASS_NAMES[i], 60 | iou=iou_class[i], 61 | accuracy=accuracy_class[i], 62 | ) 63 | ) 64 | 65 | 66 | def main(): 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument( 69 | "--record_root", 70 | required=True, 71 | help="Path to the S3DIS record of each split", 72 | ) 73 | config = parser.parse_args() 74 | logger = get_root_logger( 75 | log_file=os.path.join(config.record_root, "6-fold.log"), 76 | file_mode="w", 77 | ) 78 | 79 | records = sorted(glob.glob(os.path.join(config.record_root, "Area_*.pth"))) 80 | assert len(records) == 6 81 | intersection_ = np.zeros(len(CLASS_NAMES), dtype=int) 82 | union_ = np.zeros(len(CLASS_NAMES), dtype=int) 83 | target_ = np.zeros(len(CLASS_NAMES), dtype=int) 84 | 85 | for record in records: 86 | area = os.path.basename(record).split(".")[0] 87 | info = torch.load(record) 88 | logger.info(f"<<<<<<<<<<<<<<<<< Parsing {area} <<<<<<<<<<<<<<<<<") 89 | intersection = info["intersection"] 90 | union = info["union"] 91 | target = info["target"] 92 | evaluation(intersection, union, target, logger=logger) 93 | intersection_ += intersection 94 | union_ += union 95 | target_ += target 96 | 97 | logger.info(f"<<<<<<<<<<<<<<<<< Parsing 6-fold <<<<<<<<<<<<<<<<<") 98 | evaluation(intersection_, union_, target_, logger=logger) 99 | 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Training Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | # import os 9 | # os.environ['CUDA_VISIBLE_DEVICES'] = "1" 10 | 11 | from pointcept.engines.defaults import ( 12 | default_argument_parser, 13 | default_config_parser, 14 | default_setup, 15 | ) 16 | from pointcept.engines.train import TRAINERS 17 | from pointcept.engines.launch import launch 18 | 19 | 20 | def main_worker(cfg): 21 | cfg = default_setup(cfg) 22 | trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) 23 | trainer.train() 24 | 25 | 26 | def main(): 27 | args = default_argument_parser().parse_args() 28 | cfg = default_config_parser(args.config_file, args.options) 29 | 30 | launch( 31 | main_worker, 32 | num_gpus_per_machine=args.num_gpus, 33 | num_machines=args.num_machines, 34 | machine_rank=args.machine_rank, 35 | dist_url=args.dist_url, 36 | cfg=(cfg,), 37 | ) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | --------------------------------------------------------------------------------