├── LICENSE ├── README.md ├── assets ├── combinations.png ├── compare.png ├── compare_table.png ├── init.py └── overall.png ├── configs ├── _base_ │ └── default_runtime.py ├── nuscenes │ ├── Baseline.py │ ├── CDSegNet.py │ ├── CDSegNet_time.py │ ├── PTv3.py │ ├── PTv3_CNF.py │ ├── PTv3_CNF_testing_82.8.py │ ├── PTv3_CNF_time.py │ └── PTv3_time.py ├── scannet │ ├── Baseline.py │ ├── CDSegNet.py │ ├── CDSegNet_time.py │ ├── PTv3.py │ ├── PTv3_CNF.py │ └── PTv3_CNF_time.py └── scannet200 │ ├── Baseline.py │ ├── CDSegNet.py │ ├── PTv3.py │ └── PTv3_CNF.py ├── libs ├── pointgroup_ops │ ├── functions │ │ ├── __init__.py │ │ └── functions.py │ ├── setup.py │ └── src │ │ ├── bfs_cluster.cpp │ │ └── bfs_cluster_kernel.cu ├── pointops │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ ├── aggregation.py │ │ ├── attention.py │ │ ├── grouping.py │ │ ├── interpolation.py │ │ ├── query.py │ │ ├── sampling.py │ │ ├── subtraction.py │ │ └── utils.py │ ├── setup.py │ └── src │ │ ├── __init__.py │ │ ├── aggregation │ │ ├── aggregation_cuda.cpp │ │ ├── aggregation_cuda_kernel.cu │ │ └── aggregation_cuda_kernel.h │ │ ├── attention │ │ ├── attention_cuda.cpp │ │ ├── attention_cuda_kernel.cu │ │ └── attention_cuda_kernel.h │ │ ├── ball_query │ │ ├── ball_query_cuda.cpp │ │ ├── ball_query_cuda_kernel.cu │ │ └── ball_query_cuda_kernel.h │ │ ├── cuda_utils.h │ │ ├── grouping │ │ ├── grouping_cuda.cpp │ │ ├── grouping_cuda_kernel.cu │ │ └── grouping_cuda_kernel.h │ │ ├── interpolation │ │ ├── interpolation_cuda.cpp │ │ ├── interpolation_cuda_kernel.cu │ │ └── interpolation_cuda_kernel.h │ │ ├── knn_query │ │ ├── knn_query_cuda.cpp │ │ ├── knn_query_cuda_kernel.cu │ │ └── knn_query_cuda_kernel.h │ │ ├── pointops_api.cpp │ │ ├── random_ball_query │ │ ├── random_ball_query_cuda.cpp │ │ ├── random_ball_query_cuda_kernel.cu │ │ └── random_ball_query_cuda_kernel.h │ │ ├── sampling │ │ ├── sampling_cuda.cpp │ │ ├── sampling_cuda_kernel.cu │ │ └── sampling_cuda_kernel.h │ │ └── subtraction │ │ ├── subtraction_cuda.cpp │ │ ├── subtraction_cuda_kernel.cu │ │ └── subtraction_cuda_kernel.h └── pointops2 │ ├── __init__.py │ ├── functions │ ├── __init__.py │ ├── pointops.py │ ├── pointops2.py │ ├── pointops_ablation.py │ ├── test_attention_op_step1.py │ ├── test_attention_op_step1_v2.py │ ├── test_attention_op_step2.py │ ├── test_relative_pos_encoding_op_step1.py │ ├── test_relative_pos_encoding_op_step1_v2.py │ ├── test_relative_pos_encoding_op_step1_v3.py │ ├── test_relative_pos_encoding_op_step2.py │ └── test_relative_pos_encoding_op_step2_v2.py │ ├── setup.py │ └── src │ ├── __init__.py │ ├── aggregation │ ├── aggregation_cuda.cpp │ ├── aggregation_cuda_kernel.cu │ └── aggregation_cuda_kernel.h │ ├── attention │ ├── attention_cuda.cpp │ ├── attention_cuda_kernel.cu │ └── attention_cuda_kernel.h │ ├── attention_v2 │ ├── attention_cuda_kernel_v2.cu │ ├── attention_cuda_kernel_v2.h │ └── attention_cuda_v2.cpp │ ├── cuda_utils.h │ ├── grouping │ ├── grouping_cuda.cpp │ ├── grouping_cuda_kernel.cu │ └── grouping_cuda_kernel.h │ ├── interpolation │ ├── interpolation_cuda.cpp │ ├── interpolation_cuda_kernel.cu │ └── interpolation_cuda_kernel.h │ ├── knnquery │ ├── knnquery_cuda.cpp │ ├── knnquery_cuda_kernel.cu │ └── knnquery_cuda_kernel.h │ ├── pointops_api.cpp │ ├── rpe │ ├── relative_pos_encoding_cuda.cpp │ ├── relative_pos_encoding_cuda_kernel.cu │ └── relative_pos_encoding_cuda_kernel.h │ ├── rpe_v2 │ ├── relative_pos_encoding_cuda_kernel_v2.cu │ ├── relative_pos_encoding_cuda_kernel_v2.h │ └── relative_pos_encoding_cuda_v2.cpp │ ├── sampling │ ├── sampling_cuda.cpp │ ├── sampling_cuda_kernel.cu │ └── sampling_cuda_kernel.h │ └── subtraction │ ├── subtraction_cuda.cpp │ ├── subtraction_cuda_kernel.cu │ └── subtraction_cuda_kernel.h ├── pointcept ├── __init__.py ├── datasets │ ├── __init__.py │ ├── arkitscenes.py │ ├── builder.py │ ├── dataloader.py │ ├── defaults.py │ ├── modelnet.py │ ├── nuscenes.py │ ├── preprocessing │ │ ├── arkitscenes │ │ │ └── preprocess_arkitscenes_mesh.py │ │ ├── nuscenes │ │ │ └── preprocess_nuscenes_info.py │ │ ├── s3dis │ │ │ ├── preprocess_s3dis.py │ │ │ └── preprocess_s3dis_voxelized.py │ │ ├── scannet │ │ │ ├── meta_data │ │ │ │ ├── classes_ObjClassification-ShapeNetCore55.txt │ │ │ │ ├── classes_SemVoxLabel-nyu40id.txt │ │ │ │ ├── scannet200_constants.py │ │ │ │ ├── scannet200_splits.py │ │ │ │ ├── scannet_means.npz │ │ │ │ ├── scannetv1_test.txt │ │ │ │ ├── scannetv1_train.txt │ │ │ │ ├── scannetv1_val.txt │ │ │ │ ├── scannetv2-labels-old.combined.tsv │ │ │ │ ├── scannetv2-labels.combined.tsv │ │ │ │ ├── scannetv2_test.txt │ │ │ │ ├── scannetv2_train.txt │ │ │ │ └── scannetv2_val.txt │ │ │ ├── preprocess_scannet.py │ │ │ └── scannet_pair │ │ │ │ ├── SensorData.py │ │ │ │ ├── compute_full_overlapping.py │ │ │ │ ├── generage_list.py │ │ │ │ ├── plyfile.py │ │ │ │ ├── point_cloud_extractor.py │ │ │ │ ├── preprocess.py │ │ │ │ └── reader.py │ │ ├── structured3d │ │ │ └── preprocess_structured3d.py │ │ └── waymo │ │ │ └── preprocess_waymo.py │ ├── s3dis.py │ ├── scannet.py │ ├── scannet_pair.py │ ├── semantic_kitti.py │ ├── shapenet_part.py │ ├── structure3d.py │ ├── transform.py │ ├── utils.py │ └── waymo.py ├── engines │ ├── __init__.py │ ├── defaults.py │ ├── hooks │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── default.py │ │ ├── evaluator.py │ │ └── misc.py │ ├── launch.py │ ├── test.py │ └── train.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── context_aware_classifier │ │ ├── __init__.py │ │ └── context_aware_classifier_v1m1_base.py │ ├── default.py │ ├── losses │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── lovasz.py │ │ └── misc.py │ ├── masked_scene_contrast │ │ ├── __init__.py │ │ ├── masked_scene_contrast_v1m1_base.py │ │ └── masked_scene_contrast_v1m2_csc.py │ ├── modules.py │ ├── oacnns │ │ ├── __init__.py │ │ └── oacnns_v1m1_base.py │ ├── octformer │ │ ├── __init__.py │ │ └── octformer_v1m1_base.py │ ├── point_group │ │ ├── __init__.py │ │ ├── point_group_v1m1_base.py │ │ └── utils.py │ ├── point_prompt_training │ │ ├── __init__.py │ │ ├── point_prompt_training_v1m1_language_guided.py │ │ ├── point_prompt_training_v1m2_decoupled.py │ │ └── prompt_driven_normalization.py │ ├── point_transformer │ │ ├── __init__.py │ │ ├── point_transformer_cls.py │ │ ├── point_transformer_partseg.py │ │ ├── point_transformer_seg.py │ │ └── utils.py │ ├── point_transformer_v2 │ │ ├── __init__.py │ │ ├── point_transformer_v2m1_origin.py │ │ ├── point_transformer_v2m2_base.py │ │ └── point_transformer_v2m3_pdnorm.py │ ├── point_transformer_v3 │ │ ├── __init__.py │ │ └── point_transformer_v3m1_base.py │ ├── sparse_unet │ │ ├── __init__.py │ │ ├── mink_unet.py │ │ ├── spconv_unet_v1m1_base.py │ │ ├── spconv_unet_v1m2_bn_momentum.py │ │ └── spconv_unet_v1m3_pdnorm.py │ ├── spvcnn │ │ ├── __init__.py │ │ └── ts_spvcnn.py │ ├── stratified_transformer │ │ ├── __init__.py │ │ ├── stratified_transformer_v1m1_origin.py │ │ └── stratified_transformer_v1m2_refine.py │ ├── swin3d │ │ ├── __init__.py │ │ ├── mink_layers.py │ │ ├── swin3d_layers.py │ │ └── swin3d_v1m1_base.py │ └── utils │ │ ├── __init__.py │ │ ├── checkpoint.py │ │ ├── misc.py │ │ ├── serialization │ │ ├── __init__.py │ │ ├── default.py │ │ ├── hilbert.py │ │ └── z_order.py │ │ └── structure.py └── utils │ ├── __init__.py │ ├── cache.py │ ├── comm.py │ ├── config.py │ ├── env.py │ ├── events.py │ ├── logger.py │ ├── misc.py │ ├── optimizer.py │ ├── path.py │ ├── registry.py │ ├── scheduler.py │ ├── timer.py │ └── visualization.py ├── scripts ├── build_image.sh ├── compile.sh ├── test.sh └── train.sh └── tools ├── test.py ├── test_CDSegNet_ScanNet.py ├── test_CDSegNet_ScanNet200.py ├── test_CDSegNet_nuScenes.py ├── test_time.py ├── train.py ├── train_CDSegNet_ScanNet.py ├── train_CDSegNet_ScanNet200.py └── train_CDSegNet_nuScenes.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Pointcept 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/combinations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/assets/combinations.png -------------------------------------------------------------------------------- /assets/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/assets/compare.png -------------------------------------------------------------------------------- /assets/compare_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/assets/compare_table.png -------------------------------------------------------------------------------- /assets/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /assets/overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/assets/overall.png -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | weight = None # path to model weight 2 | resume = False # whether to resume training process 3 | evaluate = True # evaluate after each epoch training process 4 | test_only = False # test process 5 | 6 | seed = None # train process will init a random seed and record 7 | save_path = "exp/default" 8 | num_worker = 16 # total worker in all gpu 9 | batch_size = 16 # total batch size in all gpu 10 | batch_size_val = None # auto adapt to bs 1 for each gpu 11 | batch_size_test = None # auto adapt to bs 1 for each gpu 12 | epoch = 100 # total epoch, data loop = epoch // eval_epoch 13 | eval_epoch = 100 # sche total eval & checkpoint epoch 14 | 15 | sync_bn = False 16 | enable_amp = False 17 | empty_cache = False 18 | find_unused_parameters = False 19 | 20 | mix_prob = 0 21 | param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] 22 | 23 | # hook 24 | hooks = [ 25 | dict(type="CheckpointLoader"), 26 | dict(type="IterationTimer", warmup_iter=2), 27 | dict(type="InformationWriter"), 28 | dict(type="SemSegEvaluator"), 29 | dict(type="CheckpointSaver", save_freq=None), 30 | dict(type="PreciseEvaluator", test_last=False), 31 | ] 32 | 33 | # Trainer 34 | train = dict(type="DefaultTrainer") 35 | 36 | # Tester 37 | test = dict(type="SemSegTester", verbose=True) 38 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import bfs_cluster, ballquery_batch_p, Clustering 2 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from sys import argv 3 | from setuptools import setup 4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 5 | from distutils.sysconfig import get_config_vars 6 | 7 | (opt,) = get_config_vars("OPT") 8 | os.environ["OPT"] = " ".join( 9 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 10 | ) 11 | 12 | 13 | def _argparse(pattern, argv, is_flag=True, is_list=False): 14 | if is_flag: 15 | found = pattern in argv 16 | if found: 17 | argv.remove(pattern) 18 | return found, argv 19 | else: 20 | arr = [arg for arg in argv if pattern == arg.split("=")[0]] 21 | if is_list: 22 | if len(arr) == 0: # not found 23 | return False, argv 24 | else: 25 | assert "=" in arr[0], f"{arr[0]} requires a value." 26 | argv.remove(arr[0]) 27 | val = arr[0].split("=")[1] 28 | if "," in val: 29 | return val.split(","), argv 30 | else: 31 | return [val], argv 32 | else: 33 | if len(arr) == 0: # not found 34 | return False, argv 35 | else: 36 | assert "=" in arr[0], f"{arr[0]} requires a value." 37 | argv.remove(arr[0]) 38 | return arr[0].split("=")[1], argv 39 | 40 | 41 | INCLUDE_DIRS, argv = _argparse("--include_dirs", argv, False, is_list=True) 42 | include_dirs = [] 43 | if not (INCLUDE_DIRS is False): 44 | include_dirs += INCLUDE_DIRS 45 | 46 | setup( 47 | name="pointgroup_ops", 48 | packages=["pointgroup_ops"], 49 | package_dir={"pointgroup_ops": "functions"}, 50 | ext_modules=[ 51 | CUDAExtension( 52 | name="pointgroup_ops_cuda", 53 | sources=["src/bfs_cluster.cpp", "src/bfs_cluster_kernel.cu"], 54 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 55 | ) 56 | ], 57 | include_dirs=[*include_dirs], 58 | cmdclass={"build_ext": BuildExtension}, 59 | ) 60 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/bfs_cluster_kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | #include 7 | #include 8 | #include 9 | 10 | #define TOTAL_THREADS 1024 11 | #define THREADS_PER_BLOCK 512 12 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 13 | 14 | 15 | /* ================================== ballquery_batch_p ================================== */ 16 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) { 17 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 18 | if (pt_idx >= n) return; 19 | 20 | start_len += (pt_idx * 2); 21 | int idx_temp[1000]; 22 | 23 | float radius2 = radius * radius; 24 | float o_x = xyz[pt_idx * 3 + 0]; 25 | float o_y = xyz[pt_idx * 3 + 1]; 26 | float o_z = xyz[pt_idx * 3 + 2]; 27 | 28 | int batch_idx = batch_idxs[pt_idx]; 29 | int start = batch_offsets[batch_idx]; 30 | int end = batch_offsets[batch_idx + 1]; 31 | 32 | int cnt = 0; 33 | for(int k = start; k < end; k++){ 34 | float x = xyz[k * 3 + 0]; 35 | float y = xyz[k * 3 + 1]; 36 | float z = xyz[k * 3 + 2]; 37 | float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z); 38 | if(d2 < radius2){ 39 | if(cnt < 1000){ 40 | idx_temp[cnt] = k; 41 | } 42 | else{ 43 | break; 44 | } 45 | ++cnt; 46 | } 47 | } 48 | 49 | start_len[0] = atomicAdd(cumsum, cnt); 50 | start_len[1] = cnt; 51 | 52 | int thre = n * meanActive; 53 | if(start_len[0] >= thre) return; 54 | 55 | idx += start_len[0]; 56 | if(start_len[0] + cnt >= thre) cnt = thre - start_len[0]; 57 | 58 | for(int k = 0; k < cnt; k++){ 59 | idx[k] = idx_temp[k]; 60 | } 61 | } 62 | 63 | 64 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) { 65 | // param xyz: (n, 3) 66 | // param batch_idxs: (n) 67 | // param batch_offsets: (B + 1) 68 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n 69 | // output start_len: (n, 2), int 70 | 71 | cudaError_t err; 72 | 73 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); 74 | dim3 threads(THREADS_PER_BLOCK); 75 | 76 | int cumsum = 0; 77 | int* p_cumsum; 78 | cudaMalloc((void**)&p_cumsum, sizeof(int)); 79 | cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice); 80 | 81 | ballquery_batch_p_cuda_<<>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum); 82 | 83 | err = cudaGetLastError(); 84 | if (cudaSuccess != err) { 85 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 86 | exit(-1); 87 | } 88 | 89 | cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost); 90 | return cumsum; 91 | } 92 | -------------------------------------------------------------------------------- /libs/pointops/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | -------------------------------------------------------------------------------- /libs/pointops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .query import knn_query, ball_query, random_ball_query 2 | from .sampling import farthest_point_sampling 3 | from .grouping import grouping, grouping2 4 | from .interpolation import interpolation, interpolation2 5 | from .subtraction import subtraction 6 | from .aggregation import aggregation 7 | from .attention import attention_relation_step, attention_fusion_step 8 | from .utils import ( 9 | query_and_group, 10 | knn_query_and_group, 11 | ball_query_and_group, 12 | batch2offset, 13 | offset2batch, 14 | ) 15 | -------------------------------------------------------------------------------- /libs/pointops/functions/aggregation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda 5 | 6 | 7 | class Aggregation(Function): 8 | @staticmethod 9 | def forward(ctx, input, position, weight, idx): 10 | """ 11 | input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) 12 | output: (n, c) 13 | """ 14 | assert ( 15 | input.is_contiguous() 16 | and position.is_contiguous() 17 | and weight.is_contiguous() 18 | ) 19 | n, nsample, c = position.shape 20 | w_c = weight.shape[-1] 21 | output = torch.cuda.FloatTensor(n, c).zero_() 22 | aggregation_forward_cuda( 23 | n, nsample, c, w_c, input, position, weight, idx, output 24 | ) 25 | ctx.save_for_backward(input, position, weight, idx) 26 | return output 27 | 28 | @staticmethod 29 | def backward(ctx, grad_output): 30 | """ 31 | input: grad_out: (n, c) 32 | output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') 33 | """ 34 | input, position, weight, idx = ctx.saved_tensors 35 | n, nsample, c = position.shape 36 | w_c = weight.shape[-1] 37 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 38 | grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() 39 | grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() 40 | aggregation_backward_cuda( 41 | n, 42 | nsample, 43 | c, 44 | w_c, 45 | input, 46 | position, 47 | weight, 48 | idx, 49 | grad_output, 50 | grad_input, 51 | grad_position, 52 | grad_weight, 53 | ) 54 | return grad_input, grad_position, grad_weight, None 55 | 56 | 57 | aggregation = Aggregation.apply 58 | -------------------------------------------------------------------------------- /libs/pointops/functions/grouping.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda 5 | 6 | 7 | class Grouping(Function): 8 | @staticmethod 9 | def forward(ctx, input, idx): 10 | """ 11 | input: input: (n, c), idx : (m, nsample) 12 | output: (m, nsample, c) 13 | """ 14 | assert input.is_contiguous() and idx.is_contiguous() 15 | m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] 16 | output = torch.cuda.FloatTensor(m, nsample, c) 17 | grouping_forward_cuda(m, nsample, c, input, idx, output) 18 | ctx.n = n 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (m, c, nsample) 26 | output: (n, c), None 27 | """ 28 | n = ctx.n 29 | (idx,) = ctx.saved_tensors 30 | m, nsample, c = grad_output.shape 31 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 32 | grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input) 33 | return grad_input, None 34 | 35 | 36 | def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False): 37 | if new_xyz is None: 38 | new_xyz = xyz 39 | assert xyz.is_contiguous() and feat.is_contiguous() 40 | m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1] 41 | xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0) 42 | feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0) 43 | grouped_feat = feat[idx.view(-1).long(), :].view( 44 | m, nsample, c 45 | ) # (m, num_sample, c) 46 | 47 | if with_xyz: 48 | assert new_xyz.is_contiguous() 49 | mask = torch.sign(idx + 1) 50 | grouped_xyz = xyz[idx.view(-1).long(), :].view( 51 | m, nsample, 3 52 | ) - new_xyz.unsqueeze( 53 | 1 54 | ) # (m, num_sample, 3) 55 | grouped_xyz = torch.einsum( 56 | "n s c, n s -> n s c", grouped_xyz, mask 57 | ) # (m, num_sample, 3) 58 | return torch.cat((grouped_xyz, grouped_feat), -1) 59 | else: 60 | return grouped_feat 61 | 62 | 63 | grouping2 = Grouping.apply 64 | -------------------------------------------------------------------------------- /libs/pointops/functions/interpolation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda 5 | from .query import knn_query 6 | 7 | 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): 9 | """ 10 | input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b) 11 | output: (n, c) 12 | """ 13 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() 14 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, 3), (n, 3) 15 | dist_recip = 1.0 / (dist + 1e-8) # (n, 3) 16 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 17 | weight = dist_recip / norm # (n, 3) 18 | 19 | new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() 20 | for i in range(k): 21 | new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) 22 | return new_feat 23 | 24 | 25 | class Interpolation(Function): 26 | @staticmethod 27 | def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): 28 | """ 29 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 30 | output: (n, c) 31 | """ 32 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() 33 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, k), (n, k) 34 | dist_recip = 1.0 / (dist + 1e-8) # (n, k) 35 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 36 | weight = dist_recip / norm # (n, k) 37 | 38 | n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] 39 | output = torch.cuda.FloatTensor(n, c).zero_() 40 | interpolation_forward_cuda(n, c, k, input, idx, weight, output) 41 | ctx.m, ctx.k = m, k 42 | ctx.save_for_backward(idx, weight) 43 | return output 44 | 45 | @staticmethod 46 | def backward(ctx, grad_output): 47 | """ 48 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 49 | output: (n, c) 50 | """ 51 | m, k = ctx.m, ctx.k 52 | idx, weight = ctx.saved_tensors 53 | n, c = grad_output.shape 54 | grad_input = torch.cuda.FloatTensor(m, c).zero_() 55 | interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input) 56 | return None, None, grad_input, None, None, None 57 | 58 | 59 | interpolation2 = Interpolation.apply 60 | -------------------------------------------------------------------------------- /libs/pointops/functions/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import farthest_point_sampling_cuda 5 | 6 | 7 | class FarthestPointSampling(Function): 8 | @staticmethod 9 | def forward(ctx, xyz, offset, new_offset): 10 | """ 11 | input: coords: (n, 3), offset: (b), new_offset: (b) 12 | output: idx: (m) 13 | """ 14 | assert xyz.is_contiguous() 15 | n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] 16 | for i in range(1, b): 17 | n_max = max(offset[i] - offset[i - 1], n_max) 18 | idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() 19 | tmp = torch.cuda.FloatTensor(n).fill_(1e10) 20 | farthest_point_sampling_cuda( 21 | b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx 22 | ) 23 | del tmp 24 | return idx 25 | 26 | 27 | farthest_point_sampling = FarthestPointSampling.apply 28 | -------------------------------------------------------------------------------- /libs/pointops/functions/subtraction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda 5 | 6 | 7 | class Subtraction(Function): 8 | @staticmethod 9 | def forward(ctx, input1, input2, idx): 10 | """ 11 | input: input1: (n, c), input2: (n, c), idx: (n, nsample) 12 | output: (n, nsample, c) 13 | """ 14 | assert input1.is_contiguous() and input2.is_contiguous() 15 | n, c = input1.shape 16 | nsample = idx.shape[-1] 17 | output = torch.cuda.FloatTensor(n, nsample, c).zero_() 18 | subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output) 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (n, nsample, c) 26 | output: grad_input1: (n, c), grad_input2: (n, c) 27 | """ 28 | (idx,) = ctx.saved_tensors 29 | n, nsample, c = grad_output.shape 30 | grad_input1 = torch.cuda.FloatTensor(n, c).zero_() 31 | grad_input2 = torch.cuda.FloatTensor(n, c).zero_() 32 | subtraction_backward_cuda( 33 | n, nsample, c, idx, grad_output, grad_input1, grad_input2 34 | ) 35 | return grad_input1, grad_input2, None 36 | 37 | 38 | subtraction = Subtraction.apply 39 | -------------------------------------------------------------------------------- /libs/pointops/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | from distutils.sysconfig import get_config_vars 5 | 6 | (opt,) = get_config_vars("OPT") 7 | os.environ["OPT"] = " ".join( 8 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 9 | ) 10 | 11 | src = "src" 12 | sources = [ 13 | os.path.join(root, file) 14 | for root, dirs, files in os.walk(src) 15 | for file in files 16 | if file.endswith(".cpp") or file.endswith(".cu") 17 | ] 18 | 19 | setup( 20 | name="pointops", 21 | version="1.0", 22 | install_requires=["torch", "numpy"], 23 | packages=["pointops"], 24 | package_dir={"pointops": "functions"}, 25 | ext_modules=[ 26 | CUDAExtension( 27 | name="pointops._C", 28 | sources=sources, 29 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 30 | ) 31 | ], 32 | cmdclass={"build_ext": BuildExtension}, 33 | ) 34 | -------------------------------------------------------------------------------- /libs/pointops/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/libs/pointops/src/__init__.py -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "aggregation_cuda_kernel.h" 5 | 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const float *position = position_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 15 | } 16 | 17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 18 | { 19 | const float *input = input_tensor.data_ptr(); 20 | const float *position = position_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | const int *idx = idx_tensor.data_ptr(); 23 | const float *grad_output = grad_output_tensor.data_ptr(); 24 | float *grad_input = grad_input_tensor.data_ptr(); 25 | float *grad_position = grad_position_tensor.data_ptr(); 26 | float *grad_weight = grad_weight_tensor.data_ptr(); 27 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 28 | } 29 | -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_relation_step_forward_cuda(int m, int g, int c, 8 | at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor, 9 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 10 | at::Tensor output_tensor); 11 | void attention_relation_step_backward_cuda(int m, int g, int c, 12 | at::Tensor query_tensor, at::Tensor grad_query_tensor, 13 | at::Tensor key_tensor, at::Tensor grad_key_tensor, 14 | at::Tensor weight_tensor, at::Tensor grad_weight_tensor, 15 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 16 | at::Tensor grad_output_tensor); 17 | void attention_fusion_step_forward_cuda(int m, int g, int c, 18 | at::Tensor weight_tensor, at::Tensor value_tensor, 19 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 20 | at::Tensor output_tensor); 21 | void attention_fusion_step_backward_cuda(int m, int g, int c, 22 | at::Tensor weight_tensor, at::Tensor grad_weight_tensor, 23 | at::Tensor value_tensor, at::Tensor grad_value_tensor, 24 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 25 | at::Tensor grad_output_tensor); 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | void attention_relation_step_forward_cuda_launcher(int m, int g, int c, 32 | const float *query, const float *key, const float *weight, 33 | const int *index_target, const int *index_refer, 34 | float *output); 35 | void attention_relation_step_backward_cuda_launcher(int m, int g, int c, 36 | const float *query, float *grad_query, 37 | const float *key, float *grad_key, 38 | const float *weight, float *grad_weight, 39 | const int *index_target, const int *index_refer, 40 | const float *grad_output); 41 | void attention_fusion_step_forward_cuda_launcher(int m, int g, int c, 42 | const float *weight, const float *value, 43 | const int *index_target, const int *index_refer, 44 | float *output); 45 | void attention_fusion_step_backward_cuda_launcher(int m, int g, int c, 46 | const float *weight, float *grad_weight, 47 | const float *value, float *grad_value, 48 | const int *index_target, const int *index_refer, 49 | const float *grad_output); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | #endif 55 | -------------------------------------------------------------------------------- /libs/pointops/src/ball_query/ball_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "ball_query_cuda_kernel.h" 5 | 6 | 7 | void ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor) 12 | { 13 | const float *xyz = xyz_tensor.data_ptr(); 14 | const float *new_xyz = new_xyz_tensor.data_ptr(); 15 | const int *offset = offset_tensor.data_ptr(); 16 | const int *new_offset = new_offset_tensor.data_ptr(); 17 | int *idx = idx_tensor.data_ptr(); 18 | float *dist2 = dist2_tensor.data_ptr(); 19 | ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2); 20 | } 21 | -------------------------------------------------------------------------------- /libs/pointops/src/ball_query/ball_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_CUDA_KERNEL 2 | #define _BALL_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void ball_query_cuda_launcher(int m, int nsample, 18 | float min_radius, float max_radius, 19 | const float *xyz, const float *new_xyz, 20 | const int *offset, const int *new_offset, 21 | int *idx, float *dist2); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 512 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grouping_cuda_kernel.h" 5 | 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | float *output = output_tensor.data_ptr(); 12 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 13 | } 14 | 15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 16 | { 17 | const float *grad_output = grad_output_tensor.data_ptr(); 18 | const int *idx = idx_tensor.data_ptr(); 19 | float *grad_input = grad_input_tensor.data_ptr(); 20 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "interpolation_cuda_kernel.h" 5 | 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 14 | } 15 | 16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *weight = weight_tensor.data_ptr(); 21 | float *grad_input = grad_input_tensor.data_ptr(); 22 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "knn_query_cuda_kernel.h" 5 | 6 | 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const float *new_xyz = new_xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | float *dist2 = dist2_tensor.data_ptr(); 15 | knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knn_query_cuda_kernel.h" 3 | 4 | 5 | namespace knn_query_utils{ 6 | 7 | template 8 | __device__ void swap(DType *x, DType *y) 9 | { 10 | DType tmp = *x; 11 | *x = *y; 12 | *y = tmp; 13 | } 14 | 15 | __device__ void reheap(float *dist, int *idx, int k) 16 | { 17 | int root = 0; 18 | int child = root * 2 + 1; 19 | while (child < k) 20 | { 21 | if(child + 1 < k && dist[child+1] > dist[child]) 22 | child++; 23 | if(dist[root] > dist[child]) 24 | return; 25 | swap(&dist[root], &dist[child]); 26 | swap(&idx[root], &idx[child]); 27 | root = child; 28 | child = root * 2 + 1; 29 | } 30 | } 31 | 32 | 33 | __device__ void heap_sort(float *dist, int *idx, int k) 34 | { 35 | int i; 36 | for (i = k - 1; i > 0; i--) 37 | { 38 | swap(&dist[0], &dist[i]); 39 | swap(&idx[0], &idx[i]); 40 | reheap(dist, idx, i); 41 | } 42 | } 43 | 44 | 45 | __device__ int get_bt_idx(int idx, const int *offset) 46 | { 47 | int i = 0; 48 | while (1) 49 | { 50 | if (idx < offset[i]) 51 | break; 52 | else 53 | i++; 54 | } 55 | return i; 56 | } 57 | } // namespace knn_query_utils 58 | 59 | 60 | __global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 61 | // input: xyz (n, 3) new_xyz (m, 3) 62 | // output: idx (m, nsample) dist2 (m, nsample) 63 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 64 | if (pt_idx >= m) return; 65 | 66 | new_xyz += pt_idx * 3; 67 | idx += pt_idx * nsample; 68 | dist2 += pt_idx * nsample; 69 | 70 | int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset); 71 | int start; 72 | if (bt_idx == 0) 73 | start = 0; 74 | else 75 | start = offset[bt_idx - 1]; 76 | int end = offset[bt_idx]; 77 | 78 | float new_x = new_xyz[0]; 79 | float new_y = new_xyz[1]; 80 | float new_z = new_xyz[2]; 81 | 82 | float best_dist[128]; 83 | int best_idx[128]; 84 | for(int i = 0; i < nsample; i++){ 85 | best_dist[i] = 1e10; 86 | best_idx[i] = -1; 87 | } 88 | for(int i = start; i < end; i++){ 89 | float x = xyz[i * 3 + 0]; 90 | float y = xyz[i * 3 + 1]; 91 | float z = xyz[i * 3 + 2]; 92 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 93 | if (d2 < best_dist[0]){ 94 | best_dist[0] = d2; 95 | best_idx[0] = i; 96 | knn_query_utils::reheap(best_dist, best_idx, nsample); 97 | } 98 | } 99 | knn_query_utils::heap_sort(best_dist, best_idx, nsample); 100 | for(int i = 0; i < nsample; i++){ 101 | idx[i] = best_idx[i]; 102 | dist2[i] = best_dist[i]; 103 | } 104 | } 105 | 106 | 107 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 108 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 109 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 110 | dim3 threads(THREADS_PER_BLOCK); 111 | knn_query_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 112 | } 113 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNN_QUERY_CUDA_KERNEL 2 | #define _KNN_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knn_query/knn_query_cuda_kernel.h" 5 | #include "ball_query/ball_query_cuda_kernel.h" 6 | #include "random_ball_query/random_ball_query_cuda_kernel.h" 7 | #include "sampling/sampling_cuda_kernel.h" 8 | #include "grouping/grouping_cuda_kernel.h" 9 | #include "interpolation/interpolation_cuda_kernel.h" 10 | #include "aggregation/aggregation_cuda_kernel.h" 11 | #include "subtraction/subtraction_cuda_kernel.h" 12 | #include "attention/attention_cuda_kernel.h" 13 | 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda"); 17 | m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda"); 18 | m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda"); 19 | m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda"); 20 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 21 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 22 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 23 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 24 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 25 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 26 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 27 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 28 | m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda"); 29 | m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda"); 30 | m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda"); 31 | m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda"); 32 | } 33 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "random_ball_query_cuda_kernel.h" 5 | 6 | 7 | void random_ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, at::Tensor order_tensor, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor) 12 | { 13 | const int *order = order_tensor.data_ptr(); 14 | const float *xyz = xyz_tensor.data_ptr(); 15 | const float *new_xyz = new_xyz_tensor.data_ptr(); 16 | const int *offset = offset_tensor.data_ptr(); 17 | const int *new_offset = new_offset_tensor.data_ptr(); 18 | int *idx = idx_tensor.data_ptr(); 19 | float *dist2 = dist2_tensor.data_ptr(); 20 | random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL 2 | #define _RANDOM_BALL_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void random_ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, at::Tensor order_tensor, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void random_ball_query_cuda_launcher(int m, int nsample, 18 | float min_radius, float max_radius, const int *order, 19 | const float *xyz, const float *new_xyz, 20 | const int *offset, const int *new_offset, 21 | int *idx, float *dist2); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sampling_cuda_kernel.h" 5 | 6 | 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const int *offset = offset_tensor.data_ptr(); 11 | const int *new_offset = new_offset_tensor.data_ptr(); 12 | float *tmp = tmp_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 15 | } 16 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subtraction_cuda_kernel.h" 5 | 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input1 = input1_tensor.data_ptr(); 10 | const float *input2 = input2_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 14 | } 15 | 16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 17 | { 18 | const int *idx = idx_tensor.data_ptr(); 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | float *grad_input1 = grad_input1_tensor.data_ptr(); 21 | float *grad_input2 = grad_input2_tensor.data_ptr(); 22 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/libs/pointops2/__init__.py -------------------------------------------------------------------------------- /libs/pointops2/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from pointops2 import * 2 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 800000 14 | N = 35000 15 | C = 96 16 | h = 6 17 | query = torch.rand(N, h, C // h).cuda() 18 | key = torch.rand(N, h, C // h).cuda() 19 | 20 | index_0 = torch.rand(M) 21 | index_0[index_0 < 0] = 0 22 | index_0 = (index_0 * N).long().cuda() 23 | 24 | index_1 = torch.rand(M) 25 | index_1[index_1 < 0] = 0 26 | index_1 = (index_1 * N).long().cuda() 27 | 28 | query.requires_grad = True 29 | key.requires_grad = True 30 | 31 | # rearrange index for acceleration 32 | index_0, indices = torch.sort(index_0) # [M,] 33 | index_1 = index_1[indices] # [M,] 34 | index_0_counts = index_0.bincount() 35 | 36 | print("index_0_counts.shape: ", index_0_counts.shape) 37 | 38 | n_max = index_0_counts.max() 39 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 40 | 41 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 42 | 43 | index_0_offsets = torch.cat( 44 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 45 | ) # [N+1] 46 | 47 | # print("index_0[:100]: ", index_0[:100]) 48 | print("n_max: ", n_max) 49 | print("index_0_offsets.shape: ", index_0_offsets.shape) 50 | # input() 51 | 52 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 53 | print("index_1[300:320]: ", index_1[300:320]) 54 | 55 | 56 | attn_flat = pointops.attention_step1( 57 | query.float(), key.float(), index_0.int(), index_1.int() 58 | ) 59 | # loss = attn_flat.sum() 60 | # loss.backward() 61 | print( 62 | "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format( 63 | attn_flat.shape, attn_flat[300:320, :10] 64 | ) 65 | ) 66 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 67 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 68 | # input() 69 | 70 | print("query.is_contiguous(): ", query.is_contiguous()) 71 | print("key.is_contiguous(): ", key.is_contiguous()) 72 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 73 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 74 | 75 | attn_flat_v2 = pointops.attention_step1_v2( 76 | query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max 77 | ) 78 | # loss = attn_flat_v2.sum() 79 | # loss.backward() 80 | print( 81 | "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format( 82 | attn_flat_v2.shape, attn_flat_v2[300:320, :10] 83 | ) 84 | ) 85 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 86 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 87 | # input() 88 | 89 | mask = attn_flat_v2.sum(-1) != 0 90 | print("mask.sum(): ", mask.sum()) 91 | print( 92 | "attn_flat_v2[mask] - attn_flat[mask]: ", 93 | ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(), 94 | ) 95 | 96 | 97 | print( 98 | "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", 99 | ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), 100 | ) 101 | 102 | selected = 10000 103 | print( 104 | "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", 105 | torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), 106 | ) 107 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 800000 14 | N = 35000 15 | C = 96 16 | h = 6 17 | softmax_attn_flat = torch.rand(M, h).cuda() 18 | value = torch.rand(N, h, C // h).cuda() 19 | 20 | index_0 = torch.rand(M) 21 | index_0[index_0 < 0] = 0 22 | index_0 = (index_0 * N).long().cuda() 23 | 24 | index_1 = torch.rand(M) 25 | index_1[index_1 < 0] = 0 26 | index_1 = (index_1 * N).long().cuda() 27 | 28 | softmax_attn_flat.requires_grad = True 29 | value.requires_grad = True 30 | 31 | # value_flat = value[index_1] #[M, num_heads, C // num_heads] 32 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C) 33 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C] 34 | # loss = x.sum() 35 | # loss.backward() 36 | 37 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10])) 38 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 39 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 40 | # input() 41 | 42 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous()) 43 | print("value.is_contiguous(): ", value.is_contiguous()) 44 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 45 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 46 | 47 | x_v2 = pointops.attention_step2( 48 | softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int() 49 | ) 50 | x_v2 = x_v2.view(N, C) 51 | loss = x_v2.sum() 52 | loss.backward() 53 | 54 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10])) 55 | 56 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 57 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 58 | input() 59 | 60 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all()) 61 | 62 | print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2)) 63 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | query = torch.rand(N, h, hdim).cuda() 19 | table = torch.rand(L, h, hdim, 3).cuda() 20 | 21 | index = torch.rand(M) 22 | index[index < 0] = 0 23 | index = (index * N).long().cuda() 24 | 25 | rel_index = torch.rand(M, 3) 26 | rel_index[rel_index < 0] = 0 27 | rel_index = (rel_index * L).long().cuda() 28 | 29 | query.requires_grad = True 30 | table.requires_grad = True 31 | 32 | # query_flat = query[index] #[M, h, hdim] 33 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim] 34 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M] 35 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim] 36 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h] 37 | # loss = output.mean() 38 | # loss.backward() 39 | 40 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 41 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 42 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 43 | # input() 44 | 45 | # print("query.is_contiguous(): ", query.is_contiguous()) 46 | # print("key.is_contiguous(): ", key.is_contiguous()) 47 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 48 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 49 | 50 | output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int()) 51 | loss = output_v2.mean() 52 | loss.backward() 53 | 54 | print( 55 | "output_v2.shape: {}, output_v2[:5,:10]: {}".format( 56 | output_v2.shape, output_v2[:5, :10] 57 | ) 58 | ) 59 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 60 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 61 | input() 62 | 63 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 64 | 65 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 66 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | query = torch.rand(N, h, hdim).cuda() 19 | table_q = torch.rand(L, h, hdim, 3).cuda() 20 | key = torch.rand(N, h, hdim).cuda() 21 | table_k = torch.rand(L, h, hdim, 3).cuda() 22 | 23 | index_q = torch.rand(M) 24 | index_q[index_q < 0] = 0 25 | index_q = (index_q * N).long().cuda() 26 | 27 | index_k = torch.rand(M) 28 | index_k[index_k < 0] = 0 29 | index_k = (index_k * N).long().cuda() 30 | 31 | rel_index = torch.rand(M, 3) 32 | rel_index[rel_index < 0] = 0 33 | rel_index = (rel_index * L).long().cuda() 34 | 35 | query.requires_grad = True 36 | table_q.requires_grad = True 37 | key.requires_grad = True 38 | table_k.requires_grad = True 39 | 40 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) 41 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) 42 | output = output1 + output2 43 | # loss = output.mean() 44 | # loss.backward() 45 | 46 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 47 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 48 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 49 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 50 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 51 | # input() 52 | 53 | # print("query.is_contiguous(): ", query.is_contiguous()) 54 | # print("key.is_contiguous(): ", key.is_contiguous()) 55 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 56 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 57 | 58 | output_v2 = pointops.dot_prod_with_idx_v2( 59 | query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int() 60 | ) 61 | loss = output_v2.mean() 62 | loss.backward() 63 | 64 | print( 65 | "output_v2.shape: {}, output_v2[:5,:10]: {}".format( 66 | output_v2.shape, output_v2[:5, :10] 67 | ) 68 | ) 69 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 70 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 71 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 72 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 73 | # input() 74 | 75 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 76 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | # M = 80 16 | # N = 5 17 | hdim = 16 18 | h = 6 19 | L = 31 20 | query = torch.rand(N, h, hdim).cuda() 21 | table_q = torch.rand(L, h, hdim, 3).cuda() 22 | key = torch.rand(N, h, hdim).cuda() 23 | table_k = torch.rand(L, h, hdim, 3).cuda() 24 | 25 | index_q = torch.rand(M) 26 | index_q[index_q < 0] = 0 27 | index_q = (index_q * N).long().cuda() 28 | 29 | index_k = torch.rand(M) 30 | index_k[index_k < 0] = 0 31 | index_k = (index_k * N).long().cuda() 32 | 33 | rel_index = torch.rand(M, 3) 34 | rel_index[rel_index < 0] = 0 35 | rel_index = (rel_index * L).long().cuda() 36 | 37 | 38 | # rearrange index for acceleration 39 | index_q, indices = torch.sort(index_q) # [M,] 40 | index_k = index_k[indices] # [M,] 41 | rel_index = rel_index[indices] 42 | index_q_counts = index_q.bincount() 43 | 44 | print("index_q_counts.shape: ", index_q_counts.shape) 45 | 46 | n_max = index_q_counts.max() 47 | index_q_offsets = index_q_counts.cumsum(dim=-1) # [N] 48 | 49 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape) 50 | 51 | index_q_offsets = torch.cat( 52 | [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0 53 | ) # [N+1] 54 | 55 | # print("index_q[:100]: ", index_q[:100]) 56 | print("n_max: ", n_max) 57 | print("index_q_offsets.shape: ", index_q_offsets.shape) 58 | # input() 59 | 60 | print("index_q_offsets[:100]: ", index_q_offsets[:100]) 61 | print("index_k[:20]: ", index_k[:20]) 62 | 63 | query.requires_grad = True 64 | table_q.requires_grad = True 65 | key.requires_grad = True 66 | table_k.requires_grad = True 67 | 68 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) 69 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) 70 | output = output1 + output2 71 | loss = output.mean() 72 | loss.backward() 73 | 74 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 75 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 76 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 77 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 78 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 79 | # input() 80 | 81 | # print("query.is_contiguous(): ", query.is_contiguous()) 82 | # print("key.is_contiguous(): ", key.is_contiguous()) 83 | # print("index_q.is_contiguous(): ", index_q.is_contiguous()) 84 | # print("index_k.is_contiguous(): ", index_k.is_contiguous()) 85 | 86 | output_v2 = pointops.dot_prod_with_idx_v3( 87 | query, 88 | index_q_offsets.int(), 89 | n_max, 90 | key, 91 | index_k.int(), 92 | table_q, 93 | table_k, 94 | rel_index.int(), 95 | ) 96 | # loss = output_v2.mean() 97 | # loss.backward() 98 | 99 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) 100 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 101 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 102 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 103 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 104 | # input() 105 | 106 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 107 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | attn = torch.rand(M, h).cuda() 19 | v = torch.rand(N, h, hdim).cuda() 20 | table = torch.rand(L, h, hdim, 3).cuda() 21 | 22 | index_0 = torch.rand(M) 23 | index_0[index_0 < 0] = 0 24 | index_0 = (index_0 * N).long().cuda() 25 | 26 | index_1 = torch.rand(M) 27 | index_1[index_1 < 0] = 0 28 | index_1 = (index_1 * N).long().cuda() 29 | 30 | rel_index = torch.rand(M, 3) 31 | rel_index[rel_index < 0] = 0 32 | rel_index = (rel_index * L).long().cuda() 33 | 34 | attn.requires_grad = True 35 | v.requires_grad = True 36 | table.requires_grad = True 37 | 38 | v_flat = v[index_1] # [M, h, hdim] 39 | table_x, table_y, table_z = ( 40 | table[:, :, :, 0], 41 | table[:, :, :, 1], 42 | table[:, :, :, 2], 43 | ) # [L, h, hdim] 44 | rel_index_x, rel_index_y, rel_index_z = ( 45 | rel_index[:, 0], 46 | rel_index[:, 1], 47 | rel_index[:, 2], 48 | ) # [M] 49 | rel_pos_encoding = ( 50 | table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] 51 | ) # [M, h, hdim] 52 | v_flat_new = v_flat + rel_pos_encoding # [M, h, hdim] 53 | output = attn.unsqueeze(-1) * v_flat_new # [M, h, hdim] 54 | output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N) # [N, h, hdim] 55 | loss = output.mean() 56 | loss.backward() 57 | 58 | print( 59 | "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5]) 60 | ) 61 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 62 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 63 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 64 | input() 65 | 66 | # print("query.is_contiguous(): ", query.is_contiguous()) 67 | # print("key.is_contiguous(): ", key.is_contiguous()) 68 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 69 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 70 | 71 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int()) 72 | # loss = output_v2.mean() 73 | # loss.backward() 74 | 75 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5])) 76 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 77 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 78 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 79 | # input() 80 | 81 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 82 | 83 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 84 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import ( 4 | scatter_max, 5 | scatter_mean, 6 | scatter_add, 7 | scatter_min, 8 | scatter_sum, 9 | ) 10 | 11 | torch.manual_seed(1) 12 | 13 | M = 80000 14 | N = 3500 15 | hdim = 16 16 | h = 6 17 | L = 31 18 | attn = torch.rand(M, h).cuda() 19 | v = torch.rand(N, h, hdim).cuda() 20 | table = torch.rand(L, h, hdim, 3).cuda() 21 | 22 | index_0 = torch.rand(M) 23 | index_0[index_0 < 0] = 0 24 | index_0 = (index_0 * N).long().cuda() 25 | 26 | index_1 = torch.rand(M) 27 | index_1[index_1 < 0] = 0 28 | index_1 = (index_1 * N).long().cuda() 29 | 30 | rel_index = torch.rand(M, 3) 31 | rel_index[rel_index < 0] = 0 32 | rel_index = (rel_index * L).long().cuda() 33 | 34 | 35 | # rearrange index for acceleration 36 | index_0, indices = torch.sort(index_0) # [M,] 37 | index_1 = index_1[indices] # [M,] 38 | rel_index = rel_index[indices] 39 | index_0_counts = index_0.bincount() 40 | 41 | print("index_0_counts.shape: ", index_0_counts.shape) 42 | 43 | n_max = index_0_counts.max() 44 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 45 | 46 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 47 | 48 | index_0_offsets = torch.cat( 49 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 50 | ) # [N+1] 51 | 52 | 53 | attn.requires_grad = True 54 | v.requires_grad = True 55 | table.requires_grad = True 56 | 57 | 58 | output = pointops.attention_step2_with_rel_pos_value( 59 | attn, v, index_0.int(), index_1.int(), table, rel_index.int() 60 | ) 61 | loss = output.mean() 62 | loss.backward() 63 | 64 | print( 65 | "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5]) 66 | ) 67 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 68 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 69 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 70 | # input() 71 | 72 | attn_grad = attn.grad.clone() 73 | v_grad = v.grad.clone() 74 | table_grad = table.grad.clone() 75 | 76 | attn.grad.zero_() 77 | v.grad.zero_() 78 | table.grad.zero_() 79 | 80 | # print("query.is_contiguous(): ", query.is_contiguous()) 81 | # print("key.is_contiguous(): ", key.is_contiguous()) 82 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 83 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 84 | 85 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2( 86 | attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int() 87 | ) 88 | loss = output_v2.mean() 89 | loss.backward() 90 | 91 | print( 92 | "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format( 93 | output_v2.shape, output_v2[:5, :10, :5] 94 | ) 95 | ) 96 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 97 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 98 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 99 | # input() 100 | 101 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 102 | 103 | print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max()) 104 | 105 | print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max()) 106 | 107 | print("((table_grad-table.grad)**2).max(): ", ((table_grad - table.grad) ** 2).max()) 108 | 109 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 110 | -------------------------------------------------------------------------------- /libs/pointops2/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | from distutils.sysconfig import get_config_vars 5 | 6 | (opt,) = get_config_vars("OPT") 7 | os.environ["OPT"] = " ".join( 8 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 9 | ) 10 | 11 | src = "src" 12 | sources = [ 13 | os.path.join(root, file) 14 | for root, dirs, files in os.walk(src) 15 | for file in files 16 | if file.endswith(".cpp") or file.endswith(".cu") 17 | ] 18 | 19 | setup( 20 | name="pointops2", 21 | version="1.0", 22 | install_requires=["torch", "numpy"], 23 | packages=["pointops2"], 24 | package_dir={"pointops2": "functions"}, 25 | ext_modules=[ 26 | CUDAExtension( 27 | name="pointops2_cuda", 28 | sources=sources, 29 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 30 | ) 31 | ], 32 | cmdclass={"build_ext": BuildExtension}, 33 | ) 34 | -------------------------------------------------------------------------------- /libs/pointops2/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/libs/pointops2/src/__init__.py -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "aggregation_cuda_kernel.h" 5 | 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const float *position = position_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 15 | } 16 | 17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 18 | { 19 | const float *input = input_tensor.data_ptr(); 20 | const float *position = position_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | const int *idx = idx_tensor.data_ptr(); 23 | const float *grad_output = grad_output_tensor.data_ptr(); 24 | float *grad_input = grad_input_tensor.data_ptr(); 25 | float *grad_position = grad_position_tensor.data_ptr(); 26 | float *grad_weight = grad_weight_tensor.data_ptr(); 27 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 28 | } 29 | -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention/attention_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "attention_cuda_kernel.h" 5 | 6 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 7 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *k = k_tensor.data_ptr(); 11 | const int *index0 = index0_tensor.data_ptr(); 12 | const int *index1 = index1_tensor.data_ptr(); 13 | float *attn = attn_tensor.data_ptr(); 14 | attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); 15 | } 16 | 17 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 18 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const int *index0 = index0_tensor.data_ptr(); 23 | const int *index1 = index1_tensor.data_ptr(); 24 | const float *q = q_tensor.data_ptr(); 25 | const float *k = k_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_k = grad_k_tensor.data_ptr(); 28 | attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 29 | } 30 | 31 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | float *output = output_tensor.data_ptr(); 39 | attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); 40 | } 41 | 42 | 43 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 44 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 45 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 46 | { 47 | const float *grad_out = grad_out_tensor.data_ptr(); 48 | const int *index0 = index0_tensor.data_ptr(); 49 | const int *index1 = index1_tensor.data_ptr(); 50 | const float *attn = attn_tensor.data_ptr(); 51 | const float *v = v_tensor.data_ptr(); 52 | float *grad_attn = grad_attn_tensor.data_ptr(); 53 | float *grad_v = grad_v_tensor.data_ptr(); 54 | attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 55 | } 56 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL 2 | #define _ATTENTION_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention_v2/attention_cuda_v2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "attention_cuda_kernel_v2.h" 5 | 6 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 7 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *k = k_tensor.data_ptr(); 11 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 12 | const int *index1 = index1_tensor.data_ptr(); 13 | float *attn = attn_tensor.data_ptr(); 14 | attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); 15 | } 16 | 17 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 18 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 23 | const int *index1 = index1_tensor.data_ptr(); 24 | const float *q = q_tensor.data_ptr(); 25 | const float *k = k_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_k = grad_k_tensor.data_ptr(); 28 | attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); 29 | } 30 | 31 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | float *output = output_tensor.data_ptr(); 39 | attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); 40 | } 41 | 42 | 43 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 44 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 45 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 46 | { 47 | const float *grad_out = grad_out_tensor.data_ptr(); 48 | const int *index0 = index0_tensor.data_ptr(); 49 | const int *index1 = index1_tensor.data_ptr(); 50 | const float *attn = attn_tensor.data_ptr(); 51 | const float *v = v_tensor.data_ptr(); 52 | float *grad_attn = grad_attn_tensor.data_ptr(); 53 | float *grad_v = grad_v_tensor.data_ptr(); 54 | attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 55 | } 56 | -------------------------------------------------------------------------------- /libs/pointops2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grouping_cuda_kernel.h" 5 | 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | float *output = output_tensor.data_ptr(); 12 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 13 | } 14 | 15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 16 | { 17 | const float *grad_output = grad_output_tensor.data_ptr(); 18 | const int *idx = idx_tensor.data_ptr(); 19 | float *grad_input = grad_input_tensor.data_ptr(); 20 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "interpolation_cuda_kernel.h" 5 | 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 14 | } 15 | 16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *weight = weight_tensor.data_ptr(); 21 | float *grad_input = grad_input_tensor.data_ptr(); 22 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "knnquery_cuda_kernel.h" 5 | 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const float *new_xyz = new_xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | float *dist2 = dist2_tensor.data_ptr(); 15 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "relative_pos_encoding_cuda_kernel.h" 5 | 6 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 7 | at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *table = table_tensor.data_ptr(); 11 | const int *index = index_tensor.data_ptr(); 12 | const int *rel_idx = rel_idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); 15 | } 16 | 17 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 18 | at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const float *q = q_tensor.data_ptr(); 23 | const int *index = index_tensor.data_ptr(); 24 | const float *table = table_tensor.data_ptr(); 25 | const int *rel_idx = rel_idx_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_table = grad_table_tensor.data_ptr(); 28 | dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); 29 | } 30 | 31 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | const float *table = table_tensor.data_ptr(); 39 | const int *rel_idx = rel_idx_tensor.data_ptr(); 40 | float *output = output_tensor.data_ptr(); 41 | attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); 42 | } 43 | 44 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, 46 | at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | const float *table = table_tensor.data_ptr(); 54 | const int *rel_idx = rel_idx_tensor.data_ptr(); 55 | float *grad_attn = grad_attn_tensor.data_ptr(); 56 | float *grad_v = grad_v_tensor.data_ptr(); 57 | float *grad_table = grad_table_tensor.data_ptr(); 58 | attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); 59 | } 60 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_CUDA_KERNEL 2 | #define _RPE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); 9 | 10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); 18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); 19 | 20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); 21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sampling_cuda_kernel.h" 5 | 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const int *offset = offset_tensor.data_ptr(); 11 | const int *new_offset = new_offset_tensor.data_ptr(); 12 | float *tmp = tmp_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 15 | } 16 | -------------------------------------------------------------------------------- /libs/pointops2/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subtraction_cuda_kernel.h" 5 | 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input1 = input1_tensor.data_ptr(); 10 | const float *input2 = input2_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 14 | } 15 | 16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 17 | { 18 | const int *idx = idx_tensor.data_ptr(); 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | float *grad_input1 = grad_input1_tensor.data_ptr(); 21 | float *grad_input2 = grad_input2_tensor.data_ptr(); 22 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /pointcept/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/pointcept/__init__.py -------------------------------------------------------------------------------- /pointcept/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import DefaultDataset, ConcatDataset 2 | from .builder import build_dataset 3 | from .utils import point_collate_fn, collate_fn 4 | 5 | # indoor scene 6 | from .s3dis import S3DISDataset 7 | from .scannet import ScanNetDataset, ScanNet200Dataset 8 | from .scannet_pair import ScanNetPairDataset 9 | from .arkitscenes import ArkitScenesDataset 10 | from .structure3d import Structured3DDataset 11 | 12 | # outdoor scene 13 | from .semantic_kitti import SemanticKITTIDataset 14 | from .nuscenes import NuScenesDataset 15 | from .waymo import WaymoDataset 16 | 17 | # object 18 | from .modelnet import ModelNetDataset 19 | from .shapenet_part import ShapeNetPartDataset 20 | 21 | # dataloader 22 | from .dataloader import MultiDatasetDataloader 23 | -------------------------------------------------------------------------------- /pointcept/datasets/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pointcept.utils.registry import Registry 9 | 10 | DATASETS = Registry("datasets") 11 | 12 | 13 | def build_dataset(cfg): 14 | """Build datasets.""" 15 | return DATASETS.build(cfg) 16 | -------------------------------------------------------------------------------- /pointcept/datasets/modelnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ModelNet40 Dataset 3 | 4 | get sampled point clouds of ModelNet40 (XYZ and normal from mesh, 10k points per shape) 5 | at "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip" 6 | 7 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 8 | Please cite our work if the code is helpful to you. 9 | """ 10 | 11 | import os 12 | import numpy as np 13 | from torch.utils.data import Dataset 14 | 15 | from pointcept.utils.logger import get_root_logger 16 | from .builder import DATASETS 17 | from .transform import Compose 18 | 19 | 20 | @DATASETS.register_module() 21 | class ModelNetDataset(Dataset): 22 | def __init__( 23 | self, 24 | split="train", 25 | data_root="data/modelnet40_normal_resampled", 26 | class_names=None, 27 | transform=None, 28 | test_mode=False, 29 | test_cfg=None, 30 | cache_data=False, 31 | loop=1, 32 | ): 33 | super(ModelNetDataset, self).__init__() 34 | self.data_root = data_root 35 | self.class_names = dict(zip(class_names, range(len(class_names)))) 36 | self.split = split 37 | self.transform = Compose(transform) 38 | self.loop = ( 39 | loop if not test_mode else 1 40 | ) # force make loop = 1 while in test mode 41 | self.cache_data = cache_data 42 | self.test_mode = test_mode 43 | self.test_cfg = test_cfg if test_mode else None 44 | self.cache = {} 45 | if test_mode: 46 | # TODO: Optimize 47 | pass 48 | 49 | self.data_list = self.get_data_list() 50 | logger = get_root_logger() 51 | logger.info( 52 | "Totally {} x {} samples in {} set.".format( 53 | len(self.data_list), self.loop, split 54 | ) 55 | ) 56 | 57 | def get_data_list(self): 58 | assert isinstance(self.split, str) 59 | split_path = os.path.join( 60 | self.data_root, "modelnet40_{}.txt".format(self.split) 61 | ) 62 | data_list = np.loadtxt(split_path, dtype="str") 63 | return data_list 64 | 65 | def get_data(self, idx): 66 | data_idx = idx % len(self.data_list) 67 | if self.cache_data: 68 | coord, normal, category = self.cache[data_idx] 69 | else: 70 | data_shape = "_".join(self.data_list[data_idx].split("_")[0:-1]) 71 | data_path = os.path.join( 72 | self.data_root, data_shape, self.data_list[data_idx] + ".txt" 73 | ) 74 | data = np.loadtxt(data_path, delimiter=",").astype(np.float32) 75 | coord, normal = data[:, 0:3], data[:, 3:6] 76 | category = np.array([self.class_names[data_shape]]) 77 | if self.cache_data: 78 | self.cache[data_idx] = (coord, normal, category) 79 | data_dict = dict(coord=coord, normal=normal, category=category) 80 | return data_dict 81 | 82 | def prepare_train_data(self, idx): 83 | data_dict = self.get_data(idx) 84 | data_dict = self.transform(data_dict) 85 | return data_dict 86 | 87 | def prepare_test_data(self, idx): 88 | assert idx < len(self.data_list) 89 | data_dict = self.get_data(idx) 90 | data_dict = self.transform(data_dict) 91 | return data_dict 92 | 93 | def get_data_name(self, idx): 94 | data_idx = idx % len(self.data_list) 95 | return self.data_list[data_idx] 96 | 97 | def __getitem__(self, idx): 98 | if self.test_mode: 99 | return self.prepare_test_data(idx) 100 | else: 101 | return self.prepare_train_data(idx) 102 | 103 | def __len__(self): 104 | return len(self.data_list) * self.loop 105 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing ArkitScenes 3 | """ 4 | 5 | import os 6 | import argparse 7 | import glob 8 | import plyfile 9 | import numpy as np 10 | import pandas as pd 11 | import multiprocessing as mp 12 | from concurrent.futures import ProcessPoolExecutor 13 | from itertools import repeat 14 | 15 | import torch 16 | 17 | 18 | def read_plymesh(filepath): 19 | """Read ply file and return it as numpy array. Returns None if emtpy.""" 20 | with open(filepath, "rb") as f: 21 | plydata = plyfile.PlyData.read(f) 22 | if plydata.elements: 23 | vertices = pd.DataFrame(plydata["vertex"].data).values 24 | faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) 25 | return vertices, faces 26 | 27 | 28 | def face_normal(vertex, face): 29 | v01 = vertex[face[:, 1]] - vertex[face[:, 0]] 30 | v02 = vertex[face[:, 2]] - vertex[face[:, 0]] 31 | vec = np.cross(v01, v02) 32 | length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 33 | nf = vec / length 34 | area = length * 0.5 35 | return nf, area 36 | 37 | 38 | def vertex_normal(vertex, face): 39 | nf, area = face_normal(vertex, face) 40 | nf = nf * area 41 | 42 | nv = np.zeros_like(vertex) 43 | for i in range(face.shape[0]): 44 | nv[face[i]] += nf[i] 45 | 46 | length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 47 | nv = nv / length 48 | return nv 49 | 50 | 51 | def parse_scene(scene_path, output_dir): 52 | print(f"Parsing scene {scene_path}") 53 | split = os.path.basename(os.path.dirname(os.path.dirname(scene_path))) 54 | scene_id = os.path.basename(os.path.dirname(scene_path)) 55 | vertices, faces = read_plymesh(scene_path) 56 | coords = vertices[:, :3] 57 | colors = vertices[:, 3:6] 58 | data_dict = dict(coord=coords, color=colors, scene_id=scene_id) 59 | data_dict["normal"] = vertex_normal(coords, faces) 60 | torch.save(data_dict, os.path.join(output_dir, split, f"{scene_id}.pth")) 61 | 62 | 63 | if __name__ == "__main__": 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument( 66 | "--dataset_root", 67 | required=True, 68 | help="Path to the ScanNet dataset containing scene folders", 69 | ) 70 | parser.add_argument( 71 | "--output_root", 72 | required=True, 73 | help="Output path where train/val folders will be located", 74 | ) 75 | opt = parser.parse_args() 76 | # Create output directories 77 | train_output_dir = os.path.join(opt.output_root, "Training") 78 | os.makedirs(train_output_dir, exist_ok=True) 79 | val_output_dir = os.path.join(opt.output_root, "Validation") 80 | os.makedirs(val_output_dir, exist_ok=True) 81 | # Load scene paths 82 | scene_paths = sorted(glob.glob(opt.dataset_root + "/3dod/*/*/*_mesh.ply")) 83 | # Preprocess data. 84 | pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) 85 | # pool = ProcessPoolExecutor(max_workers=1) 86 | print("Processing scenes...") 87 | _ = list(pool.map(parse_scene, scene_paths, repeat(opt.output_root))) 88 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/s3dis/preprocess_s3dis_voxelized.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing Script for S3DIS 3 | Parsing normal vectors has a large consumption of memory. Please reduce max_workers if memory is limited. 4 | 5 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 6 | Please cite our work if the code is helpful to you. 7 | """ 8 | 9 | import os 10 | import argparse 11 | import glob 12 | import torch 13 | import numpy as np 14 | import multiprocessing as mp 15 | from concurrent.futures import ProcessPoolExecutor 16 | from itertools import repeat 17 | 18 | from pointcept.datasets.transform import GridSample 19 | 20 | 21 | def voxelize_parser(data_path, dataset_root, output_root, voxel_size): 22 | print(f"Parsing data: {data_path}") 23 | out_path = data_path.replace(dataset_root, output_root) 24 | os.makedirs(os.path.dirname(out_path), exist_ok=True) 25 | data = torch.load(data_path) 26 | data = GridSample( 27 | grid_size=voxel_size, hash_type="fnv", mode="train", keys=data.keys() 28 | )(data) 29 | torch.save(data, out_path) 30 | 31 | 32 | def main_process(): 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument( 35 | "--dataset_root", required=True, help="Path to processed S3DIS dataset" 36 | ) 37 | parser.add_argument( 38 | "--output_root", 39 | required=True, 40 | help="Output path where area folders will be located", 41 | ) 42 | parser.add_argument( 43 | "--voxel_size", default=0.01, type=float, help="Voxel size for voxelization" 44 | ) 45 | args = parser.parse_args() 46 | 47 | data_list = glob.glob(os.path.join(args.dataset_root, "*/*.pth")) 48 | # Preprocess data. 49 | print("Processing scenes...") 50 | pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) 51 | # pool = ProcessPoolExecutor(max_workers=1) 52 | _ = list( 53 | pool.map( 54 | voxelize_parser, 55 | data_list, 56 | repeat(args.dataset_root), 57 | repeat(args.output_root), 58 | repeat(args.voxel_size), 59 | ) 60 | ) 61 | 62 | 63 | if __name__ == "__main__": 64 | main_process() 65 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt: -------------------------------------------------------------------------------- 1 | 1 trash 2 | 3 basket 3 | 4 bathtub 4 | 5 bed 5 | 9 shelf 6 | 13 cabinet 7 | 18 chair 8 | 20 keyboard 9 | 22 tv 10 | 30 lamp 11 | 31 laptop 12 | 35 microwave 13 | 39 pillow 14 | 42 printer 15 | 47 sofa 16 | 48 stove 17 | 49 table 18 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt: -------------------------------------------------------------------------------- 1 | 1 wall 2 | 2 floor 3 | 3 cabinet 4 | 4 bed 5 | 5 chair 6 | 6 sofa 7 | 7 table 8 | 8 door 9 | 9 window 10 | 10 bookshelf 11 | 11 picture 12 | 12 counter 13 | 14 desk 14 | 16 curtain 15 | 24 refridgerator 16 | 28 shower curtain 17 | 33 toilet 18 | 34 sink 19 | 36 bathtub 20 | 39 otherfurniture -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt: -------------------------------------------------------------------------------- 1 | scene0534_00 2 | scene0534_01 3 | scene0319_00 4 | scene0273_00 5 | scene0273_01 6 | scene0225_00 7 | scene0198_00 8 | scene0003_00 9 | scene0003_01 10 | scene0003_02 11 | scene0409_00 12 | scene0409_01 13 | scene0331_00 14 | scene0331_01 15 | scene0505_00 16 | scene0505_01 17 | scene0505_02 18 | scene0505_03 19 | scene0505_04 20 | scene0506_00 21 | scene0057_00 22 | scene0057_01 23 | scene0074_00 24 | scene0074_01 25 | scene0074_02 26 | scene0091_00 27 | scene0112_00 28 | scene0112_01 29 | scene0112_02 30 | scene0240_00 31 | scene0102_00 32 | scene0102_01 33 | scene0513_00 34 | scene0514_00 35 | scene0514_01 36 | scene0537_00 37 | scene0516_00 38 | scene0516_01 39 | scene0495_00 40 | scene0617_00 41 | scene0133_00 42 | scene0520_00 43 | scene0520_01 44 | scene0635_00 45 | scene0635_01 46 | scene0054_00 47 | scene0473_00 48 | scene0473_01 49 | scene0524_00 50 | scene0524_01 51 | scene0379_00 52 | scene0471_00 53 | scene0471_01 54 | scene0471_02 55 | scene0566_00 56 | scene0248_00 57 | scene0248_01 58 | scene0248_02 59 | scene0529_00 60 | scene0529_01 61 | scene0529_02 62 | scene0391_00 63 | scene0264_00 64 | scene0264_01 65 | scene0264_02 66 | scene0675_00 67 | scene0675_01 68 | scene0350_00 69 | scene0350_01 70 | scene0350_02 71 | scene0450_00 72 | scene0068_00 73 | scene0068_01 74 | scene0237_00 75 | scene0237_01 76 | scene0365_00 77 | scene0365_01 78 | scene0365_02 79 | scene0605_00 80 | scene0605_01 81 | scene0539_00 82 | scene0539_01 83 | scene0539_02 84 | scene0540_00 85 | scene0540_01 86 | scene0540_02 87 | scene0170_00 88 | scene0170_01 89 | scene0170_02 90 | scene0433_00 91 | scene0340_00 92 | scene0340_01 93 | scene0340_02 94 | scene0160_00 95 | scene0160_01 96 | scene0160_02 97 | scene0160_03 98 | scene0160_04 99 | scene0059_00 100 | scene0059_01 101 | scene0059_02 102 | scene0056_00 103 | scene0056_01 104 | scene0478_00 105 | scene0478_01 106 | scene0548_00 107 | scene0548_01 108 | scene0548_02 109 | scene0204_00 110 | scene0204_01 111 | scene0204_02 112 | scene0033_00 113 | scene0145_00 114 | scene0483_00 115 | scene0508_00 116 | scene0508_01 117 | scene0508_02 118 | scene0180_00 119 | scene0148_00 120 | scene0556_00 121 | scene0556_01 122 | scene0416_00 123 | scene0416_01 124 | scene0416_02 125 | scene0416_03 126 | scene0416_04 127 | scene0073_00 128 | scene0073_01 129 | scene0073_02 130 | scene0073_03 131 | scene0034_00 132 | scene0034_01 133 | scene0034_02 134 | scene0639_00 135 | scene0561_00 136 | scene0561_01 137 | scene0298_00 138 | scene0692_00 139 | scene0692_01 140 | scene0692_02 141 | scene0692_03 142 | scene0692_04 143 | scene0642_00 144 | scene0642_01 145 | scene0642_02 146 | scene0642_03 147 | scene0630_00 148 | scene0630_01 149 | scene0630_02 150 | scene0630_03 151 | scene0630_04 152 | scene0630_05 153 | scene0630_06 154 | scene0706_00 155 | scene0567_00 156 | scene0567_01 157 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/scannet_pair/compute_full_overlapping.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import copy 7 | import torch 8 | import numpy as np 9 | import math 10 | import glob, os 11 | import argparse 12 | import open3d as o3d 13 | 14 | 15 | def make_open3d_point_cloud(xyz, color=None, voxel_size=None): 16 | if np.isnan(xyz).any(): 17 | return None 18 | 19 | xyz = xyz[:, :3] 20 | pcd = o3d.geometry.PointCloud() 21 | pcd.points = o3d.utility.Vector3dVector(xyz) 22 | if color is not None: 23 | pcd.colors = o3d.utility.Vector3dVector(color) 24 | if voxel_size is not None: 25 | pcd = pcd.voxel_down_sample(voxel_size) 26 | 27 | return pcd 28 | 29 | 30 | def compute_overlap_ratio(pcd0, pcd1, voxel_size): 31 | pcd0_down = pcd0.voxel_down_sample(voxel_size) 32 | pcd1_down = pcd1.voxel_down_sample(voxel_size) 33 | matching01 = get_matching_indices(pcd0_down, pcd1_down, voxel_size * 1.5, 1) 34 | matching10 = get_matching_indices(pcd1_down, pcd0_down, voxel_size * 1.5, 1) 35 | overlap0 = float(len(matching01)) / float(len(pcd0_down.points)) 36 | overlap1 = float(len(matching10)) / float(len(pcd1_down.points)) 37 | return max(overlap0, overlap1) 38 | 39 | 40 | def get_matching_indices(source, pcd_tree, search_voxel_size, K=None): 41 | match_inds = [] 42 | for i, point in enumerate(source.points): 43 | [_, idx, _] = pcd_tree.search_radius_vector_3d(point, search_voxel_size) 44 | if K is not None: 45 | idx = idx[:K] 46 | for j in idx: 47 | match_inds.append((i, j)) 48 | return match_inds 49 | 50 | 51 | def compute_full_overlapping(data_root, scene_id, voxel_size=0.05): 52 | _points = [ 53 | ( 54 | pcd_name, 55 | make_open3d_point_cloud( 56 | torch.load(pcd_name)["coord"], voxel_size=voxel_size 57 | ), 58 | ) 59 | for pcd_name in glob.glob(os.path.join(data_root, scene_id, "pcd", "*.pth")) 60 | ] 61 | points = [(pcd_name, pcd) for (pcd_name, pcd) in _points if pcd is not None] 62 | print( 63 | "load {} point clouds ({} invalid has been filtered), computing matching/overlapping".format( 64 | len(points), len(_points) - len(points) 65 | ) 66 | ) 67 | 68 | matching_matrix = np.zeros((len(points), len(points))) 69 | for i, (pcd0_name, pcd0) in enumerate(points): 70 | print("matching to...{}".format(pcd0_name)) 71 | pcd0_tree = o3d.geometry.KDTreeFlann(copy.deepcopy(pcd0)) 72 | for j, (pcd1_name, pcd1) in enumerate(points): 73 | if i == j: 74 | continue 75 | matching_matrix[i, j] = float( 76 | len(get_matching_indices(pcd1, pcd0_tree, 1.5 * voxel_size, 1)) 77 | ) / float(len(pcd1.points)) 78 | 79 | # write to file 80 | with open(os.path.join(data_root, scene_id, "pcd", "overlap.txt"), "w") as f: 81 | for i, (pcd0_name, pcd0) in enumerate(points): 82 | for j, (pcd1_name, pcd1) in enumerate(points): 83 | if i < j: 84 | overlap = max(matching_matrix[i, j], matching_matrix[j, i]) 85 | f.write( 86 | "{} {} {}\n".format( 87 | pcd0_name.replace(data_root, ""), 88 | pcd1_name.replace(data_root, ""), 89 | overlap, 90 | ) 91 | ) 92 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/scannet_pair/generage_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import argparse 8 | import glob, os, sys 9 | 10 | from SensorData import SensorData 11 | 12 | # params 13 | parser = argparse.ArgumentParser() 14 | # data paths 15 | parser.add_argument("--target_dir", required=True, help="path to the target dir") 16 | 17 | opt = parser.parse_args() 18 | print(opt) 19 | 20 | 21 | def main(): 22 | overlaps = glob.glob(os.path.join(opt.target_dir, "*/pcd/overlap.txt")) 23 | with open(os.path.join(opt.target_dir, "overlap30.txt"), "w") as f: 24 | for fo in overlaps: 25 | for line in open(fo): 26 | pcd0, pcd1, op = line.strip().split() 27 | if float(op) >= 0.3: 28 | print("{} {} {}".format(pcd0, pcd1, op), file=f) 29 | print("done") 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import glob 4 | import multiprocessing as mp 5 | from concurrent.futures import ProcessPoolExecutor 6 | from itertools import repeat 7 | from reader import reader 8 | from point_cloud_extractor import extractor 9 | from compute_full_overlapping import compute_full_overlapping 10 | 11 | 12 | frame_skip = 25 13 | 14 | 15 | def parse_sens(sens_dir, output_dir): 16 | scene_id = os.path.basename(os.path.dirname(sens_dir)) 17 | print(f"Parsing sens data{sens_dir}") 18 | reader( 19 | sens_dir, 20 | os.path.join(output_dir, scene_id), 21 | frame_skip, 22 | export_color_images=True, 23 | export_depth_images=True, 24 | export_poses=True, 25 | export_intrinsics=True, 26 | ) 27 | extractor( 28 | os.path.join(output_dir, scene_id), os.path.join(output_dir, scene_id, "pcd") 29 | ) 30 | compute_full_overlapping(output_dir, scene_id) 31 | 32 | 33 | if __name__ == "__main__": 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument( 36 | "--dataset_root", 37 | required=True, 38 | help="Path to the ScanNet dataset containing scene folders", 39 | ) 40 | parser.add_argument( 41 | "--output_root", 42 | required=True, 43 | help="Output path where train/val folders will be located", 44 | ) 45 | opt = parser.parse_args() 46 | sens_list = sorted(glob.glob(os.path.join(opt.dataset_root, "scans/scene*/*.sens"))) 47 | # Preprocess data. 48 | pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) 49 | # pool = ProcessPoolExecutor(max_workers=1) 50 | print("Processing scenes...") 51 | _ = list(pool.map(parse_sens, sens_list, repeat(opt.output_root))) 52 | -------------------------------------------------------------------------------- /pointcept/datasets/preprocessing/scannet/scannet_pair/reader.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os, sys 3 | 4 | from SensorData import SensorData 5 | 6 | 7 | def reader( 8 | filename, 9 | output_path, 10 | frame_skip, 11 | export_color_images=False, 12 | export_depth_images=False, 13 | export_poses=False, 14 | export_intrinsics=False, 15 | ): 16 | if not os.path.exists(output_path): 17 | os.makedirs(output_path) 18 | 19 | # load the data 20 | print("loading %s..." % filename) 21 | sd = SensorData(filename) 22 | if export_depth_images: 23 | sd.export_depth_images( 24 | os.path.join(output_path, "depth"), frame_skip=frame_skip 25 | ) 26 | if export_color_images: 27 | sd.export_color_images( 28 | os.path.join(output_path, "color"), frame_skip=frame_skip 29 | ) 30 | if export_poses: 31 | sd.export_poses(os.path.join(output_path, "pose"), frame_skip=frame_skip) 32 | if export_intrinsics: 33 | sd.export_intrinsics(os.path.join(output_path, "intrinsic")) 34 | -------------------------------------------------------------------------------- /pointcept/datasets/scannet_pair.py: -------------------------------------------------------------------------------- 1 | """ 2 | ScanNet Pair Dataset (Frame-level contrastive view) 3 | 4 | Refer PointContrast 5 | 6 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 7 | Please cite our work if the code is helpful to you. 8 | """ 9 | 10 | import os 11 | import glob 12 | import numpy as np 13 | import torch 14 | from copy import deepcopy 15 | from torch.utils.data import Dataset 16 | 17 | from pointcept.utils.logger import get_root_logger 18 | from .builder import DATASETS 19 | from .transform import Compose, TRANSFORMS 20 | 21 | 22 | @DATASETS.register_module() 23 | class ScanNetPairDataset(Dataset): 24 | def __init__( 25 | self, 26 | data_root="data/scannet_pair", 27 | overlap_threshold=0.3, 28 | view1_transform=None, 29 | view2_transform=None, 30 | loop=1, 31 | **kwargs 32 | ): 33 | super(ScanNetPairDataset, self).__init__() 34 | self.data_root = data_root 35 | self.overlap_threshold = overlap_threshold 36 | self.view1_transform = Compose(view1_transform) 37 | self.view2_transform = Compose(view2_transform) 38 | self.loop = loop 39 | self.data_list = self.get_data_list() 40 | logger = get_root_logger() 41 | logger.info("Totally {} x {} samples.".format(len(self.data_list), self.loop)) 42 | 43 | def get_data_list(self): 44 | data_list = [] 45 | overlap_list = glob.glob( 46 | os.path.join(self.data_root, "*", "pcd", "overlap.txt") 47 | ) 48 | for overlap_file in overlap_list: 49 | with open(overlap_file) as f: 50 | overlap = f.readlines() 51 | overlap = [pair.strip().split() for pair in overlap] 52 | data_list.extend( 53 | [ 54 | pair[:2] 55 | for pair in overlap 56 | if float(pair[2]) > self.overlap_threshold 57 | ] 58 | ) 59 | return data_list 60 | 61 | def get_data(self, idx): 62 | pair = self.data_list[idx % len(self.data_list)] 63 | view1_dict = torch.load(self.data_root + pair[0]) 64 | view2_dict = torch.load(self.data_root + pair[1]) 65 | return view1_dict, view2_dict 66 | 67 | def get_data_name(self, idx): 68 | return os.path.basename(self.data_list[idx % len(self.data_list)]).split(".")[0] 69 | 70 | def prepare_train_data(self, idx): 71 | # load data 72 | view1_dict, view2_dict = self.get_data(idx) 73 | view1_dict = self.view1_transform(view1_dict) 74 | view2_dict = self.view2_transform(view2_dict) 75 | data_dict = dict() 76 | for key, value in view1_dict.items(): 77 | data_dict["view1_" + key] = value 78 | for key, value in view2_dict.items(): 79 | data_dict["view2_" + key] = value 80 | return data_dict 81 | 82 | def prepare_test_data(self, idx): 83 | raise NotImplementedError 84 | 85 | def __getitem__(self, idx): 86 | return self.prepare_train_data(idx) 87 | 88 | def __len__(self): 89 | return len(self.data_list) * self.loop 90 | -------------------------------------------------------------------------------- /pointcept/datasets/structure3d.py: -------------------------------------------------------------------------------- 1 | """ 2 | Structured3D Datasets 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import glob 10 | from collections.abc import Sequence 11 | 12 | from .defaults import DefaultDataset 13 | from .builder import DATASETS 14 | 15 | 16 | @DATASETS.register_module() 17 | class Structured3DDataset(DefaultDataset): 18 | def get_data_list(self): 19 | if isinstance(self.split, str): 20 | data_list = glob.glob(os.path.join(self.data_root, self.split, "*/*.pth")) 21 | elif isinstance(self.split, Sequence): 22 | data_list = [] 23 | for split in self.split: 24 | data_list += glob.glob(os.path.join(self.data_root, split, "*/*.pth")) 25 | else: 26 | raise NotImplementedError 27 | return data_list 28 | 29 | def get_data_name(self, idx): 30 | file_path = self.data_list[idx % len(self.data_list)] 31 | dir_path, file_name = os.path.split(file_path) 32 | scene_name = os.path.basename(dir_path) 33 | room_name = os.path.splitext(file_name)[0] 34 | data_name = f"{scene_name}_{room_name}" 35 | return data_name 36 | -------------------------------------------------------------------------------- /pointcept/datasets/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for Datasets 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import random 9 | from collections.abc import Mapping, Sequence 10 | import numpy as np 11 | import torch 12 | from torch.utils.data.dataloader import default_collate 13 | 14 | 15 | def collate_fn(batch): 16 | """ 17 | collate function for point cloud which support dict and list, 18 | 'coord' is necessary to determine 'offset' 19 | """ 20 | if not isinstance(batch, Sequence): 21 | raise TypeError(f"{batch.dtype} is not supported.") 22 | 23 | if isinstance(batch[0], torch.Tensor): 24 | return torch.cat(list(batch)) 25 | elif isinstance(batch[0], str): 26 | # str is also a kind of Sequence, judgement should before Sequence 27 | return list(batch) 28 | elif isinstance(batch[0], Sequence): 29 | for data in batch: 30 | data.append(torch.tensor([data[0].shape[0]])) 31 | batch = [collate_fn(samples) for samples in zip(*batch)] 32 | batch[-1] = torch.cumsum(batch[-1], dim=0).int() 33 | return batch 34 | elif isinstance(batch[0], Mapping): 35 | batch = {key: collate_fn([d[key] for d in batch]) for key in batch[0]} 36 | for key in batch.keys(): 37 | if "offset" in key: 38 | batch[key] = torch.cumsum(batch[key], dim=0) 39 | return batch 40 | else: 41 | return default_collate(batch) 42 | 43 | 44 | def point_collate_fn(batch, mix_prob=0): 45 | assert isinstance( 46 | batch[0], Mapping 47 | ) # currently, only support input_dict, rather than input_list 48 | batch = collate_fn(batch) 49 | if "offset" in batch.keys(): 50 | # Mix3d (https://arxiv.org/pdf/2110.02210.pdf) 51 | if random.random() < mix_prob: 52 | batch["offset"] = torch.cat( 53 | [batch["offset"][1:-1:2], batch["offset"][-1].unsqueeze(0)], dim=0 54 | ) 55 | return batch 56 | 57 | 58 | def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): 59 | return a * np.exp(-dist2 / (2 * c**2)) 60 | -------------------------------------------------------------------------------- /pointcept/datasets/waymo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Waymo dataset 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import numpy as np 10 | import glob 11 | 12 | from .builder import DATASETS 13 | from .defaults import DefaultDataset 14 | 15 | 16 | @DATASETS.register_module() 17 | class WaymoDataset(DefaultDataset): 18 | def __init__( 19 | self, 20 | split="training", 21 | data_root="data/waymo", 22 | transform=None, 23 | test_mode=False, 24 | test_cfg=None, 25 | loop=1, 26 | ignore_index=-1, 27 | ): 28 | self.ignore_index = ignore_index 29 | super().__init__( 30 | split=split, 31 | data_root=data_root, 32 | transform=transform, 33 | test_mode=test_mode, 34 | test_cfg=test_cfg, 35 | loop=loop, 36 | ) 37 | 38 | def get_data_list(self): 39 | if isinstance(self.split, str): 40 | self.split = [self.split] 41 | 42 | data_list = [] 43 | for split in self.split: 44 | data_list += glob.glob( 45 | os.path.join(self.data_root, split, "*", "velodyne", "*.bin") 46 | ) 47 | return data_list 48 | 49 | def get_data(self, idx): 50 | data_path = self.data_list[idx % len(self.data_list)] 51 | with open(data_path, "rb") as b: 52 | scan = np.fromfile(b, dtype=np.float32).reshape(-1, 4) 53 | coord = scan[:, :3] 54 | strength = np.tanh(scan[:, -1].reshape([-1, 1])) 55 | 56 | label_file = data_path.replace("velodyne", "labels").replace(".bin", ".label") 57 | if os.path.exists(label_file): 58 | with open(label_file, "rb") as a: 59 | segment = ( 60 | np.fromfile(a, dtype=np.int32).reshape(-1, 2)[:, 1] - 1 61 | ) # ignore_index 0 -> -1 62 | else: 63 | segment = np.zeros(scan.shape[0]).astype(np.int32) 64 | data_dict = dict(coord=coord, strength=strength, segment=segment) 65 | return data_dict 66 | 67 | def get_data_name(self, idx): 68 | file_path = self.data_list[idx % len(self.data_list)] 69 | dir_path, file_name = os.path.split(file_path) 70 | sequence_name = os.path.basename(os.path.dirname(dir_path)) 71 | frame_name = os.path.splitext(file_name)[0] 72 | data_name = f"{sequence_name}_{frame_name}" 73 | return data_name 74 | -------------------------------------------------------------------------------- /pointcept/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/pointcept/engines/__init__.py -------------------------------------------------------------------------------- /pointcept/engines/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import HookBase 2 | from .misc import * 3 | from .evaluator import * 4 | 5 | from .builder import build_hooks 6 | -------------------------------------------------------------------------------- /pointcept/engines/hooks/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hook Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pointcept.utils.registry import Registry 9 | 10 | 11 | HOOKS = Registry("hooks") 12 | 13 | 14 | def build_hooks(cfg): 15 | hooks = [] 16 | for hook_cfg in cfg: 17 | hooks.append(HOOKS.build(hook_cfg)) 18 | return hooks 19 | -------------------------------------------------------------------------------- /pointcept/engines/hooks/default.py: -------------------------------------------------------------------------------- 1 | """ 2 | Default Hook 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | 9 | class HookBase: 10 | """ 11 | Base class for hooks that can be registered with :class:`TrainerBase`. 12 | """ 13 | 14 | trainer = None # A weak reference to the trainer object. 15 | 16 | def before_train(self): 17 | pass 18 | 19 | def before_epoch(self): 20 | pass 21 | 22 | def before_step(self): 23 | pass 24 | 25 | def after_step(self): 26 | pass 27 | 28 | def after_epoch(self): 29 | pass 30 | 31 | def after_train(self): 32 | pass 33 | -------------------------------------------------------------------------------- /pointcept/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_model 2 | from .default import DefaultSegmentor, DefaultClassifier 3 | 4 | # Backbones 5 | from .sparse_unet import * 6 | from .point_transformer import * 7 | from .point_transformer_v2 import * 8 | from .point_transformer_v3 import * 9 | from .stratified_transformer import * 10 | from .spvcnn import * 11 | from .octformer import * 12 | from .oacnns import * 13 | 14 | # from .swin3d import * 15 | 16 | # Semantic Segmentation 17 | from .context_aware_classifier import * 18 | 19 | # Instance Segmentation 20 | from .point_group import * 21 | 22 | # Pretraining 23 | from .masked_scene_contrast import * 24 | from .point_prompt_training import * 25 | -------------------------------------------------------------------------------- /pointcept/models/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pointcept.utils.registry import Registry 9 | 10 | MODELS = Registry("models") 11 | MODULES = Registry("modules") 12 | 13 | 14 | def build_model(cfg): 15 | """Build models.""" 16 | return MODELS.build(cfg) 17 | -------------------------------------------------------------------------------- /pointcept/models/context_aware_classifier/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_aware_classifier_v1m1_base import CACSegmentor 2 | -------------------------------------------------------------------------------- /pointcept/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_criteria 2 | 3 | from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss 4 | from .lovasz import LovaszLoss 5 | -------------------------------------------------------------------------------- /pointcept/models/losses/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Criteria Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from pointcept.utils.registry import Registry 11 | 12 | LOSSES = Registry("losses") 13 | 14 | class Criteria(object): 15 | def __init__(self, cfg=None, loss_type="EW", task_num=2): 16 | self.cfg = cfg if cfg is not None else [] 17 | self.criteria = [] 18 | for loss_cfg in self.cfg: 19 | self.criteria.append(LOSSES.build(cfg=loss_cfg)) 20 | self.loss_type = loss_type 21 | self.task_num = task_num 22 | 23 | def __call__(self, points): 24 | 25 | if len(self.criteria) == 0: 26 | # loss computation occur in model 27 | return points 28 | 29 | loss = 0.0 30 | loss_mode = points["loss_mode"] 31 | 32 | if(loss_mode == "eval" or self.loss_type == "EW"): 33 | for c in self.criteria: 34 | l = c(points) 35 | loss += l 36 | 37 | elif(loss_mode == "train" and self.loss_type == "GLS"): 38 | loss = [] 39 | for c in self.criteria: 40 | l = c(points) 41 | loss.append(l) 42 | 43 | if(self.task_num == 1): 44 | loss = loss[0] + loss[1] 45 | elif (self.task_num == 2 and self.task_num != len(loss)): 46 | loss = [loss[0], loss[1] + loss[2]] # MSE, Cross Entropy + Lovaz 47 | loss = loss[0] * loss[1] 48 | 49 | loss = torch.pow(loss, 1. / self.task_num) 50 | 51 | return loss 52 | 53 | def build_criteria(cfg,loss_type="EW", task_num=2): 54 | return Criteria(cfg,loss_type=loss_type, task_num=task_num) 55 | -------------------------------------------------------------------------------- /pointcept/models/masked_scene_contrast/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_scene_contrast_v1m1_base import MaskedSceneContrast 2 | from .masked_scene_contrast_v1m2_csc import MaskedSceneContrast 3 | -------------------------------------------------------------------------------- /pointcept/models/modules.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch.nn as nn 3 | import spconv.pytorch as spconv 4 | from collections import OrderedDict 5 | from pointcept.models.utils.structure import Point 6 | 7 | 8 | class PointModule(nn.Module): 9 | r"""PointModule 10 | placeholder, all module subclass from this will take Point in PointSequential. 11 | """ 12 | 13 | def __init__(self, *args, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | 16 | 17 | class PointSequential(PointModule): 18 | r"""A sequential container. 19 | Modules will be added to it in the order they are passed in the constructor. 20 | Alternatively, an ordered dict of modules can also be passed in. 21 | """ 22 | 23 | def __init__(self, *args, **kwargs): 24 | super().__init__() 25 | if len(args) == 1 and isinstance(args[0], OrderedDict): 26 | for key, module in args[0].items(): 27 | self.add_module(key, module) 28 | else: 29 | for idx, module in enumerate(args): 30 | self.add_module(str(idx), module) 31 | for name, module in kwargs.items(): 32 | if sys.version_info < (3, 6): 33 | raise ValueError("kwargs only supported in py36+") 34 | if name in self._modules: 35 | raise ValueError("name exists.") 36 | self.add_module(name, module) 37 | 38 | def __getitem__(self, idx): 39 | if not (-len(self) <= idx < len(self)): 40 | raise IndexError("index {} is out of range".format(idx)) 41 | if idx < 0: 42 | idx += len(self) 43 | it = iter(self._modules.values()) 44 | for i in range(idx): 45 | next(it) 46 | return next(it) 47 | 48 | def __len__(self): 49 | return len(self._modules) 50 | 51 | def add(self, module, name=None): 52 | if name is None: 53 | name = str(len(self._modules)) 54 | if name in self._modules: 55 | raise KeyError("name exists") 56 | self.add_module(name, module) 57 | 58 | def forward(self, input): 59 | for k, module in self._modules.items(): 60 | # Point module 61 | if isinstance(module, PointModule): 62 | input = module(input) 63 | # Spconv module 64 | elif spconv.modules.is_spconv_module(module): 65 | if isinstance(input, Point): 66 | input.sparse_conv_feat = module(input.sparse_conv_feat) 67 | input.feat = input.sparse_conv_feat.features 68 | else: 69 | input = module(input) 70 | # PyTorch module 71 | else: 72 | if isinstance(input, Point): 73 | input.feat = module(input.feat) 74 | if "sparse_conv_feat" in input.keys(): 75 | input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( 76 | input.feat 77 | ) 78 | elif isinstance(input, spconv.SparseConvTensor): 79 | if input.indices.shape[0] != 0: 80 | input = input.replace_feature(module(input.features)) 81 | else: 82 | input = module(input) 83 | return input 84 | -------------------------------------------------------------------------------- /pointcept/models/oacnns/__init__.py: -------------------------------------------------------------------------------- 1 | from .oacnns_v1m1_base import OACNNs 2 | -------------------------------------------------------------------------------- /pointcept/models/octformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .octformer_v1m1_base import OctFormer 2 | -------------------------------------------------------------------------------- /pointcept/models/point_group/__init__.py: -------------------------------------------------------------------------------- 1 | from .point_group_v1m1_base import PointGroup 2 | -------------------------------------------------------------------------------- /pointcept/models/point_prompt_training/__init__.py: -------------------------------------------------------------------------------- 1 | from .point_prompt_training_v1m1_language_guided import * 2 | from .point_prompt_training_v1m2_decoupled import * 3 | 4 | from .prompt_driven_normalization import PDNorm 5 | -------------------------------------------------------------------------------- /pointcept/models/point_prompt_training/point_prompt_training_v1m2_decoupled.py: -------------------------------------------------------------------------------- 1 | """ 2 | Point Prompt Training with decoupled segmentation head 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from functools import partial 9 | from collections import OrderedDict 10 | 11 | import torch 12 | import torch.nn as nn 13 | from pointcept.models.utils.structure import Point 14 | from pointcept.models.builder import MODELS 15 | from pointcept.models.losses import build_criteria 16 | 17 | 18 | @MODELS.register_module("PPT-v1m2") 19 | class PointPromptTraining(nn.Module): 20 | """ 21 | PointPromptTraining v1m2 provides Data-driven Context and enables multi-dataset training with 22 | Decoupled Segmentation Head. PDNorm is supported by SpUNet-v1m3 to adapt the 23 | backbone to a specific dataset with a given dataset condition and context. 24 | """ 25 | 26 | def __init__( 27 | self, 28 | backbone=None, 29 | criteria=None, 30 | backbone_out_channels=96, 31 | context_channels=256, 32 | conditions=("Structured3D", "ScanNet", "S3DIS"), 33 | num_classes=(25, 20, 13), 34 | backbone_mode=False, 35 | ): 36 | super().__init__() 37 | assert len(conditions) == len(num_classes) 38 | assert backbone.type in ["SpUNet-v1m3", "PT-v2m3", "PT-v3m1"] 39 | self.backbone = MODELS.build(backbone) 40 | self.criteria = build_criteria(criteria) 41 | self.conditions = conditions 42 | self.embedding_table = nn.Embedding(len(conditions), context_channels) 43 | self.backbone_mode = backbone_mode 44 | self.seg_heads = nn.ModuleList( 45 | [nn.Linear(backbone_out_channels, num_cls) for num_cls in num_classes] 46 | ) 47 | 48 | def forward(self, data_dict): 49 | condition = data_dict["condition"][0] 50 | assert condition in self.conditions 51 | context = self.embedding_table( 52 | torch.tensor( 53 | [self.conditions.index(condition)], device=data_dict["coord"].device 54 | ) 55 | ) 56 | data_dict["context"] = context 57 | point = self.backbone(data_dict) 58 | # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 59 | # TODO: remove this part after make all backbone return Point only. 60 | if isinstance(point, Point): 61 | feat = point.feat 62 | else: 63 | feat = point 64 | if self.backbone_mode: 65 | # PPT serve as a multi-dataset backbone when enable backbone mode 66 | return feat 67 | seg_head = self.seg_heads[self.conditions.index(condition)] 68 | seg_logits = seg_head(feat) 69 | # train 70 | if self.training: 71 | loss = self.criteria(seg_logits, data_dict["segment"]) 72 | return dict(loss=loss) 73 | # eval 74 | elif "segment" in data_dict.keys(): 75 | loss = self.criteria(seg_logits, data_dict["segment"]) 76 | return dict(loss=loss, seg_logits=seg_logits) 77 | # test 78 | else: 79 | return dict(seg_logits=seg_logits) 80 | -------------------------------------------------------------------------------- /pointcept/models/point_prompt_training/prompt_driven_normalization.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from pointcept.models.modules import PointModule, PointSequential 4 | from pointcept.models.builder import MODULES 5 | 6 | 7 | @MODULES.register_module() 8 | class PDNorm(PointModule): 9 | def __init__( 10 | self, 11 | num_features, 12 | norm_layer, 13 | context_channels=256, 14 | conditions=("ScanNet", "S3DIS", "Structured3D"), 15 | decouple=True, 16 | adaptive=False, 17 | ): 18 | super().__init__() 19 | self.conditions = conditions 20 | self.decouple = decouple 21 | self.adaptive = adaptive 22 | if self.decouple: 23 | self.norm = nn.ModuleList([norm_layer(num_features) for _ in conditions]) 24 | else: 25 | self.norm = norm_layer 26 | if self.adaptive: 27 | self.modulation = nn.Sequential( 28 | nn.SiLU(), nn.Linear(context_channels, 2 * num_features, bias=True) 29 | ) 30 | 31 | def forward(self, point): 32 | assert {"feat", "condition"}.issubset(point.keys()) 33 | if isinstance(point.condition, str): 34 | condition = point.condition 35 | else: 36 | condition = point.condition[0] 37 | if self.decouple: 38 | assert condition in self.conditions 39 | norm = self.norm[self.conditions.index(condition)] 40 | else: 41 | norm = self.norm 42 | point.feat = norm(point.feat) 43 | if self.adaptive: 44 | assert "context" in point.keys() 45 | shift, scale = self.modulation(point.context).chunk(2, dim=1) 46 | point.feat = point.feat * (1.0 + scale) + shift 47 | return point 48 | -------------------------------------------------------------------------------- /pointcept/models/point_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .point_transformer_seg import * 2 | from .point_transformer_partseg import * 3 | from .point_transformer_cls import * 4 | -------------------------------------------------------------------------------- /pointcept/models/point_transformer/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | torch.nn.LayerNorm 5 | 6 | 7 | class LayerNorm1d(nn.BatchNorm1d): 8 | def forward(self, input: torch.Tensor) -> torch.Tensor: 9 | return ( 10 | super() 11 | .forward(input.transpose(1, 2).contiguous()) 12 | .transpose(1, 2) 13 | .contiguous() 14 | ) 15 | -------------------------------------------------------------------------------- /pointcept/models/point_transformer_v2/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Point Transformer V2 3 | 4 | Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. 5 | Please cite our work if you use any part of the code. 6 | """ 7 | 8 | from .point_transformer_v2m1_origin import * 9 | from .point_transformer_v2m2_base import * 10 | from .point_transformer_v2m3_pdnorm import * 11 | -------------------------------------------------------------------------------- /pointcept/models/point_transformer_v3/__init__.py: -------------------------------------------------------------------------------- 1 | from .point_transformer_v3m1_base import * 2 | -------------------------------------------------------------------------------- /pointcept/models/sparse_unet/__init__.py: -------------------------------------------------------------------------------- 1 | from .mink_unet import * 2 | from .spconv_unet_v1m1_base import * 3 | from .spconv_unet_v1m2_bn_momentum import * 4 | from .spconv_unet_v1m3_pdnorm import * 5 | -------------------------------------------------------------------------------- /pointcept/models/spvcnn/__init__.py: -------------------------------------------------------------------------------- 1 | from .ts_spvcnn import * 2 | -------------------------------------------------------------------------------- /pointcept/models/stratified_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .stratified_transformer_v1m1_origin import StratifiedTransformer 2 | from .stratified_transformer_v1m2_refine import StratifiedTransformer 3 | -------------------------------------------------------------------------------- /pointcept/models/swin3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .swin3d_v1m1_base import Swin3DUNet 2 | -------------------------------------------------------------------------------- /pointcept/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import offset2batch, offset2bincount, batch2offset, off_diagonal 2 | from .checkpoint import checkpoint 3 | from .serialization import encode, decode 4 | from .structure import Point 5 | -------------------------------------------------------------------------------- /pointcept/models/utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Checkpoint Utils for Models 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import torch 9 | 10 | 11 | class CheckpointFunction(torch.autograd.Function): 12 | @staticmethod 13 | def forward(ctx, run_function, length, *args): 14 | ctx.run_function = run_function 15 | ctx.input_tensors = list(args[:length]) 16 | ctx.input_params = list(args[length:]) 17 | 18 | with torch.no_grad(): 19 | output_tensors = ctx.run_function(*ctx.input_tensors) 20 | return output_tensors 21 | 22 | @staticmethod 23 | def backward(ctx, *output_grads): 24 | ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] 25 | with torch.enable_grad(): 26 | # Fixes a bug where the first op in run_function modifies the 27 | # Tensor storage in place, which is not allowed for detach()'d 28 | # Tensors. 29 | shallow_copies = [x.view_as(x) for x in ctx.input_tensors] 30 | output_tensors = ctx.run_function(*shallow_copies) 31 | input_grads = torch.autograd.grad( 32 | output_tensors, 33 | ctx.input_tensors + ctx.input_params, 34 | output_grads, 35 | allow_unused=True, 36 | ) 37 | del ctx.input_tensors 38 | del ctx.input_params 39 | del output_tensors 40 | return (None, None) + input_grads 41 | 42 | 43 | def checkpoint(func, inputs, params, flag): 44 | """ 45 | Evaluate a function without caching intermediate activations, allowing for 46 | reduced memory at the expense of extra compute in the backward pass. 47 | :param func: the function to evaluate. 48 | :param inputs: the argument sequence to pass to `func`. 49 | :param params: a sequence of parameters `func` depends on but does not 50 | explicitly take as arguments. 51 | :param flag: if False, disable gradient checkpointing. 52 | """ 53 | if flag: 54 | args = tuple(inputs) + tuple(params) 55 | return CheckpointFunction.apply(func, len(inputs), *args) 56 | else: 57 | return func(*inputs) 58 | -------------------------------------------------------------------------------- /pointcept/models/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | General Utils for Models 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import torch 9 | 10 | 11 | @torch.inference_mode() 12 | def offset2bincount(offset): 13 | return torch.diff( 14 | offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) 15 | ) 16 | 17 | 18 | @torch.inference_mode() 19 | def offset2batch(offset): 20 | bincount = offset2bincount(offset) 21 | return torch.arange( 22 | len(bincount), device=offset.device, dtype=torch.long 23 | ).repeat_interleave(bincount) 24 | 25 | 26 | @torch.inference_mode() 27 | def batch2offset(batch): 28 | return torch.cumsum(batch.bincount(), dim=0).long() 29 | 30 | 31 | def off_diagonal(x): 32 | # return a flattened view of the off-diagonal elements of a square matrix 33 | n, m = x.shape 34 | assert n == m 35 | return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() 36 | -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import ( 2 | encode, 3 | decode, 4 | z_order_encode, 5 | z_order_decode, 6 | hilbert_encode, 7 | hilbert_decode, 8 | ) 9 | -------------------------------------------------------------------------------- /pointcept/models/utils/serialization/default.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .z_order import xyz2key as z_order_encode_ 3 | from .z_order import key2xyz as z_order_decode_ 4 | from .hilbert import encode as hilbert_encode_ 5 | from .hilbert import decode as hilbert_decode_ 6 | 7 | 8 | @torch.inference_mode() 9 | def encode(grid_coord, batch=None, depth=16, order="z"): 10 | assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} 11 | if order == "z": 12 | code = z_order_encode(grid_coord, depth=depth) 13 | elif order == "z-trans": 14 | code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) 15 | elif order == "hilbert": 16 | code = hilbert_encode(grid_coord, depth=depth) 17 | elif order == "hilbert-trans": 18 | code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) 19 | else: 20 | raise NotImplementedError 21 | if batch is not None: 22 | batch = batch.long() 23 | code = batch << depth * 3 | code 24 | return code 25 | 26 | 27 | @torch.inference_mode() 28 | def decode(code, depth=16, order="z"): 29 | assert order in {"z", "hilbert"} 30 | batch = code >> depth * 3 31 | code = code & ((1 << depth * 3) - 1) 32 | if order == "z": 33 | grid_coord = z_order_decode(code, depth=depth) 34 | elif order == "hilbert": 35 | grid_coord = hilbert_decode(code, depth=depth) 36 | else: 37 | raise NotImplementedError 38 | return grid_coord, batch 39 | 40 | 41 | def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): 42 | x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() 43 | # we block the support to batch, maintain batched code in Point class 44 | code = z_order_encode_(x, y, z, b=None, depth=depth) 45 | return code 46 | 47 | 48 | def z_order_decode(code: torch.Tensor, depth): 49 | x, y, z = z_order_decode_(code, depth=depth) 50 | grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) 51 | return grid_coord 52 | 53 | 54 | def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): 55 | return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) 56 | 57 | 58 | def hilbert_decode(code: torch.Tensor, depth: int = 16): 59 | return hilbert_decode_(code, num_dims=3, num_bits=depth) 60 | -------------------------------------------------------------------------------- /pointcept/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QWTforGithub/CDSegNet/cc23a7e98ee5e96787df032bf0a0547d6a55e32e/pointcept/utils/__init__.py -------------------------------------------------------------------------------- /pointcept/utils/cache.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Cache Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import SharedArray 10 | 11 | try: 12 | from multiprocessing.shared_memory import ShareableList 13 | except ImportError: 14 | import warnings 15 | 16 | warnings.warn("Please update python version >= 3.8 to enable shared_memory") 17 | import numpy as np 18 | 19 | 20 | def shared_array(name, var=None): 21 | if var is not None: 22 | # check exist 23 | if os.path.exists(f"/dev/shm/{name}"): 24 | return SharedArray.attach(f"shm://{name}") 25 | # create shared_array 26 | data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype) 27 | data[...] = var[...] 28 | data.flags.writeable = False 29 | else: 30 | data = SharedArray.attach(f"shm://{name}").copy() 31 | return data 32 | 33 | 34 | def shared_dict(name, var=None): 35 | name = str(name) 36 | assert "." not in name # '.' is used as sep flag 37 | data = {} 38 | if var is not None: 39 | assert isinstance(var, dict) 40 | keys = var.keys() 41 | # current version only cache np.array 42 | keys_valid = [] 43 | for key in keys: 44 | if isinstance(var[key], np.ndarray): 45 | keys_valid.append(key) 46 | keys = keys_valid 47 | 48 | ShareableList(sequence=keys, name=name + ".keys") 49 | for key in keys: 50 | if isinstance(var[key], np.ndarray): 51 | data[key] = shared_array(name=f"{name}.{key}", var=var[key]) 52 | else: 53 | keys = list(ShareableList(name=name + ".keys")) 54 | for key in keys: 55 | data[key] = shared_array(name=f"{name}.{key}") 56 | return data 57 | -------------------------------------------------------------------------------- /pointcept/utils/env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Environment Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import random 10 | import numpy as np 11 | import torch 12 | import torch.backends.cudnn as cudnn 13 | 14 | from datetime import datetime 15 | 16 | 17 | def get_random_seed(): 18 | seed = ( 19 | os.getpid() 20 | + int(datetime.now().strftime("%S%f")) 21 | + int.from_bytes(os.urandom(2), "big") 22 | ) 23 | return seed 24 | 25 | 26 | def set_seed(seed=None): 27 | if seed is None: 28 | seed = get_random_seed() 29 | random.seed(seed) 30 | np.random.seed(seed) 31 | torch.manual_seed(seed) 32 | torch.cuda.manual_seed(seed) 33 | torch.cuda.manual_seed_all(seed) 34 | cudnn.benchmark = False 35 | cudnn.deterministic = True 36 | os.environ["PYTHONHASHSEED"] = str(seed) 37 | -------------------------------------------------------------------------------- /pointcept/utils/optimizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Optimizer 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import torch 9 | from pointcept.utils.logger import get_root_logger 10 | from pointcept.utils.registry import Registry 11 | 12 | OPTIMIZERS = Registry("optimizers") 13 | 14 | 15 | OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") 16 | OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") 17 | OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") 18 | 19 | 20 | def build_optimizer(cfg, model, param_dicts=None): 21 | if param_dicts is None: 22 | cfg.params = model.parameters() 23 | else: 24 | cfg.params = [dict(names=[], params=[], lr=cfg.lr)] 25 | for i in range(len(param_dicts)): 26 | param_group = dict(names=[], params=[]) 27 | if "lr" in param_dicts[i].keys(): 28 | param_group["lr"] = param_dicts[i].lr 29 | if "momentum" in param_dicts[i].keys(): 30 | param_group["momentum"] = param_dicts[i].momentum 31 | if "weight_decay" in param_dicts[i].keys(): 32 | param_group["weight_decay"] = param_dicts[i].weight_decay 33 | cfg.params.append(param_group) 34 | 35 | for n, p in model.named_parameters(): 36 | flag = False 37 | for i in range(len(param_dicts)): 38 | if param_dicts[i].keyword in n: 39 | cfg.params[i + 1]["names"].append(n) 40 | cfg.params[i + 1]["params"].append(p) 41 | flag = True 42 | break 43 | if not flag: 44 | cfg.params[0]["names"].append(n) 45 | cfg.params[0]["params"].append(p) 46 | 47 | logger = get_root_logger() 48 | for i in range(len(cfg.params)): 49 | param_names = cfg.params[i].pop("names") 50 | message = "" 51 | for key in cfg.params[i].keys(): 52 | if key != "params": 53 | message += f" {key}: {cfg.params[i][key]};" 54 | logger.info(f"Params Group {i+1} -{message} Params: {param_names}.") 55 | return OPTIMIZERS.build(cfg=cfg) 56 | -------------------------------------------------------------------------------- /pointcept/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # -*- coding: utf-8 -*- 3 | 4 | from time import perf_counter 5 | from typing import Optional 6 | 7 | 8 | class Timer: 9 | """ 10 | A timer which computes the time elapsed since the start/reset of the timer. 11 | """ 12 | 13 | def __init__(self) -> None: 14 | self.reset() 15 | 16 | def reset(self) -> None: 17 | """ 18 | Reset the timer. 19 | """ 20 | self._start = perf_counter() 21 | self._paused: Optional[float] = None 22 | self._total_paused = 0 23 | self._count_start = 1 24 | 25 | def pause(self) -> None: 26 | """ 27 | Pause the timer. 28 | """ 29 | if self._paused is not None: 30 | raise ValueError("Trying to pause a Timer that is already paused!") 31 | self._paused = perf_counter() 32 | 33 | def is_paused(self) -> bool: 34 | """ 35 | Returns: 36 | bool: whether the timer is currently paused 37 | """ 38 | return self._paused is not None 39 | 40 | def resume(self) -> None: 41 | """ 42 | Resume the timer. 43 | """ 44 | if self._paused is None: 45 | raise ValueError("Trying to resume a Timer that is not paused!") 46 | # pyre-fixme[58]: `-` is not supported for operand types `float` and 47 | # `Optional[float]`. 48 | self._total_paused += perf_counter() - self._paused 49 | self._paused = None 50 | self._count_start += 1 51 | 52 | def seconds(self) -> float: 53 | """ 54 | Returns: 55 | (float): the total number of seconds since the start/reset of the 56 | timer, excluding the time when the timer is paused. 57 | """ 58 | if self._paused is not None: 59 | end_time: float = self._paused # type: ignore 60 | else: 61 | end_time = perf_counter() 62 | return end_time - self._start - self._total_paused 63 | 64 | def avg_seconds(self) -> float: 65 | """ 66 | Returns: 67 | (float): the average number of seconds between every start/reset and 68 | pause. 69 | """ 70 | return self.seconds() / self._count_start 71 | -------------------------------------------------------------------------------- /pointcept/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visualization Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import open3d as o3d 10 | import numpy as np 11 | import torch 12 | 13 | 14 | def to_numpy(x): 15 | if isinstance(x, torch.Tensor): 16 | x = x.clone().detach().cpu().numpy() 17 | assert isinstance(x, np.ndarray) 18 | return x 19 | 20 | 21 | def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): 22 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 23 | coord = to_numpy(coord) 24 | if color is not None: 25 | color = to_numpy(color) 26 | pcd = o3d.geometry.PointCloud() 27 | pcd.points = o3d.utility.Vector3dVector(coord) 28 | pcd.colors = o3d.utility.Vector3dVector( 29 | np.ones_like(coord) if color is None else color 30 | ) 31 | o3d.io.write_point_cloud(file_path, pcd) 32 | if logger is not None: 33 | logger.info(f"Save Point Cloud to: {file_path}") 34 | 35 | 36 | def save_bounding_boxes( 37 | bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None 38 | ): 39 | bboxes_corners = to_numpy(bboxes_corners) 40 | # point list 41 | points = bboxes_corners.reshape(-1, 3) 42 | # line list 43 | box_lines = np.array( 44 | [ 45 | [0, 1], 46 | [1, 2], 47 | [2, 3], 48 | [3, 0], 49 | [4, 5], 50 | [5, 6], 51 | [6, 7], 52 | [7, 0], 53 | [0, 4], 54 | [1, 5], 55 | [2, 6], 56 | [3, 7], 57 | ] 58 | ) 59 | lines = [] 60 | for i, _ in enumerate(bboxes_corners): 61 | lines.append(box_lines + i * 8) 62 | lines = np.concatenate(lines) 63 | # color list 64 | color = np.array([color for _ in range(len(lines))]) 65 | # generate line set 66 | line_set = o3d.geometry.LineSet() 67 | line_set.points = o3d.utility.Vector3dVector(points) 68 | line_set.lines = o3d.utility.Vector2iVector(lines) 69 | line_set.colors = o3d.utility.Vector3dVector(color) 70 | o3d.io.write_line_set(file_path, line_set) 71 | 72 | if logger is not None: 73 | logger.info(f"Save Boxes to: {file_path}") 74 | 75 | 76 | def save_lines( 77 | points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None 78 | ): 79 | points = to_numpy(points) 80 | lines = to_numpy(lines) 81 | colors = np.array([color for _ in range(len(lines))]) 82 | line_set = o3d.geometry.LineSet() 83 | line_set.points = o3d.utility.Vector3dVector(points) 84 | line_set.lines = o3d.utility.Vector2iVector(lines) 85 | line_set.colors = o3d.utility.Vector3dVector(colors) 86 | o3d.io.write_line_set(file_path, line_set) 87 | 88 | if logger is not None: 89 | logger.info(f"Save Lines to: {file_path}") 90 | -------------------------------------------------------------------------------- /scripts/build_image.sh: -------------------------------------------------------------------------------- 1 | TORCH_VERSION=2.0.1 2 | CUDA_VERSION=11.7 3 | CUDNN_VERSION=8 4 | 5 | ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"` 6 | [ $? != 0 ] && exit 1 7 | eval set -- "${ARGS}" 8 | while true ; do 9 | case "$1" in 10 | -t | --torch) 11 | TORCH_VERSION=$2 12 | shift 2 13 | ;; 14 | -c | --cuda) 15 | CUDA_VERSION=$2 16 | shift 2 17 | ;; 18 | --cudnn) 19 | CUDNN_VERSION=$2 20 | shift 2 21 | ;; 22 | --) 23 | break 24 | ;; 25 | *) 26 | echo "Invalid option: $1" 27 | exit 1 28 | ;; 29 | esac 30 | done 31 | 32 | CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."` 33 | BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel 34 | IMG_TAG=pointcept/pointcept:pytorch${BASE_TORCH_TAG} 35 | 36 | echo "TORCH VERSION: ${TORCH_VERSION}" 37 | echo "CUDA VERSION: ${CUDA_VERSION}" 38 | echo "CUDNN VERSION: ${CUDNN_VERSION}" 39 | 40 | 41 | cat > ./Dockerfile <<- EOM 42 | FROM pytorch/pytorch:${BASE_TORCH_TAG} 43 | 44 | # Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC) 45 | RUN rm /etc/apt/sources.list.d/*.list 46 | 47 | # Installing apt packages 48 | RUN export DEBIAN_FRONTEND=noninteractive \ 49 | && apt -y update --no-install-recommends \ 50 | && apt -y install --no-install-recommends \ 51 | git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \ 52 | && apt autoremove -y \ 53 | && apt clean -y \ 54 | && export DEBIAN_FRONTEND=dialog 55 | 56 | # Install Pointcept environment 57 | RUN conda install h5py pyyaml -c anaconda -y 58 | RUN conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y 59 | RUN conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y 60 | 61 | RUN pip install --upgrade pip 62 | RUN pip install torch-geometric 63 | RUN pip install spconv-cu${CUDA_VERSION_NO_DOT} 64 | RUN pip install open3d 65 | 66 | # Build MinkowskiEngine 67 | RUN git clone https://github.com/NVIDIA/MinkowskiEngine.git 68 | WORKDIR /workspace/MinkowskiEngine 69 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" python setup.py install --blas=openblas --force_cuda 70 | WORKDIR /workspace 71 | 72 | # Build pointops 73 | RUN git clone https://github.com/Pointcept/Pointcept.git 74 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointops -v 75 | 76 | # Build pointgroup_ops 77 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointgroup_ops -v 78 | 79 | # Build swin3d 80 | RUN TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX 8.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v 81 | EOM 82 | 83 | docker build . -f ./Dockerfile -t $IMG_TAG -------------------------------------------------------------------------------- /scripts/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd ../libs 3 | 4 | cd pointgroup_ops 5 | python setup.py install 6 | echo "pointgroup_ops --> Finishing!" 7 | cd ../ 8 | 9 | cd pointops 10 | python setup.py install 11 | echo "pointops --> Finishing!" 12 | cd ../ 13 | 14 | cd pointops2 15 | python setup.py install 16 | echo "pointops2 --> Finishing!" 17 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | PYTHON=python 5 | 6 | TEST_CODE=test.py 7 | 8 | DATASET=scannet 9 | CONFIG="None" 10 | EXP_NAME=debug 11 | WEIGHT=model_best 12 | GPU=None 13 | 14 | while getopts "p:d:c:n:w:g:" opt; do 15 | case $opt in 16 | p) 17 | PYTHON=$OPTARG 18 | ;; 19 | d) 20 | DATASET=$OPTARG 21 | ;; 22 | c) 23 | CONFIG=$OPTARG 24 | ;; 25 | n) 26 | EXP_NAME=$OPTARG 27 | ;; 28 | w) 29 | WEIGHT=$OPTARG 30 | ;; 31 | g) 32 | GPU=$OPTARG 33 | ;; 34 | \?) 35 | echo "Invalid option: -$OPTARG" 36 | ;; 37 | esac 38 | done 39 | 40 | if [ "${NUM_GPU}" = 'None' ] 41 | then 42 | NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` 43 | fi 44 | 45 | echo "Experiment name: $EXP_NAME" 46 | echo "Python interpreter dir: $PYTHON" 47 | echo "Dataset: $DATASET" 48 | echo "GPU Num: $GPU" 49 | 50 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 51 | MODEL_DIR=${EXP_DIR}/model 52 | CODE_DIR=${EXP_DIR}/code 53 | CONFIG_DIR=${EXP_DIR}/config.py 54 | 55 | if [ "${CONFIG}" = "None" ] 56 | then 57 | CONFIG_DIR=${EXP_DIR}/config.py 58 | else 59 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 60 | fi 61 | 62 | echo "Loading config in:" $CONFIG_DIR 63 | #export PYTHONPATH=./$CODE_DIR 64 | export PYTHONPATH=./ 65 | echo "Running code in: $CODE_DIR" 66 | 67 | 68 | echo " =========> RUN TASK <=========" 69 | 70 | #$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ 71 | $PYTHON -u tools/$TEST_CODE \ 72 | --config-file "$CONFIG_DIR" \ 73 | --num-gpus "$GPU" \ 74 | --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth 75 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | ROOT_DIR=$(pwd) 5 | PYTHON=python 6 | 7 | TRAIN_CODE=train.py 8 | 9 | DATASET=scannet 10 | CONFIG="None" 11 | EXP_NAME=debug 12 | WEIGHT="None" 13 | RESUME=false 14 | GPU=None 15 | 16 | 17 | while getopts "p:d:c:n:w:g:r:" opt; do 18 | case $opt in 19 | p) 20 | PYTHON=$OPTARG 21 | ;; 22 | d) 23 | DATASET=$OPTARG 24 | ;; 25 | c) 26 | CONFIG=$OPTARG 27 | ;; 28 | n) 29 | EXP_NAME=$OPTARG 30 | ;; 31 | w) 32 | WEIGHT=$OPTARG 33 | ;; 34 | r) 35 | RESUME=$OPTARG 36 | ;; 37 | g) 38 | GPU=$OPTARG 39 | ;; 40 | \?) 41 | echo "Invalid option: -$OPTARG" 42 | ;; 43 | esac 44 | done 45 | 46 | if [ "${NUM_GPU}" = 'None' ] 47 | then 48 | NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` 49 | fi 50 | 51 | echo "Experiment name: $EXP_NAME" 52 | echo "Python interpreter dir: $PYTHON" 53 | echo "Dataset: $DATASET" 54 | echo "Config: $CONFIG" 55 | echo "GPU Num: $GPU" 56 | 57 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 58 | MODEL_DIR=${EXP_DIR}/model 59 | CODE_DIR=${EXP_DIR}/code 60 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 61 | 62 | 63 | echo " =========> CREATE EXP DIR <=========" 64 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR" 65 | if ${RESUME} 66 | then 67 | CONFIG_DIR=${EXP_DIR}/config.py 68 | WEIGHT=$MODEL_DIR/model_last.pth 69 | else 70 | mkdir -p "$MODEL_DIR" "$CODE_DIR" 71 | cp -r scripts tools pointcept "$CODE_DIR" 72 | fi 73 | 74 | echo "Loading config in:" $CONFIG_DIR 75 | export PYTHONPATH=./$CODE_DIR 76 | echo "Running code in: $CODE_DIR" 77 | 78 | 79 | echo " =========> RUN TASK <=========" 80 | 81 | if [ "${WEIGHT}" = "None" ] 82 | then 83 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 84 | --config-file "$CONFIG_DIR" \ 85 | --num-gpus "$GPU" \ 86 | --options save_path="$EXP_DIR" 87 | else 88 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 89 | --config-file "$CONFIG_DIR" \ 90 | --num-gpus "$GPU" \ 91 | --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" 92 | fi -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | 11 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 13 | 14 | 15 | from pointcept.engines.defaults import ( 16 | default_argument_parser, 17 | default_config_parser, 18 | default_setup, 19 | ) 20 | from pointcept.engines.test import TESTERS 21 | from pointcept.engines.launch import launch 22 | 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) 27 | tester.test() 28 | 29 | def main(): 30 | 31 | dataset = "nuscenes" # {scannet, scannet200, nuscenes} 32 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 33 | 34 | weight = f"/root/models/models/{dataset}/{config}/best_model.pth" 35 | 36 | num_gpus = 2 37 | config_file = f"../configs/{dataset}/{config}.py" 38 | 39 | options = {'save_path': f'../exp/{dataset}_test/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | cfg.weight = weight 48 | cfg.num_gpus = num_gpus 49 | 50 | # nG ~ N(nG;0,\tau*I), the input c' = c + nG 51 | noise_level = None 52 | cfg.noise_level = noise_level 53 | 54 | # the mode of inference 55 | ''' 56 | SSI : Single-Step Inference, semantic labels aregenerated by CN through a single-step iteration in NN. 57 | MSAI : Multi-Step Average Inference (MSAI), 58 | MSAI conducts T step iterations in NN and averages T outputs produced by CN. 59 | MSFI : Multi-Step Final Inference, MSFI is determined by the output from the final iteration of CN. 60 | ''' 61 | inference_mode = "SSI" 62 | step = 1 63 | cfg.inference_mode = inference_mode 64 | cfg.step = step 65 | 66 | launch( 67 | main_worker, 68 | num_gpus_per_machine=args.num_gpus, 69 | num_machines=args.num_machines, 70 | machine_rank=args.machine_rank, 71 | dist_url=args.dist_url, 72 | cfg=(cfg,), 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | 78 | 79 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 80 | print(f"GPU : {os.environ['CUDA_VISIBLE_DEVICES']}") 81 | main() 82 | -------------------------------------------------------------------------------- /tools/test_CDSegNet_ScanNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | 11 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 13 | 14 | 15 | from pointcept.engines.defaults import ( 16 | default_argument_parser, 17 | default_config_parser, 18 | default_setup, 19 | ) 20 | from pointcept.engines.test import TESTERS 21 | from pointcept.engines.launch import launch 22 | 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) 27 | tester.test() 28 | 29 | def main(): 30 | 31 | dataset = "scannet" # {scannet, scannet200, nuscenes} 32 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 33 | 34 | weight = f"/root/models/models/{dataset}/{config}/best_model.pth" 35 | 36 | num_gpus = 2 37 | config_file = f"../configs/{dataset}/{config}.py" 38 | 39 | options = {'save_path': f'../exp/{dataset}_test/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | cfg.weight = weight 48 | cfg.num_gpus = num_gpus 49 | 50 | # nG ~ N(nG;0,\tau*I), the input c' = c + nG 51 | noise_level = None 52 | cfg.noise_level = noise_level 53 | 54 | # the mode of inference 55 | ''' 56 | SSI : Single-Step Inference, semantic labels aregenerated by CN through a single-step iteration in NN. 57 | MSAI : Multi-Step Average Inference (MSAI), 58 | MSAI conducts T step iterations in NN and averages T outputs produced by CN. 59 | MSFI : Multi-Step Final Inference, MSFI is determined by the output from the final iteration of CN. 60 | ''' 61 | inference_mode = "SSI" 62 | step = 1 63 | cfg.inference_mode = inference_mode 64 | cfg.step = step 65 | 66 | launch( 67 | main_worker, 68 | num_gpus_per_machine=args.num_gpus, 69 | num_machines=args.num_machines, 70 | machine_rank=args.machine_rank, 71 | dist_url=args.dist_url, 72 | cfg=(cfg,), 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | 78 | 79 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 80 | print(f"GPU : {os.environ['CUDA_VISIBLE_DEVICES']}") 81 | main() 82 | -------------------------------------------------------------------------------- /tools/test_CDSegNet_ScanNet200.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | 11 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 13 | 14 | 15 | from pointcept.engines.defaults import ( 16 | default_argument_parser, 17 | default_config_parser, 18 | default_setup, 19 | ) 20 | from pointcept.engines.test import TESTERS 21 | from pointcept.engines.launch import launch 22 | 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) 27 | tester.test() 28 | 29 | def main(): 30 | 31 | dataset = "scannet200" # {scannet, scannet200, nuscenes} 32 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 33 | 34 | weight = f"/root/models/models/{dataset}/{config}/best_model.pth" 35 | 36 | num_gpus = 2 37 | config_file = f"../configs/{dataset}/{config}.py" 38 | 39 | options = {'save_path': f'../exp/{dataset}_test/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | cfg.weight = weight 48 | cfg.num_gpus = num_gpus 49 | 50 | # nG ~ N(nG;0,\tau*I), the input c' = c + nG 51 | noise_level = None 52 | cfg.noise_level = noise_level 53 | 54 | # the mode of inference 55 | ''' 56 | SSI : Single-Step Inference, semantic labels aregenerated by CN through a single-step iteration in NN. 57 | MSAI : Multi-Step Average Inference (MSAI), 58 | MSAI conducts T step iterations in NN and averages T outputs produced by CN. 59 | MSFI : Multi-Step Final Inference, MSFI is determined by the output from the final iteration of CN. 60 | ''' 61 | inference_mode = "SSI" 62 | step = 1 63 | cfg.inference_mode = inference_mode 64 | cfg.step = step 65 | 66 | launch( 67 | main_worker, 68 | num_gpus_per_machine=args.num_gpus, 69 | num_machines=args.num_machines, 70 | machine_rank=args.machine_rank, 71 | dist_url=args.dist_url, 72 | cfg=(cfg,), 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | 78 | 79 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 80 | print(f"GPU : {os.environ['CUDA_VISIBLE_DEVICES']}") 81 | main() 82 | -------------------------------------------------------------------------------- /tools/test_CDSegNet_nuScenes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | 11 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 13 | 14 | 15 | from pointcept.engines.defaults import ( 16 | default_argument_parser, 17 | default_config_parser, 18 | default_setup, 19 | ) 20 | from pointcept.engines.test import TESTERS 21 | from pointcept.engines.launch import launch 22 | 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) 27 | tester.test() 28 | 29 | def main(): 30 | 31 | dataset = "nuscenes" # {scannet, scannet200, nuscenes} 32 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 33 | 34 | weight = f"/root/models/models/{dataset}/{config}/best_model.pth" 35 | 36 | num_gpus = 2 37 | config_file = f"../configs/{dataset}/{config}.py" 38 | 39 | options = {'save_path': f'../exp/{dataset}_test/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | cfg.weight = weight 48 | cfg.num_gpus = num_gpus 49 | 50 | # nG ~ N(nG;0,\tau*I), the input c' = c + nG 51 | noise_level = None 52 | cfg.noise_level = noise_level 53 | 54 | # the mode of inference 55 | ''' 56 | SSI : Single-Step Inference, semantic labels aregenerated by CN through a single-step iteration in NN. 57 | MSAI : Multi-Step Average Inference (MSAI), 58 | MSAI conducts T step iterations in NN and averages T outputs produced by CN. 59 | MSFI : Multi-Step Final Inference, MSFI is determined by the output from the final iteration of CN. 60 | ''' 61 | inference_mode = "SSI" 62 | step = 1 63 | cfg.inference_mode = inference_mode 64 | cfg.step = step 65 | 66 | launch( 67 | main_worker, 68 | num_gpus_per_machine=args.num_gpus, 69 | num_machines=args.num_machines, 70 | machine_rank=args.machine_rank, 71 | dist_url=args.dist_url, 72 | cfg=(cfg,), 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | 78 | 79 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 80 | print(f"GPU : {os.environ['CUDA_VISIBLE_DEVICES']}") 81 | main() 82 | -------------------------------------------------------------------------------- /tools/test_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | 11 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 13 | 14 | 15 | from pointcept.engines.defaults import ( 16 | default_argument_parser, 17 | default_config_parser, 18 | default_setup, 19 | ) 20 | from pointcept.engines.test import TESTERS 21 | from pointcept.engines.launch import launch 22 | 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) 27 | tester.test() 28 | 29 | def main(): 30 | 31 | dataset = "scannet" # {scannet, scannet200, nuscenes} 32 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 33 | 34 | weight = f"/root/models/models/{dataset}/{config}/best_model.pth" 35 | 36 | num_gpus = 1 37 | config_file = f"../configs/{dataset}/{config}_time.py" 38 | 39 | options = {'save_path': f'../exp/{dataset}_test/{config}_time'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | cfg.weight = weight 48 | cfg.num_gpus = num_gpus 49 | 50 | # nG ~ N(nG;0,\tau*I), the input c' = c + nG 51 | noise_level = None 52 | cfg.noise_level = noise_level 53 | 54 | # the mode of inference 55 | ''' 56 | SSI : Single-Step Inference, semantic labels aregenerated by CN through a single-step iteration in NN. 57 | MSAI : Multi-Step Average Inference (MSAI), 58 | MSAI conducts T step iterations in NN and averages T outputs produced by CN. 59 | MSFI : Multi-Step Final Inference, MSFI is determined by the output from the final iteration of CN. 60 | ''' 61 | inference_mode = "SSI" 62 | step = 1 63 | cfg.inference_mode = inference_mode 64 | cfg.step = step 65 | 66 | launch( 67 | main_worker, 68 | num_gpus_per_machine=args.num_gpus, 69 | num_machines=args.num_machines, 70 | machine_rank=args.machine_rank, 71 | dist_url=args.dist_url, 72 | cfg=(cfg,), 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | 78 | 79 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 80 | print(f"GPU : {os.environ['CUDA_VISIBLE_DEVICES']}") 81 | main() 82 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Training Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | import torch 11 | 12 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 14 | 15 | 16 | from pointcept.engines.defaults import ( 17 | default_argument_parser, 18 | default_config_parser, 19 | default_setup, 20 | ) 21 | from pointcept.engines.train import TRAINERS 22 | from pointcept.engines.launch import launch 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) 27 | trainer.train() 28 | 29 | 30 | def main(): 31 | 32 | dataset = "scannet" # {scannet, scannet200, nuscenes} 33 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 34 | 35 | num_gpus = 2 36 | config_file = f"../configs/{dataset}/{config}.py" 37 | 38 | # the path of saving results 39 | options = {'save_path': f'../exp/{dataset}/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | 48 | # the number of GPUs 49 | cfg.num_gpus = num_gpus 50 | 51 | # checkpoint path 52 | 53 | weight = f"../exp/{dataset}/{config}/model/model_last.pth" 54 | cfg.weight = weight 55 | cfg.resume = True 56 | 57 | # After {save_freq_threshold} epochs, the checkpoint is saved every {save_freq} epochs. 58 | save_freq = 1 59 | save_freq_threshold = 70 60 | cfg.save_freq = save_freq 61 | cfg.hooks[4].save_freq = save_freq 62 | cfg.save_freq_threshold = save_freq_threshold 63 | 64 | if(cfg.data_root.__contains__("scannet_debug")): 65 | cfg.eval_epoch = cfg.epoch = 1 66 | cfg.data.train.loop = 1 67 | 68 | launch( 69 | main_worker, 70 | num_gpus_per_machine=args.num_gpus, 71 | num_machines=args.num_machines, 72 | machine_rank=args.machine_rank, 73 | dist_url=args.dist_url, 74 | cfg=(cfg,), 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | 80 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 81 | main() 82 | -------------------------------------------------------------------------------- /tools/train_CDSegNet_ScanNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Training Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | import torch 11 | 12 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 14 | 15 | 16 | from pointcept.engines.defaults import ( 17 | default_argument_parser, 18 | default_config_parser, 19 | default_setup, 20 | ) 21 | from pointcept.engines.train import TRAINERS 22 | from pointcept.engines.launch import launch 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) 27 | trainer.train() 28 | 29 | 30 | def main(): 31 | 32 | dataset = "scannet" # {scannet, scannet200, nuscenes} 33 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 34 | 35 | num_gpus = 2 36 | config_file = f"../configs/{dataset}/{config}.py" 37 | 38 | # the path of saving results 39 | options = {'save_path': f'../exp/{dataset}/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | 48 | # the number of GPUs 49 | cfg.num_gpus = num_gpus 50 | 51 | # checkpoint path 52 | 53 | weight = f"../exp/{dataset}/{config}/model/model_last.pth" 54 | cfg.weight = weight 55 | cfg.resume = True 56 | 57 | # After {save_freq_threshold} epochs, the checkpoint is saved every {save_freq} epochs. 58 | save_freq = 1 59 | save_freq_threshold = 70 60 | cfg.save_freq = save_freq 61 | cfg.hooks[4].save_freq = save_freq 62 | cfg.save_freq_threshold = save_freq_threshold 63 | 64 | if(cfg.data_root.__contains__("scannet_debug")): 65 | cfg.eval_epoch = cfg.epoch = 1 66 | cfg.data.train.loop = 1 67 | 68 | launch( 69 | main_worker, 70 | num_gpus_per_machine=args.num_gpus, 71 | num_machines=args.num_machines, 72 | machine_rank=args.machine_rank, 73 | dist_url=args.dist_url, 74 | cfg=(cfg,), 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | 80 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 81 | main() 82 | -------------------------------------------------------------------------------- /tools/train_CDSegNet_ScanNet200.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Training Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | import torch 11 | 12 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 14 | 15 | 16 | from pointcept.engines.defaults import ( 17 | default_argument_parser, 18 | default_config_parser, 19 | default_setup, 20 | ) 21 | from pointcept.engines.train import TRAINERS 22 | from pointcept.engines.launch import launch 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) 27 | trainer.train() 28 | 29 | 30 | def main(): 31 | 32 | dataset = "scannet200" # {scannet, scannet200, nuscenes} 33 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 34 | 35 | num_gpus = 2 36 | config_file = f"../configs/{dataset}/{config}.py" 37 | 38 | # the path of saving results 39 | options = {'save_path': f'../exp/{dataset}/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | 48 | # the number of GPUs 49 | cfg.num_gpus = num_gpus 50 | 51 | # checkpoint path 52 | 53 | weight = f"../exp/{dataset}/{config}/model/model_last.pth" 54 | cfg.weight = weight 55 | cfg.resume = True 56 | 57 | # After {save_freq_threshold} epochs, the checkpoint is saved every {save_freq} epochs. 58 | save_freq = 1 59 | save_freq_threshold = 70 60 | cfg.save_freq = save_freq 61 | cfg.hooks[4].save_freq = save_freq 62 | cfg.save_freq_threshold = save_freq_threshold 63 | 64 | if(cfg.data_root.__contains__("scannet_debug")): 65 | cfg.eval_epoch = cfg.epoch = 1 66 | cfg.data.train.loop = 1 67 | 68 | launch( 69 | main_worker, 70 | num_gpus_per_machine=args.num_gpus, 71 | num_machines=args.num_machines, 72 | machine_rank=args.machine_rank, 73 | dist_url=args.dist_url, 74 | cfg=(cfg,), 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | 80 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 81 | main() 82 | -------------------------------------------------------------------------------- /tools/train_CDSegNet_nuScenes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Training Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import sys 9 | import os 10 | import torch 11 | 12 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | sys.path.append(os.path.dirname(SCRIPT_DIR)) 14 | 15 | 16 | from pointcept.engines.defaults import ( 17 | default_argument_parser, 18 | default_config_parser, 19 | default_setup, 20 | ) 21 | from pointcept.engines.train import TRAINERS 22 | from pointcept.engines.launch import launch 23 | 24 | def main_worker(cfg): 25 | cfg = default_setup(cfg) 26 | trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) 27 | trainer.train() 28 | 29 | 30 | def main(): 31 | 32 | dataset = "nuscenes" # {scannet, scannet200, nuscenes} 33 | config = "CDSegNet" # {CDSegNet, PTv3_CNF} 34 | 35 | num_gpus = 2 36 | config_file = f"../configs/{dataset}/{config}.py" 37 | 38 | # the path of saving results 39 | options = {'save_path': f'../exp/{dataset}/{config}'} 40 | 41 | args = default_argument_parser().parse_args() 42 | args.config_file = config_file 43 | args.num_gpus = num_gpus 44 | args.options = options 45 | 46 | cfg = default_config_parser(args.config_file, args.options) 47 | 48 | # the number of GPUs 49 | cfg.num_gpus = num_gpus 50 | 51 | # checkpoint path 52 | 53 | weight = f"../exp/{dataset}/{config}/model/model_last.pth" 54 | cfg.weight = weight 55 | cfg.resume = True 56 | 57 | # After {save_freq_threshold} epochs, the checkpoint is saved every {save_freq} epochs. 58 | save_freq = 1 59 | save_freq_threshold = 70 60 | cfg.save_freq = save_freq 61 | cfg.hooks[4].save_freq = save_freq 62 | cfg.save_freq_threshold = save_freq_threshold 63 | 64 | if(cfg.data_root.__contains__("scannet_debug")): 65 | cfg.eval_epoch = cfg.epoch = 1 66 | cfg.data.train.loop = 1 67 | 68 | launch( 69 | main_worker, 70 | num_gpus_per_machine=args.num_gpus, 71 | num_machines=args.num_machines, 72 | machine_rank=args.machine_rank, 73 | dist_url=args.dist_url, 74 | cfg=(cfg,), 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | 80 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 81 | main() 82 | --------------------------------------------------------------------------------