├── .gitignore ├── README.md ├── configs ├── _base_ │ ├── datasets │ │ ├── modelnet40.py │ │ ├── s3dis.py │ │ ├── scannet.py │ │ ├── scannet200.py │ │ ├── scannet_submit.py │ │ ├── semantic_kitti_19cls.py │ │ └── shapenet_part.py │ ├── default_runtime.py │ └── tests │ │ ├── classification.py │ │ ├── part_segmentation.py │ │ └── segmentation.py ├── s3dis │ ├── semseg-minkunet34c-0-base.py │ ├── semseg-ptv1-0-base.py │ ├── semseg-ptv2m1-0-base.py │ ├── semseg-ptv2m2-0-base.py │ └── semseg-spunet34c-0-base.py ├── scannet │ ├── semseg-minkunet34c-0-base.py │ ├── semseg-ptv1-0-base.py │ ├── semseg-ptv2m1-0-origin.py │ ├── semseg-ptv2m2-0-base.py │ ├── semseg-ptv2m2-1-benchmark-submit.py │ ├── semseg-ptv2m2-2-precise-evaluate.py │ ├── semseg-spunet34c-0-base.py │ ├── semseg-spunet34c-1-cn-base.py │ ├── semseg-stv1m1-0-origin.py │ └── semseg-stv1m2-0-refined.py ├── scannet200 │ ├── semseg-minkunet34c-0-base.py │ ├── semseg-ptv1-0-base.py │ ├── semseg-ptv2m1-0-base.py │ ├── semseg-ptv2m2-0-base.py │ ├── semseg-ptv2m2-1-benchmark-submit.py │ ├── semseg-spunet34c-0-base.py │ └── semseg-stv1m2-0-refined.py └── semantic_kitti │ ├── semseg-minkunet34c-0-base.py │ ├── semseg-spunet34c-0-base.py │ └── semseg-spvcnn34c-0-base.py ├── figures ├── design.png └── offset.png ├── libs ├── pointops │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ ├── aggregation.py │ │ ├── attention.py │ │ ├── grouping.py │ │ ├── interpolation.py │ │ ├── query.py │ │ ├── sampling.py │ │ ├── subtraction.py │ │ └── utils.py │ ├── setup.py │ └── src │ │ ├── __init__.py │ │ ├── aggregation │ │ ├── aggregation_cuda.cpp │ │ ├── aggregation_cuda_kernel.cu │ │ └── aggregation_cuda_kernel.h │ │ ├── attention │ │ ├── attention_cuda.cpp │ │ ├── attention_cuda_kernel.cu │ │ └── attention_cuda_kernel.h │ │ ├── ball_query │ │ ├── ball_query_cuda.cpp │ │ ├── ball_query_cuda_kernel.cu │ │ └── ball_query_cuda_kernel.h │ │ ├── cuda_utils.h │ │ ├── grouping │ │ ├── grouping_cuda.cpp │ │ ├── grouping_cuda_kernel.cu │ │ └── grouping_cuda_kernel.h │ │ ├── interpolation │ │ ├── interpolation_cuda.cpp │ │ ├── interpolation_cuda_kernel.cu │ │ └── interpolation_cuda_kernel.h │ │ ├── knn_query │ │ ├── knn_query_cuda.cpp │ │ ├── knn_query_cuda_kernel.cu │ │ └── knn_query_cuda_kernel.h │ │ ├── pointops_api.cpp │ │ ├── random_ball_query │ │ ├── random_ball_query_cuda.cpp │ │ ├── random_ball_query_cuda_kernel.cu │ │ └── random_ball_query_cuda_kernel.h │ │ ├── sampling │ │ ├── sampling_cuda.cpp │ │ ├── sampling_cuda_kernel.cu │ │ └── sampling_cuda_kernel.h │ │ └── subtraction │ │ ├── subtraction_cuda.cpp │ │ ├── subtraction_cuda_kernel.cu │ │ └── subtraction_cuda_kernel.h └── pointops2 │ ├── __init__.py │ ├── functions │ ├── __init__.py │ ├── pointops.py │ ├── pointops2.py │ ├── pointops_ablation.py │ ├── test_attention_op_step1.py │ ├── test_attention_op_step1_v2.py │ ├── test_attention_op_step2.py │ ├── test_relative_pos_encoding_op_step1.py │ ├── test_relative_pos_encoding_op_step1_v2.py │ ├── test_relative_pos_encoding_op_step1_v3.py │ ├── test_relative_pos_encoding_op_step2.py │ └── test_relative_pos_encoding_op_step2_v2.py │ ├── setup.py │ └── src │ ├── __init__.py │ ├── aggregation │ ├── aggregation_cuda.cpp │ ├── aggregation_cuda_kernel.cu │ └── aggregation_cuda_kernel.h │ ├── attention │ ├── attention_cuda.cpp │ ├── attention_cuda_kernel.cu │ └── attention_cuda_kernel.h │ ├── attention_v2 │ ├── attention_cuda_kernel_v2.cu │ ├── attention_cuda_kernel_v2.h │ └── attention_cuda_v2.cpp │ ├── cuda_utils.h │ ├── grouping │ ├── grouping_cuda.cpp │ ├── grouping_cuda_kernel.cu │ └── grouping_cuda_kernel.h │ ├── interpolation │ ├── interpolation_cuda.cpp │ ├── interpolation_cuda_kernel.cu │ └── interpolation_cuda_kernel.h │ ├── knnquery │ ├── knnquery_cuda.cpp │ ├── knnquery_cuda_kernel.cu │ └── knnquery_cuda_kernel.h │ ├── pointops_api.cpp │ ├── rpe │ ├── relative_pos_encoding_cuda.cpp │ ├── relative_pos_encoding_cuda_kernel.cu │ └── relative_pos_encoding_cuda_kernel.h │ ├── rpe_v2 │ ├── relative_pos_encoding_cuda_kernel_v2.cu │ ├── relative_pos_encoding_cuda_kernel_v2.h │ └── relative_pos_encoding_cuda_v2.cpp │ ├── sampling │ ├── sampling_cuda.cpp │ ├── sampling_cuda_kernel.cu │ └── sampling_cuda_kernel.h │ └── subtraction │ ├── subtraction_cuda.cpp │ ├── subtraction_cuda_kernel.cu │ └── subtraction_cuda_kernel.h ├── pcr ├── __init__.py ├── datasets │ ├── __init__.py │ ├── arkitscenes.py │ ├── builder.py │ ├── defaults.py │ ├── modelnet.py │ ├── preprocessing │ │ ├── arkitscenes │ │ │ └── preprocess_arkitscenes_mesh.py │ │ ├── s3dis │ │ │ ├── preprocess_s3dis.py │ │ │ └── preprocess_s3dis_align_raw.py │ │ └── scannet │ │ │ ├── meta_data │ │ │ ├── classes_ObjClassification-ShapeNetCore55.txt │ │ │ ├── classes_SemVoxLabel-nyu40id.txt │ │ │ ├── scannet200_constants.py │ │ │ ├── scannet200_splits.py │ │ │ ├── scannet_means.npz │ │ │ ├── scannetv1_test.txt │ │ │ ├── scannetv1_train.txt │ │ │ ├── scannetv1_val.txt │ │ │ ├── scannetv2-labels-old.combined.tsv │ │ │ ├── scannetv2-labels.combined.tsv │ │ │ ├── scannetv2_test.txt │ │ │ ├── scannetv2_train.txt │ │ │ └── scannetv2_val.txt │ │ │ ├── preprocess_scannet.py │ │ │ └── scannet_pair │ │ │ ├── SensorData.py │ │ │ ├── compute_full_overlapping.py │ │ │ ├── generage_list.py │ │ │ ├── plyfile.py │ │ │ ├── point_cloud_extractor.py │ │ │ ├── preprocess.py │ │ │ └── reader.py │ ├── s3dis.py │ ├── scannet.py │ ├── scannet_pair.py │ ├── semantic_kitti.py │ ├── shapenet_part.py │ ├── transform.py │ └── utils.py ├── engines │ ├── __init__.py │ ├── defaults.py │ ├── launch.py │ ├── test.py │ └── train.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── point_transformer │ │ ├── __init__.py │ │ ├── point_transformer_cls.py │ │ ├── point_transformer_partseg.py │ │ ├── point_transformer_seg.py │ │ └── utils.py │ ├── point_transformer2 │ │ ├── __init__.py │ │ ├── point_transformer_v2m1_origin.py │ │ └── point_transformer_v2m2_base.py │ ├── sparse_unet │ │ ├── __init__.py │ │ ├── mink_unet.py │ │ └── spconv_unet.py │ ├── spvcnn │ │ ├── __init__.py │ │ └── ts_spvcnn.py │ ├── stratified_transformer │ │ ├── __init__.py │ │ ├── stratified_transformer_v1m1_origin.py │ │ └── stratified_transformer_v1m2_refine.py │ └── utils.py └── utils │ ├── __init__.py │ ├── comm.py │ ├── config.py │ ├── env.py │ ├── events.py │ ├── logger.py │ ├── losses.py │ ├── misc.py │ ├── optimizer.py │ ├── path.py │ ├── registry.py │ ├── scheduler.py │ └── visualization.py ├── scripts ├── pretrain.sh ├── test.sh └── train.sh └── tools ├── pretrain.py ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | -------------------------------------------------------------------------------- /configs/_base_/datasets/modelnet40.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unmaintained 3 | it is kept for reference 4 | """ 5 | 6 | # dataset settings 7 | dataset_type = "ModelNetDataset" 8 | data_root = "data/modelnet40_normal_resampled" 9 | cache_data = False 10 | names = ["airplane", "bathtub", "bed", "bench", "bookshelf", 11 | "bottle", "bowl", "car", "chair", "cone", 12 | "cup", "curtain", "desk", "door", "dresser", 13 | "flower_pot", "glass_box", "guitar", "keyboard", "lamp", 14 | "laptop", "mantel", "monitor", "night_stand", "person", 15 | "piano", "plant", "radio", "range_hood", "sink", 16 | "sofa", "stairs", "stool", "table", "tent", 17 | "toilet", "tv_stand", "vase", "wardrobe", "xbox"] 18 | 19 | data = dict( 20 | num_classes=40, 21 | ignore_label=-1, # dummy ignore 22 | names=names, 23 | train=dict( 24 | type=dataset_type, 25 | split="train", 26 | data_root=data_root, 27 | class_names=names, 28 | transform=[ 29 | dict(type="NormalizeCoord"), 30 | # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), 31 | # dict(type="CenterShift", apply_z=True), 32 | # dict(type="RandomRotate", angle=[-1, 1], axis='z', center=[0, 0, 0], p=0.5), 33 | # dict(type="RandomRotate", angle=[-1/24, 1/24], axis='x', p=0.5), 34 | # dict(type="RandomRotate", angle=[-1/24, 1/24], axis='y', p=0.5), 35 | dict(type="RandomScale", scale=[0.9, 1.1]), 36 | # dict(type="RandomFlip", p=0.5), 37 | # dict(type="RandomJitter", sigma=0.005, clip=0.02), 38 | dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), 39 | dict(type="Voxelize", voxel_size=0.02, hash_type='fnv', mode='train'), 40 | # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), 41 | 42 | # dict(type="Voxelize", voxel_size=0.01, hash_type='fnv', mode='train'), 43 | # dict(type="SphereCrop", point_max=10000, mode='random'), 44 | # dict(type="CenterShift", apply_z=True), 45 | dict(type="ShufflePoint"), 46 | dict(type="ToTensor"), 47 | ], 48 | loop=2, 49 | test_mode=False, 50 | ), 51 | 52 | val=dict( 53 | type=dataset_type, 54 | split="test", 55 | data_root=data_root, 56 | class_names=names, 57 | transform=[ 58 | dict(type="NormalizeCoord"), 59 | dict(type="ToTensor"), 60 | ], 61 | loop=1, 62 | test_mode=False, 63 | ), 64 | 65 | test=dict( 66 | type=dataset_type, 67 | split="test", 68 | data_root=data_root, 69 | class_names=names, 70 | transform=[ 71 | dict(type="NormalizeCoord"), 72 | dict(type="ToTensor"), 73 | ], 74 | loop=1, 75 | test_mode=True, 76 | test_cfg=dict( 77 | ) 78 | ), 79 | ) 80 | 81 | criteria = [ 82 | dict(type="CrossEntropyLoss", 83 | loss_weight=1.0, 84 | ignore_index=data["ignore_label"]) 85 | ] 86 | 87 | -------------------------------------------------------------------------------- /configs/_base_/datasets/scannet_submit.py: -------------------------------------------------------------------------------- 1 | _base_ = ['scannet.py'] 2 | 3 | data = dict( 4 | train=dict( 5 | split=["train", "val"], 6 | ), 7 | 8 | val=dict( 9 | split="val", 10 | ), 11 | 12 | test=dict( 13 | split="test", 14 | ), 15 | ) -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | weight = None # path to model weight 2 | resume = False # whether to resume training process 3 | evaluate = True # evaluate after each epoch training process 4 | test_only = False # test process 5 | 6 | seed = None # train process will init a random seed and record 7 | save_path = "exp/default" 8 | num_worker = 32 # total worker in all gpu 9 | batch_size = 16 # total batch size in all gpu 10 | batch_size_val = None # auto adapt to bs 1 for each gpu 11 | batch_size_test = 1 12 | epoch = 100 # total epoch, data loop = epoch // eval_epoch 13 | eval_epoch = 100 # sche total eval & checkpoint epoch 14 | save_freq = None # None or int, None indicate only save model last 15 | 16 | eval_metric = "mIoU" 17 | 18 | sync_bn = False 19 | enable_amp = False 20 | empty_cache = False 21 | find_unused_parameters = False 22 | 23 | max_batch_points = 1e8 24 | mix_prob = 0 25 | param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] 26 | -------------------------------------------------------------------------------- /configs/_base_/tests/classification.py: -------------------------------------------------------------------------------- 1 | test = dict( 2 | type="ClassificationTest", 3 | # scales=[0.9, 0.95, 1, 1.05, 1.1], 4 | scales=[1], 5 | shuffle=True 6 | ) 7 | -------------------------------------------------------------------------------- /configs/_base_/tests/part_segmentation.py: -------------------------------------------------------------------------------- 1 | test = dict( 2 | type="PartSegmentationTest" 3 | ) 4 | -------------------------------------------------------------------------------- /configs/_base_/tests/segmentation.py: -------------------------------------------------------------------------------- 1 | test = dict( 2 | type="SegmentationTest" 3 | ) 4 | -------------------------------------------------------------------------------- /figures/design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/figures/design.png -------------------------------------------------------------------------------- /figures/offset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/figures/offset.png -------------------------------------------------------------------------------- /libs/pointops/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | -------------------------------------------------------------------------------- /libs/pointops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .query import knn_query, ball_query, random_ball_query 2 | from .sampling import farthest_point_sampling 3 | from .grouping import grouping, grouping2 4 | from .interpolation import interpolation, interpolation2 5 | from .subtraction import subtraction 6 | from .aggregation import aggregation 7 | from .attention import attention_relation_step, attention_fusion_step 8 | from .utils import query_and_group, knn_query_and_group, ball_query_and_group, batch2offset, offset2batch 9 | -------------------------------------------------------------------------------- /libs/pointops/functions/aggregation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda 5 | 6 | 7 | class Aggregation(Function): 8 | @staticmethod 9 | def forward(ctx, input, position, weight, idx): 10 | """ 11 | input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) 12 | output: (n, c) 13 | """ 14 | assert input.is_contiguous() and position.is_contiguous() and weight.is_contiguous() 15 | n, nsample, c = position.shape; w_c = weight.shape[-1] 16 | output = torch.cuda.FloatTensor(n, c).zero_() 17 | aggregation_forward_cuda(n, nsample, c, w_c, input, position, weight, idx, output) 18 | ctx.save_for_backward(input, position, weight, idx) 19 | return output 20 | 21 | @staticmethod 22 | def backward(ctx, grad_output): 23 | """ 24 | input: grad_out: (n, c) 25 | output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') 26 | """ 27 | input, position, weight, idx = ctx.saved_tensors 28 | n, nsample, c = position.shape; w_c = weight.shape[-1] 29 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 30 | grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() 31 | grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() 32 | aggregation_backward_cuda(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight) 33 | return grad_input, grad_position, grad_weight, None 34 | 35 | 36 | aggregation = Aggregation.apply 37 | -------------------------------------------------------------------------------- /libs/pointops/functions/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import attention_relation_step_forward_cuda, attention_relation_step_backward_cuda, \ 5 | attention_fusion_step_forward_cuda, attention_fusion_step_backward_cuda 6 | 7 | 8 | class AttentionRelationStep(Function): 9 | @staticmethod 10 | def forward(ctx, query, key, weight, index_target, index_refer): 11 | """ 12 | input - query: (n, g, c), key: (n, g, c), weight: (c) 1_c for scatter attention, 13 | index_target: (m), index_refer: (m) 14 | output - relation: (M, g) 15 | """ 16 | 17 | assert query.is_contiguous() \ 18 | and key.is_contiguous() \ 19 | and index_target.is_contiguous() \ 20 | and index_refer.is_contiguous() \ 21 | and weight.is_contiguous() 22 | 23 | assert index_target.shape[0] == index_refer.shape[0] 24 | 25 | _, g, c = query.shape 26 | m = index_target.shape[0] 27 | output = torch.cuda.FloatTensor(m, g).zero_() 28 | attention_relation_step_forward_cuda(m, g, c, query, key, weight, 29 | index_target.int(), index_refer.int(), output) 30 | ctx.save_for_backward(query, key, weight, index_target, index_refer) 31 | return output 32 | 33 | @staticmethod 34 | def backward(ctx, grad_output): 35 | query, key, weight, index_target, index_refer = ctx.saved_tensors 36 | n, g, c = query.shape 37 | m = index_target.shape[0] 38 | grad_query = torch.cuda.FloatTensor(n, g, c).zero_() 39 | grad_key = torch.cuda.FloatTensor(n, g, c).zero_() 40 | grad_weight = torch.cuda.FloatTensor(c).zero_() 41 | attention_relation_step_backward_cuda(m, g, c, 42 | query, grad_query, 43 | key, grad_key, 44 | weight, grad_weight, 45 | index_target.int(), index_refer.int(), 46 | grad_output) 47 | return grad_query, grad_key, None, None, None 48 | 49 | 50 | class AttentionFusionStep(Function): 51 | @staticmethod 52 | def forward(ctx, weight, value, index_target, index_refer): 53 | """ 54 | input - weight: (m, g), value: (n, g, c) 55 | index_target: (m), index_value: (m) 56 | output - output: (n, g, c) 57 | """ 58 | 59 | assert weight.is_contiguous() \ 60 | and value.is_contiguous() \ 61 | and index_target.is_contiguous() \ 62 | and index_refer.is_contiguous() \ 63 | and weight.is_contiguous() 64 | 65 | assert index_target.shape[0] == index_refer.shape[0] 66 | 67 | n, g, c = value.shape 68 | m = index_refer.shape[0] 69 | output = torch.cuda.FloatTensor(n, g, c).zero_() 70 | attention_fusion_step_forward_cuda(m, g, c, weight, value, index_target.int(), index_refer.int(), output) 71 | ctx.save_for_backward(weight, value, index_target, index_refer) 72 | return output 73 | 74 | @staticmethod 75 | def backward(ctx, grad_output): 76 | """ 77 | input: grad_output: (n, g, c) 78 | output: grad_weight: (m, g), grad_value: (n, g, c), none, none 79 | """ 80 | weight, value, index_target, index_refer = ctx.saved_tensors 81 | n, g, c = value.shape 82 | m = index_target.shape[0] 83 | grad_weight = torch.cuda.FloatTensor(m, g).zero_() 84 | grad_value = torch.cuda.FloatTensor(n, g, c).zero_() 85 | attention_fusion_step_backward_cuda(m, g, c, 86 | weight, grad_weight, 87 | value, grad_value, 88 | index_target.int(), index_refer.int(), 89 | grad_output) 90 | return grad_weight, grad_value, None, None 91 | 92 | 93 | attention_relation_step = AttentionRelationStep.apply 94 | attention_fusion_step = AttentionFusionStep.apply 95 | -------------------------------------------------------------------------------- /libs/pointops/functions/grouping.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda 5 | 6 | 7 | class Grouping(Function): 8 | @staticmethod 9 | def forward(ctx, input, idx): 10 | """ 11 | input: input: (n, c), idx : (m, nsample) 12 | output: (m, nsample, c) 13 | """ 14 | assert input.is_contiguous() and idx.is_contiguous() 15 | m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] 16 | output = torch.cuda.FloatTensor(m, nsample, c) 17 | grouping_forward_cuda(m, nsample, c, input, idx, output) 18 | ctx.n = n 19 | ctx.save_for_backward(idx) 20 | return output 21 | 22 | @staticmethod 23 | def backward(ctx, grad_output): 24 | """ 25 | input: grad_out: (m, c, nsample) 26 | output: (n, c), None 27 | """ 28 | n = ctx.n 29 | idx, = ctx.saved_tensors 30 | m, nsample, c = grad_output.shape 31 | grad_input = torch.cuda.FloatTensor(n, c).zero_() 32 | grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input) 33 | return grad_input, None 34 | 35 | 36 | def grouping(idx, 37 | feat, 38 | xyz, 39 | new_xyz=None, 40 | with_xyz=False): 41 | if new_xyz is None: 42 | new_xyz = xyz 43 | assert xyz.is_contiguous() and feat.is_contiguous() 44 | m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1] 45 | xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0) 46 | feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0) 47 | grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, num_sample, c) 48 | 49 | if with_xyz: 50 | assert new_xyz.is_contiguous() 51 | mask = torch.sign(idx + 1) 52 | grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) - new_xyz.unsqueeze(1) # (m, num_sample, 3) 53 | grouped_xyz = torch.einsum("n s c, n s -> n s c", grouped_xyz, mask) # (m, num_sample, 3) 54 | return torch.cat((grouped_xyz, grouped_feat), -1) 55 | else: 56 | return grouped_feat 57 | 58 | 59 | grouping2 = Grouping.apply 60 | -------------------------------------------------------------------------------- /libs/pointops/functions/interpolation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda 5 | from .query import knn_query 6 | 7 | 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): 9 | """ 10 | input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b) 11 | output: (n, c) 12 | """ 13 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() 14 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, 3), (n, 3) 15 | dist_recip = 1.0 / (dist + 1e-8) # (n, 3) 16 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 17 | weight = dist_recip / norm # (n, 3) 18 | 19 | new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() 20 | for i in range(k): 21 | new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) 22 | return new_feat 23 | 24 | 25 | class Interpolation(Function): 26 | @staticmethod 27 | def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): 28 | """ 29 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 30 | output: (n, c) 31 | """ 32 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() 33 | idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, k), (n, k) 34 | dist_recip = 1.0 / (dist + 1e-8) # (n, k) 35 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 36 | weight = dist_recip / norm # (n, k) 37 | 38 | n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] 39 | output = torch.cuda.FloatTensor(n, c).zero_() 40 | interpolation_forward_cuda(n, c, k, input, idx, weight, output) 41 | ctx.m, ctx.k = m, k 42 | ctx.save_for_backward(idx, weight) 43 | return output 44 | 45 | @staticmethod 46 | def backward(ctx, grad_output): 47 | """ 48 | input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) 49 | output: (n, c) 50 | """ 51 | m, k = ctx.m, ctx.k 52 | idx, weight = ctx.saved_tensors 53 | n, c = grad_output.shape 54 | grad_input = torch.cuda.FloatTensor(m, c).zero_() 55 | interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input) 56 | return None, None, grad_input, None, None, None 57 | 58 | 59 | interpolation2 = Interpolation.apply 60 | -------------------------------------------------------------------------------- /libs/pointops/functions/query.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda 5 | 6 | 7 | class KNNQuery(Function): 8 | @staticmethod 9 | def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None): 10 | """ 11 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 12 | output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample) 13 | """ 14 | if new_xyz is None or new_offset is None: 15 | new_xyz = xyz 16 | new_offset = offset 17 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 18 | m = new_xyz.shape[0] 19 | idx = torch.cuda.IntTensor(m, nsample).zero_() 20 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 21 | knn_query_cuda(m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2) 22 | return idx, torch.sqrt(dist2) 23 | 24 | 25 | class RandomBallQuery(Function): 26 | """Random Ball Query. 27 | 28 | Find nearby points in spherical space. 29 | """ 30 | 31 | @staticmethod 32 | def forward(ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None): 33 | """ 34 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 35 | output: idx: (m, nsample), dist2: (m, nsample) 36 | """ 37 | if new_xyz is None or new_offset is None: 38 | new_xyz = xyz 39 | new_offset = offset 40 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 41 | assert min_radius < max_radius 42 | 43 | m = new_xyz.shape[0] 44 | order = [] 45 | for k in range(offset.shape[0]): 46 | s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k]) 47 | order.append(torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k) 48 | order = torch.cat(order, dim=0) 49 | idx = torch.cuda.IntTensor(m, nsample).zero_() 50 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 51 | random_ball_query_cuda(m, nsample, min_radius, max_radius, order, 52 | xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2) 53 | return idx, torch.sqrt(dist2) 54 | 55 | 56 | class BallQuery(Function): 57 | """Ball Query. 58 | 59 | Find nearby points in spherical space. 60 | """ 61 | 62 | @staticmethod 63 | def forward(ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None): 64 | """ 65 | input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) 66 | output: idx: (m, nsample), dist2: (m, nsample) 67 | """ 68 | if new_xyz is None or new_offset is None: 69 | new_xyz = xyz 70 | new_offset = offset 71 | assert xyz.is_contiguous() and new_xyz.is_contiguous() 72 | assert min_radius < max_radius 73 | 74 | m = new_xyz.shape[0] 75 | idx = torch.cuda.IntTensor(m, nsample).zero_() 76 | dist2 = torch.cuda.FloatTensor(m, nsample).zero_() 77 | ball_query_cuda(m, nsample, min_radius, max_radius, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2) 78 | return idx, torch.sqrt(dist2) 79 | 80 | 81 | knn_query = KNNQuery.apply 82 | ball_query = BallQuery.apply 83 | random_ball_query = RandomBallQuery.apply 84 | -------------------------------------------------------------------------------- /libs/pointops/functions/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import farthest_point_sampling_cuda 5 | 6 | 7 | class FarthestPointSampling(Function): 8 | @staticmethod 9 | def forward(ctx, xyz, offset, new_offset): 10 | """ 11 | input: coords: (n, 3), offset: (b), new_offset: (b) 12 | output: idx: (m) 13 | """ 14 | assert xyz.is_contiguous() 15 | n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] 16 | for i in range(1, b): 17 | n_max = max(offset[i] - offset[i - 1], n_max) 18 | idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() 19 | tmp = torch.cuda.FloatTensor(n).fill_(1e10) 20 | farthest_point_sampling_cuda(b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx) 21 | del tmp 22 | return idx 23 | 24 | 25 | farthest_point_sampling = FarthestPointSampling.apply 26 | -------------------------------------------------------------------------------- /libs/pointops/functions/subtraction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda 5 | 6 | 7 | class Subtraction(Function): 8 | @staticmethod 9 | def forward(ctx, input1, input2, idx): 10 | """ 11 | input: input1: (n, c), input2: (n, c), idx: (n, nsample) 12 | output: (n, nsample, c) 13 | """ 14 | assert input1.is_contiguous() and input2.is_contiguous() 15 | n, c = input1.shape; nsample = idx.shape[-1] 16 | output = torch.cuda.FloatTensor(n, nsample, c).zero_() 17 | subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output) 18 | ctx.save_for_backward(idx) 19 | return output 20 | 21 | @staticmethod 22 | def backward(ctx, grad_output): 23 | """ 24 | input: grad_out: (n, nsample, c) 25 | output: grad_input1: (n, c), grad_input2: (n, c) 26 | """ 27 | idx, = ctx.saved_tensors 28 | n, nsample, c = grad_output.shape 29 | grad_input1 = torch.cuda.FloatTensor(n, c).zero_() 30 | grad_input2 = torch.cuda.FloatTensor(n, c).zero_() 31 | subtraction_backward_cuda(n, nsample, c, idx, grad_output, grad_input1, grad_input2) 32 | return grad_input1, grad_input2, None 33 | 34 | 35 | subtraction = Subtraction.apply 36 | -------------------------------------------------------------------------------- /libs/pointops/functions/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from pointops import knn_query, ball_query, grouping 3 | 4 | 5 | def knn_query_and_group(feat, 6 | xyz, 7 | offset=None, 8 | new_xyz=None, 9 | new_offset=None, 10 | idx=None, 11 | nsample=None, 12 | with_xyz=False 13 | ): 14 | if idx is None: 15 | assert nsample is not None 16 | idx, _ = knn_query(nsample, xyz, offset, new_xyz, new_offset) 17 | return grouping(idx, feat, xyz, new_xyz, with_xyz), idx 18 | 19 | 20 | def ball_query_and_group(feat, 21 | xyz, 22 | offset=None, 23 | new_xyz=None, 24 | new_offset=None, 25 | idx=None, 26 | max_radio=None, 27 | min_radio=0, 28 | nsample=None, 29 | with_xyz=False 30 | ): 31 | if idx is None: 32 | assert nsample is not None and offset is not None 33 | assert max_radio is not None and min_radio is not None 34 | idx, _ = ball_query(nsample, max_radio, min_radio, xyz, offset, new_xyz, new_offset) 35 | return grouping(idx, feat, xyz, new_xyz, with_xyz), idx 36 | 37 | 38 | def query_and_group(nsample, 39 | xyz, 40 | new_xyz, 41 | feat, 42 | idx, 43 | offset, 44 | new_offset, 45 | dilation=0, 46 | with_feat=True, 47 | with_xyz=True, 48 | ): 49 | """ 50 | input: coords: (n, 3), new_xyz: (m, 3), color: (n, c), idx: (m, nsample), offset: (b), new_offset: (b) 51 | output: new_feat: (m, nsample, c+3), grouped_idx: (m, nsample) 52 | """ 53 | assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() 54 | if new_xyz is None: 55 | new_xyz = xyz 56 | 57 | if idx is None: 58 | num_samples_total = 1 + (nsample - 1) * (dilation + 1) 59 | # num points in a batch might < num_samples_total => [n1, n2, ..., nk, ns, ns, ns, ...] 60 | idx_no_dilation, _ = knn_query(num_samples_total, xyz, offset, new_xyz, 61 | new_offset) # (m, nsample * (d + 1)) 62 | idx = [] 63 | batch_end = offset.tolist() 64 | batch_start = [0] + batch_end[:-1] 65 | new_batch_end = new_offset.tolist() 66 | new_batch_start = [0] + new_batch_end[:-1] 67 | for i in range(offset.shape[0]): 68 | if batch_end[i] - batch_start[i] < num_samples_total: 69 | soft_dilation = (batch_end[i] - batch_start[i] - 1) / (nsample - 1) - 1 70 | else: 71 | soft_dilation = dilation 72 | idx.append(idx_no_dilation[new_batch_start[i]: new_batch_end[i], 73 | [int((soft_dilation + 1) * i) for i in range(nsample)]]) 74 | idx = torch.cat(idx, dim=0) 75 | 76 | if not with_feat: 77 | return idx 78 | 79 | n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1] 80 | grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3) 81 | # grouped_xyz = grouping(coords, idx) # (m, nsample, 3) 82 | grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3) 83 | grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c) 84 | # grouped_feat = grouping(color, idx) # (m, nsample, c) 85 | 86 | if with_xyz: 87 | return torch.cat((grouped_xyz, grouped_feat), -1), idx # (m, nsample, 3+c) 88 | else: 89 | return grouped_feat, idx 90 | 91 | 92 | def offset2batch(offset): 93 | return torch.cat([ 94 | torch.tensor([i] * (o - offset[i - 1])) if i > 0 else torch.tensor([i] * o) 95 | for i, o in enumerate(offset) 96 | ], dim=0).long().to(offset.device) 97 | 98 | 99 | def batch2offset(batch): 100 | return torch.cumsum(batch.bincount(), dim=0).int() -------------------------------------------------------------------------------- /libs/pointops/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | from distutils.sysconfig import get_config_vars 5 | 6 | (opt,) = get_config_vars('OPT') 7 | os.environ['OPT'] = " ".join( 8 | flag for flag in opt.split() if flag != '-Wstrict-prototypes' 9 | ) 10 | 11 | src = 'src' 12 | sources = [os.path.join(root, file) for root, dirs, files in os.walk(src) 13 | for file in files 14 | if file.endswith('.cpp') or file.endswith('.cu')] 15 | 16 | setup( 17 | name='pointops', 18 | version='1.0', 19 | install_requires=["torch", "numpy"], 20 | packages=["pointops"], 21 | package_dir={"pointops": "functions"}, 22 | ext_modules=[ 23 | CUDAExtension( 24 | name='pointops._C', 25 | sources=sources, 26 | extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']} 27 | ) 28 | ], 29 | cmdclass={'build_ext': BuildExtension} 30 | ) 31 | -------------------------------------------------------------------------------- /libs/pointops/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/libs/pointops/src/__init__.py -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "aggregation_cuda_kernel.h" 5 | 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const float *position = position_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 15 | } 16 | 17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 18 | { 19 | const float *input = input_tensor.data_ptr(); 20 | const float *position = position_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | const int *idx = idx_tensor.data_ptr(); 23 | const float *grad_output = grad_output_tensor.data_ptr(); 24 | float *grad_input = grad_input_tensor.data_ptr(); 25 | float *grad_position = grad_position_tensor.data_ptr(); 26 | float *grad_weight = grad_weight_tensor.data_ptr(); 27 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 28 | } 29 | -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /libs/pointops/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_relation_step_forward_cuda(int m, int g, int c, 8 | at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor, 9 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 10 | at::Tensor output_tensor); 11 | void attention_relation_step_backward_cuda(int m, int g, int c, 12 | at::Tensor query_tensor, at::Tensor grad_query_tensor, 13 | at::Tensor key_tensor, at::Tensor grad_key_tensor, 14 | at::Tensor weight_tensor, at::Tensor grad_weight_tensor, 15 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 16 | at::Tensor grad_output_tensor); 17 | void attention_fusion_step_forward_cuda(int m, int g, int c, 18 | at::Tensor weight_tensor, at::Tensor value_tensor, 19 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 20 | at::Tensor output_tensor); 21 | void attention_fusion_step_backward_cuda(int m, int g, int c, 22 | at::Tensor weight_tensor, at::Tensor grad_weight_tensor, 23 | at::Tensor value_tensor, at::Tensor grad_value_tensor, 24 | at::Tensor index_target_tensor, at::Tensor index_refer_tensor, 25 | at::Tensor grad_output_tensor); 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | void attention_relation_step_forward_cuda_launcher(int m, int g, int c, 32 | const float *query, const float *key, const float *weight, 33 | const int *index_target, const int *index_refer, 34 | float *output); 35 | void attention_relation_step_backward_cuda_launcher(int m, int g, int c, 36 | const float *query, float *grad_query, 37 | const float *key, float *grad_key, 38 | const float *weight, float *grad_weight, 39 | const int *index_target, const int *index_refer, 40 | const float *grad_output); 41 | void attention_fusion_step_forward_cuda_launcher(int m, int g, int c, 42 | const float *weight, const float *value, 43 | const int *index_target, const int *index_refer, 44 | float *output); 45 | void attention_fusion_step_backward_cuda_launcher(int m, int g, int c, 46 | const float *weight, float *grad_weight, 47 | const float *value, float *grad_value, 48 | const int *index_target, const int *index_refer, 49 | const float *grad_output); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | #endif 55 | -------------------------------------------------------------------------------- /libs/pointops/src/ball_query/ball_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "ball_query_cuda_kernel.h" 5 | 6 | 7 | void ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor) 12 | { 13 | const float *xyz = xyz_tensor.data_ptr(); 14 | const float *new_xyz = new_xyz_tensor.data_ptr(); 15 | const int *offset = offset_tensor.data_ptr(); 16 | const int *new_offset = new_offset_tensor.data_ptr(); 17 | int *idx = idx_tensor.data_ptr(); 18 | float *dist2 = dist2_tensor.data_ptr(); 19 | ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2); 20 | } 21 | -------------------------------------------------------------------------------- /libs/pointops/src/ball_query/ball_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_CUDA_KERNEL 2 | #define _BALL_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void ball_query_cuda_launcher(int m, int nsample, 18 | float min_radius, float max_radius, 19 | const float *xyz, const float *new_xyz, 20 | const int *offset, const int *new_offset, 21 | int *idx, float *dist2); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 512 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grouping_cuda_kernel.h" 5 | 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | float *output = output_tensor.data_ptr(); 12 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 13 | } 14 | 15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 16 | { 17 | const float *grad_output = grad_output_tensor.data_ptr(); 18 | const int *idx = idx_tensor.data_ptr(); 19 | float *grad_input = grad_input_tensor.data_ptr(); 20 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /libs/pointops/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "interpolation_cuda_kernel.h" 5 | 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 14 | } 15 | 16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *weight = weight_tensor.data_ptr(); 21 | float *grad_input = grad_input_tensor.data_ptr(); 22 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /libs/pointops/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "knn_query_cuda_kernel.h" 5 | 6 | 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const float *new_xyz = new_xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | float *dist2 = dist2_tensor.data_ptr(); 15 | knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knn_query_cuda_kernel.h" 3 | 4 | 5 | namespace knn_query_utils{ 6 | 7 | template 8 | __device__ void swap(DType *x, DType *y) 9 | { 10 | DType tmp = *x; 11 | *x = *y; 12 | *y = tmp; 13 | } 14 | 15 | __device__ void reheap(float *dist, int *idx, int k) 16 | { 17 | int root = 0; 18 | int child = root * 2 + 1; 19 | while (child < k) 20 | { 21 | if(child + 1 < k && dist[child+1] > dist[child]) 22 | child++; 23 | if(dist[root] > dist[child]) 24 | return; 25 | swap(&dist[root], &dist[child]); 26 | swap(&idx[root], &idx[child]); 27 | root = child; 28 | child = root * 2 + 1; 29 | } 30 | } 31 | 32 | 33 | __device__ void heap_sort(float *dist, int *idx, int k) 34 | { 35 | int i; 36 | for (i = k - 1; i > 0; i--) 37 | { 38 | swap(&dist[0], &dist[i]); 39 | swap(&idx[0], &idx[i]); 40 | reheap(dist, idx, i); 41 | } 42 | } 43 | 44 | 45 | __device__ int get_bt_idx(int idx, const int *offset) 46 | { 47 | int i = 0; 48 | while (1) 49 | { 50 | if (idx < offset[i]) 51 | break; 52 | else 53 | i++; 54 | } 55 | return i; 56 | } 57 | } // namespace knn_query_utils 58 | 59 | 60 | __global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 61 | // input: xyz (n, 3) new_xyz (m, 3) 62 | // output: idx (m, nsample) dist2 (m, nsample) 63 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 64 | if (pt_idx >= m) return; 65 | 66 | new_xyz += pt_idx * 3; 67 | idx += pt_idx * nsample; 68 | dist2 += pt_idx * nsample; 69 | 70 | int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset); 71 | int start; 72 | if (bt_idx == 0) 73 | start = 0; 74 | else 75 | start = offset[bt_idx - 1]; 76 | int end = offset[bt_idx]; 77 | 78 | float new_x = new_xyz[0]; 79 | float new_y = new_xyz[1]; 80 | float new_z = new_xyz[2]; 81 | 82 | float best_dist[128]; 83 | int best_idx[128]; 84 | for(int i = 0; i < nsample; i++){ 85 | best_dist[i] = 1e10; 86 | best_idx[i] = -1; 87 | } 88 | for(int i = start; i < end; i++){ 89 | float x = xyz[i * 3 + 0]; 90 | float y = xyz[i * 3 + 1]; 91 | float z = xyz[i * 3 + 2]; 92 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 93 | if (d2 < best_dist[0]){ 94 | best_dist[0] = d2; 95 | best_idx[0] = i; 96 | knn_query_utils::reheap(best_dist, best_idx, nsample); 97 | } 98 | } 99 | knn_query_utils::heap_sort(best_dist, best_idx, nsample); 100 | for(int i = 0; i < nsample; i++){ 101 | idx[i] = best_idx[i]; 102 | dist2[i] = best_dist[i]; 103 | } 104 | } 105 | 106 | 107 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 108 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 109 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 110 | dim3 threads(THREADS_PER_BLOCK); 111 | knn_query_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 112 | } 113 | -------------------------------------------------------------------------------- /libs/pointops/src/knn_query/knn_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNN_QUERY_CUDA_KERNEL 2 | #define _KNN_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knn_query/knn_query_cuda_kernel.h" 5 | #include "ball_query/ball_query_cuda_kernel.h" 6 | #include "random_ball_query/random_ball_query_cuda_kernel.h" 7 | #include "sampling/sampling_cuda_kernel.h" 8 | #include "grouping/grouping_cuda_kernel.h" 9 | #include "interpolation/interpolation_cuda_kernel.h" 10 | #include "aggregation/aggregation_cuda_kernel.h" 11 | #include "subtraction/subtraction_cuda_kernel.h" 12 | #include "attention/attention_cuda_kernel.h" 13 | 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda"); 17 | m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda"); 18 | m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda"); 19 | m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda"); 20 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 21 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 22 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 23 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 24 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 25 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 26 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 27 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 28 | m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda"); 29 | m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda"); 30 | m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda"); 31 | m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda"); 32 | } 33 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "random_ball_query_cuda_kernel.h" 5 | 6 | 7 | void random_ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, at::Tensor order_tensor, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor) 12 | { 13 | const int *order = order_tensor.data_ptr(); 14 | const float *xyz = xyz_tensor.data_ptr(); 15 | const float *new_xyz = new_xyz_tensor.data_ptr(); 16 | const int *offset = offset_tensor.data_ptr(); 17 | const int *new_offset = new_offset_tensor.data_ptr(); 18 | int *idx = idx_tensor.data_ptr(); 19 | float *dist2 = dist2_tensor.data_ptr(); 20 | random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "random_ball_query_cuda_kernel.h" 3 | 4 | 5 | namespace random_ball_query_utils{ 6 | 7 | template 8 | __device__ void swap(DType *x, DType *y) 9 | { 10 | DType tmp = *x; 11 | *x = *y; 12 | *y = tmp; 13 | } 14 | 15 | __device__ void reheap(float *dist, int *idx, int k) 16 | { 17 | int root = 0; 18 | int child = root * 2 + 1; 19 | while (child < k) 20 | { 21 | if(child + 1 < k && dist[child+1] > dist[child]) 22 | child++; 23 | if(dist[root] > dist[child]) 24 | return; 25 | swap(&dist[root], &dist[child]); 26 | swap(&idx[root], &idx[child]); 27 | root = child; 28 | child = root * 2 + 1; 29 | } 30 | } 31 | 32 | 33 | __device__ void heap_sort(float *dist, int *idx, int k) 34 | { 35 | int i; 36 | for (i = k - 1; i > 0; i--) 37 | { 38 | swap(&dist[0], &dist[i]); 39 | swap(&idx[0], &idx[i]); 40 | reheap(dist, idx, i); 41 | } 42 | } 43 | 44 | __device__ int get_bt_idx(int idx, const int *offset) 45 | { 46 | int i = 0; 47 | while (1) 48 | { 49 | if (idx < offset[i]) 50 | break; 51 | else 52 | i++; 53 | } 54 | return i; 55 | } 56 | } // namespace ball_query_utils 57 | 58 | __global__ void random_ball_query_cuda_kernel(int m, int nsample, 59 | float min_radius, float max_radius, const int *__restrict__ order, 60 | const float *__restrict__ xyz, const float *__restrict__ new_xyz, 61 | const int *__restrict__ offset, const int *__restrict__ new_offset, 62 | int *__restrict__ idx, float *__restrict__ dist2) { 63 | // input: xyz (n, 3) new_xyz (m, 3) 64 | // output: idx (m, nsample) dist (m, nsample) 65 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 66 | if (pt_idx >= m) return; 67 | 68 | new_xyz += pt_idx * 3; 69 | idx += pt_idx * nsample; 70 | dist2 += pt_idx * nsample; 71 | 72 | int bt_idx = random_ball_query_utils::get_bt_idx(pt_idx, new_offset); 73 | int start; 74 | if (bt_idx == 0) 75 | start = 0; 76 | else 77 | start = offset[bt_idx - 1]; 78 | int end = offset[bt_idx]; 79 | 80 | float max_radius2 = max_radius * max_radius; 81 | float min_radius2 = min_radius * min_radius; 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | int cnt = 0; 87 | 88 | for(int i = start; i < end; i++){ 89 | float x = xyz[order[i] * 3 + 0]; 90 | float y = xyz[order[i] * 3 + 1]; 91 | float z = xyz[order[i] * 3 + 2]; 92 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 93 | 94 | if (d2 <= 1e-5 || (d2 >= min_radius2 && d2 < max_radius2)){ 95 | dist2[cnt] = d2; 96 | idx[cnt] = order[i]; 97 | cnt += 1; 98 | if (cnt >= nsample) break; 99 | } 100 | } 101 | 102 | if (cnt < nsample) { 103 | for (int i = cnt; i < nsample; i++){ 104 | idx[i] = -1; 105 | dist2[i] = 1e10; 106 | } 107 | } 108 | } 109 | 110 | void random_ball_query_cuda_launcher(int m, int nsample, 111 | float min_radius, float max_radius, const int *order, 112 | const float *xyz, const float *new_xyz, 113 | const int *offset, const int *new_offset, 114 | int *idx, float *dist2) { 115 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 116 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 117 | dim3 threads(THREADS_PER_BLOCK); 118 | random_ball_query_cuda_kernel<<>>(m, nsample, 119 | min_radius, max_radius, order, 120 | xyz, new_xyz, 121 | offset, new_offset, 122 | idx, dist2); 123 | } 124 | -------------------------------------------------------------------------------- /libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL 2 | #define _RANDOM_BALL_QUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void random_ball_query_cuda(int m, int nsample, 8 | float min_radius, float max_radius, at::Tensor order_tensor, 9 | at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, 10 | at::Tensor offset_tensor, at::Tensor new_offset_tensor, 11 | at::Tensor idx_tensor, at::Tensor dist2_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void random_ball_query_cuda_launcher(int m, int nsample, 18 | float min_radius, float max_radius, const int *order, 19 | const float *xyz, const float *new_xyz, 20 | const int *offset, const int *new_offset, 21 | int *idx, float *dist2); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sampling_cuda_kernel.h" 5 | 6 | 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const int *offset = offset_tensor.data_ptr(); 11 | const int *new_offset = new_offset_tensor.data_ptr(); 12 | float *tmp = tmp_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 15 | } 16 | -------------------------------------------------------------------------------- /libs/pointops/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subtraction_cuda_kernel.h" 5 | 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input1 = input1_tensor.data_ptr(); 10 | const float *input2 = input2_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 14 | } 15 | 16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 17 | { 18 | const int *idx = idx_tensor.data_ptr(); 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | float *grad_input1 = grad_input1_tensor.data_ptr(); 21 | float *grad_input2 = grad_input2_tensor.data_ptr(); 22 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /libs/pointops/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/libs/pointops2/__init__.py -------------------------------------------------------------------------------- /libs/pointops2/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from pointops2 import * -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 800000 8 | N = 35000 9 | C = 96 10 | h = 6 11 | query = torch.rand(N, h, C//h).cuda() 12 | key = torch.rand(N, h, C//h).cuda() 13 | 14 | index_0 = torch.rand(M) 15 | index_0[index_0 < 0] = 0 16 | index_0 = (index_0*N).long().cuda() 17 | 18 | index_1 = torch.rand(M) 19 | index_1[index_1 < 0] = 0 20 | index_1 = (index_1*N).long().cuda() 21 | 22 | query.requires_grad = True 23 | key.requires_grad = True 24 | 25 | # rearrange index for acceleration 26 | index_0, indices = torch.sort(index_0) #[M,] 27 | index_1 = index_1[indices] #[M,] 28 | index_0_counts = index_0.bincount() 29 | 30 | print("index_0_counts.shape: ", index_0_counts.shape) 31 | 32 | n_max = index_0_counts.max() 33 | index_0_offsets = index_0_counts.cumsum(dim=-1) #[N] 34 | 35 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 36 | 37 | index_0_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0) #[N+1] 38 | 39 | # print("index_0[:100]: ", index_0[:100]) 40 | print("n_max: ", n_max) 41 | print("index_0_offsets.shape: ", index_0_offsets.shape) 42 | # input() 43 | 44 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 45 | print("index_1[300:320]: ", index_1[300:320]) 46 | 47 | 48 | attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) 49 | # loss = attn_flat.sum() 50 | # loss.backward() 51 | print("attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format(attn_flat.shape, attn_flat[300:320,:10])) 52 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 53 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 54 | # input() 55 | 56 | print("query.is_contiguous(): ", query.is_contiguous()) 57 | print("key.is_contiguous(): ", key.is_contiguous()) 58 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 59 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 60 | 61 | attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max) 62 | # loss = attn_flat_v2.sum() 63 | # loss.backward() 64 | print("attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format(attn_flat_v2.shape, attn_flat_v2[300:320,:10])) 65 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 66 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 67 | # input() 68 | 69 | mask = attn_flat_v2.sum(-1) != 0 70 | print("mask.sum(): ", mask.sum()) 71 | print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max()) 72 | 73 | 74 | print("((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", ((attn_flat-attn_flat_v2)**2 < 1e-8).all()) 75 | 76 | selected = 10000 77 | print("torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0)) 78 | 79 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 800000 8 | N = 35000 9 | C = 96 10 | h = 6 11 | query = torch.rand(N, h, C//h).cuda() 12 | key = torch.rand(N, h, C//h).cuda() 13 | 14 | index_0 = torch.rand(M) 15 | index_0[index_0 < 0] = 0 16 | index_0 = (index_0*N).long().cuda() 17 | 18 | index_1 = torch.rand(M) 19 | index_1[index_1 < 0] = 0 20 | index_1 = (index_1*N).long().cuda() 21 | 22 | query.requires_grad = True 23 | key.requires_grad = True 24 | 25 | 26 | attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) 27 | loss = attn_flat.sum() 28 | loss.backward() 29 | print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10])) 30 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 31 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 32 | input() 33 | 34 | 35 | 36 | # rearrange index for acceleration 37 | index_0, indices = torch.sort(index_0) #[M,] 38 | index_1 = index_1[indices] #[M,] 39 | index_0_counts = index_0.bincount() 40 | 41 | print("index_0_counts.shape: ", index_0_counts.shape) 42 | 43 | n_max = index_0_counts.max() 44 | index_0_offsets = index_0_counts.cumsum(dim=-1) #[N] 45 | 46 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 47 | 48 | index_0_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0) #[N+1] 49 | 50 | # print("index_0[:100]: ", index_0[:100]) 51 | print("n_max: ", n_max) 52 | print("index_0_offsets.shape: ", index_0_offsets.shape) 53 | # input() 54 | 55 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 56 | print("index_1[:20]: ", index_1[:20]) 57 | 58 | 59 | attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) 60 | # loss = attn_flat.sum() 61 | # loss.backward() 62 | # # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) 63 | # # loss = attn_flat.sum() 64 | # # loss.backward() 65 | # print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10])) 66 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 67 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 68 | # input() 69 | 70 | print("query.is_contiguous(): ", query.is_contiguous()) 71 | print("key.is_contiguous(): ", key.is_contiguous()) 72 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 73 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 74 | 75 | attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max) 76 | loss = attn_flat_v2.sum() 77 | loss.backward() 78 | 79 | # attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max) 80 | # loss = attn_flat_v2.sum() 81 | # loss.backward() 82 | 83 | print("attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format(attn_flat_v2.shape, attn_flat_v2[:20,:10])) 84 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 85 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 86 | # input() 87 | 88 | # mask = attn_flat_v2.sum(-1) != 0 89 | # print("mask.sum(): ", mask.sum()) 90 | # print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max()) 91 | 92 | 93 | print("((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", ((attn_flat-attn_flat_v2)**2 < 1e-8).all()) 94 | 95 | selected = 10000 96 | print("torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0)) 97 | 98 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_attention_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 800000 8 | N = 35000 9 | C = 96 10 | h = 6 11 | softmax_attn_flat = torch.rand(M, h).cuda() 12 | value = torch.rand(N, h, C//h).cuda() 13 | 14 | index_0 = torch.rand(M) 15 | index_0[index_0 < 0] = 0 16 | index_0 = (index_0*N).long().cuda() 17 | 18 | index_1 = torch.rand(M) 19 | index_1[index_1 < 0] = 0 20 | index_1 = (index_1*N).long().cuda() 21 | 22 | softmax_attn_flat.requires_grad = True 23 | value.requires_grad = True 24 | 25 | # value_flat = value[index_1] #[M, num_heads, C // num_heads] 26 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C) 27 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C] 28 | # loss = x.sum() 29 | # loss.backward() 30 | 31 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10])) 32 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 33 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 34 | # input() 35 | 36 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous()) 37 | print("value.is_contiguous(): ", value.is_contiguous()) 38 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 39 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 40 | 41 | x_v2 = pointops.attention_step2(softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int()) 42 | x_v2 = x_v2.view(N, C) 43 | loss = x_v2.sum() 44 | loss.backward() 45 | 46 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5,:10])) 47 | 48 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 49 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 50 | input() 51 | 52 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x-x_v2)**2 < 1e-8).all()) 53 | 54 | print("torch.max((x-x_v2)**2): ", torch.max((x-x_v2)**2)) 55 | 56 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 80000 8 | N = 3500 9 | hdim = 16 10 | h = 6 11 | L = 31 12 | query = torch.rand(N, h, hdim).cuda() 13 | table = torch.rand(L, h, hdim, 3).cuda() 14 | 15 | index = torch.rand(M) 16 | index[index < 0] = 0 17 | index = (index*N).long().cuda() 18 | 19 | rel_index = torch.rand(M, 3) 20 | rel_index[rel_index < 0] = 0 21 | rel_index = (rel_index*L).long().cuda() 22 | 23 | query.requires_grad = True 24 | table.requires_grad = True 25 | 26 | # query_flat = query[index] #[M, h, hdim] 27 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim] 28 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M] 29 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim] 30 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h] 31 | # loss = output.mean() 32 | # loss.backward() 33 | 34 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 35 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 36 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 37 | # input() 38 | 39 | # print("query.is_contiguous(): ", query.is_contiguous()) 40 | # print("key.is_contiguous(): ", key.is_contiguous()) 41 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 42 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 43 | 44 | output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int()) 45 | loss = output_v2.mean() 46 | loss.backward() 47 | 48 | print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) 49 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 50 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 51 | input() 52 | 53 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 54 | 55 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 56 | 57 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 80000 8 | N = 3500 9 | hdim = 16 10 | h = 6 11 | L = 31 12 | query = torch.rand(N, h, hdim).cuda() 13 | table_q = torch.rand(L, h, hdim, 3).cuda() 14 | key = torch.rand(N, h, hdim).cuda() 15 | table_k = torch.rand(L, h, hdim, 3).cuda() 16 | 17 | index_q = torch.rand(M) 18 | index_q[index_q < 0] = 0 19 | index_q = (index_q*N).long().cuda() 20 | 21 | index_k = torch.rand(M) 22 | index_k[index_k < 0] = 0 23 | index_k = (index_k*N).long().cuda() 24 | 25 | rel_index = torch.rand(M, 3) 26 | rel_index[rel_index < 0] = 0 27 | rel_index = (rel_index*L).long().cuda() 28 | 29 | query.requires_grad = True 30 | table_q.requires_grad = True 31 | key.requires_grad = True 32 | table_k.requires_grad = True 33 | 34 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) 35 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) 36 | output = output1 + output2 37 | # loss = output.mean() 38 | # loss.backward() 39 | 40 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 41 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 42 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 43 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 44 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 45 | # input() 46 | 47 | # print("query.is_contiguous(): ", query.is_contiguous()) 48 | # print("key.is_contiguous(): ", key.is_contiguous()) 49 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 50 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 51 | 52 | output_v2 = pointops.dot_prod_with_idx_v2(query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int()) 53 | loss = output_v2.mean() 54 | loss.backward() 55 | 56 | print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) 57 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 58 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 59 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 60 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 61 | # input() 62 | 63 | print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 64 | 65 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 80000 8 | N = 3500 9 | # M = 80 10 | # N = 5 11 | hdim = 16 12 | h = 6 13 | L = 31 14 | query = torch.rand(N, h, hdim).cuda() 15 | table_q = torch.rand(L, h, hdim, 3).cuda() 16 | key = torch.rand(N, h, hdim).cuda() 17 | table_k = torch.rand(L, h, hdim, 3).cuda() 18 | 19 | index_q = torch.rand(M) 20 | index_q[index_q < 0] = 0 21 | index_q = (index_q*N).long().cuda() 22 | 23 | index_k = torch.rand(M) 24 | index_k[index_k < 0] = 0 25 | index_k = (index_k*N).long().cuda() 26 | 27 | rel_index = torch.rand(M, 3) 28 | rel_index[rel_index < 0] = 0 29 | rel_index = (rel_index*L).long().cuda() 30 | 31 | 32 | # rearrange index for acceleration 33 | index_q, indices = torch.sort(index_q) #[M,] 34 | index_k = index_k[indices] #[M,] 35 | rel_index = rel_index[indices] 36 | index_q_counts = index_q.bincount() 37 | 38 | print("index_q_counts.shape: ", index_q_counts.shape) 39 | 40 | n_max = index_q_counts.max() 41 | index_q_offsets = index_q_counts.cumsum(dim=-1) #[N] 42 | 43 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape) 44 | 45 | index_q_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0) #[N+1] 46 | 47 | # print("index_q[:100]: ", index_q[:100]) 48 | print("n_max: ", n_max) 49 | print("index_q_offsets.shape: ", index_q_offsets.shape) 50 | # input() 51 | 52 | print("index_q_offsets[:100]: ", index_q_offsets[:100]) 53 | print("index_k[:20]: ", index_k[:20]) 54 | 55 | query.requires_grad = True 56 | table_q.requires_grad = True 57 | key.requires_grad = True 58 | table_k.requires_grad = True 59 | 60 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) 61 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) 62 | output = output1 + output2 63 | loss = output.mean() 64 | loss.backward() 65 | 66 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 67 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 68 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 69 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 70 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 71 | # input() 72 | 73 | # print("query.is_contiguous(): ", query.is_contiguous()) 74 | # print("key.is_contiguous(): ", key.is_contiguous()) 75 | # print("index_q.is_contiguous(): ", index_q.is_contiguous()) 76 | # print("index_k.is_contiguous(): ", index_k.is_contiguous()) 77 | 78 | output_v2 = pointops.dot_prod_with_idx_v3(query, index_q_offsets.int(), n_max, key, index_k.int(), table_q, table_k, rel_index.int()) 79 | # loss = output_v2.mean() 80 | # loss.backward() 81 | 82 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) 83 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 84 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 85 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 86 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 87 | # input() 88 | 89 | print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 90 | 91 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 80000 8 | N = 3500 9 | hdim = 16 10 | h = 6 11 | L = 31 12 | attn = torch.rand(M, h).cuda() 13 | v = torch.rand(N, h, hdim).cuda() 14 | table = torch.rand(L, h, hdim, 3).cuda() 15 | 16 | index_0 = torch.rand(M) 17 | index_0[index_0 < 0] = 0 18 | index_0 = (index_0*N).long().cuda() 19 | 20 | index_1 = torch.rand(M) 21 | index_1[index_1 < 0] = 0 22 | index_1 = (index_1*N).long().cuda() 23 | 24 | rel_index = torch.rand(M, 3) 25 | rel_index[rel_index < 0] = 0 26 | rel_index = (rel_index*L).long().cuda() 27 | 28 | attn.requires_grad = True 29 | v.requires_grad = True 30 | table.requires_grad = True 31 | 32 | v_flat = v[index_1] #[M, h, hdim] 33 | table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim] 34 | rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M] 35 | rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim] 36 | v_flat_new = v_flat + rel_pos_encoding #[M, h, hdim] 37 | output = attn.unsqueeze(-1) * v_flat_new #[M, h, hdim] 38 | output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N) #[N, h, hdim] 39 | loss = output.mean() 40 | loss.backward() 41 | 42 | print("output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5,:10, :5])) 43 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 44 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 45 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 46 | input() 47 | 48 | # print("query.is_contiguous(): ", query.is_contiguous()) 49 | # print("key.is_contiguous(): ", key.is_contiguous()) 50 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 51 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 52 | 53 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int()) 54 | # loss = output_v2.mean() 55 | # loss.backward() 56 | 57 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5])) 58 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 59 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 60 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 61 | # input() 62 | 63 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 64 | 65 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 66 | 67 | -------------------------------------------------------------------------------- /libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum 4 | 5 | torch.manual_seed(1) 6 | 7 | M = 80000 8 | N = 3500 9 | hdim = 16 10 | h = 6 11 | L = 31 12 | attn = torch.rand(M, h).cuda() 13 | v = torch.rand(N, h, hdim).cuda() 14 | table = torch.rand(L, h, hdim, 3).cuda() 15 | 16 | index_0 = torch.rand(M) 17 | index_0[index_0 < 0] = 0 18 | index_0 = (index_0*N).long().cuda() 19 | 20 | index_1 = torch.rand(M) 21 | index_1[index_1 < 0] = 0 22 | index_1 = (index_1*N).long().cuda() 23 | 24 | rel_index = torch.rand(M, 3) 25 | rel_index[rel_index < 0] = 0 26 | rel_index = (rel_index*L).long().cuda() 27 | 28 | 29 | # rearrange index for acceleration 30 | index_0, indices = torch.sort(index_0) #[M,] 31 | index_1 = index_1[indices] #[M,] 32 | rel_index = rel_index[indices] 33 | index_0_counts = index_0.bincount() 34 | 35 | print("index_0_counts.shape: ", index_0_counts.shape) 36 | 37 | n_max = index_0_counts.max() 38 | index_0_offsets = index_0_counts.cumsum(dim=-1) #[N] 39 | 40 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 41 | 42 | index_0_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0) #[N+1] 43 | 44 | 45 | attn.requires_grad = True 46 | v.requires_grad = True 47 | table.requires_grad = True 48 | 49 | 50 | output = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int()) 51 | loss = output.mean() 52 | loss.backward() 53 | 54 | print("output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5,:10, :5])) 55 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 56 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 57 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 58 | # input() 59 | 60 | attn_grad = attn.grad.clone() 61 | v_grad = v.grad.clone() 62 | table_grad = table.grad.clone() 63 | 64 | attn.grad.zero_() 65 | v.grad.zero_() 66 | table.grad.zero_() 67 | 68 | # print("query.is_contiguous(): ", query.is_contiguous()) 69 | # print("key.is_contiguous(): ", key.is_contiguous()) 70 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 71 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 72 | 73 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2(attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int()) 74 | loss = output_v2.mean() 75 | loss.backward() 76 | 77 | print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5])) 78 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 79 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 80 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 81 | # input() 82 | 83 | print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 84 | 85 | print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad-attn.grad)**2).max()) 86 | 87 | print("((v_grad-v.grad)**2).max(): ", ((v_grad-v.grad)**2).max()) 88 | 89 | print("((table_grad-table.grad)**2).max(): ", ((table_grad-table.grad)**2).max()) 90 | 91 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 92 | 93 | -------------------------------------------------------------------------------- /libs/pointops2/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | from distutils.sysconfig import get_config_vars 5 | 6 | (opt,) = get_config_vars('OPT') 7 | os.environ['OPT'] = " ".join( 8 | flag for flag in opt.split() if flag != '-Wstrict-prototypes' 9 | ) 10 | 11 | src = 'src' 12 | sources = [os.path.join(root, file) for root, dirs, files in os.walk(src) 13 | for file in files 14 | if file.endswith('.cpp') or file.endswith('.cu')] 15 | 16 | setup( 17 | name='pointops2', 18 | version='1.0', 19 | install_requires=["torch", "numpy"], 20 | packages=["pointops2"], 21 | package_dir={"pointops2": "functions"}, 22 | ext_modules=[ 23 | CUDAExtension( 24 | name='pointops2_cuda', 25 | sources=sources, 26 | extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']} 27 | ) 28 | ], 29 | cmdclass={'build_ext': BuildExtension} 30 | ) 31 | -------------------------------------------------------------------------------- /libs/pointops2/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/libs/pointops2/src/__init__.py -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "aggregation_cuda_kernel.h" 5 | 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const float *position = position_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 15 | } 16 | 17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 18 | { 19 | const float *input = input_tensor.data_ptr(); 20 | const float *position = position_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | const int *idx = idx_tensor.data_ptr(); 23 | const float *grad_output = grad_output_tensor.data_ptr(); 24 | float *grad_input = grad_input_tensor.data_ptr(); 25 | float *grad_position = grad_position_tensor.data_ptr(); 26 | float *grad_weight = grad_weight_tensor.data_ptr(); 27 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 28 | } 29 | -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /libs/pointops2/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention/attention_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "attention_cuda_kernel.h" 5 | 6 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 7 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *k = k_tensor.data_ptr(); 11 | const int *index0 = index0_tensor.data_ptr(); 12 | const int *index1 = index1_tensor.data_ptr(); 13 | float *attn = attn_tensor.data_ptr(); 14 | attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); 15 | } 16 | 17 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 18 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const int *index0 = index0_tensor.data_ptr(); 23 | const int *index1 = index1_tensor.data_ptr(); 24 | const float *q = q_tensor.data_ptr(); 25 | const float *k = k_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_k = grad_k_tensor.data_ptr(); 28 | attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 29 | } 30 | 31 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | float *output = output_tensor.data_ptr(); 39 | attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); 40 | } 41 | 42 | 43 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 44 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 45 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 46 | { 47 | const float *grad_out = grad_out_tensor.data_ptr(); 48 | const int *index0 = index0_tensor.data_ptr(); 49 | const int *index1 = index1_tensor.data_ptr(); 50 | const float *attn = attn_tensor.data_ptr(); 51 | const float *v = v_tensor.data_ptr(); 52 | float *grad_attn = grad_attn_tensor.data_ptr(); 53 | float *grad_v = grad_v_tensor.data_ptr(); 54 | attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 55 | } 56 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL 2 | #define _ATTENTION_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/attention_v2/attention_cuda_v2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "attention_cuda_kernel_v2.h" 5 | 6 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 7 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *k = k_tensor.data_ptr(); 11 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 12 | const int *index1 = index1_tensor.data_ptr(); 13 | float *attn = attn_tensor.data_ptr(); 14 | attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); 15 | } 16 | 17 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 18 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 23 | const int *index1 = index1_tensor.data_ptr(); 24 | const float *q = q_tensor.data_ptr(); 25 | const float *k = k_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_k = grad_k_tensor.data_ptr(); 28 | attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); 29 | } 30 | 31 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | float *output = output_tensor.data_ptr(); 39 | attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); 40 | } 41 | 42 | 43 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 44 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 45 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 46 | { 47 | const float *grad_out = grad_out_tensor.data_ptr(); 48 | const int *index0 = index0_tensor.data_ptr(); 49 | const int *index1 = index1_tensor.data_ptr(); 50 | const float *attn = attn_tensor.data_ptr(); 51 | const float *v = v_tensor.data_ptr(); 52 | float *grad_attn = grad_attn_tensor.data_ptr(); 53 | float *grad_v = grad_v_tensor.data_ptr(); 54 | attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 55 | } 56 | -------------------------------------------------------------------------------- /libs/pointops2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "grouping_cuda_kernel.h" 5 | 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | float *output = output_tensor.data_ptr(); 12 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 13 | } 14 | 15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 16 | { 17 | const float *grad_output = grad_output_tensor.data_ptr(); 18 | const int *idx = idx_tensor.data_ptr(); 19 | float *grad_input = grad_input_tensor.data_ptr(); 20 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 21 | } 22 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /libs/pointops2/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "interpolation_cuda_kernel.h" 5 | 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input = input_tensor.data_ptr(); 10 | const int *idx = idx_tensor.data_ptr(); 11 | const float *weight = weight_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 14 | } 15 | 16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *weight = weight_tensor.data_ptr(); 21 | float *grad_input = grad_input_tensor.data_ptr(); 22 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /libs/pointops2/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "knnquery_cuda_kernel.h" 5 | 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const float *new_xyz = new_xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | float *dist2 = dist2_tensor.data_ptr(); 15 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /libs/pointops2/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops2/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knnquery/knnquery_cuda_kernel.h" 5 | #include "sampling/sampling_cuda_kernel.h" 6 | #include "grouping/grouping_cuda_kernel.h" 7 | #include "interpolation/interpolation_cuda_kernel.h" 8 | #include "aggregation/aggregation_cuda_kernel.h" 9 | #include "subtraction/subtraction_cuda_kernel.h" 10 | #include "attention/attention_cuda_kernel.h" 11 | #include "rpe/relative_pos_encoding_cuda_kernel.h" 12 | #include "attention_v2/attention_cuda_kernel_v2.h" 13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h" 14 | 15 | 16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 17 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 18 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 19 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 21 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 22 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 23 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 24 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 25 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 26 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 27 | m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda"); 28 | m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda"); 29 | m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda"); 30 | m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda"); 31 | m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda"); 32 | m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda"); 33 | m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda"); 34 | m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda"); 35 | m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2"); 36 | m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2"); 37 | m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2"); 38 | m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2"); 39 | m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2"); 40 | m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2"); 41 | m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2"); 42 | m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2"); 43 | m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3"); 44 | m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3"); 45 | } 46 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "relative_pos_encoding_cuda_kernel.h" 5 | 6 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 7 | at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *q = q_tensor.data_ptr(); 10 | const float *table = table_tensor.data_ptr(); 11 | const int *index = index_tensor.data_ptr(); 12 | const int *rel_idx = rel_idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); 15 | } 16 | 17 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 18 | at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 19 | at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) 20 | { 21 | const float *grad_out = grad_out_tensor.data_ptr(); 22 | const float *q = q_tensor.data_ptr(); 23 | const int *index = index_tensor.data_ptr(); 24 | const float *table = table_tensor.data_ptr(); 25 | const int *rel_idx = rel_idx_tensor.data_ptr(); 26 | float *grad_q = grad_q_tensor.data_ptr(); 27 | float *grad_table = grad_table_tensor.data_ptr(); 28 | dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); 29 | } 30 | 31 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 32 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 33 | { 34 | const float *attn = attn_tensor.data_ptr(); 35 | const float *v = v_tensor.data_ptr(); 36 | const int *index0 = index0_tensor.data_ptr(); 37 | const int *index1 = index1_tensor.data_ptr(); 38 | const float *table = table_tensor.data_ptr(); 39 | const int *rel_idx = rel_idx_tensor.data_ptr(); 40 | float *output = output_tensor.data_ptr(); 41 | attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); 42 | } 43 | 44 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, 46 | at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | const float *table = table_tensor.data_ptr(); 54 | const int *rel_idx = rel_idx_tensor.data_ptr(); 55 | float *grad_attn = grad_attn_tensor.data_ptr(); 56 | float *grad_v = grad_v_tensor.data_ptr(); 57 | float *grad_table = grad_table_tensor.data_ptr(); 58 | attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); 59 | } 60 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_CUDA_KERNEL 2 | #define _RPE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); 9 | 10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); 18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); 19 | 20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); 21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_V2_CUDA_KERNEL 2 | #define _RPE_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 9 | 10 | void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 12 | 13 | void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 14 | void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output); 21 | void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 22 | 23 | void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output); 24 | void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 25 | 26 | void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output); 27 | void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | #endif 33 | -------------------------------------------------------------------------------- /libs/pointops2/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sampling_cuda_kernel.h" 5 | 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 8 | { 9 | const float *xyz = xyz_tensor.data_ptr(); 10 | const int *offset = offset_tensor.data_ptr(); 11 | const int *new_offset = new_offset_tensor.data_ptr(); 12 | float *tmp = tmp_tensor.data_ptr(); 13 | int *idx = idx_tensor.data_ptr(); 14 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 15 | } 16 | -------------------------------------------------------------------------------- /libs/pointops2/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subtraction_cuda_kernel.h" 5 | 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 8 | { 9 | const float *input1 = input1_tensor.data_ptr(); 10 | const float *input2 = input2_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 14 | } 15 | 16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 17 | { 18 | const int *idx = idx_tensor.data_ptr(); 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | float *grad_input1 = grad_input1_tensor.data_ptr(); 21 | float *grad_input2 = grad_input2_tensor.data_ptr(); 22 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 23 | } 24 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /libs/pointops2/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /pcr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/pcr/__init__.py -------------------------------------------------------------------------------- /pcr/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import DefaultDataset, ConcatDataset 2 | from .s3dis import S3DISDataset 3 | from .scannet import ScanNetDataset, ScanNet200Dataset 4 | from .scannet_pair import ScanNetPairDataset 5 | from .modelnet import ModelNetDataset 6 | from .shapenet_part import ShapeNetPartDataset 7 | from .semantic_kitti import SemanticKITTIDataset 8 | from .arkitscenes import ArkitScenesDataset 9 | from .builder import build_dataset 10 | from .utils import point_collate_fn, collate_fn 11 | -------------------------------------------------------------------------------- /pcr/datasets/arkitscenes.py: -------------------------------------------------------------------------------- 1 | """ 2 | ArkitScenes Dataset 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import glob 10 | import numpy as np 11 | import torch 12 | from copy import deepcopy 13 | from torch.utils.data import Dataset 14 | 15 | from pcr.utils.logger import get_root_logger 16 | from .builder import DATASETS 17 | from .transform import Compose, TRANSFORMS 18 | from .preprocessing.scannet.meta_data.scannet200_constants import VALID_CLASS_IDS_200 19 | 20 | 21 | @DATASETS.register_module() 22 | class ArkitScenesDataset(Dataset): 23 | def __init__(self, 24 | split="Training", 25 | data_root="data/ARKitScenesMesh", 26 | transform=None, 27 | test_mode=False, 28 | test_cfg=None, 29 | loop=1): 30 | super(ArkitScenesDataset, self).__init__() 31 | self.data_root = data_root 32 | self.split = split 33 | self.transform = Compose(transform) 34 | self.loop = loop if not test_mode else 1 # force make loop = 1 while in test mode 35 | self.test_mode = test_mode 36 | self.test_cfg = test_cfg if test_mode else None 37 | self.class2id = np.array(VALID_CLASS_IDS_200) 38 | 39 | if test_mode: 40 | self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) 41 | self.test_crop = TRANSFORMS.build(self.test_cfg.crop) 42 | self.post_transform = Compose(self.test_cfg.post_transform) 43 | self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] 44 | 45 | self.data_list = self.get_data_list() 46 | logger = get_root_logger() 47 | logger.info("Totally {} x {} samples in {} set.".format(len(self.data_list), self.loop, split)) 48 | 49 | def get_data_list(self): 50 | if isinstance(self.split, str): 51 | data_list = glob.glob(os.path.join(self.data_root, self.split, "*.pth")) 52 | elif isinstance(self.split, list): 53 | data_list = [] 54 | for split in self.split: 55 | data_list += glob.glob(os.path.join(self.data_root, split, "*.pth")) 56 | else: 57 | raise NotImplementedError 58 | return data_list 59 | 60 | def get_data(self, idx): 61 | data = torch.load(self.data_list[idx % len(self.data_list)]) 62 | coord = data["coord"] 63 | color = data["color"] 64 | normal = data["normal"] 65 | label = np.zeros(coord.shape[0]) 66 | data_dict = dict(coord=coord, normal=normal, color=color, label=label) 67 | return data_dict 68 | 69 | def get_data_name(self, idx): 70 | data_idx = self.data_idx[idx % len(self.data_idx)] 71 | return os.path.basename(self.data_list[data_idx]).split(".")[0] 72 | 73 | def prepare_train_data(self, idx): 74 | # load data 75 | data_dict = self.get_data(idx) 76 | data_dict = self.transform(data_dict) 77 | return data_dict 78 | 79 | def prepare_test_data(self, idx): 80 | # load data 81 | data_dict = self.get_data(idx) 82 | label = data_dict.pop("label") 83 | data_dict = self.transform(data_dict) 84 | data_dict_list = [] 85 | for aug in self.aug_transform: 86 | data_dict_list.append( 87 | aug(deepcopy(data_dict)) 88 | ) 89 | 90 | input_dict_list = [] 91 | for data in data_dict_list: 92 | data_part_list = self.test_voxelize(data) 93 | for data_part in data_part_list: 94 | data_part_list = self.test_crop(data_part) 95 | input_dict_list += data_part_list 96 | 97 | for i in range(len(input_dict_list)): 98 | input_dict_list[i] = self.post_transform(input_dict_list[i]) 99 | return input_dict_list, label 100 | 101 | def __getitem__(self, idx): 102 | if self.test_mode: 103 | return self.prepare_test_data(idx) 104 | else: 105 | return self.prepare_train_data(idx) 106 | 107 | def __len__(self): 108 | return len(self.data_list) * self.loop 109 | 110 | -------------------------------------------------------------------------------- /pcr/datasets/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | 9 | from pcr.utils.registry import Registry 10 | 11 | DATASETS = Registry('datasets') 12 | 13 | 14 | def build_dataset(cfg): 15 | """Build test_datasets.""" 16 | return DATASETS.build(cfg) 17 | -------------------------------------------------------------------------------- /pcr/datasets/modelnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ModelNet40 Dataset (Unmaintained) 3 | 4 | get sampled point clouds of ModelNet40 (XYZ and normal from mesh, 10k points per shape) 5 | at "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip" 6 | 7 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 8 | Please cite our work if the code is helpful to you. 9 | """ 10 | 11 | import os 12 | import numpy as np 13 | import torch 14 | from torch.utils.data import Dataset 15 | 16 | from pcr.utils.logger import get_root_logger 17 | from .builder import DATASETS 18 | from .transform import Compose, TRANSFORMS 19 | 20 | 21 | @DATASETS.register_module() 22 | class ModelNetDataset(Dataset): 23 | def __init__(self, 24 | split='train', 25 | data_root='data/modelnet40_normal_resampled', 26 | class_names=None, 27 | transform=None, 28 | cache_data=False, 29 | test_mode=False, 30 | test_cfg=None, 31 | loop=1): 32 | super(ModelNetDataset, self).__init__() 33 | self.data_root = data_root 34 | self.class_names = dict(zip(class_names, range(len(class_names)))) 35 | self.split = split 36 | self.cache_data = cache_data 37 | self.transform = Compose(transform) 38 | self.loop = loop if not test_mode else 1 # force make loop = 1 while in test mode 39 | self.test_mode = test_mode 40 | self.test_cfg = test_cfg if test_mode else None 41 | self.cache = {} 42 | 43 | if test_mode: 44 | # TODO: Optimize 45 | pass 46 | 47 | self.data_list = [line.rstrip() for line in open( 48 | os.path.join(self.data_root, 'modelnet40_{}.txt'.format(self.split)))] 49 | logger = get_root_logger() 50 | logger.info("Totally {} x {} samples in {} set.".format(len(self.data_idx), self.loop, split)) 51 | 52 | def prepare_train_data(self, idx): 53 | # load data 54 | data_idx = idx % len(self.data_list) 55 | if self.cache_data: 56 | coord, norm, label = self.cache[data_idx] 57 | else: 58 | data_shape = '_'.join(self.data_list[data_idx].split('_')[0: -1]) 59 | data_path = os.path.join(self.data_root, data_shape, self.data_list[data_idx] + '.txt') 60 | data = np.loadtxt(data_path, delimiter=',').astype(np.float32) 61 | coord, norm = data[:, 0:3], data[:, 3:6] 62 | label = np.array([self.class_names[data_shape]]) 63 | if self.cache_data: 64 | self.cache[data_idx] = (coord, norm, label) 65 | 66 | data_dict = dict(coord=coord, norm=norm, label=label) 67 | data_dict = self.transform(data_dict) 68 | return data_dict 69 | 70 | def prepare_test_data(self, idx): 71 | assert idx < len(self.data_idx) 72 | data_idx = idx 73 | data_shape = '_'.join(self.data_list[data_idx].split('_')[0: -1]) 74 | data_path = os.path.join(self.data_root, data_shape, self.data_list[data_idx] + '.txt') 75 | data = np.loadtxt(data_path, delimiter=',').astype(np.float32) 76 | coord, norm = data[:, 0:3], data[:, 3:6] 77 | label = np.array([self.class_names[data_shape]]) 78 | 79 | data_dict = dict(coord=coord, norm=norm, label=label) 80 | data_dict = self.transform(data_dict) 81 | return data_dict 82 | 83 | def get_data_name(self, idx): 84 | data_idx = idx % len(self.data_list) 85 | return self.data_list[data_idx] 86 | 87 | def __getitem__(self, idx): 88 | if self.test_mode: 89 | return self.prepare_test_data(idx) 90 | else: 91 | return self.prepare_train_data(idx) 92 | 93 | def __len__(self): 94 | return len(self.data_idx) * self.loop 95 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing ArkitScenes 3 | """ 4 | import os 5 | import argparse 6 | import glob 7 | import plyfile 8 | import numpy as np 9 | import pandas as pd 10 | import multiprocessing as mp 11 | from concurrent.futures import ProcessPoolExecutor 12 | from itertools import repeat 13 | 14 | import torch 15 | 16 | 17 | def read_plymesh(filepath): 18 | """Read ply file and return it as numpy array. Returns None if emtpy.""" 19 | with open(filepath, 'rb') as f: 20 | plydata = plyfile.PlyData.read(f) 21 | if plydata.elements: 22 | vertices = pd.DataFrame(plydata['vertex'].data).values 23 | faces = np.stack(plydata['face'].data['vertex_indices'], axis=0) 24 | return vertices, faces 25 | 26 | 27 | def face_normal(vertex, face): 28 | v01 = vertex[face[:, 1]] - vertex[face[:, 0]] 29 | v02 = vertex[face[:, 2]] - vertex[face[:, 0]] 30 | vec = np.cross(v01, v02) 31 | length = np.sqrt(np.sum(vec ** 2, axis=1, keepdims=True)) + 1.0e-8 32 | nf = vec / length 33 | area = length * 0.5 34 | return nf, area 35 | 36 | 37 | def vertex_normal(vertex, face): 38 | nf, area = face_normal(vertex, face) 39 | nf = nf * area 40 | 41 | nv = np.zeros_like(vertex) 42 | for i in range(face.shape[0]): 43 | nv[face[i]] += nf[i] 44 | 45 | length = np.sqrt(np.sum(nv ** 2, axis=1, keepdims=True)) + 1.0e-8 46 | nv = nv / length 47 | return nv 48 | 49 | 50 | def parse_scene(scene_path, output_dir): 51 | print(f"Parsing scene {scene_path}") 52 | split = os.path.basename(os.path.dirname(os.path.dirname(scene_path))) 53 | scene_id = os.path.basename(os.path.dirname(scene_path)) 54 | vertices, faces = read_plymesh(scene_path) 55 | coords = vertices[:, :3] 56 | colors = vertices[:, 3:6] 57 | data_dict = dict(coord=coords, color=colors, scene_id=scene_id) 58 | data_dict["normal"] = vertex_normal(coords, faces) 59 | torch.save(data_dict, os.path.join(output_dir, split, f"{scene_id}.pth")) 60 | 61 | 62 | if __name__ == '__main__': 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument('--dataset_root', required=True, help='Path to the ScanNet dataset containing scene folders') 65 | parser.add_argument('--output_root', required=True, help='Output path where train/val folders will be located') 66 | opt = parser.parse_args() 67 | # Create output directories 68 | train_output_dir = os.path.join(opt.output_root, 'Training') 69 | os.makedirs(train_output_dir, exist_ok=True) 70 | val_output_dir = os.path.join(opt.output_root, 'Validation') 71 | os.makedirs(val_output_dir, exist_ok=True) 72 | # Load scene paths 73 | scene_paths = sorted(glob.glob(opt.dataset_root + '/3dod/*/*/*_mesh.ply')) 74 | # Preprocess data. 75 | pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) 76 | # pool = ProcessPoolExecutor(max_workers=1) 77 | print('Processing scenes...') 78 | _ = list(pool.map(parse_scene, scene_paths, repeat(opt.output_root))) 79 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt: -------------------------------------------------------------------------------- 1 | 1 trash 2 | 3 basket 3 | 4 bathtub 4 | 5 bed 5 | 9 shelf 6 | 13 cabinet 7 | 18 chair 8 | 20 keyboard 9 | 22 tv 10 | 30 lamp 11 | 31 laptop 12 | 35 microwave 13 | 39 pillow 14 | 42 printer 15 | 47 sofa 16 | 48 stove 17 | 49 table 18 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt: -------------------------------------------------------------------------------- 1 | 1 wall 2 | 2 floor 3 | 3 cabinet 4 | 4 bed 5 | 5 chair 6 | 6 sofa 7 | 7 table 8 | 8 door 9 | 9 window 10 | 10 bookshelf 11 | 11 picture 12 | 12 counter 13 | 14 desk 14 | 16 curtain 15 | 24 refridgerator 16 | 28 shower curtain 17 | 33 toilet 18 | 34 sink 19 | 36 bathtub 20 | 39 otherfurniture -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/pcr/datasets/preprocessing/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt: -------------------------------------------------------------------------------- 1 | scene0534_00 2 | scene0534_01 3 | scene0319_00 4 | scene0273_00 5 | scene0273_01 6 | scene0225_00 7 | scene0198_00 8 | scene0003_00 9 | scene0003_01 10 | scene0003_02 11 | scene0409_00 12 | scene0409_01 13 | scene0331_00 14 | scene0331_01 15 | scene0505_00 16 | scene0505_01 17 | scene0505_02 18 | scene0505_03 19 | scene0505_04 20 | scene0506_00 21 | scene0057_00 22 | scene0057_01 23 | scene0074_00 24 | scene0074_01 25 | scene0074_02 26 | scene0091_00 27 | scene0112_00 28 | scene0112_01 29 | scene0112_02 30 | scene0240_00 31 | scene0102_00 32 | scene0102_01 33 | scene0513_00 34 | scene0514_00 35 | scene0514_01 36 | scene0537_00 37 | scene0516_00 38 | scene0516_01 39 | scene0495_00 40 | scene0617_00 41 | scene0133_00 42 | scene0520_00 43 | scene0520_01 44 | scene0635_00 45 | scene0635_01 46 | scene0054_00 47 | scene0473_00 48 | scene0473_01 49 | scene0524_00 50 | scene0524_01 51 | scene0379_00 52 | scene0471_00 53 | scene0471_01 54 | scene0471_02 55 | scene0566_00 56 | scene0248_00 57 | scene0248_01 58 | scene0248_02 59 | scene0529_00 60 | scene0529_01 61 | scene0529_02 62 | scene0391_00 63 | scene0264_00 64 | scene0264_01 65 | scene0264_02 66 | scene0675_00 67 | scene0675_01 68 | scene0350_00 69 | scene0350_01 70 | scene0350_02 71 | scene0450_00 72 | scene0068_00 73 | scene0068_01 74 | scene0237_00 75 | scene0237_01 76 | scene0365_00 77 | scene0365_01 78 | scene0365_02 79 | scene0605_00 80 | scene0605_01 81 | scene0539_00 82 | scene0539_01 83 | scene0539_02 84 | scene0540_00 85 | scene0540_01 86 | scene0540_02 87 | scene0170_00 88 | scene0170_01 89 | scene0170_02 90 | scene0433_00 91 | scene0340_00 92 | scene0340_01 93 | scene0340_02 94 | scene0160_00 95 | scene0160_01 96 | scene0160_02 97 | scene0160_03 98 | scene0160_04 99 | scene0059_00 100 | scene0059_01 101 | scene0059_02 102 | scene0056_00 103 | scene0056_01 104 | scene0478_00 105 | scene0478_01 106 | scene0548_00 107 | scene0548_01 108 | scene0548_02 109 | scene0204_00 110 | scene0204_01 111 | scene0204_02 112 | scene0033_00 113 | scene0145_00 114 | scene0483_00 115 | scene0508_00 116 | scene0508_01 117 | scene0508_02 118 | scene0180_00 119 | scene0148_00 120 | scene0556_00 121 | scene0556_01 122 | scene0416_00 123 | scene0416_01 124 | scene0416_02 125 | scene0416_03 126 | scene0416_04 127 | scene0073_00 128 | scene0073_01 129 | scene0073_02 130 | scene0073_03 131 | scene0034_00 132 | scene0034_01 133 | scene0034_02 134 | scene0639_00 135 | scene0561_00 136 | scene0561_01 137 | scene0298_00 138 | scene0692_00 139 | scene0692_01 140 | scene0692_02 141 | scene0692_03 142 | scene0692_04 143 | scene0642_00 144 | scene0642_01 145 | scene0642_02 146 | scene0642_03 147 | scene0630_00 148 | scene0630_01 149 | scene0630_02 150 | scene0630_03 151 | scene0630_04 152 | scene0630_05 153 | scene0630_06 154 | scene0706_00 155 | scene0567_00 156 | scene0567_01 157 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/scannet_pair/compute_full_overlapping.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import copy 7 | import torch 8 | import numpy as np 9 | import math 10 | import glob, os 11 | import argparse 12 | import open3d as o3d 13 | 14 | 15 | def make_open3d_point_cloud(xyz, color=None, voxel_size=None): 16 | if np.isnan(xyz).any(): 17 | return None 18 | 19 | xyz = xyz[:,:3] 20 | pcd = o3d.geometry.PointCloud() 21 | pcd.points = o3d.utility.Vector3dVector(xyz) 22 | if color is not None: 23 | pcd.colors = o3d.utility.Vector3dVector(color) 24 | if voxel_size is not None: 25 | pcd = pcd.voxel_down_sample(voxel_size) 26 | 27 | return pcd 28 | 29 | 30 | def compute_overlap_ratio(pcd0, pcd1, voxel_size): 31 | pcd0_down = pcd0.voxel_down_sample(voxel_size) 32 | pcd1_down = pcd1.voxel_down_sample(voxel_size) 33 | matching01 = get_matching_indices(pcd0_down, pcd1_down, voxel_size * 1.5, 1) 34 | matching10 = get_matching_indices(pcd1_down, pcd0_down, voxel_size * 1.5, 1) 35 | overlap0 = float(len(matching01)) / float(len(pcd0_down.points)) 36 | overlap1 = float(len(matching10)) / float(len(pcd1_down.points)) 37 | return max(overlap0, overlap1) 38 | 39 | 40 | def get_matching_indices(source, pcd_tree, search_voxel_size, K=None): 41 | match_inds = [] 42 | for i, point in enumerate(source.points): 43 | [_, idx, _] = pcd_tree.search_radius_vector_3d(point, search_voxel_size) 44 | if K is not None: 45 | idx = idx[:K] 46 | for j in idx: 47 | match_inds.append((i, j)) 48 | return match_inds 49 | 50 | 51 | def compute_full_overlapping(data_root, scene_id, voxel_size=0.05): 52 | _points = [ 53 | (pcd_name, make_open3d_point_cloud(torch.load(pcd_name)['coord'], voxel_size=voxel_size)) 54 | for pcd_name in glob.glob(os.path.join(data_root, scene_id, "pcd", "*.pth")) 55 | ] 56 | points = [(pcd_name, pcd) for (pcd_name, pcd) in _points if pcd is not None] 57 | print('load {} point clouds ({} invalid has been filtered), computing matching/overlapping'.format( 58 | len(points), len(_points) - len(points))) 59 | 60 | matching_matrix = np.zeros((len(points), len(points))) 61 | for i, (pcd0_name, pcd0) in enumerate(points): 62 | print('matching to...{}'.format(pcd0_name)) 63 | pcd0_tree = o3d.geometry.KDTreeFlann(copy.deepcopy(pcd0)) 64 | for j, (pcd1_name, pcd1) in enumerate(points): 65 | if i == j: 66 | continue 67 | matching_matrix[i, j] = float(len(get_matching_indices(pcd1, pcd0_tree, 1.5 * voxel_size, 1))) / float( 68 | len(pcd1.points)) 69 | 70 | # write to file 71 | with open(os.path.join(data_root, scene_id, "pcd", "overlap.txt"), 'w') as f: 72 | for i, (pcd0_name, pcd0) in enumerate(points): 73 | for j, (pcd1_name, pcd1) in enumerate(points): 74 | if i < j: 75 | overlap = max(matching_matrix[i, j], matching_matrix[j, i]) 76 | f.write("{} {} {}\n".format( 77 | pcd0_name.replace(data_root, ""), pcd1_name.replace(data_root, ""), overlap 78 | )) 79 | 80 | 81 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/scannet_pair/generage_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import argparse 8 | import glob, os, sys 9 | 10 | from SensorData import SensorData 11 | 12 | # params 13 | parser = argparse.ArgumentParser() 14 | # data paths 15 | parser.add_argument('--target_dir', required=True, help='path to the target dir') 16 | 17 | opt = parser.parse_args() 18 | print(opt) 19 | 20 | def main(): 21 | overlaps = glob.glob(os.path.join(opt.target_dir, "*/pcd/overlap.txt")) 22 | with open(os.path.join(opt.target_dir, 'overlap30.txt'), 'w') as f: 23 | for fo in overlaps: 24 | for line in open(fo): 25 | pcd0, pcd1, op = line.strip().split() 26 | if float(op) >= 0.3: 27 | print('{} {} {}'.format(pcd0, pcd1, op), file=f) 28 | print('done') 29 | 30 | if __name__ == '__main__': 31 | main() -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/scannet_pair/point_cloud_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import glob, os 8 | import numpy as np 9 | import cv2 10 | import torch 11 | 12 | 13 | def extractor(input_path, output_path): 14 | if not os.path.exists(output_path): 15 | os.mkdir(output_path) 16 | 17 | # Load Depth Camera Intrinsic 18 | depth_intrinsic = np.loadtxt(input_path + '/intrinsic/intrinsic_depth.txt') 19 | print('Depth intrinsic: ') 20 | print(depth_intrinsic) 21 | 22 | # Compute Camrea Distance (just for demo, so you can choose the camera distance in frame sampling) 23 | poses = sorted(glob.glob(input_path + '/pose/*.txt'), key=lambda a: int(os.path.basename(a).split('.')[0])) 24 | depths = sorted(glob.glob(input_path + '/depth/*.png'), key=lambda a: int(os.path.basename(a).split('.')[0])) 25 | colors = sorted(glob.glob(input_path + '/color/*.png'), key=lambda a: int(os.path.basename(a).split('.')[0])) 26 | 27 | # # Get Aligned Point Clouds. 28 | for ind, (pose, depth, color) in enumerate(zip(poses, depths, colors)): 29 | name = os.path.basename(pose).split('.')[0] 30 | 31 | if os.path.exists(output_path + '/{}.npz'.format(name)): 32 | continue 33 | 34 | try: 35 | print('=' * 50, ': {}'.format(pose)) 36 | depth_img = cv2.imread(depth, -1) # read 16bit grayscale image 37 | mask = (depth_img != 0) 38 | color_image = cv2.imread(color) 39 | color_image = cv2.resize(color_image, (640, 480)) 40 | color_image = np.reshape(color_image[mask], [-1, 3]) 41 | colors = np.zeros_like(color_image) 42 | colors[:, 0] = color_image[:, 2] 43 | colors[:, 1] = color_image[:, 1] 44 | colors[:, 2] = color_image[:, 0] 45 | 46 | pose = np.loadtxt(poses[ind]) 47 | print('Camera pose: ') 48 | print(pose) 49 | 50 | depth_shift = 1000.0 51 | x, y = np.meshgrid(np.linspace(0, depth_img.shape[1] - 1, depth_img.shape[1]), 52 | np.linspace(0, depth_img.shape[0] - 1, depth_img.shape[0])) 53 | uv_depth = np.zeros((depth_img.shape[0], depth_img.shape[1], 3)) 54 | uv_depth[:, :, 0] = x 55 | uv_depth[:, :, 1] = y 56 | uv_depth[:, :, 2] = depth_img / depth_shift 57 | uv_depth = np.reshape(uv_depth, [-1, 3]) 58 | uv_depth = uv_depth[np.where(uv_depth[:, 2] != 0), :].squeeze() 59 | 60 | intrinsic_inv = np.linalg.inv(depth_intrinsic) 61 | fx = depth_intrinsic[0, 0] 62 | fy = depth_intrinsic[1, 1] 63 | cx = depth_intrinsic[0, 2] 64 | cy = depth_intrinsic[1, 2] 65 | bx = depth_intrinsic[0, 3] 66 | by = depth_intrinsic[1, 3] 67 | point_list = [] 68 | n = uv_depth.shape[0] 69 | points = np.ones((n, 4)) 70 | X = (uv_depth[:, 0] - cx) * uv_depth[:, 2] / fx + bx 71 | Y = (uv_depth[:, 1] - cy) * uv_depth[:, 2] / fy + by 72 | points[:, 0] = X 73 | points[:, 1] = Y 74 | points[:, 2] = uv_depth[:, 2] 75 | points_world = np.dot(points, np.transpose(pose)) 76 | print(points_world.shape) 77 | 78 | pcd = dict(coord=points_world[:, :3], color=colors) 79 | # pcd_save = np.zeros((points_world.shape[0], 7)) 80 | # pcd_save[:, :3] = points_world[:, :3] 81 | # pcd_save[:, 3:6] = colors 82 | 83 | # print('Saving npz file...') 84 | # np.savez(output_path + '/{}.npz'.format(name), pcd=pcd_save) 85 | torch.save(pcd, output_path + '/{}.pth'.format(name)) 86 | except: 87 | continue 88 | 89 | 90 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/scannet_pair/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import glob 4 | import multiprocessing as mp 5 | from concurrent.futures import ProcessPoolExecutor 6 | from itertools import repeat 7 | from reader import reader 8 | from point_cloud_extractor import extractor 9 | from compute_full_overlapping import compute_full_overlapping 10 | 11 | 12 | frame_skip = 25 13 | 14 | 15 | def parse_sens(sens_dir, output_dir): 16 | scene_id = os.path.basename(os.path.dirname(sens_dir)) 17 | print(f"Parsing sens data{sens_dir}") 18 | reader(sens_dir, os.path.join(output_dir, scene_id), frame_skip, 19 | export_color_images=True, export_depth_images=True, export_poses=True, export_intrinsics=True) 20 | extractor(os.path.join(output_dir, scene_id), os.path.join(output_dir, scene_id, "pcd")) 21 | compute_full_overlapping(output_dir, scene_id) 22 | 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--dataset_root', required=True, help='Path to the ScanNet dataset containing scene folders') 27 | parser.add_argument('--output_root', required=True, help='Output path where train/val folders will be located') 28 | opt = parser.parse_args() 29 | sens_list = sorted(glob.glob(os.path.join(opt.dataset_root, "scans/scene*/*.sens"))) 30 | # Preprocess data. 31 | pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) 32 | # pool = ProcessPoolExecutor(max_workers=1) 33 | print('Processing scenes...') 34 | _ = list(pool.map(parse_sens, sens_list, repeat(opt.output_root))) 35 | 36 | # sens_dir = "/home/gofinge/Documents/datasets/scannet/scans/scene0024_00/scene0024_00.sens" 37 | # output_dir = "/home/gofinge/Downloads" 38 | # parse_sens(sens_dir, output_dir) 39 | -------------------------------------------------------------------------------- /pcr/datasets/preprocessing/scannet/scannet_pair/reader.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os, sys 3 | 4 | from SensorData import SensorData 5 | 6 | 7 | def reader(filename, 8 | output_path, 9 | frame_skip, 10 | export_color_images=False, 11 | export_depth_images=False, 12 | export_poses=False, 13 | export_intrinsics=False): 14 | if not os.path.exists(output_path): 15 | os.makedirs(output_path) 16 | 17 | # load the data 18 | print('loading %s...' % filename) 19 | sd = SensorData(filename) 20 | if export_depth_images: 21 | sd.export_depth_images(os.path.join(output_path, 'depth'), frame_skip=frame_skip) 22 | if export_color_images: 23 | sd.export_color_images(os.path.join(output_path, 'color'), frame_skip=frame_skip) 24 | if export_poses: 25 | sd.export_poses(os.path.join(output_path, 'pose'), frame_skip=frame_skip) 26 | if export_intrinsics: 27 | sd.export_intrinsics(os.path.join(output_path, 'intrinsic')) 28 | -------------------------------------------------------------------------------- /pcr/datasets/s3dis.py: -------------------------------------------------------------------------------- 1 | """ 2 | S3DIS Dataset 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import glob 10 | import numpy as np 11 | import torch 12 | from copy import deepcopy 13 | from torch.utils.data import Dataset 14 | from collections.abc import Sequence 15 | 16 | from pcr.utils.logger import get_root_logger 17 | from .builder import DATASETS 18 | from .transform import Compose, TRANSFORMS 19 | 20 | 21 | @DATASETS.register_module() 22 | class S3DISDataset(Dataset): 23 | def __init__(self, 24 | split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), 25 | data_root='data/s3dis', 26 | transform=None, 27 | test_mode=False, 28 | test_cfg=None, 29 | loop=1): 30 | super(S3DISDataset, self).__init__() 31 | self.data_root = data_root 32 | self.split = split 33 | self.transform = Compose(transform) 34 | self.loop = loop if not test_mode else 1 # force make loop = 1 while in test mode 35 | self.test_mode = test_mode 36 | self.test_cfg = test_cfg if test_mode else None 37 | 38 | if test_mode: 39 | self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) 40 | self.test_crop = TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None 41 | self.post_transform = Compose(self.test_cfg.post_transform) 42 | self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] 43 | 44 | self.data_list = self.get_data_list() 45 | logger = get_root_logger() 46 | logger.info("Totally {} x {} samples in {} set.".format(len(self.data_list), self.loop, split)) 47 | 48 | def get_data_list(self): 49 | if isinstance(self.split, str): 50 | data_list = glob.glob(os.path.join(self.data_root, self.split, "*.pth")) 51 | elif isinstance(self.split, Sequence): 52 | data_list = [] 53 | for split in self.split: 54 | data_list += glob.glob(os.path.join(self.data_root, split, "*.pth")) 55 | else: 56 | raise NotImplementedError 57 | return data_list 58 | 59 | def get_data(self, idx): 60 | data = torch.load(self.data_list[idx % len(self.data_list)]) 61 | coord = data["coord"] 62 | color = data["color"] 63 | if "semantic_gt" in data.keys(): 64 | label = data["semantic_gt"].reshape([-1]) 65 | else: 66 | label = np.zeros(coord.shape[0]) 67 | data_dict = dict(coord=coord, color=color, label=label) 68 | return data_dict 69 | 70 | def get_data_name(self, idx): 71 | return os.path.basename(self.data_list[idx % len(self.data_list)]).split(".")[0] 72 | 73 | def prepare_train_data(self, idx): 74 | # load data 75 | data_dict = self.get_data(idx) 76 | data_dict = self.transform(data_dict) 77 | return data_dict 78 | 79 | def prepare_test_data(self, idx): 80 | # load data 81 | data_dict = self.get_data(idx) 82 | label = data_dict.pop("label") 83 | data_dict = self.transform(data_dict) 84 | data_dict_list = [] 85 | for aug in self.aug_transform: 86 | data_dict_list.append( 87 | aug(deepcopy(data_dict)) 88 | ) 89 | 90 | input_dict_list = [] 91 | for data in data_dict_list: 92 | data_part_list = self.test_voxelize(data) 93 | for data_part in data_part_list: 94 | if self.test_crop: 95 | data_part = self.test_crop(data_part) 96 | else: 97 | data_part = [data_part] 98 | input_dict_list += data_part 99 | 100 | for i in range(len(input_dict_list)): 101 | input_dict_list[i] = self.post_transform(input_dict_list[i]) 102 | return input_dict_list, label 103 | 104 | def __getitem__(self, idx): 105 | if self.test_mode: 106 | return self.prepare_test_data(idx) 107 | else: 108 | return self.prepare_train_data(idx) 109 | 110 | def __len__(self): 111 | return len(self.data_list) * self.loop 112 | -------------------------------------------------------------------------------- /pcr/datasets/scannet_pair.py: -------------------------------------------------------------------------------- 1 | """ 2 | ScanNet Pair Dataset (Point Contrstive Frame-level twin) 3 | 4 | Refer Point Contrast 5 | 6 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 7 | Please cite our work if the code is helpful to you. 8 | """ 9 | 10 | import os 11 | import glob 12 | import numpy as np 13 | import torch 14 | from copy import deepcopy 15 | from torch.utils.data import Dataset 16 | 17 | from pcr.utils.logger import get_root_logger 18 | from .builder import DATASETS 19 | from .transform import Compose, TRANSFORMS 20 | 21 | 22 | @DATASETS.register_module() 23 | class ScanNetPairDataset(Dataset): 24 | def __init__(self, 25 | data_root='data/scannet_pair', 26 | overlap_threshold=0.3, 27 | twin1_transform=None, 28 | twin2_transform=None, 29 | loop=1, 30 | **kwargs): 31 | super(ScanNetPairDataset, self).__init__() 32 | self.data_root = data_root 33 | self.overlap_threshold = overlap_threshold 34 | self.twin1_transform = Compose(twin1_transform) 35 | self.twin2_transform = Compose(twin2_transform) 36 | self.loop = loop 37 | self.data_list = self.get_data_list() 38 | logger = get_root_logger() 39 | logger.info("Totally {} x {} samples.".format(len(self.data_list), self.loop)) 40 | 41 | def get_data_list(self): 42 | data_list = [] 43 | overlap_list = glob.glob(os.path.join(self.data_root, "*", "pcd", "overlap.txt")) 44 | for overlap_file in overlap_list: 45 | with open(overlap_file) as f: 46 | overlap = f.readlines() 47 | overlap = [pair.strip().split() for pair in overlap] 48 | data_list.extend([pair[: 2] for pair in overlap if float(pair[2]) > self.overlap_threshold]) 49 | return data_list 50 | 51 | def get_data(self, idx): 52 | pair = self.data_list[idx % len(self.data_list)] 53 | twin1_dict = torch.load(self.data_root + pair[0]) 54 | twin2_dict = torch.load(self.data_root + pair[1]) 55 | twin1_dict["origin_coord"] = twin1_dict["coord"].copy() 56 | twin2_dict["origin_coord"] = twin2_dict["coord"].copy() 57 | return twin1_dict, twin2_dict 58 | 59 | def get_data_name(self, idx): 60 | return os.path.basename(self.data_list[idx % len(self.data_list)]).split(".")[0] 61 | 62 | def prepare_train_data(self, idx): 63 | # load data 64 | twin1_dict, twin2_dict = self.get_data(idx) 65 | twin1_dict = self.twin1_transform(twin1_dict) 66 | twin2_dict = self.twin2_transform(twin2_dict) 67 | data_dict = dict() 68 | for key, value in twin1_dict.items(): 69 | data_dict["twin1_" + key] = value 70 | for key, value in twin2_dict.items(): 71 | data_dict["twin2_" + key] = value 72 | return data_dict 73 | 74 | def prepare_test_data(self, idx): 75 | raise NotImplementedError 76 | 77 | def __getitem__(self, idx): 78 | return self.prepare_train_data(idx) 79 | 80 | def __len__(self): 81 | return len(self.data_list) * self.loop 82 | -------------------------------------------------------------------------------- /pcr/datasets/semantic_kitti.py: -------------------------------------------------------------------------------- 1 | """ 2 | Semantic KITTI dataset 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import glob 10 | import numpy as np 11 | import torch 12 | from torch.utils.data import Dataset 13 | 14 | from pcr.utils.logger import get_root_logger 15 | from .builder import DATASETS 16 | from .transform import Compose, TRANSFORMS 17 | 18 | 19 | @DATASETS.register_module() 20 | class SemanticKITTIDataset(Dataset): 21 | def __init__(self, 22 | split='train', 23 | data_root='data/semantic_kitti', 24 | learning_map=None, 25 | transform=None, 26 | test_mode=False, 27 | test_cfg=None, 28 | loop=1): 29 | super(SemanticKITTIDataset, self).__init__() 30 | self.data_root = data_root 31 | self.split = split 32 | self.learning_map = learning_map 33 | self.split2seq = dict( 34 | train=[0, 1, 2, 3, 4, 5, 6, 7, 9, 10], 35 | val=[8], 36 | test=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] 37 | ) 38 | self.transform = Compose(transform) 39 | self.loop = loop if not test_mode else 1 # force make loop = 1 while in test mode 40 | self.test_mode = test_mode 41 | self.test_cfg = test_cfg if test_mode else None 42 | 43 | if test_mode: 44 | self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) 45 | self.test_crop = TRANSFORMS.build(self.test_cfg.crop) 46 | self.post_transform = Compose(self.test_cfg.post_transform) 47 | self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] 48 | 49 | if isinstance(self.split, str): 50 | seq_list = self.split2seq[split] 51 | elif isinstance(self.split, list): 52 | seq_list = [] 53 | for split in self.split: 54 | seq_list += self.split2seq[split] 55 | else: 56 | raise NotImplementedError 57 | 58 | self.data_list = [] 59 | for seq in seq_list: 60 | seq = str(seq).zfill(2) 61 | seq_folder = os.path.join(self.data_root, "sequences", seq) 62 | seq_files = sorted( 63 | os.listdir(os.path.join(seq_folder, "velodyne"))) 64 | self.data_list += [os.path.join(seq_folder, "velodyne", file) for file in seq_files] 65 | logger = get_root_logger() 66 | logger.info("Totally {} x {} samples in {} set.".format(len(self.data_list), self.loop, split)) 67 | 68 | def prepare_train_data(self, idx): 69 | # load data 70 | data_idx = idx % len(self.data_list) 71 | with open(self.data_list[data_idx], 'rb') as b: 72 | scan = np.fromfile(b, dtype=np.float32).reshape(-1, 4) 73 | coord = scan[:, :3] 74 | strength = scan[:, -1].reshape([-1, 1]) 75 | 76 | label_file = self.data_list[data_idx].replace('velodyne', 'labels').replace('.bin', '.label') 77 | if os.path.exists(label_file): 78 | with open(label_file, 'rb') as a: 79 | label = np.fromfile(a, dtype=np.int32).reshape(-1) 80 | else: 81 | label = np.zeros(coord.shape[0]).astype(np.int32) 82 | label = np.vectorize(self.learning_map.__getitem__)(label & 0xFFFF).astype(np.int64) 83 | data_dict = dict(coord=coord, strength=strength, label=label) 84 | data_dict = self.transform(data_dict) 85 | return data_dict 86 | 87 | def prepare_test_data(self, idx): 88 | raise NotImplementedError 89 | 90 | def get_data_name(self, idx): 91 | return self.data_list[self.data_list[idx % len(self.data_list)]] 92 | 93 | def __getitem__(self, idx): 94 | if self.test_mode: 95 | return self.prepare_test_data(idx) 96 | else: 97 | return self.prepare_train_data(idx) 98 | 99 | def __len__(self): 100 | return len(self.data_list) * self.loop 101 | -------------------------------------------------------------------------------- /pcr/datasets/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for Datasets 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import random 9 | from collections.abc import Mapping, Sequence 10 | import SharedArray as SA 11 | import numpy as np 12 | import torch 13 | from torch.utils.data.dataloader import default_collate 14 | 15 | from pcr.utils.logger import get_root_logger 16 | 17 | 18 | def collate_fn(batch): 19 | """ 20 | collate function for point cloud which support dict and list, 21 | 'coord' is necessary to determine 'offset' 22 | """ 23 | if not isinstance(batch, Sequence): 24 | raise TypeError(f'{batch.dtype} is not supported.') 25 | 26 | if isinstance(batch[0], torch.Tensor): 27 | return torch.cat(list(batch)) 28 | 29 | elif isinstance(batch[0], Sequence): 30 | for data in batch: 31 | data.append(torch.tensor([data[0].shape[0]])) 32 | batch = [collate_fn(samples) for samples in zip(*batch)] 33 | batch[-1] = torch.cumsum(batch[-1], dim=0).int() 34 | return batch 35 | elif isinstance(batch[0], Mapping): 36 | batch = {key: collate_fn([d[key] for d in batch]) for key in batch[0]} 37 | for key in batch.keys(): 38 | if "offset" in key: 39 | batch[key] = torch.cumsum(batch[key], dim=0) 40 | return batch 41 | else: 42 | return default_collate(batch) 43 | 44 | 45 | def point_collate_fn(batch, max_batch_points=1e10, mix_prob=0): 46 | assert isinstance(batch[0], Mapping) # currently, only support input_dict, rather than input_list 47 | batch = collate_fn(batch) 48 | if "offset" in batch.keys(): 49 | assert batch["offset"][0] <= max_batch_points # at least the first scan can be added to batch 50 | for i in range(len(batch["offset"]) - 1): 51 | if batch["offset"][i + 1] > max_batch_points: 52 | # logger = get_root_logger() 53 | # logger.info("Reach batch point limit! Batch Size: {} -> {}; Points Num: {} -> {}".format( 54 | # len(batch["offset"]), i+1, batch["offset"][-1], batch["offset"][i] 55 | # )) 56 | batch["offset"] = batch["offset"][:i + 1] 57 | for key in batch.keys(): 58 | if key != "offset": 59 | # TODO: bug for data_metas 60 | batch[key] = batch[key][:batch["offset"][-1]] 61 | break 62 | 63 | # Mix3d (https://arxiv.org/pdf/2110.02210.pdf) 64 | if random.random() < mix_prob: 65 | batch["offset"] = torch.cat([batch["offset"][1:-1:2], batch["offset"][-1].unsqueeze(0)], dim=0) 66 | return batch 67 | 68 | 69 | def sa_create(name, var): 70 | x = SA.create(name, var.shape, dtype=var.dtype) 71 | x[...] = var[...] 72 | x.flags.writeable = False 73 | return x 74 | 75 | 76 | def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): 77 | return a * np.exp(-dist2 / (2 * c ** 2)) -------------------------------------------------------------------------------- /pcr/engines/__init__.py: -------------------------------------------------------------------------------- 1 | from .test import SegmentationTest, ClassificationTest, PartSegmentationTest -------------------------------------------------------------------------------- /pcr/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_model 2 | from .point_transformer import * 3 | from .point_transformer2 import * 4 | # from .stratified_transformer import * 5 | from .sparse_unet import * 6 | # from .spvcnn import * 7 | -------------------------------------------------------------------------------- /pcr/models/builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model Builder 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pcr.utils.registry import Registry 9 | 10 | MODELS = Registry('models') 11 | MODULES = Registry('modules') 12 | 13 | 14 | def build_model(cfg): 15 | """Build test_datasets.""" 16 | return MODELS.build(cfg) 17 | -------------------------------------------------------------------------------- /pcr/models/point_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .point_transformer_seg import * 2 | from .point_transformer_partseg import * 3 | from .point_transformer_cls import * 4 | -------------------------------------------------------------------------------- /pcr/models/point_transformer/point_transformer_cls.py: -------------------------------------------------------------------------------- 1 | """ 2 | Point Transformer V1 for Object Classification 3 | 4 | Might be a bit different from the original paper 5 | 6 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 7 | Please cite our work if the code is helpful to you. 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | from .point_transformer_seg import TransitionDown, Bottleneck 14 | from ..builder import MODELS 15 | 16 | 17 | class PointTransformerCls(nn.Module): 18 | def __init__(self, block, blocks, in_channels=6, num_classes=40): 19 | super().__init__() 20 | self.in_channels = in_channels 21 | self.in_planes, planes = in_channels, [32, 64, 128, 256, 512] 22 | fpn_planes, fpnhead_planes, share_planes = 128, 64, 8 23 | stride, nsample = [1, 4, 4, 4, 4], [8, 16, 16, 16, 16] 24 | self.enc1 = self._make_enc(block, planes[0], blocks[0], share_planes, 25 | stride=stride[0], nsample=nsample[0]) # N/1 26 | self.enc2 = self._make_enc(block, planes[1], blocks[1], share_planes, 27 | stride=stride[1], nsample=nsample[1]) # N/4 28 | self.enc3 = self._make_enc(block, planes[2], blocks[2], share_planes, 29 | stride=stride[2], nsample=nsample[2]) # N/16 30 | self.enc4 = self._make_enc(block, planes[3], blocks[3], share_planes, 31 | stride=stride[3], nsample=nsample[3]) # N/64 32 | self.enc5 = self._make_enc(block, planes[4], blocks[4], share_planes, 33 | stride=stride[4], nsample=nsample[4]) # N/256 34 | self.cls = nn.Sequential(nn.Linear(planes[4], 256), 35 | nn.BatchNorm1d(256), 36 | nn.ReLU(inplace=True), 37 | nn.Dropout(p=0.5), 38 | nn.Linear(256, 128), 39 | nn.BatchNorm1d(128), 40 | nn.ReLU(inplace=True), 41 | nn.Dropout(p=0.5), 42 | nn.Linear(128, num_classes)) 43 | 44 | def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16): 45 | layers = [TransitionDown(self.in_planes, planes * block.expansion, stride, nsample)] 46 | self.in_planes = planes * block.expansion 47 | for _ in range(1, blocks): 48 | layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample)) 49 | return nn.Sequential(*layers) 50 | 51 | def forward(self, input_dict): 52 | p0 = input_dict["coord"] 53 | x0 = input_dict["feat"] 54 | o0 = input_dict["offset"].int() 55 | x0 = p0 if self.in_channels == 3 else torch.cat((p0, x0), 1) 56 | p1, x1, o1 = self.enc1([p0, x0, o0]) 57 | p2, x2, o2 = self.enc2([p1, x1, o1]) 58 | p3, x3, o3 = self.enc3([p2, x2, o2]) 59 | p4, x4, o4 = self.enc4([p3, x3, o3]) 60 | p5, x5, o5 = self.enc5([p4, x4, o4]) 61 | x = [] 62 | for i in range(o5.shape[0]): 63 | if i == 0: 64 | s_i, e_i, cnt = 0, o5[0], o5[0] 65 | else: 66 | s_i, e_i, cnt = o5[i - 1], o5[i], o5[i] - o5[i - 1] 67 | x_b = x5[s_i:e_i, :].sum(0, True) / cnt 68 | x.append(x_b) 69 | x = torch.cat(x, 0) 70 | x = self.cls(x) 71 | return x 72 | 73 | 74 | @MODELS.register_module("PointTransformer-Cls26") 75 | class PointTransformerCls26(PointTransformerCls): 76 | def __init__(self, **kwargs): 77 | super(PointTransformerCls26, self).__init__(Bottleneck, [1, 1, 1, 1, 1], **kwargs) 78 | 79 | 80 | @MODELS.register_module("PointTransformer-Cls38") 81 | class PointTransformerCls38(PointTransformerCls): 82 | def __init__(self, **kwargs): 83 | super(PointTransformerCls38, self).__init__(Bottleneck, [1, 2, 2, 2, 2], **kwargs) 84 | 85 | 86 | @MODELS.register_module("PointTransformer-Cls50") 87 | class PointTransformerCls50(PointTransformerCls): 88 | def __init__(self, **kwargs): 89 | super(PointTransformerCls50, self).__init__(Bottleneck, [1, 2, 3, 5, 2], **kwargs) -------------------------------------------------------------------------------- /pcr/models/point_transformer/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | torch.nn.LayerNorm 5 | 6 | class LayerNorm1d(nn.BatchNorm1d): 7 | def forward(self, input: torch.Tensor) -> torch.Tensor: 8 | return super().forward(input.transpose(1, 2).contiguous()).transpose(1, 2).contiguous() 9 | -------------------------------------------------------------------------------- /pcr/models/point_transformer2/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Point Transformer V2 3 | 4 | Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. 5 | Please cite our work if you use any part of the code. 6 | """ 7 | 8 | from .point_transformer_v2m1_origin import PointTransformerV2 9 | from .point_transformer_v2m2_base import PointTransformerV2 10 | -------------------------------------------------------------------------------- /pcr/models/sparse_unet/__init__.py: -------------------------------------------------------------------------------- 1 | # from .mink_unet import * 2 | from .spconv_unet import * 3 | -------------------------------------------------------------------------------- /pcr/models/spvcnn/__init__.py: -------------------------------------------------------------------------------- 1 | from .ts_spvcnn import * 2 | -------------------------------------------------------------------------------- /pcr/models/stratified_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .stratified_transformer_v1m1_origin import StratifiedTransformer 2 | from .stratified_transformer_v1m2_refine import StratifiedTransformer 3 | -------------------------------------------------------------------------------- /pcr/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/pcr/utils/__init__.py -------------------------------------------------------------------------------- /pcr/utils/env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Environment Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import random 10 | import numpy as np 11 | import torch 12 | import torch.backends.cudnn as cudnn 13 | 14 | from datetime import datetime 15 | 16 | 17 | def get_random_seed(): 18 | seed = ( 19 | os.getpid() 20 | + int(datetime.now().strftime("%S%f")) 21 | + int.from_bytes(os.urandom(2), "big") 22 | ) 23 | return seed 24 | 25 | 26 | def set_seed(seed=None): 27 | if seed is None: 28 | seed = get_random_seed() 29 | random.seed(seed) 30 | np.random.seed(seed) 31 | torch.manual_seed(seed) 32 | torch.cuda.manual_seed(seed) 33 | torch.cuda.manual_seed_all(seed) 34 | cudnn.benchmark = False 35 | cudnn.deterministic = True 36 | os.environ["PYTHONHASHSEED"] = str(seed) -------------------------------------------------------------------------------- /pcr/utils/optimizer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Optimizer 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import torch 9 | from .registry import Registry 10 | 11 | OPTIMIZERS = Registry("optimizers") 12 | 13 | 14 | OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") 15 | OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") 16 | OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") 17 | 18 | 19 | def build_optimizer(cfg, model, params_dicts=None): 20 | if params_dicts is None: 21 | cfg.params = model.parameters() 22 | else: 23 | cfg.params = [dict(params=[])] 24 | for i in range(len(params_dicts)): 25 | cfg.params.append(dict(params=[], lr=params_dicts[i].lr_scale * cfg.lr)) 26 | 27 | for n, p in model.named_parameters(): 28 | flag = False 29 | for i in range(len(params_dicts)): 30 | if params_dicts[i].keyword in n: 31 | cfg.params[i+1]["params"].append(p) 32 | flag = True 33 | break 34 | if not flag: 35 | cfg.params[0]["params"].append(p) 36 | return OPTIMIZERS.build(cfg=cfg) 37 | -------------------------------------------------------------------------------- /pcr/utils/path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import os.path as osp 4 | from pathlib import Path 5 | 6 | from .misc import is_str 7 | 8 | 9 | def is_filepath(x): 10 | return is_str(x) or isinstance(x, Path) 11 | 12 | 13 | def fopen(filepath, *args, **kwargs): 14 | if is_str(filepath): 15 | return open(filepath, *args, **kwargs) 16 | elif isinstance(filepath, Path): 17 | return filepath.open(*args, **kwargs) 18 | raise ValueError('`filepath` should be a string or a Path') 19 | 20 | 21 | def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): 22 | if not osp.isfile(filename): 23 | raise FileNotFoundError(msg_tmpl.format(filename)) 24 | 25 | 26 | def mkdir_or_exist(dir_name, mode=0o777): 27 | if dir_name == '': 28 | return 29 | dir_name = osp.expanduser(dir_name) 30 | os.makedirs(dir_name, mode=mode, exist_ok=True) 31 | 32 | 33 | def symlink(src, dst, overwrite=True, **kwargs): 34 | if os.path.lexists(dst) and overwrite: 35 | os.remove(dst) 36 | os.symlink(src, dst, **kwargs) 37 | 38 | 39 | def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): 40 | """Scan a directory to find the interested files. 41 | 42 | Args: 43 | dir_path (str | obj:`Path`): Path of the directory. 44 | suffix (str | tuple(str), optional): File suffix that we are 45 | interested in. Default: None. 46 | recursive (bool, optional): If set to True, recursively scan the 47 | directory. Default: False. 48 | case_sensitive (bool, optional) : If set to False, ignore the case of 49 | suffix. Default: True. 50 | 51 | Returns: 52 | A generator for all the interested files with relative paths. 53 | """ 54 | if isinstance(dir_path, (str, Path)): 55 | dir_path = str(dir_path) 56 | else: 57 | raise TypeError('"dir_path" must be a string or Path object') 58 | 59 | if (suffix is not None) and not isinstance(suffix, (str, tuple)): 60 | raise TypeError('"suffix" must be a string or tuple of strings') 61 | 62 | if suffix is not None and not case_sensitive: 63 | suffix = suffix.lower() if isinstance(suffix, str) else tuple( 64 | item.lower() for item in suffix) 65 | 66 | root = dir_path 67 | 68 | def _scandir(dir_path, suffix, recursive, case_sensitive): 69 | for entry in os.scandir(dir_path): 70 | if not entry.name.startswith('.') and entry.is_file(): 71 | rel_path = osp.relpath(entry.path, root) 72 | _rel_path = rel_path if case_sensitive else rel_path.lower() 73 | if suffix is None or _rel_path.endswith(suffix): 74 | yield rel_path 75 | elif recursive and os.path.isdir(entry.path): 76 | # scan recursively if entry.path is a directory 77 | yield from _scandir(entry.path, suffix, recursive, 78 | case_sensitive) 79 | 80 | return _scandir(dir_path, suffix, recursive, case_sensitive) 81 | 82 | 83 | def find_vcs_root(path, markers=('.git', )): 84 | """Finds the root directory (including itself) of specified markers. 85 | 86 | Args: 87 | path (str): Path of directory or file. 88 | markers (list[str], optional): List of file or directory names. 89 | 90 | Returns: 91 | The directory contained one of the markers or None if not found. 92 | """ 93 | if osp.isfile(path): 94 | path = osp.dirname(path) 95 | 96 | prev, cur = None, osp.abspath(osp.expanduser(path)) 97 | while cur != prev: 98 | if any(osp.exists(osp.join(cur, marker)) for marker in markers): 99 | return cur 100 | prev, cur = cur, osp.split(cur)[0] 101 | return None 102 | -------------------------------------------------------------------------------- /pcr/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visualization Utils 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import open3d as o3d 10 | import numpy as np 11 | import torch 12 | 13 | 14 | def to_numpy(x): 15 | if isinstance(x, torch.Tensor): 16 | x = x.clone().detach().cpu().numpy() 17 | assert isinstance(x, np.ndarray) 18 | return x 19 | 20 | 21 | def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): 22 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 23 | coord = to_numpy(coord) 24 | if color is not None: 25 | color = to_numpy(color) 26 | pcd = o3d.geometry.PointCloud() 27 | pcd.points = o3d.utility.Vector3dVector(coord) 28 | pcd.colors = o3d.utility.Vector3dVector(np.ones_like(coord) if color is None else color) 29 | o3d.io.write_point_cloud(file_path, pcd) 30 | if logger is not None: 31 | logger.info(f"Save Point Cloud to: {file_path}") 32 | 33 | 34 | def save_bounding_boxes(bboxes_corners, color=(1., 0., 0.), file_path="bbox.ply", logger=None): 35 | bboxes_corners = to_numpy(bboxes_corners) 36 | # point list 37 | points = bboxes_corners.reshape(-1, 3) 38 | # line list 39 | box_lines = np.array([ 40 | [0, 1], [1, 2], [2, 3], [3, 0], 41 | [4, 5], [5, 6], [6, 7], [7, 0], 42 | [0, 4], [1, 5], [2, 6], [3, 7] 43 | ]) 44 | lines = [] 45 | for i, _ in enumerate(bboxes_corners): 46 | lines.append(box_lines + i * 8) 47 | lines = np.concatenate(lines) 48 | # color list 49 | color = np.array([color for _ in range(len(lines))]) 50 | # generate line set 51 | line_set = o3d.geometry.LineSet() 52 | line_set.points = o3d.utility.Vector3dVector(points) 53 | line_set.lines = o3d.utility.Vector2iVector(lines) 54 | line_set.colors = o3d.utility.Vector3dVector(color) 55 | o3d.io.write_line_set(file_path, line_set) 56 | 57 | if logger is not None: 58 | logger.info(f"Save Boxes to: {file_path}") 59 | 60 | 61 | def save_lines(points, lines, color=(1., 0., 0.), file_path="lines.ply", logger=None): 62 | points = to_numpy(points) 63 | lines = to_numpy(lines) 64 | colors = np.array([color for _ in range(len(lines))]) 65 | line_set = o3d.geometry.LineSet() 66 | line_set.points = o3d.utility.Vector3dVector(points) 67 | line_set.lines = o3d.utility.Vector2iVector(lines) 68 | line_set.colors = o3d.utility.Vector3dVector(colors) 69 | o3d.io.write_line_set(file_path, line_set) 70 | 71 | if logger is not None: 72 | logger.info(f"Save Lines to: {file_path}") -------------------------------------------------------------------------------- /scripts/pretrain.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | ROOT_DIR=$(pwd) 5 | PYTHON=python 6 | 7 | TRAIN_CODE=pretrain.py 8 | 9 | DATASET=s3dis 10 | CONFIG=point_transformer_v2 11 | EXP_NAME=debug 12 | WEIGHT="None" 13 | 14 | RESUME=false 15 | GPU=false 16 | 17 | 18 | while getopts "p:d:c:n:w:g:r:" opt; do 19 | case $opt in 20 | p) 21 | PYTHON=$OPTARG 22 | ;; 23 | d) 24 | DATASET=$OPTARG 25 | ;; 26 | c) 27 | CONFIG=$OPTARG 28 | ;; 29 | n) 30 | EXP_NAME=$OPTARG 31 | ;; 32 | w) 33 | WEIGHT=$OPTARG 34 | ;; 35 | r) 36 | RESUME=$OPTARG 37 | ;; 38 | g) 39 | GPU=$OPTARG 40 | ;; 41 | \?) 42 | echo "Invalid option: -$OPTARG" 43 | ;; 44 | esac 45 | done 46 | 47 | echo "Experiment name: $EXP_NAME" 48 | echo "Python interpreter dir: $PYTHON" 49 | echo "Dataset: $DATASET" 50 | echo "Config: $CONFIG" 51 | 52 | 53 | 54 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 55 | MODEL_DIR=${EXP_DIR}/model 56 | CODE_DIR=${EXP_DIR}/code 57 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 58 | 59 | 60 | echo " =========> CREATE EXP DIR <=========" 61 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR" 62 | if ${RESUME} 63 | then 64 | CONFIG_DIR=${EXP_DIR}/config.py 65 | WEIGHT=$MODEL_DIR/model_last.pth 66 | else 67 | mkdir -p "$MODEL_DIR" "$CODE_DIR" 68 | cp -r scripts tools pcr "$CODE_DIR" 69 | fi 70 | 71 | echo "Loading config in:" $CONFIG_DIR 72 | export PYTHONPATH=./$CODE_DIR 73 | echo "Running code in: $CODE_DIR" 74 | 75 | 76 | echo " =========> RUN TASK <=========" 77 | 78 | if [ "${WEIGHT}" = "None" ] 79 | then 80 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 81 | --config-file "$CONFIG_DIR" \ 82 | --num-gpus "$GPU" \ 83 | --options save_path="$EXP_DIR" 84 | else 85 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 86 | --config-file "$CONFIG_DIR" \ 87 | --num-gpus "$GPU" \ 88 | --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" 89 | fi -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | export PYTHONPATH=./ 5 | PYTHON=python 6 | 7 | TEST_CODE=test.py 8 | 9 | DATASET=s3dis 10 | CONFIG="None" 11 | EXP_NAME=debug 12 | WEIGHT=model_best 13 | 14 | while getopts "p:d:c:n:w:" opt; do 15 | case $opt in 16 | p) 17 | PYTHON=$OPTARG 18 | ;; 19 | d) 20 | DATASET=$OPTARG 21 | ;; 22 | c) 23 | CONFIG=$OPTARG 24 | ;; 25 | n) 26 | EXP_NAME=$OPTARG 27 | ;; 28 | w) 29 | WEIGHT=$OPTARG 30 | ;; 31 | \?) 32 | echo "Invalid option: -$OPTARG" 33 | ;; 34 | esac 35 | done 36 | 37 | echo "Experiment name: $EXP_NAME" 38 | echo "Python interpreter dir: $PYTHON" 39 | echo "Dataset: $DATASET" 40 | 41 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 42 | MODEL_DIR=${EXP_DIR}/model 43 | CODE_DIR=${EXP_DIR}/code 44 | CONFIG_DIR=${EXP_DIR}/config.py 45 | 46 | if [ "${CONFIG}" = "None" ] 47 | then 48 | CONFIG_DIR=${EXP_DIR}/config.py 49 | else 50 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 51 | fi 52 | 53 | echo " =========> RUN TASK <=========" 54 | 55 | #$PYTHON -u tools/$TEST_CODE \ 56 | $PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ 57 | --config-file "$CONFIG_DIR" \ 58 | --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth 59 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $(dirname "$0")) || exit 4 | ROOT_DIR=$(pwd) 5 | PYTHON=python 6 | 7 | TRAIN_CODE=train.py 8 | 9 | DATASET=scannet 10 | CONFIG=point_transformer_v2 11 | EXP_NAME=debug 12 | WEIGHT="None" 13 | RESUME=false 14 | GPU=4 15 | 16 | 17 | while getopts "p:d:c:n:w:g:r:" opt; do 18 | case $opt in 19 | p) 20 | PYTHON=$OPTARG 21 | ;; 22 | d) 23 | DATASET=$OPTARG 24 | ;; 25 | c) 26 | CONFIG=$OPTARG 27 | ;; 28 | n) 29 | EXP_NAME=$OPTARG 30 | ;; 31 | w) 32 | WEIGHT=$OPTARG 33 | ;; 34 | r) 35 | RESUME=$OPTARG 36 | ;; 37 | g) 38 | GPU=$OPTARG 39 | ;; 40 | \?) 41 | echo "Invalid option: -$OPTARG" 42 | ;; 43 | esac 44 | done 45 | 46 | echo "Experiment name: $EXP_NAME" 47 | echo "Python interpreter dir: $PYTHON" 48 | echo "Dataset: $DATASET" 49 | echo "Config: $CONFIG" 50 | 51 | 52 | 53 | EXP_DIR=exp/${DATASET}/${EXP_NAME} 54 | MODEL_DIR=${EXP_DIR}/model 55 | CODE_DIR=${EXP_DIR}/code 56 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py 57 | 58 | 59 | echo " =========> CREATE EXP DIR <=========" 60 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR" 61 | if ${RESUME} 62 | then 63 | CONFIG_DIR=${EXP_DIR}/config.py 64 | WEIGHT=$MODEL_DIR/model_last.pth 65 | else 66 | mkdir -p "$MODEL_DIR" "$CODE_DIR" 67 | cp -r scripts tools pcr "$CODE_DIR" 68 | fi 69 | 70 | echo "Loading config in:" $CONFIG_DIR 71 | export PYTHONPATH=./$CODE_DIR 72 | echo "Running code in: $CODE_DIR" 73 | 74 | 75 | echo " =========> RUN TASK <=========" 76 | 77 | if [ "${WEIGHT}" = "None" ] 78 | then 79 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 80 | --config-file "$CONFIG_DIR" \ 81 | --num-gpus "$GPU" \ 82 | --options save_path="$EXP_DIR" 83 | else 84 | $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ 85 | --config-file "$CONFIG_DIR" \ 86 | --num-gpus "$GPU" \ 87 | --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" 88 | fi -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Testing Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | import os 9 | import random 10 | import numpy as np 11 | import argparse 12 | import collections 13 | 14 | import torch 15 | import torch.nn.parallel 16 | import torch.optim 17 | import torch.utils.data 18 | 19 | from pcr.models import build_model 20 | from pcr.datasets import build_dataset 21 | from pcr.datasets.utils import collate_fn 22 | from pcr.utils.config import Config, DictAction 23 | from pcr.utils.logger import get_root_logger 24 | from pcr.utils.env import get_random_seed, set_seed 25 | from pcr.engines.test import TEST 26 | 27 | 28 | def get_parser(): 29 | parser = argparse.ArgumentParser(description='PCR Test Process') 30 | parser.add_argument('--config-file', default="", metavar="FILE", help="path to config file") 31 | parser.add_argument('--options', nargs='+', action=DictAction, help='custom options') 32 | args = parser.parse_args() 33 | return args 34 | 35 | 36 | def main(): 37 | args = get_parser() 38 | 39 | # config_parser 40 | cfg = Config.fromfile(args.config_file) 41 | if args.options is not None: 42 | cfg.merge_from_dict(args.options) 43 | 44 | if cfg.seed is None: 45 | cfg.seed = get_random_seed() 46 | 47 | os.makedirs(cfg.save_path, exist_ok=True) 48 | 49 | # default_setup 50 | set_seed(cfg.seed) 51 | cfg.batch_size_val_per_gpu = cfg.batch_size_test # TODO: add support to multi gpu test 52 | cfg.num_worker_per_gpu = cfg.num_worker # TODO: add support to multi gpu test 53 | 54 | # tester init 55 | weight_name = os.path.basename(cfg.weight).split(".")[0] 56 | logger = get_root_logger(log_file=os.path.join(cfg.save_path, "test-{}.log".format(weight_name))) 57 | logger.info("=> Loading config ...") 58 | logger.info(f"Save path: {cfg.save_path}") 59 | logger.info(f"Config:\n{cfg.pretty_text}") 60 | 61 | # build model 62 | logger.info("=> Building model ...") 63 | model = build_model(cfg.model).cuda() 64 | n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) 65 | logger.info(f"Num params: {n_parameters}") 66 | 67 | # build dataset 68 | logger.info("=> Building test dataset & dataloader ...") 69 | test_dataset = build_dataset(cfg.data.test) 70 | test_loader = torch.utils.data.DataLoader(test_dataset, 71 | batch_size=cfg.batch_size_val_per_gpu, 72 | shuffle=False, 73 | num_workers=cfg.num_worker_per_gpu, 74 | pin_memory=True, 75 | collate_fn=collate_fn) 76 | 77 | # load checkpoint 78 | if os.path.isfile(cfg.weight): 79 | checkpoint = torch.load(cfg.weight) 80 | state_dict = checkpoint['state_dict'] 81 | new_state_dict = collections.OrderedDict() 82 | for k, v in state_dict.items(): 83 | name = k[7:] # module.xxx.xxx -> xxx.xxx 84 | new_state_dict[name] = v 85 | model.load_state_dict(new_state_dict, strict=True) 86 | logger.info("=> loaded weight '{}' (epoch {})".format(cfg.weight, checkpoint['epoch'])) 87 | cfg.epochs = checkpoint['epoch'] # TODO: move to self 88 | else: 89 | raise RuntimeError("=> no checkpoint found at '{}'".format(cfg.weight)) 90 | TEST.build(cfg.test)(cfg, test_loader, model) 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Training Script 3 | 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) 5 | Please cite our work if the code is helpful to you. 6 | """ 7 | 8 | from pcr.engines.defaults import default_argument_parser, default_config_parser, default_setup, Trainer 9 | from pcr.engines.launch import launch 10 | import os 11 | 12 | 13 | def main_worker(cfg): 14 | cfg = default_setup(cfg) 15 | trainer = Trainer(cfg) 16 | trainer.train() 17 | 18 | 19 | def main(): 20 | args = default_argument_parser().parse_args() 21 | cfg = default_config_parser(args.config_file, args.options) 22 | 23 | launch( 24 | main_worker, 25 | num_gpus_per_machine=args.num_gpus, 26 | num_machines=args.num_machines, 27 | machine_rank=args.machine_rank, 28 | dist_url=args.dist_url, 29 | cfg=(cfg,), 30 | ) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | --------------------------------------------------------------------------------