├── .gitignore
├── README.md
├── configs
    ├── _base_
    │   ├── datasets
    │   │   ├── modelnet40.py
    │   │   ├── s3dis.py
    │   │   ├── scannet.py
    │   │   ├── scannet200.py
    │   │   ├── scannet_submit.py
    │   │   ├── semantic_kitti_19cls.py
    │   │   └── shapenet_part.py
    │   ├── default_runtime.py
    │   └── tests
    │   │   ├── classification.py
    │   │   ├── part_segmentation.py
    │   │   └── segmentation.py
    ├── s3dis
    │   ├── semseg-minkunet34c-0-base.py
    │   ├── semseg-ptv1-0-base.py
    │   ├── semseg-ptv2m1-0-base.py
    │   ├── semseg-ptv2m2-0-base.py
    │   └── semseg-spunet34c-0-base.py
    ├── scannet
    │   ├── semseg-minkunet34c-0-base.py
    │   ├── semseg-ptv1-0-base.py
    │   ├── semseg-ptv2m1-0-origin.py
    │   ├── semseg-ptv2m2-0-base.py
    │   ├── semseg-ptv2m2-1-benchmark-submit.py
    │   ├── semseg-ptv2m2-2-precise-evaluate.py
    │   ├── semseg-spunet34c-0-base.py
    │   ├── semseg-spunet34c-1-cn-base.py
    │   ├── semseg-stv1m1-0-origin.py
    │   └── semseg-stv1m2-0-refined.py
    ├── scannet200
    │   ├── semseg-minkunet34c-0-base.py
    │   ├── semseg-ptv1-0-base.py
    │   ├── semseg-ptv2m1-0-base.py
    │   ├── semseg-ptv2m2-0-base.py
    │   ├── semseg-ptv2m2-1-benchmark-submit.py
    │   ├── semseg-spunet34c-0-base.py
    │   └── semseg-stv1m2-0-refined.py
    └── semantic_kitti
    │   ├── semseg-minkunet34c-0-base.py
    │   ├── semseg-spunet34c-0-base.py
    │   └── semseg-spvcnn34c-0-base.py
├── figures
    ├── design.png
    └── offset.png
├── libs
    ├── pointops
    │   ├── __init__.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── aggregation.py
    │   │   ├── attention.py
    │   │   ├── grouping.py
    │   │   ├── interpolation.py
    │   │   ├── query.py
    │   │   ├── sampling.py
    │   │   ├── subtraction.py
    │   │   └── utils.py
    │   ├── setup.py
    │   └── src
    │   │   ├── __init__.py
    │   │   ├── aggregation
    │   │       ├── aggregation_cuda.cpp
    │   │       ├── aggregation_cuda_kernel.cu
    │   │       └── aggregation_cuda_kernel.h
    │   │   ├── attention
    │   │       ├── attention_cuda.cpp
    │   │       ├── attention_cuda_kernel.cu
    │   │       └── attention_cuda_kernel.h
    │   │   ├── ball_query
    │   │       ├── ball_query_cuda.cpp
    │   │       ├── ball_query_cuda_kernel.cu
    │   │       └── ball_query_cuda_kernel.h
    │   │   ├── cuda_utils.h
    │   │   ├── grouping
    │   │       ├── grouping_cuda.cpp
    │   │       ├── grouping_cuda_kernel.cu
    │   │       └── grouping_cuda_kernel.h
    │   │   ├── interpolation
    │   │       ├── interpolation_cuda.cpp
    │   │       ├── interpolation_cuda_kernel.cu
    │   │       └── interpolation_cuda_kernel.h
    │   │   ├── knn_query
    │   │       ├── knn_query_cuda.cpp
    │   │       ├── knn_query_cuda_kernel.cu
    │   │       └── knn_query_cuda_kernel.h
    │   │   ├── pointops_api.cpp
    │   │   ├── random_ball_query
    │   │       ├── random_ball_query_cuda.cpp
    │   │       ├── random_ball_query_cuda_kernel.cu
    │   │       └── random_ball_query_cuda_kernel.h
    │   │   ├── sampling
    │   │       ├── sampling_cuda.cpp
    │   │       ├── sampling_cuda_kernel.cu
    │   │       └── sampling_cuda_kernel.h
    │   │   └── subtraction
    │   │       ├── subtraction_cuda.cpp
    │   │       ├── subtraction_cuda_kernel.cu
    │   │       └── subtraction_cuda_kernel.h
    └── pointops2
    │   ├── __init__.py
    │   ├── functions
    │       ├── __init__.py
    │       ├── pointops.py
    │       ├── pointops2.py
    │       ├── pointops_ablation.py
    │       ├── test_attention_op_step1.py
    │       ├── test_attention_op_step1_v2.py
    │       ├── test_attention_op_step2.py
    │       ├── test_relative_pos_encoding_op_step1.py
    │       ├── test_relative_pos_encoding_op_step1_v2.py
    │       ├── test_relative_pos_encoding_op_step1_v3.py
    │       ├── test_relative_pos_encoding_op_step2.py
    │       └── test_relative_pos_encoding_op_step2_v2.py
    │   ├── setup.py
    │   └── src
    │       ├── __init__.py
    │       ├── aggregation
    │           ├── aggregation_cuda.cpp
    │           ├── aggregation_cuda_kernel.cu
    │           └── aggregation_cuda_kernel.h
    │       ├── attention
    │           ├── attention_cuda.cpp
    │           ├── attention_cuda_kernel.cu
    │           └── attention_cuda_kernel.h
    │       ├── attention_v2
    │           ├── attention_cuda_kernel_v2.cu
    │           ├── attention_cuda_kernel_v2.h
    │           └── attention_cuda_v2.cpp
    │       ├── cuda_utils.h
    │       ├── grouping
    │           ├── grouping_cuda.cpp
    │           ├── grouping_cuda_kernel.cu
    │           └── grouping_cuda_kernel.h
    │       ├── interpolation
    │           ├── interpolation_cuda.cpp
    │           ├── interpolation_cuda_kernel.cu
    │           └── interpolation_cuda_kernel.h
    │       ├── knnquery
    │           ├── knnquery_cuda.cpp
    │           ├── knnquery_cuda_kernel.cu
    │           └── knnquery_cuda_kernel.h
    │       ├── pointops_api.cpp
    │       ├── rpe
    │           ├── relative_pos_encoding_cuda.cpp
    │           ├── relative_pos_encoding_cuda_kernel.cu
    │           └── relative_pos_encoding_cuda_kernel.h
    │       ├── rpe_v2
    │           ├── relative_pos_encoding_cuda_kernel_v2.cu
    │           ├── relative_pos_encoding_cuda_kernel_v2.h
    │           └── relative_pos_encoding_cuda_v2.cpp
    │       ├── sampling
    │           ├── sampling_cuda.cpp
    │           ├── sampling_cuda_kernel.cu
    │           └── sampling_cuda_kernel.h
    │       └── subtraction
    │           ├── subtraction_cuda.cpp
    │           ├── subtraction_cuda_kernel.cu
    │           └── subtraction_cuda_kernel.h
├── pcr
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── arkitscenes.py
    │   ├── builder.py
    │   ├── defaults.py
    │   ├── modelnet.py
    │   ├── preprocessing
    │   │   ├── arkitscenes
    │   │   │   └── preprocess_arkitscenes_mesh.py
    │   │   ├── s3dis
    │   │   │   ├── preprocess_s3dis.py
    │   │   │   └── preprocess_s3dis_align_raw.py
    │   │   └── scannet
    │   │   │   ├── meta_data
    │   │   │       ├── classes_ObjClassification-ShapeNetCore55.txt
    │   │   │       ├── classes_SemVoxLabel-nyu40id.txt
    │   │   │       ├── scannet200_constants.py
    │   │   │       ├── scannet200_splits.py
    │   │   │       ├── scannet_means.npz
    │   │   │       ├── scannetv1_test.txt
    │   │   │       ├── scannetv1_train.txt
    │   │   │       ├── scannetv1_val.txt
    │   │   │       ├── scannetv2-labels-old.combined.tsv
    │   │   │       ├── scannetv2-labels.combined.tsv
    │   │   │       ├── scannetv2_test.txt
    │   │   │       ├── scannetv2_train.txt
    │   │   │       └── scannetv2_val.txt
    │   │   │   ├── preprocess_scannet.py
    │   │   │   └── scannet_pair
    │   │   │       ├── SensorData.py
    │   │   │       ├── compute_full_overlapping.py
    │   │   │       ├── generage_list.py
    │   │   │       ├── plyfile.py
    │   │   │       ├── point_cloud_extractor.py
    │   │   │       ├── preprocess.py
    │   │   │       └── reader.py
    │   ├── s3dis.py
    │   ├── scannet.py
    │   ├── scannet_pair.py
    │   ├── semantic_kitti.py
    │   ├── shapenet_part.py
    │   ├── transform.py
    │   └── utils.py
    ├── engines
    │   ├── __init__.py
    │   ├── defaults.py
    │   ├── launch.py
    │   ├── test.py
    │   └── train.py
    ├── models
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── point_transformer
    │   │   ├── __init__.py
    │   │   ├── point_transformer_cls.py
    │   │   ├── point_transformer_partseg.py
    │   │   ├── point_transformer_seg.py
    │   │   └── utils.py
    │   ├── point_transformer2
    │   │   ├── __init__.py
    │   │   ├── point_transformer_v2m1_origin.py
    │   │   └── point_transformer_v2m2_base.py
    │   ├── sparse_unet
    │   │   ├── __init__.py
    │   │   ├── mink_unet.py
    │   │   └── spconv_unet.py
    │   ├── spvcnn
    │   │   ├── __init__.py
    │   │   └── ts_spvcnn.py
    │   ├── stratified_transformer
    │   │   ├── __init__.py
    │   │   ├── stratified_transformer_v1m1_origin.py
    │   │   └── stratified_transformer_v1m2_refine.py
    │   └── utils.py
    └── utils
    │   ├── __init__.py
    │   ├── comm.py
    │   ├── config.py
    │   ├── env.py
    │   ├── events.py
    │   ├── logger.py
    │   ├── losses.py
    │   ├── misc.py
    │   ├── optimizer.py
    │   ├── path.py
    │   ├── registry.py
    │   ├── scheduler.py
    │   └── visualization.py
├── scripts
    ├── pretrain.sh
    ├── test.sh
    └── train.sh
└── tools
    ├── pretrain.py
    ├── test.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/modelnet40.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Unmaintained
 3 | it is kept for reference
 4 | """
 5 | 
 6 | # dataset settings
 7 | dataset_type = "ModelNetDataset"
 8 | data_root = "data/modelnet40_normal_resampled"
 9 | cache_data = False
10 | names = ["airplane", "bathtub", "bed", "bench", "bookshelf",
11 |          "bottle", "bowl", "car", "chair", "cone",
12 |          "cup", "curtain", "desk", "door", "dresser",
13 |          "flower_pot", "glass_box", "guitar", "keyboard", "lamp",
14 |          "laptop", "mantel", "monitor", "night_stand", "person",
15 |          "piano", "plant", "radio", "range_hood", "sink",
16 |          "sofa", "stairs", "stool", "table", "tent",
17 |          "toilet", "tv_stand", "vase", "wardrobe", "xbox"]
18 | 
19 | data = dict(
20 |     num_classes=40,
21 |     ignore_label=-1,  # dummy ignore
22 |     names=names,
23 |     train=dict(
24 |         type=dataset_type,
25 |         split="train",
26 |         data_root=data_root,
27 |         class_names=names,
28 |         transform=[
29 |             dict(type="NormalizeCoord"),
30 |             # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
31 |             # dict(type="CenterShift", apply_z=True),
32 |             # dict(type="RandomRotate", angle=[-1, 1], axis='z', center=[0, 0, 0], p=0.5),
33 |             # dict(type="RandomRotate", angle=[-1/24, 1/24], axis='x', p=0.5),
34 |             # dict(type="RandomRotate", angle=[-1/24, 1/24], axis='y', p=0.5),
35 |             dict(type="RandomScale", scale=[0.9, 1.1]),
36 |             # dict(type="RandomFlip", p=0.5),
37 |             # dict(type="RandomJitter", sigma=0.005, clip=0.02),
38 |             dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
39 |             dict(type="Voxelize", voxel_size=0.02, hash_type='fnv', mode='train'),
40 |             # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
41 | 
42 |             # dict(type="Voxelize", voxel_size=0.01, hash_type='fnv', mode='train'),
43 |             # dict(type="SphereCrop", point_max=10000, mode='random'),
44 |             # dict(type="CenterShift", apply_z=True),
45 |             dict(type="ShufflePoint"),
46 |             dict(type="ToTensor"),
47 |         ],
48 |         loop=2,
49 |         test_mode=False,
50 |     ),
51 | 
52 |     val=dict(
53 |         type=dataset_type,
54 |         split="test",
55 |         data_root=data_root,
56 |         class_names=names,
57 |         transform=[
58 |             dict(type="NormalizeCoord"),
59 |             dict(type="ToTensor"),
60 |         ],
61 |         loop=1,
62 |         test_mode=False,
63 |     ),
64 | 
65 |     test=dict(
66 |         type=dataset_type,
67 |         split="test",
68 |         data_root=data_root,
69 |         class_names=names,
70 |         transform=[
71 |             dict(type="NormalizeCoord"),
72 |             dict(type="ToTensor"),
73 |         ],
74 |         loop=1,
75 |         test_mode=True,
76 |         test_cfg=dict(
77 |         )
78 |     ),
79 | )
80 | 
81 | criteria = [
82 |     dict(type="CrossEntropyLoss",
83 |          loss_weight=1.0,
84 |          ignore_index=data["ignore_label"])
85 | ]
86 | 
87 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/scannet_submit.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['scannet.py']
 2 | 
 3 | data = dict(
 4 |     train=dict(
 5 |         split=["train", "val"],
 6 |     ),
 7 | 
 8 |     val=dict(
 9 |         split="val",
10 |     ),
11 | 
12 |     test=dict(
13 |         split="test",
14 |     ),
15 | )


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | weight = None  # path to model weight
 2 | resume = False  # whether to resume training process
 3 | evaluate = True  # evaluate after each epoch training process
 4 | test_only = False  # test process
 5 | 
 6 | seed = None  # train process will init a random seed and record
 7 | save_path = "exp/default"
 8 | num_worker = 32  # total worker in all gpu
 9 | batch_size = 16  # total batch size in all gpu
10 | batch_size_val = None  # auto adapt to bs 1 for each gpu
11 | batch_size_test = 1
12 | epoch = 100  # total epoch, data loop = epoch // eval_epoch
13 | eval_epoch = 100  # sche total eval & checkpoint epoch
14 | save_freq = None  # None or int, None indicate only save model last
15 | 
16 | eval_metric = "mIoU"
17 | 
18 | sync_bn = False
19 | enable_amp = False
20 | empty_cache = False
21 | find_unused_parameters = False
22 | 
23 | max_batch_points = 1e8
24 | mix_prob = 0
25 | param_dicts = None  # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
26 | 


--------------------------------------------------------------------------------
/configs/_base_/tests/classification.py:
--------------------------------------------------------------------------------
1 | test = dict(
2 |     type="ClassificationTest",
3 |     # scales=[0.9, 0.95, 1, 1.05, 1.1],
4 |     scales=[1],
5 |     shuffle=True
6 | )
7 | 


--------------------------------------------------------------------------------
/configs/_base_/tests/part_segmentation.py:
--------------------------------------------------------------------------------
1 | test = dict(
2 |     type="PartSegmentationTest"
3 | )
4 | 


--------------------------------------------------------------------------------
/configs/_base_/tests/segmentation.py:
--------------------------------------------------------------------------------
1 | test = dict(
2 |     type="SegmentationTest"
3 | )
4 | 


--------------------------------------------------------------------------------
/figures/design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/figures/design.png


--------------------------------------------------------------------------------
/figures/offset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/figures/offset.png


--------------------------------------------------------------------------------
/libs/pointops/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .query import knn_query, ball_query, random_ball_query
2 | from .sampling import farthest_point_sampling
3 | from .grouping import grouping, grouping2
4 | from .interpolation import interpolation, interpolation2
5 | from .subtraction import subtraction
6 | from .aggregation import aggregation
7 | from .attention import attention_relation_step, attention_fusion_step
8 | from .utils import query_and_group, knn_query_and_group, ball_query_and_group, batch2offset, offset2batch
9 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/aggregation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda
 5 | 
 6 | 
 7 | class Aggregation(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, position, weight, idx):
10 |         """
11 |         input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
12 |         output: (n, c)
13 |         """
14 |         assert input.is_contiguous() and position.is_contiguous() and weight.is_contiguous()
15 |         n, nsample, c = position.shape; w_c = weight.shape[-1]
16 |         output = torch.cuda.FloatTensor(n, c).zero_()
17 |         aggregation_forward_cuda(n, nsample, c, w_c, input, position, weight, idx, output)
18 |         ctx.save_for_backward(input, position, weight, idx)
19 |         return output
20 | 
21 |     @staticmethod
22 |     def backward(ctx, grad_output):
23 |         """
24 |         input: grad_out: (n, c)
25 |         output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
26 |         """
27 |         input, position, weight, idx = ctx.saved_tensors
28 |         n, nsample, c = position.shape; w_c = weight.shape[-1]
29 |         grad_input = torch.cuda.FloatTensor(n, c).zero_()
30 |         grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
31 |         grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
32 |         aggregation_backward_cuda(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight)
33 |         return grad_input, grad_position, grad_weight, None
34 | 
35 | 
36 | aggregation = Aggregation.apply
37 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import attention_relation_step_forward_cuda, attention_relation_step_backward_cuda, \
 5 |     attention_fusion_step_forward_cuda, attention_fusion_step_backward_cuda
 6 | 
 7 | 
 8 | class AttentionRelationStep(Function):
 9 |     @staticmethod
10 |     def forward(ctx, query, key, weight, index_target, index_refer):
11 |         """
12 |         input - query: (n, g, c), key: (n, g, c), weight: (c)  1_c for scatter attention,
13 |                 index_target: (m), index_refer: (m)
14 |         output - relation: (M, g)
15 |         """
16 | 
17 |         assert query.is_contiguous() \
18 |                and key.is_contiguous() \
19 |                and index_target.is_contiguous() \
20 |                and index_refer.is_contiguous() \
21 |                and weight.is_contiguous()
22 | 
23 |         assert index_target.shape[0] == index_refer.shape[0]
24 | 
25 |         _, g, c = query.shape
26 |         m = index_target.shape[0]
27 |         output = torch.cuda.FloatTensor(m, g).zero_()
28 |         attention_relation_step_forward_cuda(m, g, c, query, key, weight,
29 |                                              index_target.int(), index_refer.int(), output)
30 |         ctx.save_for_backward(query, key, weight, index_target, index_refer)
31 |         return output
32 | 
33 |     @staticmethod
34 |     def backward(ctx, grad_output):
35 |         query, key, weight, index_target, index_refer = ctx.saved_tensors
36 |         n, g, c = query.shape
37 |         m = index_target.shape[0]
38 |         grad_query = torch.cuda.FloatTensor(n, g, c).zero_()
39 |         grad_key = torch.cuda.FloatTensor(n, g, c).zero_()
40 |         grad_weight = torch.cuda.FloatTensor(c).zero_()
41 |         attention_relation_step_backward_cuda(m, g, c,
42 |                                               query, grad_query,
43 |                                               key, grad_key,
44 |                                               weight, grad_weight,
45 |                                               index_target.int(), index_refer.int(),
46 |                                               grad_output)
47 |         return grad_query, grad_key, None, None, None
48 | 
49 | 
50 | class AttentionFusionStep(Function):
51 |     @staticmethod
52 |     def forward(ctx, weight, value, index_target, index_refer):
53 |         """
54 |         input - weight: (m, g), value: (n, g, c)
55 |                 index_target: (m), index_value: (m)
56 |         output - output: (n, g, c)
57 |         """
58 | 
59 |         assert weight.is_contiguous() \
60 |                and value.is_contiguous() \
61 |                and index_target.is_contiguous() \
62 |                and index_refer.is_contiguous() \
63 |                and weight.is_contiguous()
64 | 
65 |         assert index_target.shape[0] == index_refer.shape[0]
66 | 
67 |         n, g, c = value.shape
68 |         m = index_refer.shape[0]
69 |         output = torch.cuda.FloatTensor(n, g, c).zero_()
70 |         attention_fusion_step_forward_cuda(m, g, c, weight, value, index_target.int(), index_refer.int(), output)
71 |         ctx.save_for_backward(weight, value, index_target, index_refer)
72 |         return output
73 | 
74 |     @staticmethod
75 |     def backward(ctx, grad_output):
76 |         """
77 |         input: grad_output: (n, g, c)
78 |         output: grad_weight: (m, g), grad_value: (n, g, c), none, none
79 |         """
80 |         weight, value, index_target, index_refer = ctx.saved_tensors
81 |         n, g, c = value.shape
82 |         m = index_target.shape[0]
83 |         grad_weight = torch.cuda.FloatTensor(m, g).zero_()
84 |         grad_value = torch.cuda.FloatTensor(n, g, c).zero_()
85 |         attention_fusion_step_backward_cuda(m, g, c,
86 |                                             weight, grad_weight,
87 |                                             value, grad_value,
88 |                                             index_target.int(), index_refer.int(),
89 |                                             grad_output)
90 |         return grad_weight, grad_value, None, None
91 | 
92 | 
93 | attention_relation_step = AttentionRelationStep.apply
94 | attention_fusion_step = AttentionFusionStep.apply
95 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/grouping.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda
 5 | 
 6 | 
 7 | class Grouping(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, idx):
10 |         """
11 |         input: input: (n, c), idx : (m, nsample)
12 |         output: (m, nsample, c)
13 |         """
14 |         assert input.is_contiguous() and idx.is_contiguous()
15 |         m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
16 |         output = torch.cuda.FloatTensor(m, nsample, c)
17 |         grouping_forward_cuda(m, nsample, c, input, idx, output)
18 |         ctx.n = n
19 |         ctx.save_for_backward(idx)
20 |         return output
21 | 
22 |     @staticmethod
23 |     def backward(ctx, grad_output):
24 |         """
25 |         input: grad_out: (m, c, nsample)
26 |         output: (n, c), None
27 |         """
28 |         n = ctx.n
29 |         idx, = ctx.saved_tensors
30 |         m, nsample, c = grad_output.shape
31 |         grad_input = torch.cuda.FloatTensor(n, c).zero_()
32 |         grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input)
33 |         return grad_input, None
34 | 
35 | 
36 | def grouping(idx,
37 |              feat,
38 |              xyz,
39 |              new_xyz=None,
40 |              with_xyz=False):
41 |     if new_xyz is None:
42 |         new_xyz = xyz
43 |     assert xyz.is_contiguous() and feat.is_contiguous()
44 |     m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1]
45 |     xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0)
46 |     feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0)
47 |     grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c)  # (m, num_sample, c)
48 | 
49 |     if with_xyz:
50 |         assert new_xyz.is_contiguous()
51 |         mask = torch.sign(idx + 1)
52 |         grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) - new_xyz.unsqueeze(1)  # (m, num_sample, 3)
53 |         grouped_xyz = torch.einsum("n s c, n s -> n s c", grouped_xyz, mask)  # (m, num_sample, 3)
54 |         return torch.cat((grouped_xyz, grouped_feat), -1)
55 |     else:
56 |         return grouped_feat
57 | 
58 | 
59 | grouping2 = Grouping.apply
60 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/interpolation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda
 5 | from .query import knn_query
 6 | 
 7 | 
 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
 9 |     """
10 |     input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b)
11 |     output: (n, c)
12 |     """
13 |     assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
14 |     idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset)  # (n, 3), (n, 3)
15 |     dist_recip = 1.0 / (dist + 1e-8)  # (n, 3)
16 |     norm = torch.sum(dist_recip, dim=1, keepdim=True)
17 |     weight = dist_recip / norm  # (n, 3)
18 | 
19 |     new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
20 |     for i in range(k):
21 |         new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
22 |     return new_feat
23 | 
24 | 
25 | class Interpolation(Function):
26 |     @staticmethod
27 |     def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
28 |         """
29 |         input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
30 |         output: (n, c)
31 |         """
32 |         assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
33 |         idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset)  # (n, k), (n, k)
34 |         dist_recip = 1.0 / (dist + 1e-8)  # (n, k)
35 |         norm = torch.sum(dist_recip, dim=1, keepdim=True)
36 |         weight = dist_recip / norm  # (n, k)
37 | 
38 |         n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
39 |         output = torch.cuda.FloatTensor(n, c).zero_()
40 |         interpolation_forward_cuda(n, c, k, input, idx, weight, output)
41 |         ctx.m, ctx.k = m, k
42 |         ctx.save_for_backward(idx, weight)
43 |         return output
44 | 
45 |     @staticmethod
46 |     def backward(ctx, grad_output):
47 |         """
48 |         input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
49 |         output: (n, c)
50 |         """
51 |         m, k = ctx.m, ctx.k
52 |         idx, weight = ctx.saved_tensors
53 |         n, c = grad_output.shape
54 |         grad_input = torch.cuda.FloatTensor(m, c).zero_()
55 |         interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input)
56 |         return None, None, grad_input, None, None, None
57 | 
58 | 
59 | interpolation2 = Interpolation.apply
60 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/query.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda
 5 | 
 6 | 
 7 | class KNNQuery(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None):
10 |         """
11 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
12 |         output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample)
13 |         """
14 |         if new_xyz is None or new_offset is None:
15 |             new_xyz = xyz
16 |             new_offset = offset
17 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
18 |         m = new_xyz.shape[0]
19 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
20 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
21 |         knn_query_cuda(m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2)
22 |         return idx, torch.sqrt(dist2)
23 | 
24 | 
25 | class RandomBallQuery(Function):
26 |     """Random Ball Query.
27 | 
28 |     Find nearby points in spherical space.
29 |     """
30 | 
31 |     @staticmethod
32 |     def forward(ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None):
33 |         """
34 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
35 |         output: idx: (m, nsample), dist2: (m, nsample)
36 |         """
37 |         if new_xyz is None or new_offset is None:
38 |             new_xyz = xyz
39 |             new_offset = offset
40 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
41 |         assert min_radius < max_radius
42 | 
43 |         m = new_xyz.shape[0]
44 |         order = []
45 |         for k in range(offset.shape[0]):
46 |             s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k])
47 |             order.append(torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k)
48 |         order = torch.cat(order, dim=0)
49 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
50 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
51 |         random_ball_query_cuda(m, nsample, min_radius, max_radius, order,
52 |                                xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2)
53 |         return idx, torch.sqrt(dist2)
54 | 
55 | 
56 | class BallQuery(Function):
57 |     """Ball Query.
58 | 
59 |     Find nearby points in spherical space.
60 |     """
61 | 
62 |     @staticmethod
63 |     def forward(ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None):
64 |         """
65 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
66 |         output: idx: (m, nsample), dist2: (m, nsample)
67 |         """
68 |         if new_xyz is None or new_offset is None:
69 |             new_xyz = xyz
70 |             new_offset = offset
71 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
72 |         assert min_radius < max_radius
73 | 
74 |         m = new_xyz.shape[0]
75 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
76 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
77 |         ball_query_cuda(m, nsample, min_radius, max_radius, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2)
78 |         return idx, torch.sqrt(dist2)
79 | 
80 | 
81 | knn_query = KNNQuery.apply
82 | ball_query = BallQuery.apply
83 | random_ball_query = RandomBallQuery.apply
84 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/sampling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import farthest_point_sampling_cuda
 5 | 
 6 | 
 7 | class FarthestPointSampling(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, xyz, offset, new_offset):
10 |         """
11 |         input: coords: (n, 3), offset: (b), new_offset: (b)
12 |         output: idx: (m)
13 |         """
14 |         assert xyz.is_contiguous()
15 |         n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
16 |         for i in range(1, b):
17 |             n_max = max(offset[i] - offset[i - 1], n_max)
18 |         idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
19 |         tmp = torch.cuda.FloatTensor(n).fill_(1e10)
20 |         farthest_point_sampling_cuda(b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx)
21 |         del tmp
22 |         return idx
23 | 
24 | 
25 | farthest_point_sampling = FarthestPointSampling.apply
26 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/subtraction.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda
 5 | 
 6 | 
 7 | class Subtraction(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input1, input2, idx):
10 |         """
11 |         input: input1: (n, c), input2: (n, c), idx: (n, nsample)
12 |         output:  (n, nsample, c)
13 |         """
14 |         assert input1.is_contiguous() and input2.is_contiguous()
15 |         n, c = input1.shape; nsample = idx.shape[-1]
16 |         output = torch.cuda.FloatTensor(n, nsample, c).zero_()
17 |         subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output)
18 |         ctx.save_for_backward(idx)
19 |         return output
20 | 
21 |     @staticmethod
22 |     def backward(ctx, grad_output):
23 |         """
24 |         input: grad_out: (n, nsample, c)
25 |         output: grad_input1: (n, c), grad_input2: (n, c)
26 |         """
27 |         idx, = ctx.saved_tensors
28 |         n, nsample, c = grad_output.shape
29 |         grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
30 |         grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
31 |         subtraction_backward_cuda(n, nsample, c, idx, grad_output, grad_input1, grad_input2)
32 |         return grad_input1, grad_input2, None
33 | 
34 | 
35 | subtraction = Subtraction.apply
36 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from pointops import knn_query, ball_query, grouping
  3 | 
  4 | 
  5 | def knn_query_and_group(feat,
  6 |                         xyz,
  7 |                         offset=None,
  8 |                         new_xyz=None,
  9 |                         new_offset=None,
 10 |                         idx=None,
 11 |                         nsample=None,
 12 |                         with_xyz=False
 13 |                         ):
 14 |     if idx is None:
 15 |         assert nsample is not None
 16 |         idx, _ = knn_query(nsample, xyz, offset, new_xyz, new_offset)
 17 |     return grouping(idx, feat, xyz, new_xyz, with_xyz), idx
 18 | 
 19 | 
 20 | def ball_query_and_group(feat,
 21 |                          xyz,
 22 |                          offset=None,
 23 |                          new_xyz=None,
 24 |                          new_offset=None,
 25 |                          idx=None,
 26 |                          max_radio=None,
 27 |                          min_radio=0,
 28 |                          nsample=None,
 29 |                          with_xyz=False
 30 |                          ):
 31 |     if idx is None:
 32 |         assert nsample is not None and offset is not None
 33 |         assert max_radio is not None and min_radio is not None
 34 |         idx, _ = ball_query(nsample, max_radio, min_radio, xyz, offset, new_xyz, new_offset)
 35 |     return grouping(idx, feat, xyz, new_xyz, with_xyz), idx
 36 | 
 37 | 
 38 | def query_and_group(nsample,
 39 |                     xyz,
 40 |                     new_xyz,
 41 |                     feat,
 42 |                     idx,
 43 |                     offset,
 44 |                     new_offset,
 45 |                     dilation=0,
 46 |                     with_feat=True,
 47 |                     with_xyz=True,
 48 |                     ):
 49 |     """
 50 |     input: coords: (n, 3), new_xyz: (m, 3), color: (n, c), idx: (m, nsample), offset: (b), new_offset: (b)
 51 |     output: new_feat: (m, nsample, c+3), grouped_idx: (m, nsample)
 52 |     """
 53 |     assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
 54 |     if new_xyz is None:
 55 |         new_xyz = xyz
 56 | 
 57 |     if idx is None:
 58 |         num_samples_total = 1 + (nsample - 1) * (dilation + 1)
 59 |         # num points in a batch might < num_samples_total => [n1, n2, ..., nk, ns, ns, ns, ...]
 60 |         idx_no_dilation, _ = knn_query(num_samples_total, xyz, offset, new_xyz,
 61 |                                        new_offset)  # (m, nsample * (d + 1))
 62 |         idx = []
 63 |         batch_end = offset.tolist()
 64 |         batch_start = [0] + batch_end[:-1]
 65 |         new_batch_end = new_offset.tolist()
 66 |         new_batch_start = [0] + new_batch_end[:-1]
 67 |         for i in range(offset.shape[0]):
 68 |             if batch_end[i] - batch_start[i] < num_samples_total:
 69 |                 soft_dilation = (batch_end[i] - batch_start[i] - 1) / (nsample - 1) - 1
 70 |             else:
 71 |                 soft_dilation = dilation
 72 |             idx.append(idx_no_dilation[new_batch_start[i]: new_batch_end[i],
 73 |                        [int((soft_dilation + 1) * i) for i in range(nsample)]])
 74 |         idx = torch.cat(idx, dim=0)
 75 | 
 76 |     if not with_feat:
 77 |         return idx
 78 | 
 79 |     n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1]
 80 |     grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3)  # (m, nsample, 3)
 81 |     # grouped_xyz = grouping(coords, idx) # (m, nsample, 3)
 82 |     grouped_xyz -= new_xyz.unsqueeze(1)  # (m, nsample, 3)
 83 |     grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c)  # (m, nsample, c)
 84 |     # grouped_feat = grouping(color, idx) # (m, nsample, c)
 85 | 
 86 |     if with_xyz:
 87 |         return torch.cat((grouped_xyz, grouped_feat), -1), idx  # (m, nsample, 3+c)
 88 |     else:
 89 |         return grouped_feat, idx
 90 | 
 91 | 
 92 | def offset2batch(offset):
 93 |     return torch.cat([
 94 |         torch.tensor([i] * (o - offset[i - 1])) if i > 0 else torch.tensor([i] * o)
 95 |         for i, o in enumerate(offset)
 96 |     ], dim=0).long().to(offset.device)
 97 | 
 98 | 
 99 | def batch2offset(batch):
100 |     return torch.cumsum(batch.bincount(), dim=0).int()


--------------------------------------------------------------------------------
/libs/pointops/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | from distutils.sysconfig import get_config_vars
 5 | 
 6 | (opt,) = get_config_vars('OPT')
 7 | os.environ['OPT'] = " ".join(
 8 |     flag for flag in opt.split() if flag != '-Wstrict-prototypes'
 9 | )
10 | 
11 | src = 'src'
12 | sources = [os.path.join(root, file) for root, dirs, files in os.walk(src)
13 |            for file in files
14 |            if file.endswith('.cpp') or file.endswith('.cu')]
15 | 
16 | setup(
17 |     name='pointops',
18 |     version='1.0',
19 |     install_requires=["torch", "numpy"],
20 |     packages=["pointops"],
21 |     package_dir={"pointops": "functions"},
22 |     ext_modules=[
23 |         CUDAExtension(
24 |             name='pointops._C',
25 |             sources=sources,
26 |             extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']}
27 |         )
28 |     ],
29 |     cmdclass={'build_ext': BuildExtension}
30 | )
31 | 


--------------------------------------------------------------------------------
/libs/pointops/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/libs/pointops/src/__init__.py


--------------------------------------------------------------------------------
/libs/pointops/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "aggregation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const float *position = position_tensor.data_ptr<float>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
15 | }
16 | 
17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
18 | {
19 | 	const float *input = input_tensor.data_ptr<float>();
20 |     const float *position = position_tensor.data_ptr<float>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     const int *idx = idx_tensor.data_ptr<int>();
23 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
24 |     float *grad_input = grad_input_tensor.data_ptr<float>();
25 |     float *grad_position = grad_position_tensor.data_ptr<float>();
26 |     float *grad_weight = grad_weight_tensor.data_ptr<float>();
27 |     aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
28 | }
29 | 


--------------------------------------------------------------------------------
/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "aggregation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
 6 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * c) return;
 9 |     const int c_idx = index % c;
10 |     const int n_idx = index / c;
11 |     const int w_c_idx = c_idx % w_c;
12 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 |     {   
14 |         int idx_idx = n_idx * nsample + nsample_idx;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 |         output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 |     }
20 | }
21 | 
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
25 |     if (index >= n * c) return;
26 |     const int c_idx = index % c;
27 |     const int n_idx = index / c;
28 |     const int w_c_idx = c_idx % w_c;
29 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 |     {   
31 |         int idx_idx = n_idx * nsample + nsample_idx;
32 |         int input_idx = idx[idx_idx] * c + c_idx;
33 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 |         grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 |         atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 |     }
39 | }
40 | 
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 |     dim3 threads(THREADS_PER_BLOCK);
45 |     aggregation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 | 
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {  
49 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 |     dim3 threads(THREADS_PER_BLOCK);
52 |     aggregation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 | 


--------------------------------------------------------------------------------
/libs/pointops/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AGGREGATION_CUDA_KERNEL
 2 | #define _AGGREGATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/attention/attention_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_CUDA_KERNEL
 2 | #define _ATTENTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_relation_step_forward_cuda(int m, int g, int c,
 8 |                                           at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor,
 9 |                                           at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
10 |                                           at::Tensor output_tensor);
11 | void attention_relation_step_backward_cuda(int m, int g, int c,
12 |                                            at::Tensor query_tensor, at::Tensor grad_query_tensor,
13 |                                            at::Tensor key_tensor, at::Tensor grad_key_tensor,
14 |                                            at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
15 |                                            at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
16 |                                            at::Tensor grad_output_tensor);
17 | void attention_fusion_step_forward_cuda(int m, int g, int c,
18 |                                         at::Tensor weight_tensor, at::Tensor value_tensor,
19 |                                         at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
20 |                                         at::Tensor output_tensor);
21 | void attention_fusion_step_backward_cuda(int m, int g, int c,
22 |                                          at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
23 |                                          at::Tensor value_tensor, at::Tensor grad_value_tensor,
24 |                                          at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
25 |                                          at::Tensor grad_output_tensor);
26 | 
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | 
31 | void attention_relation_step_forward_cuda_launcher(int m, int g, int c,
32 |                                                    const float *query, const float *key, const float *weight,
33 |                                                    const int *index_target, const int *index_refer,
34 |                                                    float *output);
35 | void attention_relation_step_backward_cuda_launcher(int m, int g, int c,
36 |                                                     const float *query, float *grad_query,
37 |                                                     const float *key, float *grad_key,
38 |                                                     const float *weight, float *grad_weight,
39 |                                                     const int *index_target, const int *index_refer,
40 |                                                     const float *grad_output);
41 | void attention_fusion_step_forward_cuda_launcher(int m, int g, int c,
42 |                                                  const float *weight, const float *value,
43 |                                                  const int *index_target, const int *index_refer,
44 |                                                  float *output);
45 | void attention_fusion_step_backward_cuda_launcher(int m, int g, int c,
46 |                                                   const float *weight, float *grad_weight,
47 |                                                   const float *value, float *grad_value,
48 |                                                   const int *index_target, const int *index_refer,
49 |                                                   const float *grad_output);
50 | 
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | #endif
55 | 


--------------------------------------------------------------------------------
/libs/pointops/src/ball_query/ball_query_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "ball_query_cuda_kernel.h"
 5 | 
 6 | 
 7 | void ball_query_cuda(int m, int nsample,
 8 |                      float min_radius, float max_radius,
 9 |                      at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                      at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                      at::Tensor idx_tensor, at::Tensor dist2_tensor)
12 | {
13 |     const float *xyz = xyz_tensor.data_ptr<float>();
14 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
15 |     const int *offset = offset_tensor.data_ptr<int>();
16 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
17 |     int *idx = idx_tensor.data_ptr<int>();
18 |     float *dist2 = dist2_tensor.data_ptr<float>();
19 |     ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2);
20 | }
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/ball_query/ball_query_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_CUDA_KERNEL
 2 | #define _BALL_QUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void ball_query_cuda(int m, int nsample,
 8 |                      float min_radius, float max_radius,
 9 |                      at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                      at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                      at::Tensor idx_tensor, at::Tensor dist2_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void ball_query_cuda_launcher(int m, int nsample,
18 |                               float min_radius, float max_radius,
19 |                               const float *xyz, const float *new_xyz,
20 |                               const int *offset, const int *new_offset,
21 |                               int *idx, float *dist2);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | #include <algorithm>
 6 | 
 7 | #define TOTAL_THREADS 1024
 8 | #define THREADS_PER_BLOCK 512
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |     return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |     const int x_threads = opt_n_threads(x);
18 |     const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 |     dim3 block_config(x_threads, y_threads, 1);
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/libs/pointops/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "grouping_cuda_kernel.h"
 5 | 
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     float *output = output_tensor.data_ptr<float>();
12 |     grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
13 | }
14 | 
15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
16 | {
17 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     float *grad_input = grad_input_tensor.data_ptr<float>();
20 |     grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
21 | }
22 | 


--------------------------------------------------------------------------------
/libs/pointops/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
 6 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= m * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int m_idx = index / nsample / c;
12 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 |     output[index] = input[input_idx];
14 | }
15 | 
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
19 |     if (index >= m * nsample * c) return;
20 |     const int c_idx = index % c;
21 |     const int nsample_idx = (index / c) % nsample;
22 |     const int m_idx = index / nsample / c;
23 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 |     atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 | 
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 |     dim3 threads(THREADS_PER_BLOCK);
31 |     grouping_forward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, input, idx, output);
32 | }
33 | 
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {  
36 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     grouping_backward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 | 


--------------------------------------------------------------------------------
/libs/pointops/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "interpolation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
14 | }
15 | 
16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
17 | {
18 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     const float *weight = weight_tensor.data_ptr<float>();
21 |     float *grad_input = grad_input_tensor.data_ptr<float>();
22 |     interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "interpolation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
 6 | {
 7 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (index >= n * c) return;
10 |     int c_idx = index % c;
11 |     int n_idx = index / c;
12 |     for (int i = 0; i < k; i++)
13 |     {
14 |         int idx_idx = n_idx * k + i;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         output[index] += input[input_idx] * weight[idx_idx];
17 |     }
18 | }
19 | 
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     if (index >= n * c) return;
25 |     int c_idx = index % c;
26 |     int n_idx = index / c;
27 |     for (int i = 0; i < k; i++)
28 |     {
29 |         int idx_idx = n_idx * k + i;
30 |         int input_idx = idx[idx_idx] * c + c_idx;
31 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 |     }
33 | }
34 | 
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     interpolation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, input, idx, weight, output);
40 | }
41 | 
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 |     dim3 threads(THREADS_PER_BLOCK);
46 |     interpolation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 | 


--------------------------------------------------------------------------------
/libs/pointops/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/knn_query/knn_query_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "knn_query_cuda_kernel.h"
 5 | 
 6 | 
 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
11 |     const int *offset = offset_tensor.data_ptr<int>();
12 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     float *dist2 = dist2_tensor.data_ptr<float>();
15 |     knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
16 | }
17 | 


--------------------------------------------------------------------------------
/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knn_query_cuda_kernel.h"
  3 | 
  4 | 
  5 | namespace knn_query_utils{
  6 | 
  7 | template <typename DType>
  8 | __device__ void swap(DType *x, DType *y)
  9 | {
 10 |     DType tmp = *x;
 11 |     *x = *y;
 12 |     *y = tmp;
 13 | }
 14 | 
 15 | __device__ void reheap(float *dist, int *idx, int k)
 16 | {
 17 |     int root = 0;
 18 |     int child = root * 2 + 1;
 19 |     while (child < k)
 20 |     {
 21 |         if(child + 1 < k && dist[child+1] > dist[child])
 22 |             child++;
 23 |         if(dist[root] > dist[child])
 24 |             return;
 25 |         swap<float>(&dist[root], &dist[child]);
 26 |         swap<int>(&idx[root], &idx[child]);
 27 |         root = child;
 28 |         child = root * 2 + 1;
 29 |     }
 30 | }
 31 | 
 32 | 
 33 | __device__ void heap_sort(float *dist, int *idx, int k)
 34 | {
 35 |     int i;
 36 |     for (i = k - 1; i > 0; i--)
 37 |     {
 38 |         swap<float>(&dist[0], &dist[i]);
 39 |         swap<int>(&idx[0], &idx[i]);
 40 |         reheap(dist, idx, i);
 41 |     }
 42 | }
 43 | 
 44 | 
 45 | __device__ int get_bt_idx(int idx, const int *offset)
 46 | {
 47 |     int i = 0;
 48 |     while (1)
 49 |     {
 50 |         if (idx < offset[i])
 51 |             break;
 52 |         else
 53 |             i++;
 54 |     }
 55 |     return i;
 56 | }
 57 | }  // namespace knn_query_utils
 58 | 
 59 | 
 60 | __global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
 61 |     // input: xyz (n, 3) new_xyz (m, 3)
 62 |     // output: idx (m, nsample) dist2 (m, nsample)
 63 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 64 |     if (pt_idx >= m) return;
 65 | 
 66 |     new_xyz += pt_idx * 3;
 67 |     idx += pt_idx * nsample;
 68 |     dist2 += pt_idx * nsample;
 69 | 
 70 |     int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset);
 71 |     int start;
 72 |     if (bt_idx == 0)
 73 |         start = 0;
 74 |     else
 75 |         start = offset[bt_idx - 1];
 76 |     int end = offset[bt_idx];
 77 | 
 78 |     float new_x = new_xyz[0];
 79 |     float new_y = new_xyz[1];
 80 |     float new_z = new_xyz[2];
 81 | 
 82 |     float best_dist[128];
 83 |     int best_idx[128];
 84 |     for(int i = 0; i < nsample; i++){
 85 |         best_dist[i] = 1e10;
 86 |         best_idx[i] = -1;
 87 |     }
 88 |     for(int i = start; i < end; i++){
 89 |         float x = xyz[i * 3 + 0];
 90 |         float y = xyz[i * 3 + 1];
 91 |         float z = xyz[i * 3 + 2];
 92 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 93 |         if (d2 < best_dist[0]){
 94 |             best_dist[0] = d2;
 95 |             best_idx[0] = i;
 96 |             knn_query_utils::reheap(best_dist, best_idx, nsample);
 97 |         }
 98 |     }
 99 |     knn_query_utils::heap_sort(best_dist, best_idx, nsample);
100 |     for(int i = 0; i < nsample; i++){
101 |         idx[i] = best_idx[i];
102 |         dist2[i] = best_dist[i];
103 |     }
104 | }
105 | 
106 | 
107 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
108 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
109 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
110 |     dim3 threads(THREADS_PER_BLOCK);
111 |     knn_query_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
112 | }
113 | 


--------------------------------------------------------------------------------
/libs/pointops/src/knn_query/knn_query_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNN_QUERY_CUDA_KERNEL
 2 | #define _KNN_QUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "knn_query/knn_query_cuda_kernel.h"
 5 | #include "ball_query/ball_query_cuda_kernel.h"
 6 | #include "random_ball_query/random_ball_query_cuda_kernel.h"
 7 | #include "sampling/sampling_cuda_kernel.h"
 8 | #include "grouping/grouping_cuda_kernel.h"
 9 | #include "interpolation/interpolation_cuda_kernel.h"
10 | #include "aggregation/aggregation_cuda_kernel.h"
11 | #include "subtraction/subtraction_cuda_kernel.h"
12 | #include "attention/attention_cuda_kernel.h"
13 | 
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |     m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda");
17 |     m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda");
18 |     m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda");
19 |     m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda");
20 |     m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
21 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
22 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
23 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
24 |     m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
25 |     m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
26 |     m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
27 |     m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
28 |     m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda");
29 |     m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda");
30 |     m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda");
31 |     m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda");
32 | }
33 | 


--------------------------------------------------------------------------------
/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "random_ball_query_cuda_kernel.h"
 5 | 
 6 | 
 7 | void random_ball_query_cuda(int m, int nsample,
 8 |                             float min_radius, float max_radius, at::Tensor order_tensor,
 9 |                             at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                             at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                             at::Tensor idx_tensor, at::Tensor dist2_tensor)
12 | {
13 |     const int *order = order_tensor.data_ptr<int>();
14 |     const float *xyz = xyz_tensor.data_ptr<float>();
15 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
16 |     const int *offset = offset_tensor.data_ptr<int>();
17 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
18 |     int *idx = idx_tensor.data_ptr<int>();
19 |     float *dist2 = dist2_tensor.data_ptr<float>();
20 |     random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2);
21 | }
22 | 


--------------------------------------------------------------------------------
/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "random_ball_query_cuda_kernel.h"
  3 | 
  4 | 
  5 | namespace random_ball_query_utils{
  6 | 
  7 | template <typename DType>
  8 | __device__ void swap(DType *x, DType *y)
  9 | {
 10 |     DType tmp = *x;
 11 |     *x = *y;
 12 |     *y = tmp;
 13 | }
 14 | 
 15 | __device__ void reheap(float *dist, int *idx, int k)
 16 | {
 17 |     int root = 0;
 18 |     int child = root * 2 + 1;
 19 |     while (child < k)
 20 |     {
 21 |         if(child + 1 < k && dist[child+1] > dist[child])
 22 |             child++;
 23 |         if(dist[root] > dist[child])
 24 |             return;
 25 |         swap<float>(&dist[root], &dist[child]);
 26 |         swap<int>(&idx[root], &idx[child]);
 27 |         root = child;
 28 |         child = root * 2 + 1;
 29 |     }
 30 | }
 31 | 
 32 | 
 33 | __device__ void heap_sort(float *dist, int *idx, int k)
 34 | {
 35 |     int i;
 36 |     for (i = k - 1; i > 0; i--)
 37 |     {
 38 |         swap<float>(&dist[0], &dist[i]);
 39 |         swap<int>(&idx[0], &idx[i]);
 40 |         reheap(dist, idx, i);
 41 |     }
 42 | }
 43 | 
 44 | __device__ int get_bt_idx(int idx, const int *offset)
 45 | {
 46 |     int i = 0;
 47 |     while (1)
 48 |     {
 49 |         if (idx < offset[i])
 50 |             break;
 51 |         else
 52 |             i++;
 53 |     }
 54 |     return i;
 55 | }
 56 | }  // namespace ball_query_utils
 57 | 
 58 | __global__ void random_ball_query_cuda_kernel(int m, int nsample,
 59 |                                               float min_radius, float max_radius, const int *__restrict__ order,
 60 |                                               const float *__restrict__ xyz, const float *__restrict__ new_xyz,
 61 |                                               const int *__restrict__ offset, const int *__restrict__ new_offset,
 62 |                                               int *__restrict__ idx, float *__restrict__ dist2) {
 63 |     // input: xyz (n, 3) new_xyz (m, 3)
 64 |     // output: idx (m, nsample) dist (m, nsample)
 65 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 66 |     if (pt_idx >= m) return;
 67 | 
 68 |     new_xyz += pt_idx * 3;
 69 |     idx += pt_idx * nsample;
 70 |     dist2 += pt_idx * nsample;
 71 | 
 72 |     int bt_idx = random_ball_query_utils::get_bt_idx(pt_idx, new_offset);
 73 |     int start;
 74 |     if (bt_idx == 0)
 75 |         start = 0;
 76 |     else
 77 |         start = offset[bt_idx - 1];
 78 |     int end = offset[bt_idx];
 79 | 
 80 |     float max_radius2 = max_radius * max_radius;
 81 |     float min_radius2 = min_radius * min_radius;
 82 |     float new_x = new_xyz[0];
 83 |     float new_y = new_xyz[1];
 84 |     float new_z = new_xyz[2];
 85 | 
 86 |     int cnt = 0;
 87 | 
 88 |     for(int i = start; i < end; i++){
 89 |         float x = xyz[order[i] * 3 + 0];
 90 |         float y = xyz[order[i] * 3 + 1];
 91 |         float z = xyz[order[i] * 3 + 2];
 92 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 93 | 
 94 |         if (d2 <= 1e-5 || (d2 >= min_radius2 && d2 < max_radius2)){
 95 |             dist2[cnt] = d2;
 96 |             idx[cnt] = order[i];
 97 |             cnt += 1;
 98 |             if (cnt >= nsample) break;
 99 |         }
100 |     }
101 | 
102 |     if (cnt < nsample) {
103 |         for (int i = cnt; i < nsample; i++){
104 |             idx[i] = -1;
105 |             dist2[i] = 1e10;
106 |         }
107 |     }
108 | }
109 | 
110 | void random_ball_query_cuda_launcher(int m, int nsample,
111 |                                      float min_radius, float max_radius, const int *order,
112 |                                      const float *xyz, const float *new_xyz,
113 |                                      const int *offset, const int *new_offset,
114 |                                      int *idx, float *dist2) {
115 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
116 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
117 |     dim3 threads(THREADS_PER_BLOCK);
118 |     random_ball_query_cuda_kernel<<<blocks, threads, 0>>>(m, nsample,
119 |                                                           min_radius, max_radius, order,
120 |                                                           xyz, new_xyz,
121 |                                                           offset, new_offset,
122 |                                                           idx, dist2);
123 | }
124 | 


--------------------------------------------------------------------------------
/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL
 2 | #define _RANDOM_BALL_QUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void random_ball_query_cuda(int m, int nsample,
 8 |                             float min_radius, float max_radius, at::Tensor order_tensor,
 9 |                             at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                             at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                             at::Tensor idx_tensor, at::Tensor dist2_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void random_ball_query_cuda_launcher(int m, int nsample,
18 |                                     float min_radius, float max_radius,  const int *order,
19 |                                     const float *xyz, const float *new_xyz,
20 |                                     const int *offset, const int *new_offset,
21 |                                     int *idx, float *dist2);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "sampling_cuda_kernel.h"
 5 | 
 6 | 
 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const int *offset = offset_tensor.data_ptr<int>();
11 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
12 |     float *tmp = tmp_tensor.data_ptr<float>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
15 | }
16 | 


--------------------------------------------------------------------------------
/libs/pointops/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "subtraction_cuda_kernel.h"
 5 | 
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input1 = input1_tensor.data_ptr<float>();
10 |     const float *input2 = input2_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
14 | }
15 | 
16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
17 | {
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
20 |     float *grad_input1 = grad_input1_tensor.data_ptr<float>();
21 |     float *grad_input2 = grad_input2_tensor.data_ptr<float>();
22 |     subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "subtraction_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
 6 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int n_idx = index / nsample / c;
12 |     const int idx_idx = n_idx * nsample + nsample_idx;
13 |     const int input1_idx = n_idx * c + c_idx;
14 |     const int input2_idx = idx[idx_idx] * c + c_idx;
15 |     output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 | 
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if (index >= n * nsample * c) return;
22 |     const int c_idx = index % c;
23 |     const int nsample_idx = (index / c) % nsample;
24 |     const int n_idx = index / nsample / c;
25 |     const int idx_idx = n_idx * nsample + nsample_idx;
26 |     const int input1_idx = n_idx * c + c_idx;
27 |     const int input2_idx = idx[idx_idx] * c + c_idx;
28 |     atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 |     atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 | 
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 |     dim3 threads(THREADS_PER_BLOCK);
36 |     subtraction_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 | 
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {  
40 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 |     dim3 threads(THREADS_PER_BLOCK);
43 |     subtraction_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 | 


--------------------------------------------------------------------------------
/libs/pointops/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBTRACTION_CUDA_KERNEL
 2 | #define _SUBTRACTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/libs/pointops2/__init__.py


--------------------------------------------------------------------------------
/libs/pointops2/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from pointops2 import *


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_attention_op_step1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 800000
 8 | N = 35000
 9 | C = 96
10 | h = 6
11 | query = torch.rand(N, h, C//h).cuda()
12 | key = torch.rand(N, h, C//h).cuda()
13 | 
14 | index_0 = torch.rand(M)
15 | index_0[index_0 < 0] = 0
16 | index_0 = (index_0*N).long().cuda()
17 | 
18 | index_1 = torch.rand(M)
19 | index_1[index_1 < 0] = 0
20 | index_1 = (index_1*N).long().cuda()
21 | 
22 | query.requires_grad = True
23 | key.requires_grad = True
24 | 
25 | # rearrange index for acceleration
26 | index_0, indices = torch.sort(index_0) #[M,]
27 | index_1 = index_1[indices] #[M,]
28 | index_0_counts = index_0.bincount()
29 | 
30 | print("index_0_counts.shape: ", index_0_counts.shape)
31 | 
32 | n_max = index_0_counts.max()
33 | index_0_offsets = index_0_counts.cumsum(dim=-1) #[N]
34 | 
35 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
36 | 
37 | index_0_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0) #[N+1]
38 |         
39 | # print("index_0[:100]: ", index_0[:100])
40 | print("n_max: ", n_max)
41 | print("index_0_offsets.shape: ", index_0_offsets.shape)
42 | # input()
43 | 
44 | print("index_0_offsets[:100]: ", index_0_offsets[:100])
45 | print("index_1[300:320]: ", index_1[300:320])
46 | 
47 |             
48 | attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
49 | # loss = attn_flat.sum()
50 | # loss.backward()
51 | print("attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format(attn_flat.shape, attn_flat[300:320,:10]))
52 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
53 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
54 | # input()
55 | 
56 | print("query.is_contiguous(): ", query.is_contiguous())
57 | print("key.is_contiguous(): ", key.is_contiguous())
58 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
59 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
60 | 
61 | attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max)
62 | # loss = attn_flat_v2.sum()
63 | # loss.backward()
64 | print("attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format(attn_flat_v2.shape, attn_flat_v2[300:320,:10]))
65 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
66 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
67 | # input()
68 | 
69 | mask = attn_flat_v2.sum(-1) != 0
70 | print("mask.sum(): ", mask.sum())
71 | print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max())
72 | 
73 | 
74 | print("((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", ((attn_flat-attn_flat_v2)**2 < 1e-8).all())
75 | 
76 | selected = 10000
77 | print("torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0))
78 | 
79 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_attention_op_step1_v2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 800000
 8 | N = 35000
 9 | C = 96
10 | h = 6
11 | query = torch.rand(N, h, C//h).cuda()
12 | key = torch.rand(N, h, C//h).cuda()
13 | 
14 | index_0 = torch.rand(M)
15 | index_0[index_0 < 0] = 0
16 | index_0 = (index_0*N).long().cuda()
17 | 
18 | index_1 = torch.rand(M)
19 | index_1[index_1 < 0] = 0
20 | index_1 = (index_1*N).long().cuda()
21 | 
22 | query.requires_grad = True
23 | key.requires_grad = True
24 | 
25 | 
26 | attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
27 | loss = attn_flat.sum()
28 | loss.backward()
29 | print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10]))
30 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
31 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
32 | input()
33 | 
34 | 
35 | 
36 | # rearrange index for acceleration
37 | index_0, indices = torch.sort(index_0) #[M,]
38 | index_1 = index_1[indices] #[M,]
39 | index_0_counts = index_0.bincount()
40 | 
41 | print("index_0_counts.shape: ", index_0_counts.shape)
42 | 
43 | n_max = index_0_counts.max()
44 | index_0_offsets = index_0_counts.cumsum(dim=-1) #[N]
45 | 
46 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
47 | 
48 | index_0_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0) #[N+1]
49 |         
50 | # print("index_0[:100]: ", index_0[:100])
51 | print("n_max: ", n_max)
52 | print("index_0_offsets.shape: ", index_0_offsets.shape)
53 | # input()
54 | 
55 | print("index_0_offsets[:100]: ", index_0_offsets[:100])
56 | print("index_1[:20]: ", index_1[:20])
57 | 
58 |             
59 | attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
60 | # loss = attn_flat.sum()
61 | # loss.backward()
62 | # # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
63 | # # loss = attn_flat.sum()
64 | # # loss.backward()
65 | # print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10]))
66 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
67 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
68 | # input()
69 | 
70 | print("query.is_contiguous(): ", query.is_contiguous())
71 | print("key.is_contiguous(): ", key.is_contiguous())
72 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
73 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
74 | 
75 | attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max)
76 | loss = attn_flat_v2.sum()
77 | loss.backward()
78 | 
79 | # attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max)
80 | # loss = attn_flat_v2.sum()
81 | # loss.backward()
82 | 
83 | print("attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format(attn_flat_v2.shape, attn_flat_v2[:20,:10]))
84 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
85 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
86 | # input()
87 | 
88 | # mask = attn_flat_v2.sum(-1) != 0
89 | # print("mask.sum(): ", mask.sum())
90 | # print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max())
91 | 
92 | 
93 | print("((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", ((attn_flat-attn_flat_v2)**2 < 1e-8).all())
94 | 
95 | selected = 10000
96 | print("torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0))
97 | 
98 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_attention_op_step2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 800000
 8 | N = 35000
 9 | C = 96
10 | h = 6
11 | softmax_attn_flat = torch.rand(M, h).cuda()
12 | value = torch.rand(N, h, C//h).cuda()
13 | 
14 | index_0 = torch.rand(M)
15 | index_0[index_0 < 0] = 0
16 | index_0 = (index_0*N).long().cuda()
17 | 
18 | index_1 = torch.rand(M)
19 | index_1[index_1 < 0] = 0
20 | index_1 = (index_1*N).long().cuda()
21 | 
22 | softmax_attn_flat.requires_grad = True
23 | value.requires_grad = True
24 | 
25 | # value_flat = value[index_1] #[M, num_heads, C // num_heads]
26 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C)
27 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C]
28 | # loss = x.sum()
29 | # loss.backward()
30 | 
31 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10]))
32 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
33 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
34 | # input()
35 | 
36 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous())
37 | print("value.is_contiguous(): ", value.is_contiguous())
38 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
39 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
40 | 
41 | x_v2 = pointops.attention_step2(softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int())
42 | x_v2 = x_v2.view(N, C)
43 | loss = x_v2.sum()
44 | loss.backward()
45 | 
46 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5,:10]))
47 | 
48 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
49 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
50 | input()
51 | 
52 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x-x_v2)**2 < 1e-8).all())
53 | 
54 | print("torch.max((x-x_v2)**2): ", torch.max((x-x_v2)**2))
55 | 
56 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 80000
 8 | N = 3500
 9 | hdim = 16
10 | h = 6
11 | L = 31
12 | query = torch.rand(N, h, hdim).cuda()
13 | table = torch.rand(L, h, hdim, 3).cuda()
14 | 
15 | index = torch.rand(M)
16 | index[index < 0] = 0
17 | index = (index*N).long().cuda()
18 | 
19 | rel_index = torch.rand(M, 3)
20 | rel_index[rel_index < 0] = 0
21 | rel_index = (rel_index*L).long().cuda()
22 | 
23 | query.requires_grad = True
24 | table.requires_grad = True
25 | 
26 | # query_flat = query[index] #[M, h, hdim]
27 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim]
28 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M]
29 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim]
30 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h]
31 | # loss = output.mean()
32 | # loss.backward()
33 | 
34 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
35 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
36 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
37 | # input()
38 | 
39 | # print("query.is_contiguous(): ", query.is_contiguous())
40 | # print("key.is_contiguous(): ", key.is_contiguous())
41 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
42 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
43 | 
44 | output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int())
45 | loss = output_v2.mean()
46 | loss.backward()
47 | 
48 | print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10]))
49 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
50 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
51 | input()
52 | 
53 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
54 | 
55 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
56 | 
57 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 80000
 8 | N = 3500
 9 | hdim = 16
10 | h = 6
11 | L = 31
12 | query = torch.rand(N, h, hdim).cuda()
13 | table_q = torch.rand(L, h, hdim, 3).cuda()
14 | key = torch.rand(N, h, hdim).cuda()
15 | table_k = torch.rand(L, h, hdim, 3).cuda()
16 | 
17 | index_q = torch.rand(M)
18 | index_q[index_q < 0] = 0
19 | index_q = (index_q*N).long().cuda()
20 | 
21 | index_k = torch.rand(M)
22 | index_k[index_k < 0] = 0
23 | index_k = (index_k*N).long().cuda()
24 | 
25 | rel_index = torch.rand(M, 3)
26 | rel_index[rel_index < 0] = 0
27 | rel_index = (rel_index*L).long().cuda()
28 | 
29 | query.requires_grad = True
30 | table_q.requires_grad = True
31 | key.requires_grad = True
32 | table_k.requires_grad = True
33 | 
34 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int())
35 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int())
36 | output = output1 + output2
37 | # loss = output.mean()
38 | # loss.backward()
39 | 
40 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
41 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
42 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
43 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
44 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
45 | # input()
46 | 
47 | # print("query.is_contiguous(): ", query.is_contiguous())
48 | # print("key.is_contiguous(): ", key.is_contiguous())
49 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
50 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
51 | 
52 | output_v2 = pointops.dot_prod_with_idx_v2(query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int())
53 | loss = output_v2.mean()
54 | loss.backward()
55 | 
56 | print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10]))
57 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
58 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
59 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
60 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
61 | # input()
62 | 
63 | print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
64 | 
65 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 80000
 8 | N = 3500
 9 | # M = 80
10 | # N = 5
11 | hdim = 16
12 | h = 6
13 | L = 31
14 | query = torch.rand(N, h, hdim).cuda()
15 | table_q = torch.rand(L, h, hdim, 3).cuda()
16 | key = torch.rand(N, h, hdim).cuda()
17 | table_k = torch.rand(L, h, hdim, 3).cuda()
18 | 
19 | index_q = torch.rand(M)
20 | index_q[index_q < 0] = 0
21 | index_q = (index_q*N).long().cuda()
22 | 
23 | index_k = torch.rand(M)
24 | index_k[index_k < 0] = 0
25 | index_k = (index_k*N).long().cuda()
26 | 
27 | rel_index = torch.rand(M, 3)
28 | rel_index[rel_index < 0] = 0
29 | rel_index = (rel_index*L).long().cuda()
30 | 
31 | 
32 | # rearrange index for acceleration
33 | index_q, indices = torch.sort(index_q) #[M,]
34 | index_k = index_k[indices] #[M,]
35 | rel_index = rel_index[indices]
36 | index_q_counts = index_q.bincount()
37 | 
38 | print("index_q_counts.shape: ", index_q_counts.shape)
39 | 
40 | n_max = index_q_counts.max()
41 | index_q_offsets = index_q_counts.cumsum(dim=-1) #[N]
42 | 
43 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape)
44 | 
45 | index_q_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0) #[N+1]
46 |         
47 | # print("index_q[:100]: ", index_q[:100])
48 | print("n_max: ", n_max)
49 | print("index_q_offsets.shape: ", index_q_offsets.shape)
50 | # input()
51 | 
52 | print("index_q_offsets[:100]: ", index_q_offsets[:100])
53 | print("index_k[:20]: ", index_k[:20])
54 | 
55 | query.requires_grad = True
56 | table_q.requires_grad = True
57 | key.requires_grad = True
58 | table_k.requires_grad = True
59 | 
60 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int())
61 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int())
62 | output = output1 + output2
63 | loss = output.mean()
64 | loss.backward()
65 | 
66 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
67 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
68 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
69 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
70 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
71 | # input()
72 | 
73 | # print("query.is_contiguous(): ", query.is_contiguous())
74 | # print("key.is_contiguous(): ", key.is_contiguous())
75 | # print("index_q.is_contiguous(): ", index_q.is_contiguous())
76 | # print("index_k.is_contiguous(): ", index_k.is_contiguous())
77 | 
78 | output_v2 = pointops.dot_prod_with_idx_v3(query, index_q_offsets.int(), n_max, key, index_k.int(), table_q, table_k, rel_index.int())
79 | # loss = output_v2.mean()
80 | # loss.backward()
81 | 
82 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10]))
83 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
84 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
85 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
86 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
87 | # input()
88 | 
89 | print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
90 | 
91 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 80000
 8 | N = 3500
 9 | hdim = 16
10 | h = 6
11 | L = 31
12 | attn = torch.rand(M, h).cuda()
13 | v = torch.rand(N, h, hdim).cuda()
14 | table = torch.rand(L, h, hdim, 3).cuda()
15 | 
16 | index_0 = torch.rand(M)
17 | index_0[index_0 < 0] = 0
18 | index_0 = (index_0*N).long().cuda()
19 | 
20 | index_1 = torch.rand(M)
21 | index_1[index_1 < 0] = 0
22 | index_1 = (index_1*N).long().cuda()
23 | 
24 | rel_index = torch.rand(M, 3)
25 | rel_index[rel_index < 0] = 0
26 | rel_index = (rel_index*L).long().cuda()
27 | 
28 | attn.requires_grad = True
29 | v.requires_grad = True
30 | table.requires_grad = True
31 | 
32 | v_flat = v[index_1] #[M, h, hdim]
33 | table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim]
34 | rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M]
35 | rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim]
36 | v_flat_new = v_flat + rel_pos_encoding #[M, h, hdim]
37 | output = attn.unsqueeze(-1) * v_flat_new #[M, h, hdim] 
38 | output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N) #[N, h, hdim]
39 | loss = output.mean()
40 | loss.backward()
41 | 
42 | print("output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5,:10, :5]))
43 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
44 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
45 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
46 | input()
47 | 
48 | # print("query.is_contiguous(): ", query.is_contiguous())
49 | # print("key.is_contiguous(): ", key.is_contiguous())
50 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
51 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
52 | 
53 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int())
54 | # loss = output_v2.mean()
55 | # loss.backward()
56 | 
57 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5]))
58 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
59 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
60 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
61 | # input()
62 | 
63 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
64 | 
65 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
66 | 
67 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import scatter_max, scatter_mean, scatter_add, scatter_min, scatter_sum
 4 | 
 5 | torch.manual_seed(1)
 6 | 
 7 | M = 80000
 8 | N = 3500
 9 | hdim = 16
10 | h = 6
11 | L = 31
12 | attn = torch.rand(M, h).cuda()
13 | v = torch.rand(N, h, hdim).cuda()
14 | table = torch.rand(L, h, hdim, 3).cuda()
15 | 
16 | index_0 = torch.rand(M)
17 | index_0[index_0 < 0] = 0
18 | index_0 = (index_0*N).long().cuda()
19 | 
20 | index_1 = torch.rand(M)
21 | index_1[index_1 < 0] = 0
22 | index_1 = (index_1*N).long().cuda()
23 | 
24 | rel_index = torch.rand(M, 3)
25 | rel_index[rel_index < 0] = 0
26 | rel_index = (rel_index*L).long().cuda()
27 | 
28 | 
29 | # rearrange index for acceleration
30 | index_0, indices = torch.sort(index_0) #[M,]
31 | index_1 = index_1[indices] #[M,]
32 | rel_index = rel_index[indices]
33 | index_0_counts = index_0.bincount()
34 | 
35 | print("index_0_counts.shape: ", index_0_counts.shape)
36 | 
37 | n_max = index_0_counts.max()
38 | index_0_offsets = index_0_counts.cumsum(dim=-1) #[N]
39 | 
40 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
41 | 
42 | index_0_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0) #[N+1]
43 | 
44 | 
45 | attn.requires_grad = True
46 | v.requires_grad = True
47 | table.requires_grad = True
48 | 
49 | 
50 | output = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int())
51 | loss = output.mean()
52 | loss.backward()
53 | 
54 | print("output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5,:10, :5]))
55 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
56 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
57 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
58 | # input()
59 | 
60 | attn_grad = attn.grad.clone()
61 | v_grad = v.grad.clone()
62 | table_grad = table.grad.clone()
63 | 
64 | attn.grad.zero_()
65 | v.grad.zero_()
66 | table.grad.zero_()
67 | 
68 | # print("query.is_contiguous(): ", query.is_contiguous())
69 | # print("key.is_contiguous(): ", key.is_contiguous())
70 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
71 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
72 | 
73 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2(attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int())
74 | loss = output_v2.mean()
75 | loss.backward()
76 | 
77 | print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5]))
78 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
79 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
80 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
81 | # input()
82 | 
83 | print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
84 | 
85 | print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad-attn.grad)**2).max())
86 | 
87 | print("((v_grad-v.grad)**2).max(): ", ((v_grad-v.grad)**2).max())
88 | 
89 | print("((table_grad-table.grad)**2).max(): ", ((table_grad-table.grad)**2).max())
90 | 
91 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
92 | 
93 | 


--------------------------------------------------------------------------------
/libs/pointops2/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | from distutils.sysconfig import get_config_vars
 5 | 
 6 | (opt,) = get_config_vars('OPT')
 7 | os.environ['OPT'] = " ".join(
 8 |     flag for flag in opt.split() if flag != '-Wstrict-prototypes'
 9 | )
10 | 
11 | src = 'src'
12 | sources = [os.path.join(root, file) for root, dirs, files in os.walk(src)
13 |            for file in files
14 |            if file.endswith('.cpp') or file.endswith('.cu')]
15 | 
16 | setup(
17 |     name='pointops2',
18 |     version='1.0',
19 |     install_requires=["torch", "numpy"],
20 |     packages=["pointops2"],
21 |     package_dir={"pointops2": "functions"},
22 |     ext_modules=[
23 |         CUDAExtension(
24 |             name='pointops2_cuda',
25 |             sources=sources,
26 |             extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']}
27 |         )
28 |     ],
29 |     cmdclass={'build_ext': BuildExtension}
30 | )
31 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/libs/pointops2/src/__init__.py


--------------------------------------------------------------------------------
/libs/pointops2/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "aggregation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const float *position = position_tensor.data_ptr<float>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
15 | }
16 | 
17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
18 | {
19 | 	const float *input = input_tensor.data_ptr<float>();
20 |     const float *position = position_tensor.data_ptr<float>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     const int *idx = idx_tensor.data_ptr<int>();
23 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
24 |     float *grad_input = grad_input_tensor.data_ptr<float>();
25 |     float *grad_position = grad_position_tensor.data_ptr<float>();
26 |     float *grad_weight = grad_weight_tensor.data_ptr<float>();
27 |     aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
28 | }
29 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "aggregation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
 6 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * c) return;
 9 |     const int c_idx = index % c;
10 |     const int n_idx = index / c;
11 |     const int w_c_idx = c_idx % w_c;
12 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 |     {   
14 |         int idx_idx = n_idx * nsample + nsample_idx;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 |         output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 |     }
20 | }
21 | 
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
25 |     if (index >= n * c) return;
26 |     const int c_idx = index % c;
27 |     const int n_idx = index / c;
28 |     const int w_c_idx = c_idx % w_c;
29 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 |     {   
31 |         int idx_idx = n_idx * nsample + nsample_idx;
32 |         int input_idx = idx[idx_idx] * c + c_idx;
33 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 |         grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 |         atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 |     }
39 | }
40 | 
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 |     dim3 threads(THREADS_PER_BLOCK);
45 |     aggregation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 | 
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {  
49 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 |     dim3 threads(THREADS_PER_BLOCK);
52 |     aggregation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AGGREGATION_CUDA_KERNEL
 2 | #define _AGGREGATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention/attention_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "attention_cuda_kernel.h"
 5 | 
 6 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 
 7 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor)
 8 | {
 9 |     const float *q = q_tensor.data_ptr<float>();
10 |     const float *k = k_tensor.data_ptr<float>();
11 |     const int *index0 = index0_tensor.data_ptr<int>();
12 |     const int *index1 = index1_tensor.data_ptr<int>();
13 |     float *attn = attn_tensor.data_ptr<float>();
14 |     attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn);
15 | }
16 | 
17 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
18 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 
19 |     at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
20 | {
21 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
22 |     const int *index0 = index0_tensor.data_ptr<int>();
23 |     const int *index1 = index1_tensor.data_ptr<int>();
24 |     const float *q = q_tensor.data_ptr<float>();
25 |     const float *k = k_tensor.data_ptr<float>();
26 |     float *grad_q = grad_q_tensor.data_ptr<float>();
27 |     float *grad_k = grad_k_tensor.data_ptr<float>();
28 |     attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k);
29 | }
30 | 
31 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 
32 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
33 | {
34 |     const float *attn = attn_tensor.data_ptr<float>();
35 |     const float *v = v_tensor.data_ptr<float>();
36 |     const int *index0 = index0_tensor.data_ptr<int>();
37 |     const int *index1 = index1_tensor.data_ptr<int>();
38 |     float *output = output_tensor.data_ptr<float>();
39 |     attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output);
40 | }
41 | 
42 | 
43 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
44 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 
45 |     at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor)
46 | {
47 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
48 |     const int *index0 = index0_tensor.data_ptr<int>();
49 |     const int *index1 = index1_tensor.data_ptr<int>();
50 |     const float *attn = attn_tensor.data_ptr<float>();
51 |     const float *v = v_tensor.data_ptr<float>();
52 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
53 |     float *grad_v = grad_v_tensor.data_ptr<float>();
54 |     attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
55 | }
56 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention/attention_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_CUDA_KERNEL
 2 | #define _ATTENTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor);
 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor);
 9 | 
10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor);
11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn);
18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k);
19 | 
20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output);
21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL
 2 | #define _ATTENTION_V2_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor);
 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor);
 9 | 
10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor);
11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn);
18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k);
19 | 
20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output);
21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention_v2/attention_cuda_v2.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "attention_cuda_kernel_v2.h"
 5 | 
 6 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 
 7 |     at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor)
 8 | {
 9 |     const float *q = q_tensor.data_ptr<float>();
10 |     const float *k = k_tensor.data_ptr<float>();
11 |     const int *index0_offsets = index0_tensor_offsets.data_ptr<int>();
12 |     const int *index1 = index1_tensor.data_ptr<int>();
13 |     float *attn = attn_tensor.data_ptr<float>();
14 |     attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn);
15 | }
16 | 
17 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 
18 |     at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 
19 |     at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
20 | {
21 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
22 |     const int *index0_offsets = index0_tensor_offsets.data_ptr<int>();
23 |     const int *index1 = index1_tensor.data_ptr<int>();
24 |     const float *q = q_tensor.data_ptr<float>();
25 |     const float *k = k_tensor.data_ptr<float>();
26 |     float *grad_q = grad_q_tensor.data_ptr<float>();
27 |     float *grad_k = grad_k_tensor.data_ptr<float>();
28 |     attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k);
29 | }
30 | 
31 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 
32 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
33 | {
34 |     const float *attn = attn_tensor.data_ptr<float>();
35 |     const float *v = v_tensor.data_ptr<float>();
36 |     const int *index0 = index0_tensor.data_ptr<int>();
37 |     const int *index1 = index1_tensor.data_ptr<int>();
38 |     float *output = output_tensor.data_ptr<float>();
39 |     attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output);
40 | }
41 | 
42 | 
43 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
44 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 
45 |     at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor)
46 | {
47 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
48 |     const int *index0 = index0_tensor.data_ptr<int>();
49 |     const int *index1 = index1_tensor.data_ptr<int>();
50 |     const float *attn = attn_tensor.data_ptr<float>();
51 |     const float *v = v_tensor.data_ptr<float>();
52 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
53 |     float *grad_v = grad_v_tensor.data_ptr<float>();
54 |     attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
55 | }
56 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | #include <algorithm>
 6 | 
 7 | #define TOTAL_THREADS 1024
 8 | #define THREADS_PER_BLOCK 256
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |     return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |     const int x_threads = opt_n_threads(x);
18 |     const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 |     dim3 block_config(x_threads, y_threads, 1);
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "grouping_cuda_kernel.h"
 5 | 
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     float *output = output_tensor.data_ptr<float>();
12 |     grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
13 | }
14 | 
15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
16 | {
17 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     float *grad_input = grad_input_tensor.data_ptr<float>();
20 |     grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
21 | }
22 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
 6 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= m * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int m_idx = index / nsample / c;
12 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 |     output[index] = input[input_idx];
14 | }
15 | 
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
19 |     if (index >= m * nsample * c) return;
20 |     const int c_idx = index % c;
21 |     const int nsample_idx = (index / c) % nsample;
22 |     const int m_idx = index / nsample / c;
23 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 |     atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 | 
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 |     dim3 threads(THREADS_PER_BLOCK);
31 |     grouping_forward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, input, idx, output);
32 | }
33 | 
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {  
36 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     grouping_backward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "interpolation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
14 | }
15 | 
16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
17 | {
18 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     const float *weight = weight_tensor.data_ptr<float>();
21 |     float *grad_input = grad_input_tensor.data_ptr<float>();
22 |     interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "interpolation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
 6 | {
 7 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (index >= n * c) return;
10 |     int c_idx = index % c;
11 |     int n_idx = index / c;
12 |     for (int i = 0; i < k; i++)
13 |     {
14 |         int idx_idx = n_idx * k + i;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         output[index] += input[input_idx] * weight[idx_idx];
17 |     }
18 | }
19 | 
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     if (index >= n * c) return;
25 |     int c_idx = index % c;
26 |     int n_idx = index / c;
27 |     for (int i = 0; i < k; i++)
28 |     {
29 |         int idx_idx = n_idx * k + i;
30 |         int input_idx = idx[idx_idx] * c + c_idx;
31 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 |     }
33 | }
34 | 
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     interpolation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, input, idx, weight, output);
40 | }
41 | 
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 |     dim3 threads(THREADS_PER_BLOCK);
46 |     interpolation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "knnquery_cuda_kernel.h"
 5 | 
 6 | 
 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
11 |     const int *offset = offset_tensor.data_ptr<int>();
12 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     float *dist2 = dist2_tensor.data_ptr<float>();
15 |     knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
16 | }
17 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knnquery_cuda_kernel.h"
  3 | 
  4 | 
  5 | __device__ void swap_float(float *x, float *y)
  6 | {
  7 |     float tmp = *x;
  8 |     *x = *y;
  9 |     *y = tmp;
 10 | }
 11 | 
 12 | 
 13 | __device__ void swap_int(int *x, int *y)
 14 | {
 15 |     int tmp = *x;
 16 |     *x = *y;
 17 |     *y = tmp;
 18 | }
 19 | 
 20 | 
 21 | __device__ void reheap(float *dist, int *idx, int k)
 22 | {
 23 |     int root = 0;
 24 |     int child = root * 2 + 1;
 25 |     while (child < k)
 26 |     {
 27 |         if(child + 1 < k && dist[child+1] > dist[child])
 28 |             child++;
 29 |         if(dist[root] > dist[child])
 30 |             return;
 31 |         swap_float(&dist[root], &dist[child]);
 32 |         swap_int(&idx[root], &idx[child]);
 33 |         root = child;
 34 |         child = root * 2 + 1;
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | __device__ void heap_sort(float *dist, int *idx, int k)
 40 | {
 41 |     int i;
 42 |     for (i = k - 1; i > 0; i--)
 43 |     {
 44 |         swap_float(&dist[0], &dist[i]);
 45 |         swap_int(&idx[0], &idx[i]);
 46 |         reheap(dist, idx, i);
 47 |     }
 48 | }
 49 | 
 50 | 
 51 | __device__ int get_bt_idx(int idx, const int *offset)
 52 | {
 53 |     int i = 0;
 54 |     while (1)
 55 |     {
 56 |         if (idx < offset[i])
 57 |             break;
 58 |         else
 59 |             i++;
 60 |     }
 61 |     return i;
 62 | }
 63 | 
 64 | 
 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
 66 |     // input: xyz (n, 3) new_xyz (m, 3)
 67 |     // output: idx (m, nsample) dist2 (m, nsample)
 68 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 69 |     if (pt_idx >= m) return;
 70 | 
 71 |     new_xyz += pt_idx * 3;
 72 |     idx += pt_idx * nsample;
 73 |     dist2 += pt_idx * nsample;
 74 |     int bt_idx = get_bt_idx(pt_idx, new_offset);
 75 |     int start;
 76 |     if (bt_idx == 0)
 77 |         start = 0;
 78 |     else
 79 |         start = offset[bt_idx - 1];
 80 |     int end = offset[bt_idx];
 81 | 
 82 |     float new_x = new_xyz[0];
 83 |     float new_y = new_xyz[1];
 84 |     float new_z = new_xyz[2];
 85 | 
 86 |     float best_dist[100];
 87 |     int best_idx[100];
 88 |     for(int i = 0; i < nsample; i++){
 89 |         best_dist[i] = 1e10;
 90 |         best_idx[i] = start;
 91 |     }
 92 |     for(int i = start; i < end; i++){
 93 |         float x = xyz[i * 3 + 0];
 94 |         float y = xyz[i * 3 + 1];
 95 |         float z = xyz[i * 3 + 2];
 96 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 97 |         if (d2 < best_dist[0]){
 98 |             best_dist[0] = d2;
 99 |             best_idx[0] = i;
100 |             reheap(best_dist, best_idx, nsample);
101 |         }
102 |     }
103 |     heap_sort(best_dist, best_idx, nsample);
104 |     for(int i = 0; i < nsample; i++){
105 |         idx[i] = best_idx[i];
106 |         dist2[i] = best_dist[i];
107 |     }
108 | }
109 | 
110 | 
111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
112 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
113 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
114 |     dim3 threads(THREADS_PER_BLOCK);
115 |     knnquery_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
116 | }
117 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNNQUERY_CUDA_KERNEL
 2 | #define _KNNQUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "knnquery/knnquery_cuda_kernel.h"
 5 | #include "sampling/sampling_cuda_kernel.h"
 6 | #include "grouping/grouping_cuda_kernel.h"
 7 | #include "interpolation/interpolation_cuda_kernel.h"
 8 | #include "aggregation/aggregation_cuda_kernel.h"
 9 | #include "subtraction/subtraction_cuda_kernel.h"
10 | #include "attention/attention_cuda_kernel.h"
11 | #include "rpe/relative_pos_encoding_cuda_kernel.h"
12 | #include "attention_v2/attention_cuda_kernel_v2.h"
13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h"
14 | 
15 | 
16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
17 |     m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
18 |     m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
19 |     m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
20 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
21 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
22 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
23 |     m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
24 |     m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
25 |     m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
26 |     m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
27 |     m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda");
28 |     m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda");
29 |     m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda");
30 |     m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda");
31 |     m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda");
32 |     m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda");
33 |     m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda");
34 |     m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda");
35 |     m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2");
36 |     m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2");
37 |     m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2");
38 |     m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2");
39 |     m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2");
40 |     m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2");
41 |     m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2");
42 |     m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2");
43 |     m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3");
44 |     m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3");
45 |     }
46 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "relative_pos_encoding_cuda_kernel.h"
 5 | 
 6 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 
 7 |     at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *q = q_tensor.data_ptr<float>();
10 |     const float *table = table_tensor.data_ptr<float>();
11 |     const int *index = index_tensor.data_ptr<int>();
12 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output);
15 | }
16 | 
17 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 
18 |     at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 
19 |     at::Tensor grad_q_tensor, at::Tensor grad_table_tensor)
20 | {
21 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
22 |     const float *q = q_tensor.data_ptr<float>();
23 |     const int *index = index_tensor.data_ptr<int>();
24 |     const float *table = table_tensor.data_ptr<float>();
25 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
26 |     float *grad_q = grad_q_tensor.data_ptr<float>();
27 |     float *grad_table = grad_table_tensor.data_ptr<float>();
28 |     dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table);
29 | }
30 | 
31 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 
32 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor)
33 | {
34 |     const float *attn = attn_tensor.data_ptr<float>();
35 |     const float *v = v_tensor.data_ptr<float>();
36 |     const int *index0 = index0_tensor.data_ptr<int>();
37 |     const int *index1 = index1_tensor.data_ptr<int>();
38 |     const float *table = table_tensor.data_ptr<float>();
39 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
40 |     float *output = output_tensor.data_ptr<float>();
41 |     attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output);
42 | }
43 | 
44 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 
45 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor,
46 |     at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor)
47 | {
48 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
49 |     const int *index0 = index0_tensor.data_ptr<int>();
50 |     const int *index1 = index1_tensor.data_ptr<int>();
51 |     const float *attn = attn_tensor.data_ptr<float>();
52 |     const float *v = v_tensor.data_ptr<float>();
53 |     const float *table = table_tensor.data_ptr<float>();
54 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
55 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
56 |     float *grad_v = grad_v_tensor.data_ptr<float>();
57 |     float *grad_table = grad_table_tensor.data_ptr<float>();
58 |     attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table);
59 | }
60 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RPE_CUDA_KERNEL
 2 | #define _RPE_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor);
 9 | 
10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output);
18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table);
19 | 
20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output);
21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RPE_V2_CUDA_KERNEL
 2 | #define _RPE_V2_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor);
 8 | void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor);
 9 | 
10 | void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
11 | void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor);
12 | 
13 | void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
14 | void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor);
15 | 
16 | #ifdef __cplusplus
17 | extern "C" {
18 | #endif
19 | 
20 | void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output);
21 | void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k);
22 | 
23 | void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output);
24 | void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k);
25 | 
26 | void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output);
27 | void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | #endif
33 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "sampling_cuda_kernel.h"
 5 | 
 6 | 
 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const int *offset = offset_tensor.data_ptr<int>();
11 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
12 |     float *tmp = tmp_tensor.data_ptr<float>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
15 | }
16 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "subtraction_cuda_kernel.h"
 5 | 
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input1 = input1_tensor.data_ptr<float>();
10 |     const float *input2 = input2_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
14 | }
15 | 
16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
17 | {
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
20 |     float *grad_input1 = grad_input1_tensor.data_ptr<float>();
21 |     float *grad_input2 = grad_input2_tensor.data_ptr<float>();
22 |     subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "subtraction_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
 6 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int n_idx = index / nsample / c;
12 |     const int idx_idx = n_idx * nsample + nsample_idx;
13 |     const int input1_idx = n_idx * c + c_idx;
14 |     const int input2_idx = idx[idx_idx] * c + c_idx;
15 |     output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 | 
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if (index >= n * nsample * c) return;
22 |     const int c_idx = index % c;
23 |     const int nsample_idx = (index / c) % nsample;
24 |     const int n_idx = index / nsample / c;
25 |     const int idx_idx = n_idx * nsample + nsample_idx;
26 |     const int input1_idx = n_idx * c + c_idx;
27 |     const int input2_idx = idx[idx_idx] * c + c_idx;
28 |     atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 |     atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 | 
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 |     dim3 threads(THREADS_PER_BLOCK);
36 |     subtraction_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 | 
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {  
40 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 |     dim3 threads(THREADS_PER_BLOCK);
43 |     subtraction_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBTRACTION_CUDA_KERNEL
 2 | #define _SUBTRACTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/pcr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/pcr/__init__.py


--------------------------------------------------------------------------------
/pcr/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .defaults import DefaultDataset, ConcatDataset
 2 | from .s3dis import S3DISDataset
 3 | from .scannet import ScanNetDataset, ScanNet200Dataset
 4 | from .scannet_pair import ScanNetPairDataset
 5 | from .modelnet import ModelNetDataset
 6 | from .shapenet_part import ShapeNetPartDataset
 7 | from .semantic_kitti import SemanticKITTIDataset
 8 | from .arkitscenes import ArkitScenesDataset
 9 | from .builder import build_dataset
10 | from .utils import point_collate_fn, collate_fn
11 | 


--------------------------------------------------------------------------------
/pcr/datasets/arkitscenes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ArkitScenes Dataset
  3 | 
  4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
  5 | Please cite our work if the code is helpful to you.
  6 | """
  7 | 
  8 | import os
  9 | import glob
 10 | import numpy as np
 11 | import torch
 12 | from copy import deepcopy
 13 | from torch.utils.data import Dataset
 14 | 
 15 | from pcr.utils.logger import get_root_logger
 16 | from .builder import DATASETS
 17 | from .transform import Compose, TRANSFORMS
 18 | from .preprocessing.scannet.meta_data.scannet200_constants import VALID_CLASS_IDS_200
 19 | 
 20 | 
 21 | @DATASETS.register_module()
 22 | class ArkitScenesDataset(Dataset):
 23 |     def __init__(self,
 24 |                  split="Training",
 25 |                  data_root="data/ARKitScenesMesh",
 26 |                  transform=None,
 27 |                  test_mode=False,
 28 |                  test_cfg=None,
 29 |                  loop=1):
 30 |         super(ArkitScenesDataset, self).__init__()
 31 |         self.data_root = data_root
 32 |         self.split = split
 33 |         self.transform = Compose(transform)
 34 |         self.loop = loop if not test_mode else 1    # force make loop = 1 while in test mode
 35 |         self.test_mode = test_mode
 36 |         self.test_cfg = test_cfg if test_mode else None
 37 |         self.class2id = np.array(VALID_CLASS_IDS_200)
 38 | 
 39 |         if test_mode:
 40 |             self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize)
 41 |             self.test_crop = TRANSFORMS.build(self.test_cfg.crop)
 42 |             self.post_transform = Compose(self.test_cfg.post_transform)
 43 |             self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform]
 44 | 
 45 |         self.data_list = self.get_data_list()
 46 |         logger = get_root_logger()
 47 |         logger.info("Totally {} x {} samples in {} set.".format(len(self.data_list), self.loop, split))
 48 | 
 49 |     def get_data_list(self):
 50 |         if isinstance(self.split, str):
 51 |             data_list = glob.glob(os.path.join(self.data_root, self.split, "*.pth"))
 52 |         elif isinstance(self.split, list):
 53 |             data_list = []
 54 |             for split in self.split:
 55 |                 data_list += glob.glob(os.path.join(self.data_root, split, "*.pth"))
 56 |         else:
 57 |             raise NotImplementedError
 58 |         return data_list
 59 | 
 60 |     def get_data(self, idx):
 61 |         data = torch.load(self.data_list[idx % len(self.data_list)])
 62 |         coord = data["coord"]
 63 |         color = data["color"]
 64 |         normal = data["normal"]
 65 |         label = np.zeros(coord.shape[0])
 66 |         data_dict = dict(coord=coord, normal=normal, color=color, label=label)
 67 |         return data_dict
 68 | 
 69 |     def get_data_name(self, idx):
 70 |         data_idx = self.data_idx[idx % len(self.data_idx)]
 71 |         return os.path.basename(self.data_list[data_idx]).split(".")[0]
 72 | 
 73 |     def prepare_train_data(self, idx):
 74 |         # load data
 75 |         data_dict = self.get_data(idx)
 76 |         data_dict = self.transform(data_dict)
 77 |         return data_dict
 78 | 
 79 |     def prepare_test_data(self, idx):
 80 |         # load data
 81 |         data_dict = self.get_data(idx)
 82 |         label = data_dict.pop("label")
 83 |         data_dict = self.transform(data_dict)
 84 |         data_dict_list = []
 85 |         for aug in self.aug_transform:
 86 |             data_dict_list.append(
 87 |                 aug(deepcopy(data_dict))
 88 |             )
 89 | 
 90 |         input_dict_list = []
 91 |         for data in data_dict_list:
 92 |             data_part_list = self.test_voxelize(data)
 93 |             for data_part in data_part_list:
 94 |                 data_part_list = self.test_crop(data_part)
 95 |                 input_dict_list += data_part_list
 96 | 
 97 |         for i in range(len(input_dict_list)):
 98 |             input_dict_list[i] = self.post_transform(input_dict_list[i])
 99 |         return input_dict_list, label
100 | 
101 |     def __getitem__(self, idx):
102 |         if self.test_mode:
103 |             return self.prepare_test_data(idx)
104 |         else:
105 |             return self.prepare_train_data(idx)
106 | 
107 |     def __len__(self):
108 |         return len(self.data_list) * self.loop
109 | 
110 | 


--------------------------------------------------------------------------------
/pcr/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dataset Builder
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | 
 9 | from pcr.utils.registry import Registry
10 | 
11 | DATASETS = Registry('datasets')
12 | 
13 | 
14 | def build_dataset(cfg):
15 |     """Build test_datasets."""
16 |     return DATASETS.build(cfg)
17 | 


--------------------------------------------------------------------------------
/pcr/datasets/modelnet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ModelNet40 Dataset (Unmaintained)
 3 | 
 4 | get sampled point clouds of ModelNet40 (XYZ and normal from mesh, 10k points per shape)
 5 | at "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip"
 6 | 
 7 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 8 | Please cite our work if the code is helpful to you.
 9 | """
10 | 
11 | import os
12 | import numpy as np
13 | import torch
14 | from torch.utils.data import Dataset
15 | 
16 | from pcr.utils.logger import get_root_logger
17 | from .builder import DATASETS
18 | from .transform import Compose, TRANSFORMS
19 | 
20 | 
21 | @DATASETS.register_module()
22 | class ModelNetDataset(Dataset):
23 |     def __init__(self,
24 |                  split='train',
25 |                  data_root='data/modelnet40_normal_resampled',
26 |                  class_names=None,
27 |                  transform=None,
28 |                  cache_data=False,
29 |                  test_mode=False,
30 |                  test_cfg=None,
31 |                  loop=1):
32 |         super(ModelNetDataset, self).__init__()
33 |         self.data_root = data_root
34 |         self.class_names = dict(zip(class_names, range(len(class_names))))
35 |         self.split = split
36 |         self.cache_data = cache_data
37 |         self.transform = Compose(transform)
38 |         self.loop = loop if not test_mode else 1    # force make loop = 1 while in test mode
39 |         self.test_mode = test_mode
40 |         self.test_cfg = test_cfg if test_mode else None
41 |         self.cache = {}
42 | 
43 |         if test_mode:
44 |             # TODO: Optimize
45 |             pass
46 | 
47 |         self.data_list = [line.rstrip() for line in open(
48 |             os.path.join(self.data_root, 'modelnet40_{}.txt'.format(self.split)))]
49 |         logger = get_root_logger()
50 |         logger.info("Totally {} x {} samples in {} set.".format(len(self.data_idx), self.loop, split))
51 | 
52 |     def prepare_train_data(self, idx):
53 |         # load data
54 |         data_idx = idx % len(self.data_list)
55 |         if self.cache_data:
56 |             coord, norm, label = self.cache[data_idx]
57 |         else:
58 |             data_shape = '_'.join(self.data_list[data_idx].split('_')[0: -1])
59 |             data_path = os.path.join(self.data_root, data_shape, self.data_list[data_idx] + '.txt')
60 |             data = np.loadtxt(data_path, delimiter=',').astype(np.float32)
61 |             coord, norm = data[:, 0:3], data[:, 3:6]
62 |             label = np.array([self.class_names[data_shape]])
63 |             if self.cache_data:
64 |                 self.cache[data_idx] = (coord, norm, label)
65 | 
66 |         data_dict = dict(coord=coord, norm=norm, label=label)
67 |         data_dict = self.transform(data_dict)
68 |         return data_dict
69 | 
70 |     def prepare_test_data(self, idx):
71 |         assert idx < len(self.data_idx)
72 |         data_idx = idx
73 |         data_shape = '_'.join(self.data_list[data_idx].split('_')[0: -1])
74 |         data_path = os.path.join(self.data_root, data_shape, self.data_list[data_idx] + '.txt')
75 |         data = np.loadtxt(data_path, delimiter=',').astype(np.float32)
76 |         coord, norm = data[:, 0:3], data[:, 3:6]
77 |         label = np.array([self.class_names[data_shape]])
78 | 
79 |         data_dict = dict(coord=coord, norm=norm, label=label)
80 |         data_dict = self.transform(data_dict)
81 |         return data_dict
82 | 
83 |     def get_data_name(self, idx):
84 |         data_idx = idx % len(self.data_list)
85 |         return self.data_list[data_idx]
86 | 
87 |     def __getitem__(self, idx):
88 |         if self.test_mode:
89 |             return self.prepare_test_data(idx)
90 |         else:
91 |             return self.prepare_train_data(idx)
92 | 
93 |     def __len__(self):
94 |         return len(self.data_idx) * self.loop
95 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Preprocessing ArkitScenes
 3 | """
 4 | import os
 5 | import argparse
 6 | import glob
 7 | import plyfile
 8 | import numpy as np
 9 | import pandas as pd
10 | import multiprocessing as mp
11 | from concurrent.futures import ProcessPoolExecutor
12 | from itertools import repeat
13 | 
14 | import torch
15 | 
16 | 
17 | def read_plymesh(filepath):
18 |     """Read ply file and return it as numpy array. Returns None if emtpy."""
19 |     with open(filepath, 'rb') as f:
20 |         plydata = plyfile.PlyData.read(f)
21 |     if plydata.elements:
22 |         vertices = pd.DataFrame(plydata['vertex'].data).values
23 |         faces = np.stack(plydata['face'].data['vertex_indices'], axis=0)
24 |         return vertices, faces
25 | 
26 | 
27 | def face_normal(vertex, face):
28 |     v01 = vertex[face[:, 1]] - vertex[face[:, 0]]
29 |     v02 = vertex[face[:, 2]] - vertex[face[:, 0]]
30 |     vec = np.cross(v01, v02)
31 |     length = np.sqrt(np.sum(vec ** 2, axis=1, keepdims=True)) + 1.0e-8
32 |     nf = vec / length
33 |     area = length * 0.5
34 |     return nf, area
35 | 
36 | 
37 | def vertex_normal(vertex, face):
38 |     nf, area = face_normal(vertex, face)
39 |     nf = nf * area
40 | 
41 |     nv = np.zeros_like(vertex)
42 |     for i in range(face.shape[0]):
43 |         nv[face[i]] += nf[i]
44 | 
45 |     length = np.sqrt(np.sum(nv ** 2, axis=1, keepdims=True)) + 1.0e-8
46 |     nv = nv / length
47 |     return nv
48 | 
49 | 
50 | def parse_scene(scene_path, output_dir):
51 |     print(f"Parsing scene {scene_path}")
52 |     split = os.path.basename(os.path.dirname(os.path.dirname(scene_path)))
53 |     scene_id = os.path.basename(os.path.dirname(scene_path))
54 |     vertices, faces = read_plymesh(scene_path)
55 |     coords = vertices[:, :3]
56 |     colors = vertices[:, 3:6]
57 |     data_dict = dict(coord=coords, color=colors, scene_id=scene_id)
58 |     data_dict["normal"] = vertex_normal(coords, faces)
59 |     torch.save(data_dict, os.path.join(output_dir, split, f"{scene_id}.pth"))
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     parser = argparse.ArgumentParser()
64 |     parser.add_argument('--dataset_root', required=True, help='Path to the ScanNet dataset containing scene folders')
65 |     parser.add_argument('--output_root', required=True, help='Output path where train/val folders will be located')
66 |     opt = parser.parse_args()
67 |     # Create output directories
68 |     train_output_dir = os.path.join(opt.output_root, 'Training')
69 |     os.makedirs(train_output_dir, exist_ok=True)
70 |     val_output_dir = os.path.join(opt.output_root, 'Validation')
71 |     os.makedirs(val_output_dir, exist_ok=True)
72 |     # Load scene paths
73 |     scene_paths = sorted(glob.glob(opt.dataset_root + '/3dod/*/*/*_mesh.ply'))
74 |     # Preprocess data.
75 |     pool = ProcessPoolExecutor(max_workers=mp.cpu_count())
76 |     # pool = ProcessPoolExecutor(max_workers=1)
77 |     print('Processing scenes...')
78 |     _ = list(pool.map(parse_scene, scene_paths, repeat(opt.output_root)))
79 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt:
--------------------------------------------------------------------------------
 1 | 1	trash
 2 | 3 	basket
 3 | 4	bathtub
 4 | 5	bed
 5 | 9	shelf
 6 | 13	cabinet
 7 | 18	chair
 8 | 20	keyboard
 9 | 22	tv
10 | 30	lamp
11 | 31	laptop
12 | 35	microwave
13 | 39	pillow
14 | 42	printer
15 | 47	sofa
16 | 48	stove
17 | 49	table
18 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt:
--------------------------------------------------------------------------------
 1 | 1       wall
 2 | 2       floor
 3 | 3       cabinet
 4 | 4       bed
 5 | 5       chair
 6 | 6       sofa
 7 | 7       table
 8 | 8       door
 9 | 9       window
10 | 10      bookshelf
11 | 11      picture
12 | 12      counter
13 | 14      desk
14 | 16      curtain
15 | 24      refridgerator
16 | 28      shower curtain
17 | 33      toilet
18 | 34      sink
19 | 36      bathtub
20 | 39      otherfurniture


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/meta_data/scannet_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/pcr/datasets/preprocessing/scannet/meta_data/scannet_means.npz


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt:
--------------------------------------------------------------------------------
  1 | scene0534_00
  2 | scene0534_01
  3 | scene0319_00
  4 | scene0273_00
  5 | scene0273_01
  6 | scene0225_00
  7 | scene0198_00
  8 | scene0003_00
  9 | scene0003_01
 10 | scene0003_02
 11 | scene0409_00
 12 | scene0409_01
 13 | scene0331_00
 14 | scene0331_01
 15 | scene0505_00
 16 | scene0505_01
 17 | scene0505_02
 18 | scene0505_03
 19 | scene0505_04
 20 | scene0506_00
 21 | scene0057_00
 22 | scene0057_01
 23 | scene0074_00
 24 | scene0074_01
 25 | scene0074_02
 26 | scene0091_00
 27 | scene0112_00
 28 | scene0112_01
 29 | scene0112_02
 30 | scene0240_00
 31 | scene0102_00
 32 | scene0102_01
 33 | scene0513_00
 34 | scene0514_00
 35 | scene0514_01
 36 | scene0537_00
 37 | scene0516_00
 38 | scene0516_01
 39 | scene0495_00
 40 | scene0617_00
 41 | scene0133_00
 42 | scene0520_00
 43 | scene0520_01
 44 | scene0635_00
 45 | scene0635_01
 46 | scene0054_00
 47 | scene0473_00
 48 | scene0473_01
 49 | scene0524_00
 50 | scene0524_01
 51 | scene0379_00
 52 | scene0471_00
 53 | scene0471_01
 54 | scene0471_02
 55 | scene0566_00
 56 | scene0248_00
 57 | scene0248_01
 58 | scene0248_02
 59 | scene0529_00
 60 | scene0529_01
 61 | scene0529_02
 62 | scene0391_00
 63 | scene0264_00
 64 | scene0264_01
 65 | scene0264_02
 66 | scene0675_00
 67 | scene0675_01
 68 | scene0350_00
 69 | scene0350_01
 70 | scene0350_02
 71 | scene0450_00
 72 | scene0068_00
 73 | scene0068_01
 74 | scene0237_00
 75 | scene0237_01
 76 | scene0365_00
 77 | scene0365_01
 78 | scene0365_02
 79 | scene0605_00
 80 | scene0605_01
 81 | scene0539_00
 82 | scene0539_01
 83 | scene0539_02
 84 | scene0540_00
 85 | scene0540_01
 86 | scene0540_02
 87 | scene0170_00
 88 | scene0170_01
 89 | scene0170_02
 90 | scene0433_00
 91 | scene0340_00
 92 | scene0340_01
 93 | scene0340_02
 94 | scene0160_00
 95 | scene0160_01
 96 | scene0160_02
 97 | scene0160_03
 98 | scene0160_04
 99 | scene0059_00
100 | scene0059_01
101 | scene0059_02
102 | scene0056_00
103 | scene0056_01
104 | scene0478_00
105 | scene0478_01
106 | scene0548_00
107 | scene0548_01
108 | scene0548_02
109 | scene0204_00
110 | scene0204_01
111 | scene0204_02
112 | scene0033_00
113 | scene0145_00
114 | scene0483_00
115 | scene0508_00
116 | scene0508_01
117 | scene0508_02
118 | scene0180_00
119 | scene0148_00
120 | scene0556_00
121 | scene0556_01
122 | scene0416_00
123 | scene0416_01
124 | scene0416_02
125 | scene0416_03
126 | scene0416_04
127 | scene0073_00
128 | scene0073_01
129 | scene0073_02
130 | scene0073_03
131 | scene0034_00
132 | scene0034_01
133 | scene0034_02
134 | scene0639_00
135 | scene0561_00
136 | scene0561_01
137 | scene0298_00
138 | scene0692_00
139 | scene0692_01
140 | scene0692_02
141 | scene0692_03
142 | scene0692_04
143 | scene0642_00
144 | scene0642_01
145 | scene0642_02
146 | scene0642_03
147 | scene0630_00
148 | scene0630_01
149 | scene0630_02
150 | scene0630_03
151 | scene0630_04
152 | scene0630_05
153 | scene0630_06
154 | scene0706_00
155 | scene0567_00
156 | scene0567_01
157 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt:
--------------------------------------------------------------------------------
  1 | scene0707_00
  2 | scene0708_00
  3 | scene0709_00
  4 | scene0710_00
  5 | scene0711_00
  6 | scene0712_00
  7 | scene0713_00
  8 | scene0714_00
  9 | scene0715_00
 10 | scene0716_00
 11 | scene0717_00
 12 | scene0718_00
 13 | scene0719_00
 14 | scene0720_00
 15 | scene0721_00
 16 | scene0722_00
 17 | scene0723_00
 18 | scene0724_00
 19 | scene0725_00
 20 | scene0726_00
 21 | scene0727_00
 22 | scene0728_00
 23 | scene0729_00
 24 | scene0730_00
 25 | scene0731_00
 26 | scene0732_00
 27 | scene0733_00
 28 | scene0734_00
 29 | scene0735_00
 30 | scene0736_00
 31 | scene0737_00
 32 | scene0738_00
 33 | scene0739_00
 34 | scene0740_00
 35 | scene0741_00
 36 | scene0742_00
 37 | scene0743_00
 38 | scene0744_00
 39 | scene0745_00
 40 | scene0746_00
 41 | scene0747_00
 42 | scene0748_00
 43 | scene0749_00
 44 | scene0750_00
 45 | scene0751_00
 46 | scene0752_00
 47 | scene0753_00
 48 | scene0754_00
 49 | scene0755_00
 50 | scene0756_00
 51 | scene0757_00
 52 | scene0758_00
 53 | scene0759_00
 54 | scene0760_00
 55 | scene0761_00
 56 | scene0762_00
 57 | scene0763_00
 58 | scene0764_00
 59 | scene0765_00
 60 | scene0766_00
 61 | scene0767_00
 62 | scene0768_00
 63 | scene0769_00
 64 | scene0770_00
 65 | scene0771_00
 66 | scene0772_00
 67 | scene0773_00
 68 | scene0774_00
 69 | scene0775_00
 70 | scene0776_00
 71 | scene0777_00
 72 | scene0778_00
 73 | scene0779_00
 74 | scene0780_00
 75 | scene0781_00
 76 | scene0782_00
 77 | scene0783_00
 78 | scene0784_00
 79 | scene0785_00
 80 | scene0786_00
 81 | scene0787_00
 82 | scene0788_00
 83 | scene0789_00
 84 | scene0790_00
 85 | scene0791_00
 86 | scene0792_00
 87 | scene0793_00
 88 | scene0794_00
 89 | scene0795_00
 90 | scene0796_00
 91 | scene0797_00
 92 | scene0798_00
 93 | scene0799_00
 94 | scene0800_00
 95 | scene0801_00
 96 | scene0802_00
 97 | scene0803_00
 98 | scene0804_00
 99 | scene0805_00
100 | scene0806_00
101 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/scannet_pair/compute_full_overlapping.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import copy
 7 | import torch
 8 | import numpy as np
 9 | import math
10 | import glob, os
11 | import argparse
12 | import open3d as o3d
13 | 
14 | 
15 | def make_open3d_point_cloud(xyz, color=None, voxel_size=None):
16 |     if np.isnan(xyz).any():
17 |         return None
18 | 
19 |     xyz = xyz[:,:3]
20 |     pcd = o3d.geometry.PointCloud()
21 |     pcd.points = o3d.utility.Vector3dVector(xyz)
22 |     if color is not None:
23 |         pcd.colors = o3d.utility.Vector3dVector(color)
24 |     if voxel_size is not None:
25 |         pcd = pcd.voxel_down_sample(voxel_size)
26 |     
27 |     return pcd
28 | 
29 | 
30 | def compute_overlap_ratio(pcd0, pcd1, voxel_size):
31 |     pcd0_down = pcd0.voxel_down_sample(voxel_size)
32 |     pcd1_down = pcd1.voxel_down_sample(voxel_size)
33 |     matching01 = get_matching_indices(pcd0_down, pcd1_down, voxel_size * 1.5, 1)
34 |     matching10 = get_matching_indices(pcd1_down, pcd0_down, voxel_size * 1.5, 1)
35 |     overlap0 = float(len(matching01)) / float(len(pcd0_down.points))
36 |     overlap1 = float(len(matching10)) / float(len(pcd1_down.points))
37 |     return max(overlap0, overlap1)
38 | 
39 | 
40 | def get_matching_indices(source, pcd_tree, search_voxel_size, K=None):
41 |     match_inds = []
42 |     for i, point in enumerate(source.points):
43 |         [_, idx, _] = pcd_tree.search_radius_vector_3d(point, search_voxel_size)
44 |         if K is not None:
45 |             idx = idx[:K]
46 |         for j in idx:
47 |             match_inds.append((i, j))
48 |     return match_inds
49 | 
50 | 
51 | def compute_full_overlapping(data_root, scene_id, voxel_size=0.05):
52 |     _points = [
53 |         (pcd_name, make_open3d_point_cloud(torch.load(pcd_name)['coord'], voxel_size=voxel_size))
54 |         for pcd_name in glob.glob(os.path.join(data_root, scene_id, "pcd", "*.pth"))
55 |     ]
56 |     points = [(pcd_name, pcd) for (pcd_name, pcd) in _points if pcd is not None]
57 |     print('load {} point clouds ({} invalid has been filtered), computing matching/overlapping'.format(
58 |         len(points), len(_points) - len(points)))
59 | 
60 |     matching_matrix = np.zeros((len(points), len(points)))
61 |     for i, (pcd0_name, pcd0) in enumerate(points):
62 |         print('matching to...{}'.format(pcd0_name))
63 |         pcd0_tree = o3d.geometry.KDTreeFlann(copy.deepcopy(pcd0))
64 |         for j, (pcd1_name, pcd1) in enumerate(points):
65 |             if i == j:
66 |                 continue
67 |             matching_matrix[i, j] = float(len(get_matching_indices(pcd1, pcd0_tree, 1.5 * voxel_size, 1))) / float(
68 |                 len(pcd1.points))
69 | 
70 |     # write to file
71 |     with open(os.path.join(data_root, scene_id, "pcd", "overlap.txt"), 'w') as f:
72 |         for i, (pcd0_name, pcd0) in enumerate(points):
73 |             for j, (pcd1_name, pcd1) in enumerate(points):
74 |                 if i < j:
75 |                     overlap = max(matching_matrix[i, j], matching_matrix[j, i])
76 |                     f.write("{} {} {}\n".format(
77 |                         pcd0_name.replace(data_root, ""), pcd1_name.replace(data_root, ""), overlap
78 |                     ))
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/scannet_pair/generage_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | import argparse
 8 | import glob, os, sys
 9 | 
10 | from SensorData import SensorData
11 | 
12 | # params
13 | parser = argparse.ArgumentParser()
14 | # data paths
15 | parser.add_argument('--target_dir', required=True, help='path to the target dir')
16 | 
17 | opt = parser.parse_args()
18 | print(opt)
19 | 
20 | def main():
21 |     overlaps = glob.glob(os.path.join(opt.target_dir, "*/pcd/overlap.txt"))
22 |     with open(os.path.join(opt.target_dir, 'overlap30.txt'), 'w') as f:
23 |         for fo in overlaps:
24 |             for line in open(fo):
25 |                 pcd0, pcd1, op = line.strip().split()
26 |                 if float(op) >= 0.3:
27 |                     print('{} {} {}'.format(pcd0, pcd1, op), file=f)
28 |     print('done')
29 | 
30 | if __name__ == '__main__':
31 |     main()


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/scannet_pair/point_cloud_extractor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | import glob, os
 8 | import numpy as np
 9 | import cv2
10 | import torch
11 | 
12 | 
13 | def extractor(input_path, output_path):
14 |     if not os.path.exists(output_path):
15 |         os.mkdir(output_path)
16 | 
17 |     # Load Depth Camera Intrinsic
18 |     depth_intrinsic = np.loadtxt(input_path + '/intrinsic/intrinsic_depth.txt')
19 |     print('Depth intrinsic: ')
20 |     print(depth_intrinsic)
21 | 
22 |     # Compute Camrea Distance (just for demo, so you can choose the camera distance in frame sampling)
23 |     poses = sorted(glob.glob(input_path + '/pose/*.txt'), key=lambda a: int(os.path.basename(a).split('.')[0]))
24 |     depths = sorted(glob.glob(input_path + '/depth/*.png'), key=lambda a: int(os.path.basename(a).split('.')[0]))
25 |     colors = sorted(glob.glob(input_path + '/color/*.png'), key=lambda a: int(os.path.basename(a).split('.')[0]))
26 | 
27 |     # # Get Aligned Point Clouds.
28 |     for ind, (pose, depth, color) in enumerate(zip(poses, depths, colors)):
29 |         name = os.path.basename(pose).split('.')[0]
30 | 
31 |         if os.path.exists(output_path + '/{}.npz'.format(name)):
32 |             continue
33 | 
34 |         try:
35 |             print('=' * 50, ': {}'.format(pose))
36 |             depth_img = cv2.imread(depth, -1)  # read 16bit grayscale image
37 |             mask = (depth_img != 0)
38 |             color_image = cv2.imread(color)
39 |             color_image = cv2.resize(color_image, (640, 480))
40 |             color_image = np.reshape(color_image[mask], [-1, 3])
41 |             colors = np.zeros_like(color_image)
42 |             colors[:, 0] = color_image[:, 2]
43 |             colors[:, 1] = color_image[:, 1]
44 |             colors[:, 2] = color_image[:, 0]
45 | 
46 |             pose = np.loadtxt(poses[ind])
47 |             print('Camera pose: ')
48 |             print(pose)
49 | 
50 |             depth_shift = 1000.0
51 |             x, y = np.meshgrid(np.linspace(0, depth_img.shape[1] - 1, depth_img.shape[1]),
52 |                                np.linspace(0, depth_img.shape[0] - 1, depth_img.shape[0]))
53 |             uv_depth = np.zeros((depth_img.shape[0], depth_img.shape[1], 3))
54 |             uv_depth[:, :, 0] = x
55 |             uv_depth[:, :, 1] = y
56 |             uv_depth[:, :, 2] = depth_img / depth_shift
57 |             uv_depth = np.reshape(uv_depth, [-1, 3])
58 |             uv_depth = uv_depth[np.where(uv_depth[:, 2] != 0), :].squeeze()
59 | 
60 |             intrinsic_inv = np.linalg.inv(depth_intrinsic)
61 |             fx = depth_intrinsic[0, 0]
62 |             fy = depth_intrinsic[1, 1]
63 |             cx = depth_intrinsic[0, 2]
64 |             cy = depth_intrinsic[1, 2]
65 |             bx = depth_intrinsic[0, 3]
66 |             by = depth_intrinsic[1, 3]
67 |             point_list = []
68 |             n = uv_depth.shape[0]
69 |             points = np.ones((n, 4))
70 |             X = (uv_depth[:, 0] - cx) * uv_depth[:, 2] / fx + bx
71 |             Y = (uv_depth[:, 1] - cy) * uv_depth[:, 2] / fy + by
72 |             points[:, 0] = X
73 |             points[:, 1] = Y
74 |             points[:, 2] = uv_depth[:, 2]
75 |             points_world = np.dot(points, np.transpose(pose))
76 |             print(points_world.shape)
77 | 
78 |             pcd = dict(coord=points_world[:, :3], color=colors)
79 |             # pcd_save = np.zeros((points_world.shape[0], 7))
80 |             # pcd_save[:, :3] = points_world[:, :3]
81 |             # pcd_save[:, 3:6] = colors
82 | 
83 |             # print('Saving npz file...')
84 |             # np.savez(output_path + '/{}.npz'.format(name), pcd=pcd_save)
85 |             torch.save(pcd, output_path + '/{}.pth'.format(name))
86 |         except:
87 |             continue
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/scannet_pair/preprocess.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import glob
 4 | import multiprocessing as mp
 5 | from concurrent.futures import ProcessPoolExecutor
 6 | from itertools import repeat
 7 | from reader import reader
 8 | from point_cloud_extractor import extractor
 9 | from compute_full_overlapping import compute_full_overlapping
10 | 
11 | 
12 | frame_skip = 25
13 | 
14 | 
15 | def parse_sens(sens_dir, output_dir):
16 |     scene_id = os.path.basename(os.path.dirname(sens_dir))
17 |     print(f"Parsing sens data{sens_dir}")
18 |     reader(sens_dir, os.path.join(output_dir, scene_id), frame_skip,
19 |            export_color_images=True, export_depth_images=True, export_poses=True, export_intrinsics=True)
20 |     extractor(os.path.join(output_dir, scene_id), os.path.join(output_dir, scene_id, "pcd"))
21 |     compute_full_overlapping(output_dir, scene_id)
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     parser = argparse.ArgumentParser()
26 |     parser.add_argument('--dataset_root', required=True, help='Path to the ScanNet dataset containing scene folders')
27 |     parser.add_argument('--output_root', required=True, help='Output path where train/val folders will be located')
28 |     opt = parser.parse_args()
29 |     sens_list = sorted(glob.glob(os.path.join(opt.dataset_root, "scans/scene*/*.sens")))
30 |     # Preprocess data.
31 |     pool = ProcessPoolExecutor(max_workers=mp.cpu_count())
32 |     # pool = ProcessPoolExecutor(max_workers=1)
33 |     print('Processing scenes...')
34 |     _ = list(pool.map(parse_sens, sens_list, repeat(opt.output_root)))
35 | 
36 |     # sens_dir = "/home/gofinge/Documents/datasets/scannet/scans/scene0024_00/scene0024_00.sens"
37 |     # output_dir = "/home/gofinge/Downloads"
38 |     # parse_sens(sens_dir, output_dir)
39 | 


--------------------------------------------------------------------------------
/pcr/datasets/preprocessing/scannet/scannet_pair/reader.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os, sys
 3 | 
 4 | from SensorData import SensorData
 5 | 
 6 | 
 7 | def reader(filename,
 8 |            output_path,
 9 |            frame_skip,
10 |            export_color_images=False,
11 |            export_depth_images=False,
12 |            export_poses=False,
13 |            export_intrinsics=False):
14 |     if not os.path.exists(output_path):
15 |         os.makedirs(output_path)
16 | 
17 |     # load the data
18 |     print('loading %s...' % filename)
19 |     sd = SensorData(filename)
20 |     if export_depth_images:
21 |         sd.export_depth_images(os.path.join(output_path, 'depth'), frame_skip=frame_skip)
22 |     if export_color_images:
23 |         sd.export_color_images(os.path.join(output_path, 'color'), frame_skip=frame_skip)
24 |     if export_poses:
25 |         sd.export_poses(os.path.join(output_path, 'pose'), frame_skip=frame_skip)
26 |     if export_intrinsics:
27 |         sd.export_intrinsics(os.path.join(output_path, 'intrinsic'))
28 | 


--------------------------------------------------------------------------------
/pcr/datasets/s3dis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | S3DIS Dataset
  3 | 
  4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
  5 | Please cite our work if the code is helpful to you.
  6 | """
  7 | 
  8 | import os
  9 | import glob
 10 | import numpy as np
 11 | import torch
 12 | from copy import deepcopy
 13 | from torch.utils.data import Dataset
 14 | from collections.abc import Sequence
 15 | 
 16 | from pcr.utils.logger import get_root_logger
 17 | from .builder import DATASETS
 18 | from .transform import Compose, TRANSFORMS
 19 | 
 20 | 
 21 | @DATASETS.register_module()
 22 | class S3DISDataset(Dataset):
 23 |     def __init__(self,
 24 |                  split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
 25 |                  data_root='data/s3dis',
 26 |                  transform=None,
 27 |                  test_mode=False,
 28 |                  test_cfg=None,
 29 |                  loop=1):
 30 |         super(S3DISDataset, self).__init__()
 31 |         self.data_root = data_root
 32 |         self.split = split
 33 |         self.transform = Compose(transform)
 34 |         self.loop = loop if not test_mode else 1    # force make loop = 1 while in test mode
 35 |         self.test_mode = test_mode
 36 |         self.test_cfg = test_cfg if test_mode else None
 37 | 
 38 |         if test_mode:
 39 |             self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize)
 40 |             self.test_crop = TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None
 41 |             self.post_transform = Compose(self.test_cfg.post_transform)
 42 |             self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform]
 43 | 
 44 |         self.data_list = self.get_data_list()
 45 |         logger = get_root_logger()
 46 |         logger.info("Totally {} x {} samples in {} set.".format(len(self.data_list), self.loop, split))
 47 | 
 48 |     def get_data_list(self):
 49 |         if isinstance(self.split, str):
 50 |             data_list = glob.glob(os.path.join(self.data_root, self.split, "*.pth"))
 51 |         elif isinstance(self.split, Sequence):
 52 |             data_list = []
 53 |             for split in self.split:
 54 |                 data_list += glob.glob(os.path.join(self.data_root, split, "*.pth"))
 55 |         else:
 56 |             raise NotImplementedError
 57 |         return data_list
 58 | 
 59 |     def get_data(self, idx):
 60 |         data = torch.load(self.data_list[idx % len(self.data_list)])
 61 |         coord = data["coord"]
 62 |         color = data["color"]
 63 |         if "semantic_gt" in data.keys():
 64 |             label = data["semantic_gt"].reshape([-1])
 65 |         else:
 66 |             label = np.zeros(coord.shape[0])
 67 |         data_dict = dict(coord=coord, color=color, label=label)
 68 |         return data_dict
 69 | 
 70 |     def get_data_name(self, idx):
 71 |         return os.path.basename(self.data_list[idx % len(self.data_list)]).split(".")[0]
 72 | 
 73 |     def prepare_train_data(self, idx):
 74 |         # load data
 75 |         data_dict = self.get_data(idx)
 76 |         data_dict = self.transform(data_dict)
 77 |         return data_dict
 78 | 
 79 |     def prepare_test_data(self, idx):
 80 |         # load data
 81 |         data_dict = self.get_data(idx)
 82 |         label = data_dict.pop("label")
 83 |         data_dict = self.transform(data_dict)
 84 |         data_dict_list = []
 85 |         for aug in self.aug_transform:
 86 |             data_dict_list.append(
 87 |                 aug(deepcopy(data_dict))
 88 |             )
 89 | 
 90 |         input_dict_list = []
 91 |         for data in data_dict_list:
 92 |             data_part_list = self.test_voxelize(data)
 93 |             for data_part in data_part_list:
 94 |                 if self.test_crop:
 95 |                     data_part = self.test_crop(data_part)
 96 |                 else:
 97 |                     data_part = [data_part]
 98 |                 input_dict_list += data_part
 99 | 
100 |         for i in range(len(input_dict_list)):
101 |             input_dict_list[i] = self.post_transform(input_dict_list[i])
102 |         return input_dict_list, label
103 | 
104 |     def __getitem__(self, idx):
105 |         if self.test_mode:
106 |             return self.prepare_test_data(idx)
107 |         else:
108 |             return self.prepare_train_data(idx)
109 | 
110 |     def __len__(self):
111 |         return len(self.data_list) * self.loop
112 | 


--------------------------------------------------------------------------------
/pcr/datasets/scannet_pair.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ScanNet Pair Dataset (Point Contrstive Frame-level twin)
 3 | 
 4 | Refer Point Contrast
 5 | 
 6 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 7 | Please cite our work if the code is helpful to you.
 8 | """
 9 | 
10 | import os
11 | import glob
12 | import numpy as np
13 | import torch
14 | from copy import deepcopy
15 | from torch.utils.data import Dataset
16 | 
17 | from pcr.utils.logger import get_root_logger
18 | from .builder import DATASETS
19 | from .transform import Compose, TRANSFORMS
20 | 
21 | 
22 | @DATASETS.register_module()
23 | class ScanNetPairDataset(Dataset):
24 |     def __init__(self,
25 |                  data_root='data/scannet_pair',
26 |                  overlap_threshold=0.3,
27 |                  twin1_transform=None,
28 |                  twin2_transform=None,
29 |                  loop=1,
30 |                  **kwargs):
31 |         super(ScanNetPairDataset, self).__init__()
32 |         self.data_root = data_root
33 |         self.overlap_threshold = overlap_threshold
34 |         self.twin1_transform = Compose(twin1_transform)
35 |         self.twin2_transform = Compose(twin2_transform)
36 |         self.loop = loop
37 |         self.data_list = self.get_data_list()
38 |         logger = get_root_logger()
39 |         logger.info("Totally {} x {} samples.".format(len(self.data_list), self.loop))
40 | 
41 |     def get_data_list(self):
42 |         data_list = []
43 |         overlap_list = glob.glob(os.path.join(self.data_root, "*", "pcd", "overlap.txt"))
44 |         for overlap_file in overlap_list:
45 |             with open(overlap_file) as f:
46 |                 overlap = f.readlines()
47 |             overlap = [pair.strip().split() for pair in overlap]
48 |             data_list.extend([pair[: 2] for pair in overlap if float(pair[2]) > self.overlap_threshold])
49 |         return data_list
50 | 
51 |     def get_data(self, idx):
52 |         pair = self.data_list[idx % len(self.data_list)]
53 |         twin1_dict = torch.load(self.data_root + pair[0])
54 |         twin2_dict = torch.load(self.data_root + pair[1])
55 |         twin1_dict["origin_coord"] = twin1_dict["coord"].copy()
56 |         twin2_dict["origin_coord"] = twin2_dict["coord"].copy()
57 |         return twin1_dict, twin2_dict
58 | 
59 |     def get_data_name(self, idx):
60 |         return os.path.basename(self.data_list[idx % len(self.data_list)]).split(".")[0]
61 | 
62 |     def prepare_train_data(self, idx):
63 |         # load data
64 |         twin1_dict, twin2_dict = self.get_data(idx)
65 |         twin1_dict = self.twin1_transform(twin1_dict)
66 |         twin2_dict = self.twin2_transform(twin2_dict)
67 |         data_dict = dict()
68 |         for key, value in twin1_dict.items():
69 |             data_dict["twin1_" + key] = value
70 |         for key, value in twin2_dict.items():
71 |             data_dict["twin2_" + key] = value
72 |         return data_dict
73 | 
74 |     def prepare_test_data(self, idx):
75 |         raise NotImplementedError
76 | 
77 |     def __getitem__(self, idx):
78 |         return self.prepare_train_data(idx)
79 | 
80 |     def __len__(self):
81 |         return len(self.data_list) * self.loop
82 | 


--------------------------------------------------------------------------------
/pcr/datasets/semantic_kitti.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Semantic KITTI dataset
  3 | 
  4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
  5 | Please cite our work if the code is helpful to you.
  6 | """
  7 | 
  8 | import os
  9 | import glob
 10 | import numpy as np
 11 | import torch
 12 | from torch.utils.data import Dataset
 13 | 
 14 | from pcr.utils.logger import get_root_logger
 15 | from .builder import DATASETS
 16 | from .transform import Compose, TRANSFORMS
 17 | 
 18 | 
 19 | @DATASETS.register_module()
 20 | class SemanticKITTIDataset(Dataset):
 21 |     def __init__(self,
 22 |                  split='train',
 23 |                  data_root='data/semantic_kitti',
 24 |                  learning_map=None,
 25 |                  transform=None,
 26 |                  test_mode=False,
 27 |                  test_cfg=None,
 28 |                  loop=1):
 29 |         super(SemanticKITTIDataset, self).__init__()
 30 |         self.data_root = data_root
 31 |         self.split = split
 32 |         self.learning_map = learning_map
 33 |         self.split2seq = dict(
 34 |             train=[0, 1, 2, 3, 4, 5, 6, 7, 9, 10],
 35 |             val=[8],
 36 |             test=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
 37 |         )
 38 |         self.transform = Compose(transform)
 39 |         self.loop = loop if not test_mode else 1    # force make loop = 1 while in test mode
 40 |         self.test_mode = test_mode
 41 |         self.test_cfg = test_cfg if test_mode else None
 42 | 
 43 |         if test_mode:
 44 |             self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize)
 45 |             self.test_crop = TRANSFORMS.build(self.test_cfg.crop)
 46 |             self.post_transform = Compose(self.test_cfg.post_transform)
 47 |             self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform]
 48 | 
 49 |         if isinstance(self.split, str):
 50 |             seq_list = self.split2seq[split]
 51 |         elif isinstance(self.split, list):
 52 |             seq_list = []
 53 |             for split in self.split:
 54 |                 seq_list += self.split2seq[split]
 55 |         else:
 56 |             raise NotImplementedError
 57 | 
 58 |         self.data_list = []
 59 |         for seq in seq_list:
 60 |             seq = str(seq).zfill(2)
 61 |             seq_folder = os.path.join(self.data_root, "sequences", seq)
 62 |             seq_files = sorted(
 63 |                 os.listdir(os.path.join(seq_folder, "velodyne")))
 64 |             self.data_list += [os.path.join(seq_folder, "velodyne", file) for file in seq_files]
 65 |         logger = get_root_logger()
 66 |         logger.info("Totally {} x {} samples in {} set.".format(len(self.data_list), self.loop, split))
 67 | 
 68 |     def prepare_train_data(self, idx):
 69 |         # load data
 70 |         data_idx = idx % len(self.data_list)
 71 |         with open(self.data_list[data_idx], 'rb') as b:
 72 |             scan = np.fromfile(b, dtype=np.float32).reshape(-1, 4)
 73 |         coord = scan[:, :3]
 74 |         strength = scan[:, -1].reshape([-1, 1])
 75 | 
 76 |         label_file = self.data_list[data_idx].replace('velodyne', 'labels').replace('.bin', '.label')
 77 |         if os.path.exists(label_file):
 78 |             with open(label_file, 'rb') as a:
 79 |                 label = np.fromfile(a, dtype=np.int32).reshape(-1)
 80 |         else:
 81 |             label = np.zeros(coord.shape[0]).astype(np.int32)
 82 |         label = np.vectorize(self.learning_map.__getitem__)(label & 0xFFFF).astype(np.int64)
 83 |         data_dict = dict(coord=coord, strength=strength, label=label)
 84 |         data_dict = self.transform(data_dict)
 85 |         return data_dict
 86 | 
 87 |     def prepare_test_data(self, idx):
 88 |         raise NotImplementedError
 89 | 
 90 |     def get_data_name(self, idx):
 91 |         return self.data_list[self.data_list[idx % len(self.data_list)]]
 92 | 
 93 |     def __getitem__(self, idx):
 94 |         if self.test_mode:
 95 |             return self.prepare_test_data(idx)
 96 |         else:
 97 |             return self.prepare_train_data(idx)
 98 | 
 99 |     def __len__(self):
100 |         return len(self.data_list) * self.loop
101 | 


--------------------------------------------------------------------------------
/pcr/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utils for Datasets
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import random
 9 | from collections.abc import Mapping, Sequence
10 | import SharedArray as SA
11 | import numpy as np
12 | import torch
13 | from torch.utils.data.dataloader import default_collate
14 | 
15 | from pcr.utils.logger import get_root_logger
16 | 
17 | 
18 | def collate_fn(batch):
19 |     """
20 |     collate function for point cloud which support dict and list,
21 |     'coord' is necessary to determine 'offset'
22 |     """
23 |     if not isinstance(batch, Sequence):
24 |         raise TypeError(f'{batch.dtype} is not supported.')
25 | 
26 |     if isinstance(batch[0], torch.Tensor):
27 |         return torch.cat(list(batch))
28 | 
29 |     elif isinstance(batch[0], Sequence):
30 |         for data in batch:
31 |             data.append(torch.tensor([data[0].shape[0]]))
32 |         batch = [collate_fn(samples) for samples in zip(*batch)]
33 |         batch[-1] = torch.cumsum(batch[-1], dim=0).int()
34 |         return batch
35 |     elif isinstance(batch[0], Mapping):
36 |         batch = {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
37 |         for key in batch.keys():
38 |             if "offset" in key:
39 |                 batch[key] = torch.cumsum(batch[key], dim=0)
40 |         return batch
41 |     else:
42 |         return default_collate(batch)
43 | 
44 | 
45 | def point_collate_fn(batch, max_batch_points=1e10, mix_prob=0):
46 |     assert isinstance(batch[0], Mapping)  # currently, only support input_dict, rather than input_list
47 |     batch = collate_fn(batch)
48 |     if "offset" in batch.keys():
49 |         assert batch["offset"][0] <= max_batch_points  # at least the first scan can be added to batch
50 |         for i in range(len(batch["offset"]) - 1):
51 |             if batch["offset"][i + 1] > max_batch_points:
52 |                 # logger = get_root_logger()
53 |                 # logger.info("Reach batch point limit! Batch Size: {} -> {}; Points Num: {} -> {}".format(
54 |                 #     len(batch["offset"]), i+1, batch["offset"][-1], batch["offset"][i]
55 |                 # ))
56 |                 batch["offset"] = batch["offset"][:i + 1]
57 |                 for key in batch.keys():
58 |                     if key != "offset":
59 |                         # TODO: bug for data_metas
60 |                         batch[key] = batch[key][:batch["offset"][-1]]
61 |                 break
62 | 
63 |         # Mix3d (https://arxiv.org/pdf/2110.02210.pdf)
64 |         if random.random() < mix_prob:
65 |             batch["offset"] = torch.cat([batch["offset"][1:-1:2], batch["offset"][-1].unsqueeze(0)], dim=0)
66 |     return batch
67 | 
68 | 
69 | def sa_create(name, var):
70 |     x = SA.create(name, var.shape, dtype=var.dtype)
71 |     x[...] = var[...]
72 |     x.flags.writeable = False
73 |     return x
74 | 
75 | 
76 | def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5):
77 |     return a * np.exp(-dist2 / (2 * c ** 2))


--------------------------------------------------------------------------------
/pcr/engines/__init__.py:
--------------------------------------------------------------------------------
1 | from .test import SegmentationTest, ClassificationTest, PartSegmentationTest


--------------------------------------------------------------------------------
/pcr/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_model
2 | from .point_transformer import *
3 | from .point_transformer2 import *
4 | # from .stratified_transformer import *
5 | from .sparse_unet import *
6 | # from .spvcnn import *
7 | 


--------------------------------------------------------------------------------
/pcr/models/builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Model Builder
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | from pcr.utils.registry import Registry
 9 | 
10 | MODELS = Registry('models')
11 | MODULES = Registry('modules')
12 | 
13 | 
14 | def build_model(cfg):
15 |     """Build test_datasets."""
16 |     return MODELS.build(cfg)
17 | 


--------------------------------------------------------------------------------
/pcr/models/point_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .point_transformer_seg import *
2 | from .point_transformer_partseg import *
3 | from .point_transformer_cls import *
4 | 


--------------------------------------------------------------------------------
/pcr/models/point_transformer/point_transformer_cls.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Point Transformer V1 for Object Classification
 3 | 
 4 | Might be a bit different from the original paper
 5 | 
 6 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 7 | Please cite our work if the code is helpful to you.
 8 | """
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | 
13 | from .point_transformer_seg import TransitionDown, Bottleneck
14 | from ..builder import MODELS
15 | 
16 | 
17 | class PointTransformerCls(nn.Module):
18 |     def __init__(self, block, blocks, in_channels=6, num_classes=40):
19 |         super().__init__()
20 |         self.in_channels = in_channels
21 |         self.in_planes, planes = in_channels, [32, 64, 128, 256, 512]
22 |         fpn_planes, fpnhead_planes, share_planes = 128, 64, 8
23 |         stride, nsample = [1, 4, 4, 4, 4], [8, 16, 16, 16, 16]
24 |         self.enc1 = self._make_enc(block, planes[0], blocks[0], share_planes,
25 |                                    stride=stride[0], nsample=nsample[0])  # N/1
26 |         self.enc2 = self._make_enc(block, planes[1], blocks[1], share_planes,
27 |                                    stride=stride[1], nsample=nsample[1])  # N/4
28 |         self.enc3 = self._make_enc(block, planes[2], blocks[2], share_planes,
29 |                                    stride=stride[2], nsample=nsample[2])  # N/16
30 |         self.enc4 = self._make_enc(block, planes[3], blocks[3], share_planes,
31 |                                    stride=stride[3], nsample=nsample[3])  # N/64
32 |         self.enc5 = self._make_enc(block, planes[4], blocks[4], share_planes,
33 |                                    stride=stride[4], nsample=nsample[4])  # N/256
34 |         self.cls = nn.Sequential(nn.Linear(planes[4], 256),
35 |                                  nn.BatchNorm1d(256),
36 |                                  nn.ReLU(inplace=True),
37 |                                  nn.Dropout(p=0.5),
38 |                                  nn.Linear(256, 128),
39 |                                  nn.BatchNorm1d(128),
40 |                                  nn.ReLU(inplace=True),
41 |                                  nn.Dropout(p=0.5),
42 |                                  nn.Linear(128, num_classes))
43 | 
44 |     def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16):
45 |         layers = [TransitionDown(self.in_planes, planes * block.expansion, stride, nsample)]
46 |         self.in_planes = planes * block.expansion
47 |         for _ in range(1, blocks):
48 |             layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample))
49 |         return nn.Sequential(*layers)
50 | 
51 |     def forward(self, input_dict):
52 |         p0 = input_dict["coord"]
53 |         x0 = input_dict["feat"]
54 |         o0 = input_dict["offset"].int()
55 |         x0 = p0 if self.in_channels == 3 else torch.cat((p0, x0), 1)
56 |         p1, x1, o1 = self.enc1([p0, x0, o0])
57 |         p2, x2, o2 = self.enc2([p1, x1, o1])
58 |         p3, x3, o3 = self.enc3([p2, x2, o2])
59 |         p4, x4, o4 = self.enc4([p3, x3, o3])
60 |         p5, x5, o5 = self.enc5([p4, x4, o4])
61 |         x = []
62 |         for i in range(o5.shape[0]):
63 |             if i == 0:
64 |                 s_i, e_i, cnt = 0, o5[0], o5[0]
65 |             else:
66 |                 s_i, e_i, cnt = o5[i - 1], o5[i], o5[i] - o5[i - 1]
67 |             x_b = x5[s_i:e_i, :].sum(0, True) / cnt
68 |             x.append(x_b)
69 |         x = torch.cat(x, 0)
70 |         x = self.cls(x)
71 |         return x
72 | 
73 | 
74 | @MODELS.register_module("PointTransformer-Cls26")
75 | class PointTransformerCls26(PointTransformerCls):
76 |     def __init__(self, **kwargs):
77 |         super(PointTransformerCls26, self).__init__(Bottleneck, [1, 1, 1, 1, 1], **kwargs)
78 | 
79 | 
80 | @MODELS.register_module("PointTransformer-Cls38")
81 | class PointTransformerCls38(PointTransformerCls):
82 |     def __init__(self, **kwargs):
83 |         super(PointTransformerCls38, self).__init__(Bottleneck, [1, 2, 2, 2, 2], **kwargs)
84 | 
85 | 
86 | @MODELS.register_module("PointTransformer-Cls50")
87 | class PointTransformerCls50(PointTransformerCls):
88 |     def __init__(self, **kwargs):
89 |         super(PointTransformerCls50, self).__init__(Bottleneck, [1, 2, 3, 5, 2], **kwargs)


--------------------------------------------------------------------------------
/pcr/models/point_transformer/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | 
4 | torch.nn.LayerNorm
5 | 
6 | class LayerNorm1d(nn.BatchNorm1d):
7 |     def forward(self, input: torch.Tensor) -> torch.Tensor:
8 |         return super().forward(input.transpose(1, 2).contiguous()).transpose(1, 2).contiguous()
9 | 


--------------------------------------------------------------------------------
/pcr/models/point_transformer2/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Point Transformer V2
 3 | 
 4 | Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved.
 5 | Please cite our work if you use any part of the code.
 6 | """
 7 | 
 8 | from .point_transformer_v2m1_origin import PointTransformerV2
 9 | from .point_transformer_v2m2_base import PointTransformerV2
10 | 


--------------------------------------------------------------------------------
/pcr/models/sparse_unet/__init__.py:
--------------------------------------------------------------------------------
1 | # from .mink_unet import *
2 | from .spconv_unet import *
3 | 


--------------------------------------------------------------------------------
/pcr/models/spvcnn/__init__.py:
--------------------------------------------------------------------------------
1 | from .ts_spvcnn import *
2 | 


--------------------------------------------------------------------------------
/pcr/models/stratified_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .stratified_transformer_v1m1_origin import StratifiedTransformer
2 | from .stratified_transformer_v1m2_refine import StratifiedTransformer
3 | 


--------------------------------------------------------------------------------
/pcr/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pointcept/PointTransformerV2/5386c4d71f3d6c42c24a8105fce8750e9355dc54/pcr/utils/__init__.py


--------------------------------------------------------------------------------
/pcr/utils/env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Environment Utils
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | import numpy as np
11 | import torch
12 | import torch.backends.cudnn as cudnn
13 | 
14 | from datetime import datetime
15 | 
16 | 
17 | def get_random_seed():
18 |     seed = (
19 |             os.getpid()
20 |             + int(datetime.now().strftime("%S%f"))
21 |             + int.from_bytes(os.urandom(2), "big")
22 |     )
23 |     return seed
24 | 
25 | 
26 | def set_seed(seed=None):
27 |     if seed is None:
28 |         seed = get_random_seed()
29 |     random.seed(seed)
30 |     np.random.seed(seed)
31 |     torch.manual_seed(seed)
32 |     torch.cuda.manual_seed(seed)
33 |     torch.cuda.manual_seed_all(seed)
34 |     cudnn.benchmark = False
35 |     cudnn.deterministic = True
36 |     os.environ["PYTHONHASHSEED"] = str(seed)


--------------------------------------------------------------------------------
/pcr/utils/optimizer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Optimizer
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import torch
 9 | from .registry import Registry
10 | 
11 | OPTIMIZERS = Registry("optimizers")
12 | 
13 | 
14 | OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD")
15 | OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam")
16 | OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW")
17 | 
18 | 
19 | def build_optimizer(cfg, model, params_dicts=None):
20 |     if params_dicts is None:
21 |         cfg.params = model.parameters()
22 |     else:
23 |         cfg.params = [dict(params=[])]
24 |         for i in range(len(params_dicts)):
25 |             cfg.params.append(dict(params=[], lr=params_dicts[i].lr_scale * cfg.lr))
26 | 
27 |         for n, p in model.named_parameters():
28 |             flag = False
29 |             for i in range(len(params_dicts)):
30 |                 if params_dicts[i].keyword in n:
31 |                     cfg.params[i+1]["params"].append(p)
32 |                     flag = True
33 |                     break
34 |             if not flag:
35 |                 cfg.params[0]["params"].append(p)
36 |     return OPTIMIZERS.build(cfg=cfg)
37 | 


--------------------------------------------------------------------------------
/pcr/utils/path.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import os
  3 | import os.path as osp
  4 | from pathlib import Path
  5 | 
  6 | from .misc import is_str
  7 | 
  8 | 
  9 | def is_filepath(x):
 10 |     return is_str(x) or isinstance(x, Path)
 11 | 
 12 | 
 13 | def fopen(filepath, *args, **kwargs):
 14 |     if is_str(filepath):
 15 |         return open(filepath, *args, **kwargs)
 16 |     elif isinstance(filepath, Path):
 17 |         return filepath.open(*args, **kwargs)
 18 |     raise ValueError('`filepath` should be a string or a Path')
 19 | 
 20 | 
 21 | def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
 22 |     if not osp.isfile(filename):
 23 |         raise FileNotFoundError(msg_tmpl.format(filename))
 24 | 
 25 | 
 26 | def mkdir_or_exist(dir_name, mode=0o777):
 27 |     if dir_name == '':
 28 |         return
 29 |     dir_name = osp.expanduser(dir_name)
 30 |     os.makedirs(dir_name, mode=mode, exist_ok=True)
 31 | 
 32 | 
 33 | def symlink(src, dst, overwrite=True, **kwargs):
 34 |     if os.path.lexists(dst) and overwrite:
 35 |         os.remove(dst)
 36 |     os.symlink(src, dst, **kwargs)
 37 | 
 38 | 
 39 | def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True):
 40 |     """Scan a directory to find the interested files.
 41 | 
 42 |     Args:
 43 |         dir_path (str | obj:`Path`): Path of the directory.
 44 |         suffix (str | tuple(str), optional): File suffix that we are
 45 |             interested in. Default: None.
 46 |         recursive (bool, optional): If set to True, recursively scan the
 47 |             directory. Default: False.
 48 |         case_sensitive (bool, optional) : If set to False, ignore the case of
 49 |             suffix. Default: True.
 50 | 
 51 |     Returns:
 52 |         A generator for all the interested files with relative paths.
 53 |     """
 54 |     if isinstance(dir_path, (str, Path)):
 55 |         dir_path = str(dir_path)
 56 |     else:
 57 |         raise TypeError('"dir_path" must be a string or Path object')
 58 | 
 59 |     if (suffix is not None) and not isinstance(suffix, (str, tuple)):
 60 |         raise TypeError('"suffix" must be a string or tuple of strings')
 61 | 
 62 |     if suffix is not None and not case_sensitive:
 63 |         suffix = suffix.lower() if isinstance(suffix, str) else tuple(
 64 |             item.lower() for item in suffix)
 65 | 
 66 |     root = dir_path
 67 | 
 68 |     def _scandir(dir_path, suffix, recursive, case_sensitive):
 69 |         for entry in os.scandir(dir_path):
 70 |             if not entry.name.startswith('.') and entry.is_file():
 71 |                 rel_path = osp.relpath(entry.path, root)
 72 |                 _rel_path = rel_path if case_sensitive else rel_path.lower()
 73 |                 if suffix is None or _rel_path.endswith(suffix):
 74 |                     yield rel_path
 75 |             elif recursive and os.path.isdir(entry.path):
 76 |                 # scan recursively if entry.path is a directory
 77 |                 yield from _scandir(entry.path, suffix, recursive,
 78 |                                     case_sensitive)
 79 | 
 80 |     return _scandir(dir_path, suffix, recursive, case_sensitive)
 81 | 
 82 | 
 83 | def find_vcs_root(path, markers=('.git', )):
 84 |     """Finds the root directory (including itself) of specified markers.
 85 | 
 86 |     Args:
 87 |         path (str): Path of directory or file.
 88 |         markers (list[str], optional): List of file or directory names.
 89 | 
 90 |     Returns:
 91 |         The directory contained one of the markers or None if not found.
 92 |     """
 93 |     if osp.isfile(path):
 94 |         path = osp.dirname(path)
 95 | 
 96 |     prev, cur = None, osp.abspath(osp.expanduser(path))
 97 |     while cur != prev:
 98 |         if any(osp.exists(osp.join(cur, marker)) for marker in markers):
 99 |             return cur
100 |         prev, cur = cur, osp.split(cur)[0]
101 |     return None
102 | 


--------------------------------------------------------------------------------
/pcr/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Visualization Utils
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | import open3d as o3d
10 | import numpy as np
11 | import torch
12 | 
13 | 
14 | def to_numpy(x):
15 |     if isinstance(x, torch.Tensor):
16 |         x = x.clone().detach().cpu().numpy()
17 |     assert isinstance(x, np.ndarray)
18 |     return x
19 | 
20 | 
21 | def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None):
22 |     os.makedirs(os.path.dirname(file_path), exist_ok=True)
23 |     coord = to_numpy(coord)
24 |     if color is not None:
25 |         color = to_numpy(color)
26 |     pcd = o3d.geometry.PointCloud()
27 |     pcd.points = o3d.utility.Vector3dVector(coord)
28 |     pcd.colors = o3d.utility.Vector3dVector(np.ones_like(coord) if color is None else color)
29 |     o3d.io.write_point_cloud(file_path, pcd)
30 |     if logger is not None:
31 |         logger.info(f"Save Point Cloud to: {file_path}")
32 | 
33 | 
34 | def save_bounding_boxes(bboxes_corners, color=(1., 0., 0.), file_path="bbox.ply", logger=None):
35 |     bboxes_corners = to_numpy(bboxes_corners)
36 |     # point list
37 |     points = bboxes_corners.reshape(-1, 3)
38 |     # line list
39 |     box_lines = np.array([
40 |         [0, 1], [1, 2], [2, 3], [3, 0],
41 |         [4, 5], [5, 6], [6, 7], [7, 0],
42 |         [0, 4], [1, 5], [2, 6], [3, 7]
43 |     ])
44 |     lines = []
45 |     for i, _ in enumerate(bboxes_corners):
46 |         lines.append(box_lines + i * 8)
47 |     lines = np.concatenate(lines)
48 |     # color list
49 |     color = np.array([color for _ in range(len(lines))])
50 |     # generate line set
51 |     line_set = o3d.geometry.LineSet()
52 |     line_set.points = o3d.utility.Vector3dVector(points)
53 |     line_set.lines = o3d.utility.Vector2iVector(lines)
54 |     line_set.colors = o3d.utility.Vector3dVector(color)
55 |     o3d.io.write_line_set(file_path, line_set)
56 | 
57 |     if logger is not None:
58 |         logger.info(f"Save Boxes to: {file_path}")
59 | 
60 | 
61 | def save_lines(points, lines, color=(1., 0., 0.), file_path="lines.ply", logger=None):
62 |     points = to_numpy(points)
63 |     lines = to_numpy(lines)
64 |     colors = np.array([color for _ in range(len(lines))])
65 |     line_set = o3d.geometry.LineSet()
66 |     line_set.points = o3d.utility.Vector3dVector(points)
67 |     line_set.lines = o3d.utility.Vector2iVector(lines)
68 |     line_set.colors = o3d.utility.Vector3dVector(colors)
69 |     o3d.io.write_line_set(file_path, line_set)
70 | 
71 |     if logger is not None:
72 |         logger.info(f"Save Lines to: {file_path}")


--------------------------------------------------------------------------------
/scripts/pretrain.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd $(dirname $(dirname "$0")) || exit
 4 | ROOT_DIR=$(pwd)
 5 | PYTHON=python
 6 | 
 7 | TRAIN_CODE=pretrain.py
 8 | 
 9 | DATASET=s3dis
10 | CONFIG=point_transformer_v2
11 | EXP_NAME=debug
12 | WEIGHT="None"
13 | 
14 | RESUME=false
15 | GPU=false
16 | 
17 | 
18 | while getopts "p:d:c:n:w:g:r:" opt; do
19 |   case $opt in
20 |     p)
21 |       PYTHON=$OPTARG
22 |       ;;
23 |     d)
24 |       DATASET=$OPTARG
25 |       ;;
26 |     c)
27 |       CONFIG=$OPTARG
28 |       ;;
29 |     n)
30 |       EXP_NAME=$OPTARG
31 |       ;;
32 |     w)
33 |       WEIGHT=$OPTARG
34 |       ;;
35 |     r)
36 |       RESUME=$OPTARG
37 |       ;;
38 |     g)
39 |       GPU=$OPTARG
40 |       ;;
41 |     \?)
42 |       echo "Invalid option: -$OPTARG"
43 |       ;;
44 |   esac
45 | done
46 | 
47 | echo "Experiment name: $EXP_NAME"
48 | echo "Python interpreter dir: $PYTHON"
49 | echo "Dataset: $DATASET"
50 | echo "Config: $CONFIG"
51 | 
52 | 
53 | 
54 | EXP_DIR=exp/${DATASET}/${EXP_NAME}
55 | MODEL_DIR=${EXP_DIR}/model
56 | CODE_DIR=${EXP_DIR}/code
57 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py
58 | 
59 | 
60 | echo " =========> CREATE EXP DIR <========="
61 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR"
62 | if ${RESUME}
63 | then
64 |   CONFIG_DIR=${EXP_DIR}/config.py
65 |   WEIGHT=$MODEL_DIR/model_last.pth
66 | else
67 |   mkdir -p "$MODEL_DIR" "$CODE_DIR"
68 |   cp -r scripts tools pcr "$CODE_DIR"
69 | fi
70 | 
71 | echo "Loading config in:" $CONFIG_DIR
72 | export PYTHONPATH=./$CODE_DIR
73 | echo "Running code in: $CODE_DIR"
74 | 
75 | 
76 | echo " =========> RUN TASK <========="
77 | 
78 | if [ "${WEIGHT}" = "None" ]
79 | then
80 |     $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \
81 |     --config-file "$CONFIG_DIR" \
82 |     --num-gpus "$GPU" \
83 |     --options save_path="$EXP_DIR"
84 | else
85 |     $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \
86 |     --config-file "$CONFIG_DIR" \
87 |     --num-gpus "$GPU" \
88 |     --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT"
89 | fi


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd $(dirname $(dirname "$0")) || exit
 4 | export PYTHONPATH=./
 5 | PYTHON=python
 6 | 
 7 | TEST_CODE=test.py
 8 | 
 9 | DATASET=s3dis
10 | CONFIG="None"
11 | EXP_NAME=debug
12 | WEIGHT=model_best
13 | 
14 | while getopts "p:d:c:n:w:" opt; do
15 |   case $opt in
16 |     p)
17 |       PYTHON=$OPTARG
18 |       ;;
19 |     d)
20 |       DATASET=$OPTARG
21 |       ;;
22 |     c)
23 |       CONFIG=$OPTARG
24 |       ;;
25 |     n)
26 |       EXP_NAME=$OPTARG
27 |       ;;
28 |     w)
29 |       WEIGHT=$OPTARG
30 |       ;;
31 |     \?)
32 |       echo "Invalid option: -$OPTARG"
33 |       ;;
34 |   esac
35 | done
36 | 
37 | echo "Experiment name: $EXP_NAME"
38 | echo "Python interpreter dir: $PYTHON"
39 | echo "Dataset: $DATASET"
40 | 
41 | EXP_DIR=exp/${DATASET}/${EXP_NAME}
42 | MODEL_DIR=${EXP_DIR}/model
43 | CODE_DIR=${EXP_DIR}/code
44 | CONFIG_DIR=${EXP_DIR}/config.py
45 | 
46 | if [ "${CONFIG}" = "None" ]
47 | then
48 |     CONFIG_DIR=${EXP_DIR}/config.py
49 | else
50 |     CONFIG_DIR=configs/${DATASET}/${CONFIG}.py
51 | fi
52 | 
53 | echo " =========> RUN TASK <========="
54 | 
55 | #$PYTHON -u tools/$TEST_CODE \
56 | $PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \
57 |   --config-file "$CONFIG_DIR" \
58 |   --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth
59 | 


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd $(dirname $(dirname "$0")) || exit
 4 | ROOT_DIR=$(pwd)
 5 | PYTHON=python
 6 | 
 7 | TRAIN_CODE=train.py
 8 | 
 9 | DATASET=scannet
10 | CONFIG=point_transformer_v2
11 | EXP_NAME=debug
12 | WEIGHT="None"
13 | RESUME=false
14 | GPU=4
15 | 
16 | 
17 | while getopts "p:d:c:n:w:g:r:" opt; do
18 |   case $opt in
19 |     p)
20 |       PYTHON=$OPTARG
21 |       ;;
22 |     d)
23 |       DATASET=$OPTARG
24 |       ;;
25 |     c)
26 |       CONFIG=$OPTARG
27 |       ;;
28 |     n)
29 |       EXP_NAME=$OPTARG
30 |       ;;
31 |     w)
32 |       WEIGHT=$OPTARG
33 |       ;;
34 |     r)
35 |       RESUME=$OPTARG
36 |       ;;
37 |     g)
38 |       GPU=$OPTARG
39 |       ;;
40 |     \?)
41 |       echo "Invalid option: -$OPTARG"
42 |       ;;
43 |   esac
44 | done
45 | 
46 | echo "Experiment name: $EXP_NAME"
47 | echo "Python interpreter dir: $PYTHON"
48 | echo "Dataset: $DATASET"
49 | echo "Config: $CONFIG"
50 | 
51 | 
52 | 
53 | EXP_DIR=exp/${DATASET}/${EXP_NAME}
54 | MODEL_DIR=${EXP_DIR}/model
55 | CODE_DIR=${EXP_DIR}/code
56 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py
57 | 
58 | 
59 | echo " =========> CREATE EXP DIR <========="
60 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR"
61 | if ${RESUME}
62 | then
63 |   CONFIG_DIR=${EXP_DIR}/config.py
64 |   WEIGHT=$MODEL_DIR/model_last.pth
65 | else
66 |   mkdir -p "$MODEL_DIR" "$CODE_DIR"
67 |   cp -r scripts tools pcr "$CODE_DIR"
68 | fi
69 | 
70 | echo "Loading config in:" $CONFIG_DIR
71 | export PYTHONPATH=./$CODE_DIR
72 | echo "Running code in: $CODE_DIR"
73 | 
74 | 
75 | echo " =========> RUN TASK <========="
76 | 
77 | if [ "${WEIGHT}" = "None" ]
78 | then
79 |     $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \
80 |     --config-file "$CONFIG_DIR" \
81 |     --num-gpus "$GPU" \
82 |     --options save_path="$EXP_DIR"
83 | else
84 |     $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \
85 |     --config-file "$CONFIG_DIR" \
86 |     --num-gpus "$GPU" \
87 |     --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT"
88 | fi


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Main Testing Script
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | import numpy as np
11 | import argparse
12 | import collections
13 | 
14 | import torch
15 | import torch.nn.parallel
16 | import torch.optim
17 | import torch.utils.data
18 | 
19 | from pcr.models import build_model
20 | from pcr.datasets import build_dataset
21 | from pcr.datasets.utils import collate_fn
22 | from pcr.utils.config import Config, DictAction
23 | from pcr.utils.logger import get_root_logger
24 | from pcr.utils.env import get_random_seed, set_seed
25 | from pcr.engines.test import TEST
26 | 
27 | 
28 | def get_parser():
29 |     parser = argparse.ArgumentParser(description='PCR Test Process')
30 |     parser.add_argument('--config-file', default="", metavar="FILE", help="path to config file")
31 |     parser.add_argument('--options', nargs='+', action=DictAction, help='custom options')
32 |     args = parser.parse_args()
33 |     return args
34 | 
35 | 
36 | def main():
37 |     args = get_parser()
38 | 
39 |     # config_parser
40 |     cfg = Config.fromfile(args.config_file)
41 |     if args.options is not None:
42 |         cfg.merge_from_dict(args.options)
43 | 
44 |     if cfg.seed is None:
45 |         cfg.seed = get_random_seed()
46 | 
47 |     os.makedirs(cfg.save_path, exist_ok=True)
48 | 
49 |     # default_setup
50 |     set_seed(cfg.seed)
51 |     cfg.batch_size_val_per_gpu = cfg.batch_size_test  # TODO: add support to multi gpu test
52 |     cfg.num_worker_per_gpu = cfg.num_worker  # TODO: add support to multi gpu test
53 | 
54 |     # tester init
55 |     weight_name = os.path.basename(cfg.weight).split(".")[0]
56 |     logger = get_root_logger(log_file=os.path.join(cfg.save_path, "test-{}.log".format(weight_name)))
57 |     logger.info("=> Loading config ...")
58 |     logger.info(f"Save path: {cfg.save_path}")
59 |     logger.info(f"Config:\n{cfg.pretty_text}")
60 | 
61 |     # build model
62 |     logger.info("=> Building model ...")
63 |     model = build_model(cfg.model).cuda()
64 |     n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
65 |     logger.info(f"Num params: {n_parameters}")
66 | 
67 |     # build dataset
68 |     logger.info("=> Building test dataset & dataloader ...")
69 |     test_dataset = build_dataset(cfg.data.test)
70 |     test_loader = torch.utils.data.DataLoader(test_dataset,
71 |                                               batch_size=cfg.batch_size_val_per_gpu,
72 |                                               shuffle=False,
73 |                                               num_workers=cfg.num_worker_per_gpu,
74 |                                               pin_memory=True,
75 |                                               collate_fn=collate_fn)
76 | 
77 |     # load checkpoint
78 |     if os.path.isfile(cfg.weight):
79 |         checkpoint = torch.load(cfg.weight)
80 |         state_dict = checkpoint['state_dict']
81 |         new_state_dict = collections.OrderedDict()
82 |         for k, v in state_dict.items():
83 |             name = k[7:]  # module.xxx.xxx -> xxx.xxx
84 |             new_state_dict[name] = v
85 |         model.load_state_dict(new_state_dict, strict=True)
86 |         logger.info("=> loaded weight '{}' (epoch {})".format(cfg.weight, checkpoint['epoch']))
87 |         cfg.epochs = checkpoint['epoch']  # TODO: move to self
88 |     else:
89 |         raise RuntimeError("=> no checkpoint found at '{}'".format(cfg.weight))
90 |     TEST.build(cfg.test)(cfg, test_loader, model)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Main Training Script
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | from pcr.engines.defaults import default_argument_parser, default_config_parser, default_setup, Trainer
 9 | from pcr.engines.launch import launch
10 | import os
11 | 
12 | 
13 | def main_worker(cfg):
14 |     cfg = default_setup(cfg)
15 |     trainer = Trainer(cfg)
16 |     trainer.train()
17 | 
18 | 
19 | def main():
20 |     args = default_argument_parser().parse_args()
21 |     cfg = default_config_parser(args.config_file, args.options)
22 | 
23 |     launch(
24 |         main_worker,
25 |         num_gpus_per_machine=args.num_gpus,
26 |         num_machines=args.num_machines,
27 |         machine_rank=args.machine_rank,
28 |         dist_url=args.dist_url,
29 |         cfg=(cfg,),
30 |     )
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------