├── .gitignore
├── LICENSE.txt
├── README.md
├── assets
    └── overview.png
├── configs
    ├── _base_
    │   └── default_runtime.py
    └── semantic_kitti
    │   └── semseg_mambamos.py
├── exp
    └── semantic_kitti
    │   └── mambamos
    │       └── config.py
├── libs
    ├── pointgroup_ops
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── functions.py
    │   ├── setup.py
    │   └── src
    │   │   ├── bfs_cluster.cpp
    │   │   └── bfs_cluster_kernel.cu
    ├── pointops
    │   ├── __init__.py
    │   ├── build
    │   │   ├── lib.linux-x86_64-cpython-38
    │   │   │   └── pointops
    │   │   │   │   ├── _C.cpython-38-x86_64-linux-gnu.so
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── aggregation.py
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── grouping.py
    │   │   │   │   ├── interpolation.py
    │   │   │   │   ├── query.py
    │   │   │   │   ├── sampling.py
    │   │   │   │   ├── subtraction.py
    │   │   │   │   └── utils.py
    │   │   └── temp.linux-x86_64-cpython-38
    │   │   │   ├── .ninja_deps
    │   │   │   ├── .ninja_log
    │   │   │   ├── build.ninja
    │   │   │   └── src
    │   │   │       ├── aggregation
    │   │   │           ├── aggregation_cuda.o
    │   │   │           └── aggregation_cuda_kernel.o
    │   │   │       ├── attention
    │   │   │           ├── attention_cuda.o
    │   │   │           └── attention_cuda_kernel.o
    │   │   │       ├── ball_query
    │   │   │           ├── ball_query_cuda.o
    │   │   │           └── ball_query_cuda_kernel.o
    │   │   │       ├── grouping
    │   │   │           ├── grouping_cuda.o
    │   │   │           └── grouping_cuda_kernel.o
    │   │   │       ├── interpolation
    │   │   │           ├── interpolation_cuda.o
    │   │   │           └── interpolation_cuda_kernel.o
    │   │   │       ├── knn_query
    │   │   │           ├── knn_query_cuda.o
    │   │   │           └── knn_query_cuda_kernel.o
    │   │   │       ├── pointops_api.o
    │   │   │       ├── random_ball_query
    │   │   │           ├── random_ball_query_cuda.o
    │   │   │           └── random_ball_query_cuda_kernel.o
    │   │   │       ├── sampling
    │   │   │           ├── sampling_cuda.o
    │   │   │           └── sampling_cuda_kernel.o
    │   │   │       └── subtraction
    │   │   │           ├── subtraction_cuda.o
    │   │   │           └── subtraction_cuda_kernel.o
    │   ├── dist
    │   │   └── pointops-1.0-py3.8-linux-x86_64.egg
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── aggregation.py
    │   │   ├── attention.py
    │   │   ├── grouping.py
    │   │   ├── interpolation.py
    │   │   ├── query.py
    │   │   ├── sampling.py
    │   │   ├── subtraction.py
    │   │   └── utils.py
    │   ├── pointops.egg-info
    │   │   ├── PKG-INFO
    │   │   ├── SOURCES.txt
    │   │   ├── dependency_links.txt
    │   │   ├── requires.txt
    │   │   └── top_level.txt
    │   ├── setup.py
    │   └── src
    │   │   ├── __init__.py
    │   │   ├── aggregation
    │   │       ├── aggregation_cuda.cpp
    │   │       ├── aggregation_cuda_kernel.cu
    │   │       └── aggregation_cuda_kernel.h
    │   │   ├── attention
    │   │       ├── attention_cuda.cpp
    │   │       ├── attention_cuda_kernel.cu
    │   │       └── attention_cuda_kernel.h
    │   │   ├── ball_query
    │   │       ├── ball_query_cuda.cpp
    │   │       ├── ball_query_cuda_kernel.cu
    │   │       └── ball_query_cuda_kernel.h
    │   │   ├── cuda_utils.h
    │   │   ├── grouping
    │   │       ├── grouping_cuda.cpp
    │   │       ├── grouping_cuda_kernel.cu
    │   │       └── grouping_cuda_kernel.h
    │   │   ├── interpolation
    │   │       ├── interpolation_cuda.cpp
    │   │       ├── interpolation_cuda_kernel.cu
    │   │       └── interpolation_cuda_kernel.h
    │   │   ├── knn_query
    │   │       ├── knn_query_cuda.cpp
    │   │       ├── knn_query_cuda_kernel.cu
    │   │       └── knn_query_cuda_kernel.h
    │   │   ├── pointops_api.cpp
    │   │   ├── random_ball_query
    │   │       ├── random_ball_query_cuda.cpp
    │   │       ├── random_ball_query_cuda_kernel.cu
    │   │       └── random_ball_query_cuda_kernel.h
    │   │   ├── sampling
    │   │       ├── sampling_cuda.cpp
    │   │       ├── sampling_cuda_kernel.cu
    │   │       └── sampling_cuda_kernel.h
    │   │   └── subtraction
    │   │       ├── subtraction_cuda.cpp
    │   │       ├── subtraction_cuda_kernel.cu
    │   │       └── subtraction_cuda_kernel.h
    └── pointops2
    │   ├── __init__.py
    │   ├── functions
    │       ├── __init__.py
    │       ├── pointops.py
    │       ├── pointops2.py
    │       ├── pointops_ablation.py
    │       ├── test_attention_op_step1.py
    │       ├── test_attention_op_step1_v2.py
    │       ├── test_attention_op_step2.py
    │       ├── test_relative_pos_encoding_op_step1.py
    │       ├── test_relative_pos_encoding_op_step1_v2.py
    │       ├── test_relative_pos_encoding_op_step1_v3.py
    │       ├── test_relative_pos_encoding_op_step2.py
    │       └── test_relative_pos_encoding_op_step2_v2.py
    │   ├── setup.py
    │   └── src
    │       ├── __init__.py
    │       ├── aggregation
    │           ├── aggregation_cuda.cpp
    │           ├── aggregation_cuda_kernel.cu
    │           └── aggregation_cuda_kernel.h
    │       ├── attention
    │           ├── attention_cuda.cpp
    │           ├── attention_cuda_kernel.cu
    │           └── attention_cuda_kernel.h
    │       ├── attention_v2
    │           ├── attention_cuda_kernel_v2.cu
    │           ├── attention_cuda_kernel_v2.h
    │           └── attention_cuda_v2.cpp
    │       ├── cuda_utils.h
    │       ├── grouping
    │           ├── grouping_cuda.cpp
    │           ├── grouping_cuda_kernel.cu
    │           └── grouping_cuda_kernel.h
    │       ├── interpolation
    │           ├── interpolation_cuda.cpp
    │           ├── interpolation_cuda_kernel.cu
    │           └── interpolation_cuda_kernel.h
    │       ├── knnquery
    │           ├── knnquery_cuda.cpp
    │           ├── knnquery_cuda_kernel.cu
    │           └── knnquery_cuda_kernel.h
    │       ├── pointops_api.cpp
    │       ├── rpe
    │           ├── relative_pos_encoding_cuda.cpp
    │           ├── relative_pos_encoding_cuda_kernel.cu
    │           └── relative_pos_encoding_cuda_kernel.h
    │       ├── rpe_v2
    │           ├── relative_pos_encoding_cuda_kernel_v2.cu
    │           ├── relative_pos_encoding_cuda_kernel_v2.h
    │           └── relative_pos_encoding_cuda_v2.cpp
    │       ├── sampling
    │           ├── sampling_cuda.cpp
    │           ├── sampling_cuda_kernel.cu
    │           └── sampling_cuda_kernel.h
    │       └── subtraction
    │           ├── subtraction_cuda.cpp
    │           ├── subtraction_cuda_kernel.cu
    │           └── subtraction_cuda_kernel.h
├── pointcept
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   └── __init__.cpython-39.pyc
    ├── datasets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── builder.cpython-38.pyc
    │   │   ├── dataloader.cpython-38.pyc
    │   │   ├── defaults.cpython-38.pyc
    │   │   ├── semantic_kitti_multi_scans.cpython-38.pyc
    │   │   ├── transform.cpython-38.pyc
    │   │   └── utils.cpython-38.pyc
    │   ├── builder.py
    │   ├── dataloader.py
    │   ├── defaults.py
    │   ├── semantic_kitti_multi_scans.py
    │   ├── train_split_dynamic_pointnumber.txt
    │   ├── transform.py
    │   └── utils.py
    ├── engines
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── __init__.cpython-39.pyc
    │   │   ├── defaults.cpython-38.pyc
    │   │   ├── defaults.cpython-39.pyc
    │   │   ├── launch.cpython-38.pyc
    │   │   ├── test.cpython-38.pyc
    │   │   └── train.cpython-38.pyc
    │   ├── defaults.py
    │   ├── hooks
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── builder.cpython-38.pyc
    │   │   │   ├── default.cpython-38.pyc
    │   │   │   ├── evaluator.cpython-38.pyc
    │   │   │   └── misc.cpython-38.pyc
    │   │   ├── builder.py
    │   │   ├── default.py
    │   │   ├── evaluator.py
    │   │   └── misc.py
    │   ├── launch.py
    │   ├── test.py
    │   └── train.py
    ├── models
    │   ├── MambaMOS
    │   │   ├── MambaMOS.py
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-38.pyc
    │   │   └── mssm.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── builder.cpython-38.pyc
    │   │   ├── default.cpython-38.pyc
    │   │   └── modules.cpython-38.pyc
    │   ├── builder.py
    │   ├── default.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── builder.cpython-38.pyc
    │   │   │   ├── lovasz.cpython-38.pyc
    │   │   │   └── misc.cpython-38.pyc
    │   │   ├── builder.py
    │   │   ├── lovasz.py
    │   │   └── misc.py
    │   ├── modules.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-38.pyc
    │   │       ├── checkpoint.cpython-38.pyc
    │   │       ├── misc.cpython-38.pyc
    │   │       └── structure.cpython-38.pyc
    │   │   ├── checkpoint.py
    │   │   ├── misc.py
    │   │   ├── serialization
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-38.pyc
    │   │       │   ├── default.cpython-38.pyc
    │   │       │   ├── hilbert.cpython-38.pyc
    │   │       │   └── z_order.cpython-38.pyc
    │   │       ├── default.py
    │   │       ├── hilbert.py
    │   │       └── z_order.py
    │   │   └── structure.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-38.pyc
    │       ├── cache.cpython-38.pyc
    │       ├── comm.cpython-38.pyc
    │       ├── config.cpython-38.pyc
    │       ├── env.cpython-38.pyc
    │       ├── events.cpython-38.pyc
    │       ├── logger.cpython-38.pyc
    │       ├── misc.cpython-38.pyc
    │       ├── optimizer.cpython-38.pyc
    │       ├── path.cpython-38.pyc
    │       ├── registry.cpython-38.pyc
    │       ├── scheduler.cpython-38.pyc
    │       └── timer.cpython-38.pyc
    │   ├── cache.py
    │   ├── comm.py
    │   ├── config.py
    │   ├── env.py
    │   ├── events.py
    │   ├── logger.py
    │   ├── misc.py
    │   ├── optimizer.py
    │   ├── path.py
    │   ├── registry.py
    │   ├── scheduler.py
    │   ├── timer.py
    │   └── visualization.py
├── scripts
    ├── build_image.sh
    ├── test.sh
    └── train.sh
└── tools
    ├── test.py
    ├── test_s3dis_6fold.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | image/
 2 | __pycache__
 3 | **/build/
 4 | **/*.egg-info/
 5 | **/dist/
 6 | *.so
 7 | exp
 8 | weights
 9 | data
10 | log
11 | outputs/
12 | .vscode
13 | .idea
14 | */.DS_Store
15 | **/*.out
16 | Dockerfile
17 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Pointcept
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/assets/overview.png


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | weight = None  # path to model weight
 2 | resume = False  # whether to resume training process
 3 | evaluate = True  # evaluate after each epoch training process
 4 | test_only = False  # test process
 5 | 
 6 | seed = None  # train process will init a random seed and record
 7 | save_path = "exp/default"
 8 | num_worker = 16  # total worker in all gpu
 9 | batch_size = 16  # total batch size in all gpu
10 | batch_size_val = None  # auto adapt to bs 1 for each gpu
11 | batch_size_test = None  # auto adapt to bs 1 for each gpu
12 | epoch = 100  # total epoch, data loop = epoch // eval_epoch
13 | eval_epoch = 100  # sche total eval & checkpoint epoch
14 | 
15 | sync_bn = False
16 | enable_amp = False
17 | empty_cache = False
18 | find_unused_parameters = False
19 | 
20 | mix_prob = 0
21 | param_dicts = None  # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
22 | 
23 | # hook
24 | hooks = [
25 |     dict(type="CheckpointLoader"),
26 |     dict(type="IterationTimer", warmup_iter=2),
27 |     dict(type="InformationWriter"),
28 |     dict(type="SemSegEvaluator"),
29 |     dict(type="CheckpointSaver", save_freq=None),
30 |     dict(type="PreciseEvaluator", test_last=False),
31 | ]
32 | 
33 | # Trainer
34 | train = dict(type="DefaultTrainer")
35 | 
36 | # Tester
37 | test = dict(type="SemSegTester", verbose=True)
38 | 


--------------------------------------------------------------------------------
/libs/pointgroup_ops/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import bfs_cluster, ballquery_batch_p, Clustering
2 | 


--------------------------------------------------------------------------------
/libs/pointgroup_ops/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from sys import argv
 3 | from setuptools import setup
 4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 5 | from distutils.sysconfig import get_config_vars
 6 | 
 7 | (opt,) = get_config_vars("OPT")
 8 | os.environ["OPT"] = " ".join(
 9 |     flag for flag in opt.split() if flag != "-Wstrict-prototypes"
10 | )
11 | 
12 | 
13 | def _argparse(pattern, argv, is_flag=True, is_list=False):
14 |     if is_flag:
15 |         found = pattern in argv
16 |         if found:
17 |             argv.remove(pattern)
18 |         return found, argv
19 |     else:
20 |         arr = [arg for arg in argv if pattern == arg.split("=")[0]]
21 |         if is_list:
22 |             if len(arr) == 0:  # not found
23 |                 return False, argv
24 |             else:
25 |                 assert "=" in arr[0], f"{arr[0]} requires a value."
26 |                 argv.remove(arr[0])
27 |                 val = arr[0].split("=")[1]
28 |                 if "," in val:
29 |                     return val.split(","), argv
30 |                 else:
31 |                     return [val], argv
32 |         else:
33 |             if len(arr) == 0:  # not found
34 |                 return False, argv
35 |             else:
36 |                 assert "=" in arr[0], f"{arr[0]} requires a value."
37 |                 argv.remove(arr[0])
38 |                 return arr[0].split("=")[1], argv
39 | 
40 | 
41 | INCLUDE_DIRS, argv = _argparse("--include_dirs", argv, False, is_list=True)
42 | include_dirs = []
43 | if not (INCLUDE_DIRS is False):
44 |     include_dirs += INCLUDE_DIRS
45 | 
46 | setup(
47 |     name="pointgroup_ops",
48 |     packages=["pointgroup_ops"],
49 |     package_dir={"pointgroup_ops": "functions"},
50 |     ext_modules=[
51 |         CUDAExtension(
52 |             name="pointgroup_ops_cuda",
53 |             sources=["src/bfs_cluster.cpp", "src/bfs_cluster_kernel.cu"],
54 |             extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
55 |         )
56 |     ],
57 |     include_dirs=[*include_dirs],
58 |     cmdclass={"build_ext": BuildExtension},
59 | )
60 | 


--------------------------------------------------------------------------------
/libs/pointgroup_ops/src/bfs_cluster_kernel.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | Ball Query with BatchIdx
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <time.h>
 9 | 
10 | #define TOTAL_THREADS 1024
11 | #define THREADS_PER_BLOCK 512
12 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
13 | 
14 | 
15 | /* ================================== ballquery_batch_p ================================== */
16 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) {
17 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
18 |     if (pt_idx >= n) return;
19 | 
20 |     start_len += (pt_idx * 2);
21 |     int idx_temp[1000];
22 | 
23 |     float radius2 = radius * radius;
24 |     float o_x = xyz[pt_idx * 3 + 0];
25 |     float o_y = xyz[pt_idx * 3 + 1];
26 |     float o_z = xyz[pt_idx * 3 + 2];
27 | 
28 |     int batch_idx = batch_idxs[pt_idx];
29 |     int start = batch_offsets[batch_idx];
30 |     int end = batch_offsets[batch_idx + 1];
31 | 
32 |     int cnt = 0;
33 |     for(int k = start; k < end; k++){
34 |         float x = xyz[k * 3 + 0];
35 |         float y = xyz[k * 3 + 1];
36 |         float z = xyz[k * 3 + 2];
37 |         float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z);
38 |         if(d2 < radius2){
39 |             if(cnt < 1000){
40 |                 idx_temp[cnt] = k;
41 |             }
42 |             else{
43 |                 break;
44 |             }
45 |             ++cnt;
46 |         }
47 |     }
48 | 
49 |     start_len[0] = atomicAdd(cumsum, cnt);
50 |     start_len[1] = cnt;
51 | 
52 |     int thre = n * meanActive;
53 |     if(start_len[0] >= thre) return;
54 | 
55 |     idx += start_len[0];
56 |     if(start_len[0] + cnt >= thre) cnt = thre - start_len[0];
57 | 
58 |     for(int k = 0; k < cnt; k++){
59 |         idx[k] = idx_temp[k];
60 |     }
61 | }
62 | 
63 | 
64 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) {
65 |     // param xyz: (n, 3)
66 |     // param batch_idxs: (n)
67 |     // param batch_offsets: (B + 1)
68 |     // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n
69 |     // output start_len: (n, 2), int
70 | 
71 |     cudaError_t err;
72 | 
73 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK));
74 |     dim3 threads(THREADS_PER_BLOCK);
75 | 
76 |     int cumsum = 0;
77 |     int* p_cumsum;
78 |     cudaMalloc((void**)&p_cumsum, sizeof(int));
79 |     cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice);
80 | 
81 |     ballquery_batch_p_cuda_<<<blocks, threads, 0, stream>>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum);
82 | 
83 |     err = cudaGetLastError();
84 |     if (cudaSuccess != err) {
85 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
86 |         exit(-1);
87 |     }
88 | 
89 |     cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost);
90 |     return cumsum;
91 | }
92 | 


--------------------------------------------------------------------------------
/libs/pointops/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/_C.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/_C.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .query import knn_query, ball_query, random_ball_query
 2 | from .sampling import farthest_point_sampling
 3 | from .grouping import grouping, grouping2
 4 | from .interpolation import interpolation, interpolation2
 5 | from .subtraction import subtraction
 6 | from .aggregation import aggregation
 7 | from .attention import attention_relation_step, attention_fusion_step
 8 | from .utils import (
 9 |     query_and_group,
10 |     knn_query_and_group,
11 |     ball_query_and_group,
12 |     batch2offset,
13 |     offset2batch,
14 | )
15 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/aggregation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda
 5 | 
 6 | 
 7 | class Aggregation(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, position, weight, idx):
10 |         """
11 |         input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
12 |         output: (n, c)
13 |         """
14 |         assert (
15 |             input.is_contiguous()
16 |             and position.is_contiguous()
17 |             and weight.is_contiguous()
18 |         )
19 |         n, nsample, c = position.shape
20 |         w_c = weight.shape[-1]
21 |         output = torch.cuda.FloatTensor(n, c).zero_()
22 |         aggregation_forward_cuda(
23 |             n, nsample, c, w_c, input, position, weight, idx, output
24 |         )
25 |         ctx.save_for_backward(input, position, weight, idx)
26 |         return output
27 | 
28 |     @staticmethod
29 |     def backward(ctx, grad_output):
30 |         """
31 |         input: grad_out: (n, c)
32 |         output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
33 |         """
34 |         input, position, weight, idx = ctx.saved_tensors
35 |         n, nsample, c = position.shape
36 |         w_c = weight.shape[-1]
37 |         grad_input = torch.cuda.FloatTensor(n, c).zero_()
38 |         grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
39 |         grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
40 |         aggregation_backward_cuda(
41 |             n,
42 |             nsample,
43 |             c,
44 |             w_c,
45 |             input,
46 |             position,
47 |             weight,
48 |             idx,
49 |             grad_output,
50 |             grad_input,
51 |             grad_position,
52 |             grad_weight,
53 |         )
54 |         return grad_input, grad_position, grad_weight, None
55 | 
56 | 
57 | aggregation = Aggregation.apply
58 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/grouping.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda
 5 | 
 6 | 
 7 | class Grouping(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, idx):
10 |         """
11 |         input: input: (n, c), idx : (m, nsample)
12 |         output: (m, nsample, c)
13 |         """
14 |         assert input.is_contiguous() and idx.is_contiguous()
15 |         m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
16 |         output = torch.cuda.FloatTensor(m, nsample, c)
17 |         grouping_forward_cuda(m, nsample, c, input, idx, output)
18 |         ctx.n = n
19 |         ctx.save_for_backward(idx)
20 |         return output
21 | 
22 |     @staticmethod
23 |     def backward(ctx, grad_output):
24 |         """
25 |         input: grad_out: (m, c, nsample)
26 |         output: (n, c), None
27 |         """
28 |         n = ctx.n
29 |         (idx,) = ctx.saved_tensors
30 |         m, nsample, c = grad_output.shape
31 |         grad_input = torch.cuda.FloatTensor(n, c).zero_()
32 |         grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input)
33 |         return grad_input, None
34 | 
35 | 
36 | def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False):
37 |     if new_xyz is None:
38 |         new_xyz = xyz
39 |     assert xyz.is_contiguous() and feat.is_contiguous()
40 |     m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1]
41 |     xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0)
42 |     feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0)
43 |     grouped_feat = feat[idx.view(-1).long(), :].view(
44 |         m, nsample, c
45 |     )  # (m, num_sample, c)
46 | 
47 |     if with_xyz:
48 |         assert new_xyz.is_contiguous()
49 |         mask = torch.sign(idx + 1)
50 |         grouped_xyz = xyz[idx.view(-1).long(), :].view(
51 |             m, nsample, 3
52 |         ) - new_xyz.unsqueeze(
53 |             1
54 |         )  # (m, num_sample, 3)
55 |         grouped_xyz = torch.einsum(
56 |             "n s c, n s -> n s c", grouped_xyz, mask
57 |         )  # (m, num_sample, 3)
58 |         return torch.cat((grouped_xyz, grouped_feat), -1)
59 |     else:
60 |         return grouped_feat
61 | 
62 | 
63 | grouping2 = Grouping.apply
64 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/interpolation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda
 5 | from .query import knn_query
 6 | 
 7 | 
 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
 9 |     """
10 |     input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b)
11 |     output: (n, c)
12 |     """
13 |     assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
14 |     idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset)  # (n, 3), (n, 3)
15 |     dist_recip = 1.0 / (dist + 1e-8)  # (n, 3)
16 |     norm = torch.sum(dist_recip, dim=1, keepdim=True)
17 |     weight = dist_recip / norm  # (n, 3)
18 | 
19 |     new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
20 |     for i in range(k):
21 |         new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
22 |     return new_feat
23 | 
24 | 
25 | class Interpolation(Function):
26 |     @staticmethod
27 |     def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
28 |         """
29 |         input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
30 |         output: (n, c)
31 |         """
32 |         assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
33 |         idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset)  # (n, k), (n, k)
34 |         dist_recip = 1.0 / (dist + 1e-8)  # (n, k)
35 |         norm = torch.sum(dist_recip, dim=1, keepdim=True)
36 |         weight = dist_recip / norm  # (n, k)
37 | 
38 |         n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
39 |         output = torch.cuda.FloatTensor(n, c).zero_()
40 |         interpolation_forward_cuda(n, c, k, input, idx, weight, output)
41 |         ctx.m, ctx.k = m, k
42 |         ctx.save_for_backward(idx, weight)
43 |         return output
44 | 
45 |     @staticmethod
46 |     def backward(ctx, grad_output):
47 |         """
48 |         input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
49 |         output: (n, c)
50 |         """
51 |         m, k = ctx.m, ctx.k
52 |         idx, weight = ctx.saved_tensors
53 |         n, c = grad_output.shape
54 |         grad_input = torch.cuda.FloatTensor(m, c).zero_()
55 |         interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input)
56 |         return None, None, grad_input, None, None, None
57 | 
58 | 
59 | interpolation2 = Interpolation.apply
60 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/query.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | 
  4 | from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda
  5 | 
  6 | 
  7 | class KNNQuery(Function):
  8 |     @staticmethod
  9 |     def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None):
 10 |         """
 11 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
 12 |         output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample)
 13 |         """
 14 |         if new_xyz is None or new_offset is None:
 15 |             new_xyz = xyz
 16 |             new_offset = offset
 17 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
 18 |         m = new_xyz.shape[0]
 19 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
 20 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
 21 |         knn_query_cuda(
 22 |             m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2
 23 |         )
 24 |         return idx, torch.sqrt(dist2)
 25 | 
 26 | 
 27 | class RandomBallQuery(Function):
 28 |     """Random Ball Query.
 29 | 
 30 |     Find nearby points in spherical space.
 31 |     """
 32 | 
 33 |     @staticmethod
 34 |     def forward(
 35 |         ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None
 36 |     ):
 37 |         """
 38 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
 39 |         output: idx: (m, nsample), dist2: (m, nsample)
 40 |         """
 41 |         if new_xyz is None or new_offset is None:
 42 |             new_xyz = xyz
 43 |             new_offset = offset
 44 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
 45 |         assert min_radius < max_radius
 46 | 
 47 |         m = new_xyz.shape[0]
 48 |         order = []
 49 |         for k in range(offset.shape[0]):
 50 |             s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k])
 51 |             order.append(
 52 |                 torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k
 53 |             )
 54 |         order = torch.cat(order, dim=0)
 55 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
 56 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
 57 |         random_ball_query_cuda(
 58 |             m,
 59 |             nsample,
 60 |             min_radius,
 61 |             max_radius,
 62 |             order,
 63 |             xyz,
 64 |             new_xyz,
 65 |             offset.int(),
 66 |             new_offset.int(),
 67 |             idx,
 68 |             dist2,
 69 |         )
 70 |         return idx, torch.sqrt(dist2)
 71 | 
 72 | 
 73 | class BallQuery(Function):
 74 |     """Ball Query.
 75 | 
 76 |     Find nearby points in spherical space.
 77 |     """
 78 | 
 79 |     @staticmethod
 80 |     def forward(
 81 |         ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None
 82 |     ):
 83 |         """
 84 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
 85 |         output: idx: (m, nsample), dist2: (m, nsample)
 86 |         """
 87 |         if new_xyz is None or new_offset is None:
 88 |             new_xyz = xyz
 89 |             new_offset = offset
 90 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
 91 |         assert min_radius < max_radius
 92 | 
 93 |         m = new_xyz.shape[0]
 94 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
 95 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
 96 |         ball_query_cuda(
 97 |             m,
 98 |             nsample,
 99 |             min_radius,
100 |             max_radius,
101 |             xyz,
102 |             new_xyz,
103 |             offset.int(),
104 |             new_offset.int(),
105 |             idx,
106 |             dist2,
107 |         )
108 |         return idx, torch.sqrt(dist2)
109 | 
110 | 
111 | knn_query = KNNQuery.apply
112 | ball_query = BallQuery.apply
113 | random_ball_query = RandomBallQuery.apply
114 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/sampling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import farthest_point_sampling_cuda
 5 | 
 6 | 
 7 | class FarthestPointSampling(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, xyz, offset, new_offset):
10 |         """
11 |         input: coords: (n, 3), offset: (b), new_offset: (b)
12 |         output: idx: (m)
13 |         """
14 |         assert xyz.is_contiguous()
15 |         n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
16 |         for i in range(1, b):
17 |             n_max = max(offset[i] - offset[i - 1], n_max)
18 |         idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
19 |         tmp = torch.cuda.FloatTensor(n).fill_(1e10)
20 |         farthest_point_sampling_cuda(
21 |             b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx
22 |         )
23 |         del tmp
24 |         return idx
25 | 
26 | 
27 | farthest_point_sampling = FarthestPointSampling.apply
28 | 


--------------------------------------------------------------------------------
/libs/pointops/build/lib.linux-x86_64-cpython-38/pointops/subtraction.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda
 5 | 
 6 | 
 7 | class Subtraction(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input1, input2, idx):
10 |         """
11 |         input: input1: (n, c), input2: (n, c), idx: (n, nsample)
12 |         output:  (n, nsample, c)
13 |         """
14 |         assert input1.is_contiguous() and input2.is_contiguous()
15 |         n, c = input1.shape
16 |         nsample = idx.shape[-1]
17 |         output = torch.cuda.FloatTensor(n, nsample, c).zero_()
18 |         subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output)
19 |         ctx.save_for_backward(idx)
20 |         return output
21 | 
22 |     @staticmethod
23 |     def backward(ctx, grad_output):
24 |         """
25 |         input: grad_out: (n, nsample, c)
26 |         output: grad_input1: (n, c), grad_input2: (n, c)
27 |         """
28 |         (idx,) = ctx.saved_tensors
29 |         n, nsample, c = grad_output.shape
30 |         grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
31 |         grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
32 |         subtraction_backward_cuda(
33 |             n, nsample, c, idx, grad_output, grad_input1, grad_input2
34 |         )
35 |         return grad_input1, grad_input2, None
36 | 
37 | 
38 | subtraction = Subtraction.apply
39 | 


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/.ninja_deps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/.ninja_deps


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/.ninja_log:
--------------------------------------------------------------------------------
 1 | # ninja log v5
 2 | 3	7569	1710387977587488570	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda.o	d73dc38972e55dd8
 3 | 1	7727	1710387977751486924	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda.o	d117e6c1a2a7ad8b
 4 | 3	7750	1710387977771486723	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda.o	9476417bcd253be5
 5 | 2	7760	1710387977779486643	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda.o	b20558b68956808c
 6 | 2	7769	1710387977791486522	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda.o	6fe83449741d2622
 7 | 6	7791	1710387977815486281	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda.o	544593e0e6e56bd
 8 | 6	7800	1710387977827486161	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda.o	b29340f2d904de9c
 9 | 5	7818	1710387977839486040	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda.o	d252ffd3483b2e61
10 | 4	8168	1710387978187482548	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda.o	e6c3e85ad8e50008
11 | 2	20028	1710387990051363658	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda_kernel.o	c07840bbb1b5f55f
12 | 5	20205	1710387990227361896	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda_kernel.o	4f0384c69776f916
13 | 4	20206	1710387990227361896	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda_kernel.o	95bc027ff4af3e21
14 | 1	20209	1710387990231361856	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda_kernel.o	a4de86cebf648997
15 | 2	20278	1710387990303361136	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda_kernel.o	b3c0bf1073958f70
16 | 6	20337	1710387990363360535	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda_kernel.o	ba41d5b7850adbd0
17 | 3	20343	1710387990371360455	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda_kernel.o	983231fda4ec3136
18 | 4	20457	1710387990479359374	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda_kernel.o	4042659449e6d424
19 | 5	20512	1710387990519358974	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/pointops_api.o	db403ce1a1c25111
20 | 6	20530	1710387990555358615	/data4/zk/project/pointcept_new/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda_kernel.o	74f5cc5d78468355
21 | 


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/aggregation/aggregation_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/attention/attention_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/ball_query/ball_query_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/grouping/grouping_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/interpolation/interpolation_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/knn_query/knn_query_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/pointops_api.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/pointops_api.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/random_ball_query/random_ball_query_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/sampling/sampling_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda.o


--------------------------------------------------------------------------------
/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/build/temp.linux-x86_64-cpython-38/src/subtraction/subtraction_cuda_kernel.o


--------------------------------------------------------------------------------
/libs/pointops/dist/pointops-1.0-py3.8-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/dist/pointops-1.0-py3.8-linux-x86_64.egg


--------------------------------------------------------------------------------
/libs/pointops/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | from .query import knn_query, ball_query, random_ball_query
 2 | from .sampling import farthest_point_sampling
 3 | from .grouping import grouping, grouping2
 4 | from .interpolation import interpolation, interpolation2
 5 | from .subtraction import subtraction
 6 | from .aggregation import aggregation
 7 | from .attention import attention_relation_step, attention_fusion_step
 8 | from .utils import (
 9 |     query_and_group,
10 |     knn_query_and_group,
11 |     ball_query_and_group,
12 |     batch2offset,
13 |     offset2batch,
14 | )
15 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/aggregation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda
 5 | 
 6 | 
 7 | class Aggregation(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, position, weight, idx):
10 |         """
11 |         input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
12 |         output: (n, c)
13 |         """
14 |         assert (
15 |             input.is_contiguous()
16 |             and position.is_contiguous()
17 |             and weight.is_contiguous()
18 |         )
19 |         n, nsample, c = position.shape
20 |         w_c = weight.shape[-1]
21 |         output = torch.cuda.FloatTensor(n, c).zero_()
22 |         aggregation_forward_cuda(
23 |             n, nsample, c, w_c, input, position, weight, idx, output
24 |         )
25 |         ctx.save_for_backward(input, position, weight, idx)
26 |         return output
27 | 
28 |     @staticmethod
29 |     def backward(ctx, grad_output):
30 |         """
31 |         input: grad_out: (n, c)
32 |         output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
33 |         """
34 |         input, position, weight, idx = ctx.saved_tensors
35 |         n, nsample, c = position.shape
36 |         w_c = weight.shape[-1]
37 |         grad_input = torch.cuda.FloatTensor(n, c).zero_()
38 |         grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
39 |         grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
40 |         aggregation_backward_cuda(
41 |             n,
42 |             nsample,
43 |             c,
44 |             w_c,
45 |             input,
46 |             position,
47 |             weight,
48 |             idx,
49 |             grad_output,
50 |             grad_input,
51 |             grad_position,
52 |             grad_weight,
53 |         )
54 |         return grad_input, grad_position, grad_weight, None
55 | 
56 | 
57 | aggregation = Aggregation.apply
58 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/grouping.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import grouping_forward_cuda, grouping_backward_cuda
 5 | 
 6 | 
 7 | class Grouping(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input, idx):
10 |         """
11 |         input: input: (n, c), idx : (m, nsample)
12 |         output: (m, nsample, c)
13 |         """
14 |         assert input.is_contiguous() and idx.is_contiguous()
15 |         m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
16 |         output = torch.cuda.FloatTensor(m, nsample, c)
17 |         grouping_forward_cuda(m, nsample, c, input, idx, output)
18 |         ctx.n = n
19 |         ctx.save_for_backward(idx)
20 |         return output
21 | 
22 |     @staticmethod
23 |     def backward(ctx, grad_output):
24 |         """
25 |         input: grad_out: (m, c, nsample)
26 |         output: (n, c), None
27 |         """
28 |         n = ctx.n
29 |         (idx,) = ctx.saved_tensors
30 |         m, nsample, c = grad_output.shape
31 |         grad_input = torch.cuda.FloatTensor(n, c).zero_()
32 |         grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input)
33 |         return grad_input, None
34 | 
35 | 
36 | def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False):
37 |     if new_xyz is None:
38 |         new_xyz = xyz
39 |     assert xyz.is_contiguous() and feat.is_contiguous()
40 |     m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1]
41 |     xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0)
42 |     feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0)
43 |     grouped_feat = feat[idx.view(-1).long(), :].view(
44 |         m, nsample, c
45 |     )  # (m, num_sample, c)
46 | 
47 |     if with_xyz:
48 |         assert new_xyz.is_contiguous()
49 |         mask = torch.sign(idx + 1)
50 |         grouped_xyz = xyz[idx.view(-1).long(), :].view(
51 |             m, nsample, 3
52 |         ) - new_xyz.unsqueeze(
53 |             1
54 |         )  # (m, num_sample, 3)
55 |         grouped_xyz = torch.einsum(
56 |             "n s c, n s -> n s c", grouped_xyz, mask
57 |         )  # (m, num_sample, 3)
58 |         return torch.cat((grouped_xyz, grouped_feat), -1)
59 |     else:
60 |         return grouped_feat
61 | 
62 | 
63 | grouping2 = Grouping.apply
64 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/interpolation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda
 5 | from .query import knn_query
 6 | 
 7 | 
 8 | def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
 9 |     """
10 |     input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b)
11 |     output: (n, c)
12 |     """
13 |     assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
14 |     idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset)  # (n, 3), (n, 3)
15 |     dist_recip = 1.0 / (dist + 1e-8)  # (n, 3)
16 |     norm = torch.sum(dist_recip, dim=1, keepdim=True)
17 |     weight = dist_recip / norm  # (n, 3)
18 | 
19 |     new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
20 |     for i in range(k):
21 |         new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
22 |     return new_feat
23 | 
24 | 
25 | class Interpolation(Function):
26 |     @staticmethod
27 |     def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
28 |         """
29 |         input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
30 |         output: (n, c)
31 |         """
32 |         assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
33 |         idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset)  # (n, k), (n, k)
34 |         dist_recip = 1.0 / (dist + 1e-8)  # (n, k)
35 |         norm = torch.sum(dist_recip, dim=1, keepdim=True)
36 |         weight = dist_recip / norm  # (n, k)
37 | 
38 |         n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
39 |         output = torch.cuda.FloatTensor(n, c).zero_()
40 |         interpolation_forward_cuda(n, c, k, input, idx, weight, output)
41 |         ctx.m, ctx.k = m, k
42 |         ctx.save_for_backward(idx, weight)
43 |         return output
44 | 
45 |     @staticmethod
46 |     def backward(ctx, grad_output):
47 |         """
48 |         input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
49 |         output: (n, c)
50 |         """
51 |         m, k = ctx.m, ctx.k
52 |         idx, weight = ctx.saved_tensors
53 |         n, c = grad_output.shape
54 |         grad_input = torch.cuda.FloatTensor(m, c).zero_()
55 |         interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input)
56 |         return None, None, grad_input, None, None, None
57 | 
58 | 
59 | interpolation2 = Interpolation.apply
60 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/query.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | 
  4 | from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda
  5 | 
  6 | 
  7 | class KNNQuery(Function):
  8 |     @staticmethod
  9 |     def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None):
 10 |         """
 11 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
 12 |         output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample)
 13 |         """
 14 |         if new_xyz is None or new_offset is None:
 15 |             new_xyz = xyz
 16 |             new_offset = offset
 17 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
 18 |         m = new_xyz.shape[0]
 19 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
 20 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
 21 |         knn_query_cuda(
 22 |             m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2
 23 |         )
 24 |         return idx, torch.sqrt(dist2)
 25 | 
 26 | 
 27 | class RandomBallQuery(Function):
 28 |     """Random Ball Query.
 29 | 
 30 |     Find nearby points in spherical space.
 31 |     """
 32 | 
 33 |     @staticmethod
 34 |     def forward(
 35 |         ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None
 36 |     ):
 37 |         """
 38 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
 39 |         output: idx: (m, nsample), dist2: (m, nsample)
 40 |         """
 41 |         if new_xyz is None or new_offset is None:
 42 |             new_xyz = xyz
 43 |             new_offset = offset
 44 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
 45 |         assert min_radius < max_radius
 46 | 
 47 |         m = new_xyz.shape[0]
 48 |         order = []
 49 |         for k in range(offset.shape[0]):
 50 |             s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k])
 51 |             order.append(
 52 |                 torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k
 53 |             )
 54 |         order = torch.cat(order, dim=0)
 55 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
 56 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
 57 |         random_ball_query_cuda(
 58 |             m,
 59 |             nsample,
 60 |             min_radius,
 61 |             max_radius,
 62 |             order,
 63 |             xyz,
 64 |             new_xyz,
 65 |             offset.int(),
 66 |             new_offset.int(),
 67 |             idx,
 68 |             dist2,
 69 |         )
 70 |         return idx, torch.sqrt(dist2)
 71 | 
 72 | 
 73 | class BallQuery(Function):
 74 |     """Ball Query.
 75 | 
 76 |     Find nearby points in spherical space.
 77 |     """
 78 | 
 79 |     @staticmethod
 80 |     def forward(
 81 |         ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None
 82 |     ):
 83 |         """
 84 |         input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
 85 |         output: idx: (m, nsample), dist2: (m, nsample)
 86 |         """
 87 |         if new_xyz is None or new_offset is None:
 88 |             new_xyz = xyz
 89 |             new_offset = offset
 90 |         assert xyz.is_contiguous() and new_xyz.is_contiguous()
 91 |         assert min_radius < max_radius
 92 | 
 93 |         m = new_xyz.shape[0]
 94 |         idx = torch.cuda.IntTensor(m, nsample).zero_()
 95 |         dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
 96 |         ball_query_cuda(
 97 |             m,
 98 |             nsample,
 99 |             min_radius,
100 |             max_radius,
101 |             xyz,
102 |             new_xyz,
103 |             offset.int(),
104 |             new_offset.int(),
105 |             idx,
106 |             dist2,
107 |         )
108 |         return idx, torch.sqrt(dist2)
109 | 
110 | 
111 | knn_query = KNNQuery.apply
112 | ball_query = BallQuery.apply
113 | random_ball_query = RandomBallQuery.apply
114 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/sampling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import farthest_point_sampling_cuda
 5 | 
 6 | 
 7 | class FarthestPointSampling(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, xyz, offset, new_offset):
10 |         """
11 |         input: coords: (n, 3), offset: (b), new_offset: (b)
12 |         output: idx: (m)
13 |         """
14 |         assert xyz.is_contiguous()
15 |         n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
16 |         for i in range(1, b):
17 |             n_max = max(offset[i] - offset[i - 1], n_max)
18 |         idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
19 |         tmp = torch.cuda.FloatTensor(n).fill_(1e10)
20 |         farthest_point_sampling_cuda(
21 |             b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx
22 |         )
23 |         del tmp
24 |         return idx
25 | 
26 | 
27 | farthest_point_sampling = FarthestPointSampling.apply
28 | 


--------------------------------------------------------------------------------
/libs/pointops/functions/subtraction.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda
 5 | 
 6 | 
 7 | class Subtraction(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input1, input2, idx):
10 |         """
11 |         input: input1: (n, c), input2: (n, c), idx: (n, nsample)
12 |         output:  (n, nsample, c)
13 |         """
14 |         assert input1.is_contiguous() and input2.is_contiguous()
15 |         n, c = input1.shape
16 |         nsample = idx.shape[-1]
17 |         output = torch.cuda.FloatTensor(n, nsample, c).zero_()
18 |         subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output)
19 |         ctx.save_for_backward(idx)
20 |         return output
21 | 
22 |     @staticmethod
23 |     def backward(ctx, grad_output):
24 |         """
25 |         input: grad_out: (n, nsample, c)
26 |         output: grad_input1: (n, c), grad_input2: (n, c)
27 |         """
28 |         (idx,) = ctx.saved_tensors
29 |         n, nsample, c = grad_output.shape
30 |         grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
31 |         grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
32 |         subtraction_backward_cuda(
33 |             n, nsample, c, idx, grad_output, grad_input1, grad_input2
34 |         )
35 |         return grad_input1, grad_input2, None
36 | 
37 | 
38 | subtraction = Subtraction.apply
39 | 


--------------------------------------------------------------------------------
/libs/pointops/pointops.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: pointops
3 | Version: 1.0
4 | Requires-Dist: torch
5 | Requires-Dist: numpy
6 | 


--------------------------------------------------------------------------------
/libs/pointops/pointops.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | setup.py
 2 | functions/__init__.py
 3 | functions/aggregation.py
 4 | functions/attention.py
 5 | functions/grouping.py
 6 | functions/interpolation.py
 7 | functions/query.py
 8 | functions/sampling.py
 9 | functions/subtraction.py
10 | functions/utils.py
11 | pointops.egg-info/PKG-INFO
12 | pointops.egg-info/SOURCES.txt
13 | pointops.egg-info/dependency_links.txt
14 | pointops.egg-info/requires.txt
15 | pointops.egg-info/top_level.txt
16 | src/pointops_api.cpp
17 | src/aggregation/aggregation_cuda.cpp
18 | src/aggregation/aggregation_cuda_kernel.cu
19 | src/attention/attention_cuda.cpp
20 | src/attention/attention_cuda_kernel.cu
21 | src/ball_query/ball_query_cuda.cpp
22 | src/ball_query/ball_query_cuda_kernel.cu
23 | src/grouping/grouping_cuda.cpp
24 | src/grouping/grouping_cuda_kernel.cu
25 | src/interpolation/interpolation_cuda.cpp
26 | src/interpolation/interpolation_cuda_kernel.cu
27 | src/knn_query/knn_query_cuda.cpp
28 | src/knn_query/knn_query_cuda_kernel.cu
29 | src/random_ball_query/random_ball_query_cuda.cpp
30 | src/random_ball_query/random_ball_query_cuda_kernel.cu
31 | src/sampling/sampling_cuda.cpp
32 | src/sampling/sampling_cuda_kernel.cu
33 | src/subtraction/subtraction_cuda.cpp
34 | src/subtraction/subtraction_cuda_kernel.cu


--------------------------------------------------------------------------------
/libs/pointops/pointops.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/libs/pointops/pointops.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | torch
2 | numpy
3 | 


--------------------------------------------------------------------------------
/libs/pointops/pointops.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pointops
2 | 


--------------------------------------------------------------------------------
/libs/pointops/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | from distutils.sysconfig import get_config_vars
 5 | 
 6 | (opt,) = get_config_vars("OPT")
 7 | os.environ["OPT"] = " ".join(
 8 |     flag for flag in opt.split() if flag != "-Wstrict-prototypes"
 9 | )
10 | 
11 | src = "src"
12 | sources = [
13 |     os.path.join(root, file)
14 |     for root, dirs, files in os.walk(src)
15 |     for file in files
16 |     if file.endswith(".cpp") or file.endswith(".cu")
17 | ]
18 | 
19 | setup(
20 |     name="pointops",
21 |     version="1.0",
22 |     install_requires=["torch", "numpy"],
23 |     packages=["pointops"],
24 |     package_dir={"pointops": "functions"},
25 |     ext_modules=[
26 |         CUDAExtension(
27 |             name="pointops._C",
28 |             sources=sources,
29 |             extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
30 |         )
31 |     ],
32 |     cmdclass={"build_ext": BuildExtension},
33 | )
34 | 


--------------------------------------------------------------------------------
/libs/pointops/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops/src/__init__.py


--------------------------------------------------------------------------------
/libs/pointops/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "aggregation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const float *position = position_tensor.data_ptr<float>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
15 | }
16 | 
17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
18 | {
19 | 	const float *input = input_tensor.data_ptr<float>();
20 |     const float *position = position_tensor.data_ptr<float>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     const int *idx = idx_tensor.data_ptr<int>();
23 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
24 |     float *grad_input = grad_input_tensor.data_ptr<float>();
25 |     float *grad_position = grad_position_tensor.data_ptr<float>();
26 |     float *grad_weight = grad_weight_tensor.data_ptr<float>();
27 |     aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
28 | }
29 | 


--------------------------------------------------------------------------------
/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "aggregation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
 6 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * c) return;
 9 |     const int c_idx = index % c;
10 |     const int n_idx = index / c;
11 |     const int w_c_idx = c_idx % w_c;
12 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 |     {   
14 |         int idx_idx = n_idx * nsample + nsample_idx;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 |         output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 |     }
20 | }
21 | 
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
25 |     if (index >= n * c) return;
26 |     const int c_idx = index % c;
27 |     const int n_idx = index / c;
28 |     const int w_c_idx = c_idx % w_c;
29 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 |     {   
31 |         int idx_idx = n_idx * nsample + nsample_idx;
32 |         int input_idx = idx[idx_idx] * c + c_idx;
33 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 |         grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 |         atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 |     }
39 | }
40 | 
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 |     dim3 threads(THREADS_PER_BLOCK);
45 |     aggregation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 | 
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {  
49 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 |     dim3 threads(THREADS_PER_BLOCK);
52 |     aggregation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 | 


--------------------------------------------------------------------------------
/libs/pointops/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AGGREGATION_CUDA_KERNEL
 2 | #define _AGGREGATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/attention/attention_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_CUDA_KERNEL
 2 | #define _ATTENTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_relation_step_forward_cuda(int m, int g, int c,
 8 |                                           at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor,
 9 |                                           at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
10 |                                           at::Tensor output_tensor);
11 | void attention_relation_step_backward_cuda(int m, int g, int c,
12 |                                            at::Tensor query_tensor, at::Tensor grad_query_tensor,
13 |                                            at::Tensor key_tensor, at::Tensor grad_key_tensor,
14 |                                            at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
15 |                                            at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
16 |                                            at::Tensor grad_output_tensor);
17 | void attention_fusion_step_forward_cuda(int m, int g, int c,
18 |                                         at::Tensor weight_tensor, at::Tensor value_tensor,
19 |                                         at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
20 |                                         at::Tensor output_tensor);
21 | void attention_fusion_step_backward_cuda(int m, int g, int c,
22 |                                          at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
23 |                                          at::Tensor value_tensor, at::Tensor grad_value_tensor,
24 |                                          at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
25 |                                          at::Tensor grad_output_tensor);
26 | 
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | 
31 | void attention_relation_step_forward_cuda_launcher(int m, int g, int c,
32 |                                                    const float *query, const float *key, const float *weight,
33 |                                                    const int *index_target, const int *index_refer,
34 |                                                    float *output);
35 | void attention_relation_step_backward_cuda_launcher(int m, int g, int c,
36 |                                                     const float *query, float *grad_query,
37 |                                                     const float *key, float *grad_key,
38 |                                                     const float *weight, float *grad_weight,
39 |                                                     const int *index_target, const int *index_refer,
40 |                                                     const float *grad_output);
41 | void attention_fusion_step_forward_cuda_launcher(int m, int g, int c,
42 |                                                  const float *weight, const float *value,
43 |                                                  const int *index_target, const int *index_refer,
44 |                                                  float *output);
45 | void attention_fusion_step_backward_cuda_launcher(int m, int g, int c,
46 |                                                   const float *weight, float *grad_weight,
47 |                                                   const float *value, float *grad_value,
48 |                                                   const int *index_target, const int *index_refer,
49 |                                                   const float *grad_output);
50 | 
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | #endif
55 | 


--------------------------------------------------------------------------------
/libs/pointops/src/ball_query/ball_query_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "ball_query_cuda_kernel.h"
 5 | 
 6 | 
 7 | void ball_query_cuda(int m, int nsample,
 8 |                      float min_radius, float max_radius,
 9 |                      at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                      at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                      at::Tensor idx_tensor, at::Tensor dist2_tensor)
12 | {
13 |     const float *xyz = xyz_tensor.data_ptr<float>();
14 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
15 |     const int *offset = offset_tensor.data_ptr<int>();
16 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
17 |     int *idx = idx_tensor.data_ptr<int>();
18 |     float *dist2 = dist2_tensor.data_ptr<float>();
19 |     ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2);
20 | }
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/ball_query/ball_query_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_CUDA_KERNEL
 2 | #define _BALL_QUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void ball_query_cuda(int m, int nsample,
 8 |                      float min_radius, float max_radius,
 9 |                      at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                      at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                      at::Tensor idx_tensor, at::Tensor dist2_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void ball_query_cuda_launcher(int m, int nsample,
18 |                               float min_radius, float max_radius,
19 |                               const float *xyz, const float *new_xyz,
20 |                               const int *offset, const int *new_offset,
21 |                               int *idx, float *dist2);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | #include <algorithm>
 6 | 
 7 | #define TOTAL_THREADS 1024
 8 | #define THREADS_PER_BLOCK 512
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |     return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |     const int x_threads = opt_n_threads(x);
18 |     const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 |     dim3 block_config(x_threads, y_threads, 1);
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/libs/pointops/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "grouping_cuda_kernel.h"
 5 | 
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     float *output = output_tensor.data_ptr<float>();
12 |     grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
13 | }
14 | 
15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
16 | {
17 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     float *grad_input = grad_input_tensor.data_ptr<float>();
20 |     grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
21 | }
22 | 


--------------------------------------------------------------------------------
/libs/pointops/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
 6 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= m * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int m_idx = index / nsample / c;
12 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 |     output[index] = input[input_idx];
14 | }
15 | 
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
19 |     if (index >= m * nsample * c) return;
20 |     const int c_idx = index % c;
21 |     const int nsample_idx = (index / c) % nsample;
22 |     const int m_idx = index / nsample / c;
23 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 |     atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 | 
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 |     dim3 threads(THREADS_PER_BLOCK);
31 |     grouping_forward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, input, idx, output);
32 | }
33 | 
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {  
36 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     grouping_backward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 | 


--------------------------------------------------------------------------------
/libs/pointops/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "interpolation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
14 | }
15 | 
16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
17 | {
18 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     const float *weight = weight_tensor.data_ptr<float>();
21 |     float *grad_input = grad_input_tensor.data_ptr<float>();
22 |     interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "interpolation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
 6 | {
 7 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (index >= n * c) return;
10 |     int c_idx = index % c;
11 |     int n_idx = index / c;
12 |     for (int i = 0; i < k; i++)
13 |     {
14 |         int idx_idx = n_idx * k + i;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         output[index] += input[input_idx] * weight[idx_idx];
17 |     }
18 | }
19 | 
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     if (index >= n * c) return;
25 |     int c_idx = index % c;
26 |     int n_idx = index / c;
27 |     for (int i = 0; i < k; i++)
28 |     {
29 |         int idx_idx = n_idx * k + i;
30 |         int input_idx = idx[idx_idx] * c + c_idx;
31 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 |     }
33 | }
34 | 
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     interpolation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, input, idx, weight, output);
40 | }
41 | 
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 |     dim3 threads(THREADS_PER_BLOCK);
46 |     interpolation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 | 


--------------------------------------------------------------------------------
/libs/pointops/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops/src/knn_query/knn_query_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "knn_query_cuda_kernel.h"
 5 | 
 6 | 
 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
11 |     const int *offset = offset_tensor.data_ptr<int>();
12 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     float *dist2 = dist2_tensor.data_ptr<float>();
15 |     knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
16 | }
17 | 


--------------------------------------------------------------------------------
/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knn_query_cuda_kernel.h"
  3 | 
  4 | 
  5 | namespace knn_query_utils{
  6 | 
  7 | template <typename DType>
  8 | __device__ void swap(DType *x, DType *y)
  9 | {
 10 |     DType tmp = *x;
 11 |     *x = *y;
 12 |     *y = tmp;
 13 | }
 14 | 
 15 | __device__ void reheap(float *dist, int *idx, int k)
 16 | {
 17 |     int root = 0;
 18 |     int child = root * 2 + 1;
 19 |     while (child < k)
 20 |     {
 21 |         if(child + 1 < k && dist[child+1] > dist[child])
 22 |             child++;
 23 |         if(dist[root] > dist[child])
 24 |             return;
 25 |         swap<float>(&dist[root], &dist[child]);
 26 |         swap<int>(&idx[root], &idx[child]);
 27 |         root = child;
 28 |         child = root * 2 + 1;
 29 |     }
 30 | }
 31 | 
 32 | 
 33 | __device__ void heap_sort(float *dist, int *idx, int k)
 34 | {
 35 |     int i;
 36 |     for (i = k - 1; i > 0; i--)
 37 |     {
 38 |         swap<float>(&dist[0], &dist[i]);
 39 |         swap<int>(&idx[0], &idx[i]);
 40 |         reheap(dist, idx, i);
 41 |     }
 42 | }
 43 | 
 44 | 
 45 | __device__ int get_bt_idx(int idx, const int *offset)
 46 | {
 47 |     int i = 0;
 48 |     while (1)
 49 |     {
 50 |         if (idx < offset[i])
 51 |             break;
 52 |         else
 53 |             i++;
 54 |     }
 55 |     return i;
 56 | }
 57 | }  // namespace knn_query_utils
 58 | 
 59 | 
 60 | __global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
 61 |     // input: xyz (n, 3) new_xyz (m, 3)
 62 |     // output: idx (m, nsample) dist2 (m, nsample)
 63 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 64 |     if (pt_idx >= m) return;
 65 | 
 66 |     new_xyz += pt_idx * 3;
 67 |     idx += pt_idx * nsample;
 68 |     dist2 += pt_idx * nsample;
 69 | 
 70 |     int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset);
 71 |     int start;
 72 |     if (bt_idx == 0)
 73 |         start = 0;
 74 |     else
 75 |         start = offset[bt_idx - 1];
 76 |     int end = offset[bt_idx];
 77 | 
 78 |     float new_x = new_xyz[0];
 79 |     float new_y = new_xyz[1];
 80 |     float new_z = new_xyz[2];
 81 | 
 82 |     float best_dist[128];
 83 |     int best_idx[128];
 84 |     for(int i = 0; i < nsample; i++){
 85 |         best_dist[i] = 1e10;
 86 |         best_idx[i] = -1;
 87 |     }
 88 |     for(int i = start; i < end; i++){
 89 |         float x = xyz[i * 3 + 0];
 90 |         float y = xyz[i * 3 + 1];
 91 |         float z = xyz[i * 3 + 2];
 92 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 93 |         if (d2 < best_dist[0]){
 94 |             best_dist[0] = d2;
 95 |             best_idx[0] = i;
 96 |             knn_query_utils::reheap(best_dist, best_idx, nsample);
 97 |         }
 98 |     }
 99 |     knn_query_utils::heap_sort(best_dist, best_idx, nsample);
100 |     for(int i = 0; i < nsample; i++){
101 |         idx[i] = best_idx[i];
102 |         dist2[i] = best_dist[i];
103 |     }
104 | }
105 | 
106 | 
107 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
108 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
109 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
110 |     dim3 threads(THREADS_PER_BLOCK);
111 |     knn_query_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
112 | }
113 | 


--------------------------------------------------------------------------------
/libs/pointops/src/knn_query/knn_query_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNN_QUERY_CUDA_KERNEL
 2 | #define _KNN_QUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "knn_query/knn_query_cuda_kernel.h"
 5 | #include "ball_query/ball_query_cuda_kernel.h"
 6 | #include "random_ball_query/random_ball_query_cuda_kernel.h"
 7 | #include "sampling/sampling_cuda_kernel.h"
 8 | #include "grouping/grouping_cuda_kernel.h"
 9 | #include "interpolation/interpolation_cuda_kernel.h"
10 | #include "aggregation/aggregation_cuda_kernel.h"
11 | #include "subtraction/subtraction_cuda_kernel.h"
12 | #include "attention/attention_cuda_kernel.h"
13 | 
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |     m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda");
17 |     m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda");
18 |     m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda");
19 |     m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda");
20 |     m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
21 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
22 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
23 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
24 |     m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
25 |     m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
26 |     m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
27 |     m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
28 |     m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda");
29 |     m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda");
30 |     m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda");
31 |     m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda");
32 | }
33 | 


--------------------------------------------------------------------------------
/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "random_ball_query_cuda_kernel.h"
 5 | 
 6 | 
 7 | void random_ball_query_cuda(int m, int nsample,
 8 |                             float min_radius, float max_radius, at::Tensor order_tensor,
 9 |                             at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                             at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                             at::Tensor idx_tensor, at::Tensor dist2_tensor)
12 | {
13 |     const int *order = order_tensor.data_ptr<int>();
14 |     const float *xyz = xyz_tensor.data_ptr<float>();
15 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
16 |     const int *offset = offset_tensor.data_ptr<int>();
17 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
18 |     int *idx = idx_tensor.data_ptr<int>();
19 |     float *dist2 = dist2_tensor.data_ptr<float>();
20 |     random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2);
21 | }
22 | 


--------------------------------------------------------------------------------
/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL
 2 | #define _RANDOM_BALL_QUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void random_ball_query_cuda(int m, int nsample,
 8 |                             float min_radius, float max_radius, at::Tensor order_tensor,
 9 |                             at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
10 |                             at::Tensor offset_tensor, at::Tensor new_offset_tensor,
11 |                             at::Tensor idx_tensor, at::Tensor dist2_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void random_ball_query_cuda_launcher(int m, int nsample,
18 |                                     float min_radius, float max_radius,  const int *order,
19 |                                     const float *xyz, const float *new_xyz,
20 |                                     const int *offset, const int *new_offset,
21 |                                     int *idx, float *dist2);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "sampling_cuda_kernel.h"
 5 | 
 6 | 
 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const int *offset = offset_tensor.data_ptr<int>();
11 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
12 |     float *tmp = tmp_tensor.data_ptr<float>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
15 | }
16 | 


--------------------------------------------------------------------------------
/libs/pointops/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "subtraction_cuda_kernel.h"
 5 | 
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input1 = input1_tensor.data_ptr<float>();
10 |     const float *input2 = input2_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
14 | }
15 | 
16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
17 | {
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
20 |     float *grad_input1 = grad_input1_tensor.data_ptr<float>();
21 |     float *grad_input2 = grad_input2_tensor.data_ptr<float>();
22 |     subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "subtraction_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
 6 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int n_idx = index / nsample / c;
12 |     const int idx_idx = n_idx * nsample + nsample_idx;
13 |     const int input1_idx = n_idx * c + c_idx;
14 |     const int input2_idx = idx[idx_idx] * c + c_idx;
15 |     output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 | 
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if (index >= n * nsample * c) return;
22 |     const int c_idx = index % c;
23 |     const int nsample_idx = (index / c) % nsample;
24 |     const int n_idx = index / nsample / c;
25 |     const int idx_idx = n_idx * nsample + nsample_idx;
26 |     const int input1_idx = n_idx * c + c_idx;
27 |     const int input2_idx = idx[idx_idx] * c + c_idx;
28 |     atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 |     atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 | 
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 |     dim3 threads(THREADS_PER_BLOCK);
36 |     subtraction_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 | 
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {  
40 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 |     dim3 threads(THREADS_PER_BLOCK);
43 |     subtraction_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 | 


--------------------------------------------------------------------------------
/libs/pointops/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBTRACTION_CUDA_KERNEL
 2 | #define _SUBTRACTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops2/__init__.py


--------------------------------------------------------------------------------
/libs/pointops2/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from pointops2 import *
2 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_attention_op_step1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | from torch_scatter import (
  4 |     scatter_max,
  5 |     scatter_mean,
  6 |     scatter_add,
  7 |     scatter_min,
  8 |     scatter_sum,
  9 | )
 10 | 
 11 | torch.manual_seed(1)
 12 | 
 13 | M = 800000
 14 | N = 35000
 15 | C = 96
 16 | h = 6
 17 | query = torch.rand(N, h, C // h).cuda()
 18 | key = torch.rand(N, h, C // h).cuda()
 19 | 
 20 | index_0 = torch.rand(M)
 21 | index_0[index_0 < 0] = 0
 22 | index_0 = (index_0 * N).long().cuda()
 23 | 
 24 | index_1 = torch.rand(M)
 25 | index_1[index_1 < 0] = 0
 26 | index_1 = (index_1 * N).long().cuda()
 27 | 
 28 | query.requires_grad = True
 29 | key.requires_grad = True
 30 | 
 31 | # rearrange index for acceleration
 32 | index_0, indices = torch.sort(index_0)  # [M,]
 33 | index_1 = index_1[indices]  # [M,]
 34 | index_0_counts = index_0.bincount()
 35 | 
 36 | print("index_0_counts.shape: ", index_0_counts.shape)
 37 | 
 38 | n_max = index_0_counts.max()
 39 | index_0_offsets = index_0_counts.cumsum(dim=-1)  # [N]
 40 | 
 41 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
 42 | 
 43 | index_0_offsets = torch.cat(
 44 |     [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
 45 | )  # [N+1]
 46 | 
 47 | # print("index_0[:100]: ", index_0[:100])
 48 | print("n_max: ", n_max)
 49 | print("index_0_offsets.shape: ", index_0_offsets.shape)
 50 | # input()
 51 | 
 52 | print("index_0_offsets[:100]: ", index_0_offsets[:100])
 53 | print("index_1[300:320]: ", index_1[300:320])
 54 | 
 55 | 
 56 | attn_flat = pointops.attention_step1(
 57 |     query.float(), key.float(), index_0.int(), index_1.int()
 58 | )
 59 | # loss = attn_flat.sum()
 60 | # loss.backward()
 61 | print(
 62 |     "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format(
 63 |         attn_flat.shape, attn_flat[300:320, :10]
 64 |     )
 65 | )
 66 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 67 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 68 | # input()
 69 | 
 70 | print("query.is_contiguous(): ", query.is_contiguous())
 71 | print("key.is_contiguous(): ", key.is_contiguous())
 72 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
 73 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
 74 | 
 75 | attn_flat_v2 = pointops.attention_step1_v2(
 76 |     query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max
 77 | )
 78 | # loss = attn_flat_v2.sum()
 79 | # loss.backward()
 80 | print(
 81 |     "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format(
 82 |         attn_flat_v2.shape, attn_flat_v2[300:320, :10]
 83 |     )
 84 | )
 85 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 86 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 87 | # input()
 88 | 
 89 | mask = attn_flat_v2.sum(-1) != 0
 90 | print("mask.sum(): ", mask.sum())
 91 | print(
 92 |     "attn_flat_v2[mask] - attn_flat[mask]: ",
 93 |     ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(),
 94 | )
 95 | 
 96 | 
 97 | print(
 98 |     "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ",
 99 |     ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(),
100 | )
101 | 
102 | selected = 10000
103 | print(
104 |     "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ",
105 |     torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0),
106 | )
107 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_attention_op_step1_v2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | from torch_scatter import (
  4 |     scatter_max,
  5 |     scatter_mean,
  6 |     scatter_add,
  7 |     scatter_min,
  8 |     scatter_sum,
  9 | )
 10 | 
 11 | torch.manual_seed(1)
 12 | 
 13 | M = 800000
 14 | N = 35000
 15 | C = 96
 16 | h = 6
 17 | query = torch.rand(N, h, C // h).cuda()
 18 | key = torch.rand(N, h, C // h).cuda()
 19 | 
 20 | index_0 = torch.rand(M)
 21 | index_0[index_0 < 0] = 0
 22 | index_0 = (index_0 * N).long().cuda()
 23 | 
 24 | index_1 = torch.rand(M)
 25 | index_1[index_1 < 0] = 0
 26 | index_1 = (index_1 * N).long().cuda()
 27 | 
 28 | query.requires_grad = True
 29 | key.requires_grad = True
 30 | 
 31 | 
 32 | attn_flat = pointops.attention_step1(
 33 |     query.float(), key.float(), index_0.int(), index_1.int()
 34 | )
 35 | loss = attn_flat.sum()
 36 | loss.backward()
 37 | print(
 38 |     "attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(
 39 |         attn_flat.shape, attn_flat[:20, :10]
 40 |     )
 41 | )
 42 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 43 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 44 | input()
 45 | 
 46 | 
 47 | # rearrange index for acceleration
 48 | index_0, indices = torch.sort(index_0)  # [M,]
 49 | index_1 = index_1[indices]  # [M,]
 50 | index_0_counts = index_0.bincount()
 51 | 
 52 | print("index_0_counts.shape: ", index_0_counts.shape)
 53 | 
 54 | n_max = index_0_counts.max()
 55 | index_0_offsets = index_0_counts.cumsum(dim=-1)  # [N]
 56 | 
 57 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
 58 | 
 59 | index_0_offsets = torch.cat(
 60 |     [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
 61 | )  # [N+1]
 62 | 
 63 | # print("index_0[:100]: ", index_0[:100])
 64 | print("n_max: ", n_max)
 65 | print("index_0_offsets.shape: ", index_0_offsets.shape)
 66 | # input()
 67 | 
 68 | print("index_0_offsets[:100]: ", index_0_offsets[:100])
 69 | print("index_1[:20]: ", index_1[:20])
 70 | 
 71 | 
 72 | attn_flat = pointops.attention_step1(
 73 |     query.float(), key.float(), index_0.int(), index_1.int()
 74 | )
 75 | # loss = attn_flat.sum()
 76 | # loss.backward()
 77 | # # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
 78 | # # loss = attn_flat.sum()
 79 | # # loss.backward()
 80 | # print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10]))
 81 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 82 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 83 | # input()
 84 | 
 85 | print("query.is_contiguous(): ", query.is_contiguous())
 86 | print("key.is_contiguous(): ", key.is_contiguous())
 87 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
 88 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
 89 | 
 90 | attn_flat_v2 = pointops.attention_step1_v2(
 91 |     query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max
 92 | )
 93 | loss = attn_flat_v2.sum()
 94 | loss.backward()
 95 | 
 96 | # attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max)
 97 | # loss = attn_flat_v2.sum()
 98 | # loss.backward()
 99 | 
100 | print(
101 |     "attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format(
102 |         attn_flat_v2.shape, attn_flat_v2[:20, :10]
103 |     )
104 | )
105 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
106 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
107 | # input()
108 | 
109 | # mask = attn_flat_v2.sum(-1) != 0
110 | # print("mask.sum(): ", mask.sum())
111 | # print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max())
112 | 
113 | 
114 | print(
115 |     "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ",
116 |     ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(),
117 | )
118 | 
119 | selected = 10000
120 | print(
121 |     "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ",
122 |     torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0),
123 | )
124 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_attention_op_step2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import (
 4 |     scatter_max,
 5 |     scatter_mean,
 6 |     scatter_add,
 7 |     scatter_min,
 8 |     scatter_sum,
 9 | )
10 | 
11 | torch.manual_seed(1)
12 | 
13 | M = 800000
14 | N = 35000
15 | C = 96
16 | h = 6
17 | softmax_attn_flat = torch.rand(M, h).cuda()
18 | value = torch.rand(N, h, C // h).cuda()
19 | 
20 | index_0 = torch.rand(M)
21 | index_0[index_0 < 0] = 0
22 | index_0 = (index_0 * N).long().cuda()
23 | 
24 | index_1 = torch.rand(M)
25 | index_1[index_1 < 0] = 0
26 | index_1 = (index_1 * N).long().cuda()
27 | 
28 | softmax_attn_flat.requires_grad = True
29 | value.requires_grad = True
30 | 
31 | # value_flat = value[index_1] #[M, num_heads, C // num_heads]
32 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C)
33 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C]
34 | # loss = x.sum()
35 | # loss.backward()
36 | 
37 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10]))
38 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
39 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
40 | # input()
41 | 
42 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous())
43 | print("value.is_contiguous(): ", value.is_contiguous())
44 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
45 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
46 | 
47 | x_v2 = pointops.attention_step2(
48 |     softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int()
49 | )
50 | x_v2 = x_v2.view(N, C)
51 | loss = x_v2.sum()
52 | loss.backward()
53 | 
54 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10]))
55 | 
56 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
57 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
58 | input()
59 | 
60 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all())
61 | 
62 | print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2))
63 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import (
 4 |     scatter_max,
 5 |     scatter_mean,
 6 |     scatter_add,
 7 |     scatter_min,
 8 |     scatter_sum,
 9 | )
10 | 
11 | torch.manual_seed(1)
12 | 
13 | M = 80000
14 | N = 3500
15 | hdim = 16
16 | h = 6
17 | L = 31
18 | query = torch.rand(N, h, hdim).cuda()
19 | table = torch.rand(L, h, hdim, 3).cuda()
20 | 
21 | index = torch.rand(M)
22 | index[index < 0] = 0
23 | index = (index * N).long().cuda()
24 | 
25 | rel_index = torch.rand(M, 3)
26 | rel_index[rel_index < 0] = 0
27 | rel_index = (rel_index * L).long().cuda()
28 | 
29 | query.requires_grad = True
30 | table.requires_grad = True
31 | 
32 | # query_flat = query[index] #[M, h, hdim]
33 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim]
34 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M]
35 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim]
36 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h]
37 | # loss = output.mean()
38 | # loss.backward()
39 | 
40 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
41 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
42 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
43 | # input()
44 | 
45 | # print("query.is_contiguous(): ", query.is_contiguous())
46 | # print("key.is_contiguous(): ", key.is_contiguous())
47 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
48 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
49 | 
50 | output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int())
51 | loss = output_v2.mean()
52 | loss.backward()
53 | 
54 | print(
55 |     "output_v2.shape: {}, output_v2[:5,:10]: {}".format(
56 |         output_v2.shape, output_v2[:5, :10]
57 |     )
58 | )
59 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
60 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
61 | input()
62 | 
63 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
64 | 
65 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
66 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import (
 4 |     scatter_max,
 5 |     scatter_mean,
 6 |     scatter_add,
 7 |     scatter_min,
 8 |     scatter_sum,
 9 | )
10 | 
11 | torch.manual_seed(1)
12 | 
13 | M = 80000
14 | N = 3500
15 | hdim = 16
16 | h = 6
17 | L = 31
18 | query = torch.rand(N, h, hdim).cuda()
19 | table_q = torch.rand(L, h, hdim, 3).cuda()
20 | key = torch.rand(N, h, hdim).cuda()
21 | table_k = torch.rand(L, h, hdim, 3).cuda()
22 | 
23 | index_q = torch.rand(M)
24 | index_q[index_q < 0] = 0
25 | index_q = (index_q * N).long().cuda()
26 | 
27 | index_k = torch.rand(M)
28 | index_k[index_k < 0] = 0
29 | index_k = (index_k * N).long().cuda()
30 | 
31 | rel_index = torch.rand(M, 3)
32 | rel_index[rel_index < 0] = 0
33 | rel_index = (rel_index * L).long().cuda()
34 | 
35 | query.requires_grad = True
36 | table_q.requires_grad = True
37 | key.requires_grad = True
38 | table_k.requires_grad = True
39 | 
40 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int())
41 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int())
42 | output = output1 + output2
43 | # loss = output.mean()
44 | # loss.backward()
45 | 
46 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
47 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
48 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
49 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
50 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
51 | # input()
52 | 
53 | # print("query.is_contiguous(): ", query.is_contiguous())
54 | # print("key.is_contiguous(): ", key.is_contiguous())
55 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
56 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
57 | 
58 | output_v2 = pointops.dot_prod_with_idx_v2(
59 |     query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int()
60 | )
61 | loss = output_v2.mean()
62 | loss.backward()
63 | 
64 | print(
65 |     "output_v2.shape: {}, output_v2[:5,:10]: {}".format(
66 |         output_v2.shape, output_v2[:5, :10]
67 |     )
68 | )
69 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
70 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
71 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
72 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
73 | # input()
74 | 
75 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
76 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | from torch_scatter import (
  4 |     scatter_max,
  5 |     scatter_mean,
  6 |     scatter_add,
  7 |     scatter_min,
  8 |     scatter_sum,
  9 | )
 10 | 
 11 | torch.manual_seed(1)
 12 | 
 13 | M = 80000
 14 | N = 3500
 15 | # M = 80
 16 | # N = 5
 17 | hdim = 16
 18 | h = 6
 19 | L = 31
 20 | query = torch.rand(N, h, hdim).cuda()
 21 | table_q = torch.rand(L, h, hdim, 3).cuda()
 22 | key = torch.rand(N, h, hdim).cuda()
 23 | table_k = torch.rand(L, h, hdim, 3).cuda()
 24 | 
 25 | index_q = torch.rand(M)
 26 | index_q[index_q < 0] = 0
 27 | index_q = (index_q * N).long().cuda()
 28 | 
 29 | index_k = torch.rand(M)
 30 | index_k[index_k < 0] = 0
 31 | index_k = (index_k * N).long().cuda()
 32 | 
 33 | rel_index = torch.rand(M, 3)
 34 | rel_index[rel_index < 0] = 0
 35 | rel_index = (rel_index * L).long().cuda()
 36 | 
 37 | 
 38 | # rearrange index for acceleration
 39 | index_q, indices = torch.sort(index_q)  # [M,]
 40 | index_k = index_k[indices]  # [M,]
 41 | rel_index = rel_index[indices]
 42 | index_q_counts = index_q.bincount()
 43 | 
 44 | print("index_q_counts.shape: ", index_q_counts.shape)
 45 | 
 46 | n_max = index_q_counts.max()
 47 | index_q_offsets = index_q_counts.cumsum(dim=-1)  # [N]
 48 | 
 49 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape)
 50 | 
 51 | index_q_offsets = torch.cat(
 52 |     [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0
 53 | )  # [N+1]
 54 | 
 55 | # print("index_q[:100]: ", index_q[:100])
 56 | print("n_max: ", n_max)
 57 | print("index_q_offsets.shape: ", index_q_offsets.shape)
 58 | # input()
 59 | 
 60 | print("index_q_offsets[:100]: ", index_q_offsets[:100])
 61 | print("index_k[:20]: ", index_k[:20])
 62 | 
 63 | query.requires_grad = True
 64 | table_q.requires_grad = True
 65 | key.requires_grad = True
 66 | table_k.requires_grad = True
 67 | 
 68 | output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int())
 69 | output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int())
 70 | output = output1 + output2
 71 | loss = output.mean()
 72 | loss.backward()
 73 | 
 74 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
 75 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 76 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
 77 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 78 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
 79 | # input()
 80 | 
 81 | # print("query.is_contiguous(): ", query.is_contiguous())
 82 | # print("key.is_contiguous(): ", key.is_contiguous())
 83 | # print("index_q.is_contiguous(): ", index_q.is_contiguous())
 84 | # print("index_k.is_contiguous(): ", index_k.is_contiguous())
 85 | 
 86 | output_v2 = pointops.dot_prod_with_idx_v3(
 87 |     query,
 88 |     index_q_offsets.int(),
 89 |     n_max,
 90 |     key,
 91 |     index_k.int(),
 92 |     table_q,
 93 |     table_k,
 94 |     rel_index.int(),
 95 | )
 96 | # loss = output_v2.mean()
 97 | # loss.backward()
 98 | 
 99 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10]))
100 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
101 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
102 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
103 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
104 | # input()
105 | 
106 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
107 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | from torch_scatter import (
 4 |     scatter_max,
 5 |     scatter_mean,
 6 |     scatter_add,
 7 |     scatter_min,
 8 |     scatter_sum,
 9 | )
10 | 
11 | torch.manual_seed(1)
12 | 
13 | M = 80000
14 | N = 3500
15 | hdim = 16
16 | h = 6
17 | L = 31
18 | attn = torch.rand(M, h).cuda()
19 | v = torch.rand(N, h, hdim).cuda()
20 | table = torch.rand(L, h, hdim, 3).cuda()
21 | 
22 | index_0 = torch.rand(M)
23 | index_0[index_0 < 0] = 0
24 | index_0 = (index_0 * N).long().cuda()
25 | 
26 | index_1 = torch.rand(M)
27 | index_1[index_1 < 0] = 0
28 | index_1 = (index_1 * N).long().cuda()
29 | 
30 | rel_index = torch.rand(M, 3)
31 | rel_index[rel_index < 0] = 0
32 | rel_index = (rel_index * L).long().cuda()
33 | 
34 | attn.requires_grad = True
35 | v.requires_grad = True
36 | table.requires_grad = True
37 | 
38 | v_flat = v[index_1]  # [M, h, hdim]
39 | table_x, table_y, table_z = (
40 |     table[:, :, :, 0],
41 |     table[:, :, :, 1],
42 |     table[:, :, :, 2],
43 | )  # [L, h, hdim]
44 | rel_index_x, rel_index_y, rel_index_z = (
45 |     rel_index[:, 0],
46 |     rel_index[:, 1],
47 |     rel_index[:, 2],
48 | )  # [M]
49 | rel_pos_encoding = (
50 |     table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z]
51 | )  # [M, h, hdim]
52 | v_flat_new = v_flat + rel_pos_encoding  # [M, h, hdim]
53 | output = attn.unsqueeze(-1) * v_flat_new  # [M, h, hdim]
54 | output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N)  # [N, h, hdim]
55 | loss = output.mean()
56 | loss.backward()
57 | 
58 | print(
59 |     "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5])
60 | )
61 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
62 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
63 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
64 | input()
65 | 
66 | # print("query.is_contiguous(): ", query.is_contiguous())
67 | # print("key.is_contiguous(): ", key.is_contiguous())
68 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
69 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
70 | 
71 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int())
72 | # loss = output_v2.mean()
73 | # loss.backward()
74 | 
75 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5]))
76 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
77 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
78 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
79 | # input()
80 | 
81 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
82 | 
83 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
84 | 


--------------------------------------------------------------------------------
/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | from torch_scatter import (
  4 |     scatter_max,
  5 |     scatter_mean,
  6 |     scatter_add,
  7 |     scatter_min,
  8 |     scatter_sum,
  9 | )
 10 | 
 11 | torch.manual_seed(1)
 12 | 
 13 | M = 80000
 14 | N = 3500
 15 | hdim = 16
 16 | h = 6
 17 | L = 31
 18 | attn = torch.rand(M, h).cuda()
 19 | v = torch.rand(N, h, hdim).cuda()
 20 | table = torch.rand(L, h, hdim, 3).cuda()
 21 | 
 22 | index_0 = torch.rand(M)
 23 | index_0[index_0 < 0] = 0
 24 | index_0 = (index_0 * N).long().cuda()
 25 | 
 26 | index_1 = torch.rand(M)
 27 | index_1[index_1 < 0] = 0
 28 | index_1 = (index_1 * N).long().cuda()
 29 | 
 30 | rel_index = torch.rand(M, 3)
 31 | rel_index[rel_index < 0] = 0
 32 | rel_index = (rel_index * L).long().cuda()
 33 | 
 34 | 
 35 | # rearrange index for acceleration
 36 | index_0, indices = torch.sort(index_0)  # [M,]
 37 | index_1 = index_1[indices]  # [M,]
 38 | rel_index = rel_index[indices]
 39 | index_0_counts = index_0.bincount()
 40 | 
 41 | print("index_0_counts.shape: ", index_0_counts.shape)
 42 | 
 43 | n_max = index_0_counts.max()
 44 | index_0_offsets = index_0_counts.cumsum(dim=-1)  # [N]
 45 | 
 46 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
 47 | 
 48 | index_0_offsets = torch.cat(
 49 |     [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
 50 | )  # [N+1]
 51 | 
 52 | 
 53 | attn.requires_grad = True
 54 | v.requires_grad = True
 55 | table.requires_grad = True
 56 | 
 57 | 
 58 | output = pointops.attention_step2_with_rel_pos_value(
 59 |     attn, v, index_0.int(), index_1.int(), table, rel_index.int()
 60 | )
 61 | loss = output.mean()
 62 | loss.backward()
 63 | 
 64 | print(
 65 |     "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5])
 66 | )
 67 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
 68 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
 69 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
 70 | # input()
 71 | 
 72 | attn_grad = attn.grad.clone()
 73 | v_grad = v.grad.clone()
 74 | table_grad = table.grad.clone()
 75 | 
 76 | attn.grad.zero_()
 77 | v.grad.zero_()
 78 | table.grad.zero_()
 79 | 
 80 | # print("query.is_contiguous(): ", query.is_contiguous())
 81 | # print("key.is_contiguous(): ", key.is_contiguous())
 82 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
 83 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
 84 | 
 85 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2(
 86 |     attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int()
 87 | )
 88 | loss = output_v2.mean()
 89 | loss.backward()
 90 | 
 91 | print(
 92 |     "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(
 93 |         output_v2.shape, output_v2[:5, :10, :5]
 94 |     )
 95 | )
 96 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
 97 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
 98 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
 99 | # input()
100 | 
101 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
102 | 
103 | print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max())
104 | 
105 | print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max())
106 | 
107 | print("((table_grad-table.grad)**2).max(): ", ((table_grad - table.grad) ** 2).max())
108 | 
109 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
110 | 


--------------------------------------------------------------------------------
/libs/pointops2/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | from distutils.sysconfig import get_config_vars
 5 | 
 6 | (opt,) = get_config_vars("OPT")
 7 | os.environ["OPT"] = " ".join(
 8 |     flag for flag in opt.split() if flag != "-Wstrict-prototypes"
 9 | )
10 | 
11 | src = "src"
12 | sources = [
13 |     os.path.join(root, file)
14 |     for root, dirs, files in os.walk(src)
15 |     for file in files
16 |     if file.endswith(".cpp") or file.endswith(".cu")
17 | ]
18 | 
19 | setup(
20 |     name="pointops2",
21 |     version="1.0",
22 |     install_requires=["torch", "numpy"],
23 |     packages=["pointops2"],
24 |     package_dir={"pointops2": "functions"},
25 |     ext_modules=[
26 |         CUDAExtension(
27 |             name="pointops2_cuda",
28 |             sources=sources,
29 |             extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
30 |         )
31 |     ],
32 |     cmdclass={"build_ext": BuildExtension},
33 | )
34 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/libs/pointops2/src/__init__.py


--------------------------------------------------------------------------------
/libs/pointops2/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "aggregation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const float *position = position_tensor.data_ptr<float>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
15 | }
16 | 
17 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
18 | {
19 | 	const float *input = input_tensor.data_ptr<float>();
20 |     const float *position = position_tensor.data_ptr<float>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     const int *idx = idx_tensor.data_ptr<int>();
23 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
24 |     float *grad_input = grad_input_tensor.data_ptr<float>();
25 |     float *grad_position = grad_position_tensor.data_ptr<float>();
26 |     float *grad_weight = grad_weight_tensor.data_ptr<float>();
27 |     aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
28 | }
29 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "aggregation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
 6 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * c) return;
 9 |     const int c_idx = index % c;
10 |     const int n_idx = index / c;
11 |     const int w_c_idx = c_idx % w_c;
12 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 |     {   
14 |         int idx_idx = n_idx * nsample + nsample_idx;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 |         output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 |     }
20 | }
21 | 
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
25 |     if (index >= n * c) return;
26 |     const int c_idx = index % c;
27 |     const int n_idx = index / c;
28 |     const int w_c_idx = c_idx % w_c;
29 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 |     {   
31 |         int idx_idx = n_idx * nsample + nsample_idx;
32 |         int input_idx = idx[idx_idx] * c + c_idx;
33 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 |         grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 |         atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 |     }
39 | }
40 | 
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 |     dim3 threads(THREADS_PER_BLOCK);
45 |     aggregation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 | 
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {  
49 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 |     dim3 threads(THREADS_PER_BLOCK);
52 |     aggregation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AGGREGATION_CUDA_KERNEL
 2 | #define _AGGREGATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention/attention_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "attention_cuda_kernel.h"
 5 | 
 6 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 
 7 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor)
 8 | {
 9 |     const float *q = q_tensor.data_ptr<float>();
10 |     const float *k = k_tensor.data_ptr<float>();
11 |     const int *index0 = index0_tensor.data_ptr<int>();
12 |     const int *index1 = index1_tensor.data_ptr<int>();
13 |     float *attn = attn_tensor.data_ptr<float>();
14 |     attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn);
15 | }
16 | 
17 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
18 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 
19 |     at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
20 | {
21 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
22 |     const int *index0 = index0_tensor.data_ptr<int>();
23 |     const int *index1 = index1_tensor.data_ptr<int>();
24 |     const float *q = q_tensor.data_ptr<float>();
25 |     const float *k = k_tensor.data_ptr<float>();
26 |     float *grad_q = grad_q_tensor.data_ptr<float>();
27 |     float *grad_k = grad_k_tensor.data_ptr<float>();
28 |     attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k);
29 | }
30 | 
31 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 
32 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
33 | {
34 |     const float *attn = attn_tensor.data_ptr<float>();
35 |     const float *v = v_tensor.data_ptr<float>();
36 |     const int *index0 = index0_tensor.data_ptr<int>();
37 |     const int *index1 = index1_tensor.data_ptr<int>();
38 |     float *output = output_tensor.data_ptr<float>();
39 |     attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output);
40 | }
41 | 
42 | 
43 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
44 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 
45 |     at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor)
46 | {
47 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
48 |     const int *index0 = index0_tensor.data_ptr<int>();
49 |     const int *index1 = index1_tensor.data_ptr<int>();
50 |     const float *attn = attn_tensor.data_ptr<float>();
51 |     const float *v = v_tensor.data_ptr<float>();
52 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
53 |     float *grad_v = grad_v_tensor.data_ptr<float>();
54 |     attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
55 | }
56 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention/attention_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_CUDA_KERNEL
 2 | #define _ATTENTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor);
 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor);
 9 | 
10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor);
11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn);
18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k);
19 | 
20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output);
21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL
 2 | #define _ATTENTION_V2_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor);
 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor);
 9 | 
10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor);
11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn);
18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k);
19 | 
20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output);
21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/attention_v2/attention_cuda_v2.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "attention_cuda_kernel_v2.h"
 5 | 
 6 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 
 7 |     at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor)
 8 | {
 9 |     const float *q = q_tensor.data_ptr<float>();
10 |     const float *k = k_tensor.data_ptr<float>();
11 |     const int *index0_offsets = index0_tensor_offsets.data_ptr<int>();
12 |     const int *index1 = index1_tensor.data_ptr<int>();
13 |     float *attn = attn_tensor.data_ptr<float>();
14 |     attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn);
15 | }
16 | 
17 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 
18 |     at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 
19 |     at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
20 | {
21 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
22 |     const int *index0_offsets = index0_tensor_offsets.data_ptr<int>();
23 |     const int *index1 = index1_tensor.data_ptr<int>();
24 |     const float *q = q_tensor.data_ptr<float>();
25 |     const float *k = k_tensor.data_ptr<float>();
26 |     float *grad_q = grad_q_tensor.data_ptr<float>();
27 |     float *grad_k = grad_k_tensor.data_ptr<float>();
28 |     attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k);
29 | }
30 | 
31 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 
32 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
33 | {
34 |     const float *attn = attn_tensor.data_ptr<float>();
35 |     const float *v = v_tensor.data_ptr<float>();
36 |     const int *index0 = index0_tensor.data_ptr<int>();
37 |     const int *index1 = index1_tensor.data_ptr<int>();
38 |     float *output = output_tensor.data_ptr<float>();
39 |     attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output);
40 | }
41 | 
42 | 
43 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
44 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 
45 |     at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor)
46 | {
47 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
48 |     const int *index0 = index0_tensor.data_ptr<int>();
49 |     const int *index1 = index1_tensor.data_ptr<int>();
50 |     const float *attn = attn_tensor.data_ptr<float>();
51 |     const float *v = v_tensor.data_ptr<float>();
52 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
53 |     float *grad_v = grad_v_tensor.data_ptr<float>();
54 |     attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
55 | }
56 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | #include <algorithm>
 6 | 
 7 | #define TOTAL_THREADS 1024
 8 | #define THREADS_PER_BLOCK 256
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |     return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |     const int x_threads = opt_n_threads(x);
18 |     const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 |     dim3 block_config(x_threads, y_threads, 1);
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "grouping_cuda_kernel.h"
 5 | 
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     float *output = output_tensor.data_ptr<float>();
12 |     grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
13 | }
14 | 
15 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
16 | {
17 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     float *grad_input = grad_input_tensor.data_ptr<float>();
20 |     grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
21 | }
22 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
 6 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= m * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int m_idx = index / nsample / c;
12 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 |     output[index] = input[input_idx];
14 | }
15 | 
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
19 |     if (index >= m * nsample * c) return;
20 |     const int c_idx = index % c;
21 |     const int nsample_idx = (index / c) % nsample;
22 |     const int m_idx = index / nsample / c;
23 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 |     atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 | 
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 |     dim3 threads(THREADS_PER_BLOCK);
31 |     grouping_forward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, input, idx, output);
32 | }
33 | 
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {  
36 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     grouping_backward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "interpolation_cuda_kernel.h"
 5 | 
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input = input_tensor.data_ptr<float>();
10 |     const int *idx = idx_tensor.data_ptr<int>();
11 |     const float *weight = weight_tensor.data_ptr<float>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
14 | }
15 | 
16 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
17 | {
18 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     const float *weight = weight_tensor.data_ptr<float>();
21 |     float *grad_input = grad_input_tensor.data_ptr<float>();
22 |     interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "interpolation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
 6 | {
 7 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (index >= n * c) return;
10 |     int c_idx = index % c;
11 |     int n_idx = index / c;
12 |     for (int i = 0; i < k; i++)
13 |     {
14 |         int idx_idx = n_idx * k + i;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         output[index] += input[input_idx] * weight[idx_idx];
17 |     }
18 | }
19 | 
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     if (index >= n * c) return;
25 |     int c_idx = index % c;
26 |     int n_idx = index / c;
27 |     for (int i = 0; i < k; i++)
28 |     {
29 |         int idx_idx = n_idx * k + i;
30 |         int input_idx = idx[idx_idx] * c + c_idx;
31 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 |     }
33 | }
34 | 
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     interpolation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, input, idx, weight, output);
40 | }
41 | 
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 |     dim3 threads(THREADS_PER_BLOCK);
46 |     interpolation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "knnquery_cuda_kernel.h"
 5 | 
 6 | 
 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
11 |     const int *offset = offset_tensor.data_ptr<int>();
12 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     float *dist2 = dist2_tensor.data_ptr<float>();
15 |     knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
16 | }
17 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knnquery_cuda_kernel.h"
  3 | 
  4 | 
  5 | __device__ void swap_float(float *x, float *y)
  6 | {
  7 |     float tmp = *x;
  8 |     *x = *y;
  9 |     *y = tmp;
 10 | }
 11 | 
 12 | 
 13 | __device__ void swap_int(int *x, int *y)
 14 | {
 15 |     int tmp = *x;
 16 |     *x = *y;
 17 |     *y = tmp;
 18 | }
 19 | 
 20 | 
 21 | __device__ void reheap(float *dist, int *idx, int k)
 22 | {
 23 |     int root = 0;
 24 |     int child = root * 2 + 1;
 25 |     while (child < k)
 26 |     {
 27 |         if(child + 1 < k && dist[child+1] > dist[child])
 28 |             child++;
 29 |         if(dist[root] > dist[child])
 30 |             return;
 31 |         swap_float(&dist[root], &dist[child]);
 32 |         swap_int(&idx[root], &idx[child]);
 33 |         root = child;
 34 |         child = root * 2 + 1;
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | __device__ void heap_sort(float *dist, int *idx, int k)
 40 | {
 41 |     int i;
 42 |     for (i = k - 1; i > 0; i--)
 43 |     {
 44 |         swap_float(&dist[0], &dist[i]);
 45 |         swap_int(&idx[0], &idx[i]);
 46 |         reheap(dist, idx, i);
 47 |     }
 48 | }
 49 | 
 50 | 
 51 | __device__ int get_bt_idx(int idx, const int *offset)
 52 | {
 53 |     int i = 0;
 54 |     while (1)
 55 |     {
 56 |         if (idx < offset[i])
 57 |             break;
 58 |         else
 59 |             i++;
 60 |     }
 61 |     return i;
 62 | }
 63 | 
 64 | 
 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
 66 |     // input: xyz (n, 3) new_xyz (m, 3)
 67 |     // output: idx (m, nsample) dist2 (m, nsample)
 68 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 69 |     if (pt_idx >= m) return;
 70 | 
 71 |     new_xyz += pt_idx * 3;
 72 |     idx += pt_idx * nsample;
 73 |     dist2 += pt_idx * nsample;
 74 |     int bt_idx = get_bt_idx(pt_idx, new_offset);
 75 |     int start;
 76 |     if (bt_idx == 0)
 77 |         start = 0;
 78 |     else
 79 |         start = offset[bt_idx - 1];
 80 |     int end = offset[bt_idx];
 81 | 
 82 |     float new_x = new_xyz[0];
 83 |     float new_y = new_xyz[1];
 84 |     float new_z = new_xyz[2];
 85 | 
 86 |     float best_dist[100];
 87 |     int best_idx[100];
 88 |     for(int i = 0; i < nsample; i++){
 89 |         best_dist[i] = 1e10;
 90 |         best_idx[i] = start;
 91 |     }
 92 |     for(int i = start; i < end; i++){
 93 |         float x = xyz[i * 3 + 0];
 94 |         float y = xyz[i * 3 + 1];
 95 |         float z = xyz[i * 3 + 2];
 96 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 97 |         if (d2 < best_dist[0]){
 98 |             best_dist[0] = d2;
 99 |             best_idx[0] = i;
100 |             reheap(best_dist, best_idx, nsample);
101 |         }
102 |     }
103 |     heap_sort(best_dist, best_idx, nsample);
104 |     for(int i = 0; i < nsample; i++){
105 |         idx[i] = best_idx[i];
106 |         dist2[i] = best_dist[i];
107 |     }
108 | }
109 | 
110 | 
111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
112 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
113 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
114 |     dim3 threads(THREADS_PER_BLOCK);
115 |     knnquery_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
116 | }
117 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNNQUERY_CUDA_KERNEL
 2 | #define _KNNQUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "knnquery/knnquery_cuda_kernel.h"
 5 | #include "sampling/sampling_cuda_kernel.h"
 6 | #include "grouping/grouping_cuda_kernel.h"
 7 | #include "interpolation/interpolation_cuda_kernel.h"
 8 | #include "aggregation/aggregation_cuda_kernel.h"
 9 | #include "subtraction/subtraction_cuda_kernel.h"
10 | #include "attention/attention_cuda_kernel.h"
11 | #include "rpe/relative_pos_encoding_cuda_kernel.h"
12 | #include "attention_v2/attention_cuda_kernel_v2.h"
13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h"
14 | 
15 | 
16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
17 |     m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
18 |     m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
19 |     m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
20 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
21 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
22 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
23 |     m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
24 |     m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
25 |     m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
26 |     m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
27 |     m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda");
28 |     m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda");
29 |     m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda");
30 |     m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda");
31 |     m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda");
32 |     m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda");
33 |     m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda");
34 |     m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda");
35 |     m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2");
36 |     m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2");
37 |     m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2");
38 |     m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2");
39 |     m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2");
40 |     m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2");
41 |     m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2");
42 |     m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2");
43 |     m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3");
44 |     m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3");
45 |     }
46 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "relative_pos_encoding_cuda_kernel.h"
 5 | 
 6 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 
 7 |     at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *q = q_tensor.data_ptr<float>();
10 |     const float *table = table_tensor.data_ptr<float>();
11 |     const int *index = index_tensor.data_ptr<int>();
12 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output);
15 | }
16 | 
17 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 
18 |     at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 
19 |     at::Tensor grad_q_tensor, at::Tensor grad_table_tensor)
20 | {
21 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
22 |     const float *q = q_tensor.data_ptr<float>();
23 |     const int *index = index_tensor.data_ptr<int>();
24 |     const float *table = table_tensor.data_ptr<float>();
25 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
26 |     float *grad_q = grad_q_tensor.data_ptr<float>();
27 |     float *grad_table = grad_table_tensor.data_ptr<float>();
28 |     dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table);
29 | }
30 | 
31 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 
32 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor)
33 | {
34 |     const float *attn = attn_tensor.data_ptr<float>();
35 |     const float *v = v_tensor.data_ptr<float>();
36 |     const int *index0 = index0_tensor.data_ptr<int>();
37 |     const int *index1 = index1_tensor.data_ptr<int>();
38 |     const float *table = table_tensor.data_ptr<float>();
39 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
40 |     float *output = output_tensor.data_ptr<float>();
41 |     attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output);
42 | }
43 | 
44 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 
45 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor,
46 |     at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor)
47 | {
48 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
49 |     const int *index0 = index0_tensor.data_ptr<int>();
50 |     const int *index1 = index1_tensor.data_ptr<int>();
51 |     const float *attn = attn_tensor.data_ptr<float>();
52 |     const float *v = v_tensor.data_ptr<float>();
53 |     const float *table = table_tensor.data_ptr<float>();
54 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
55 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
56 |     float *grad_v = grad_v_tensor.data_ptr<float>();
57 |     float *grad_table = grad_table_tensor.data_ptr<float>();
58 |     attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table);
59 | }
60 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RPE_CUDA_KERNEL
 2 | #define _RPE_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor);
 9 | 
10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output);
18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table);
19 | 
20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output);
21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "sampling_cuda_kernel.h"
 5 | 
 6 | 
 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
 8 | {
 9 |     const float *xyz = xyz_tensor.data_ptr<float>();
10 |     const int *offset = offset_tensor.data_ptr<int>();
11 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
12 |     float *tmp = tmp_tensor.data_ptr<float>();
13 |     int *idx = idx_tensor.data_ptr<int>();
14 |     furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
15 | }
16 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <torch/serialize/tensor.h>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include "subtraction_cuda_kernel.h"
 5 | 
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 8 | {
 9 |     const float *input1 = input1_tensor.data_ptr<float>();
10 |     const float *input2 = input2_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
14 | }
15 | 
16 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
17 | {
18 |     const int *idx = idx_tensor.data_ptr<int>();
19 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
20 |     float *grad_input1 = grad_input1_tensor.data_ptr<float>();
21 |     float *grad_input2 = grad_input2_tensor.data_ptr<float>();
22 |     subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
23 | }
24 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "subtraction_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
 6 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int n_idx = index / nsample / c;
12 |     const int idx_idx = n_idx * nsample + nsample_idx;
13 |     const int input1_idx = n_idx * c + c_idx;
14 |     const int input2_idx = idx[idx_idx] * c + c_idx;
15 |     output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 | 
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if (index >= n * nsample * c) return;
22 |     const int c_idx = index % c;
23 |     const int nsample_idx = (index / c) % nsample;
24 |     const int n_idx = index / nsample / c;
25 |     const int idx_idx = n_idx * nsample + nsample_idx;
26 |     const int input1_idx = n_idx * c + c_idx;
27 |     const int input2_idx = idx[idx_idx] * c + c_idx;
28 |     atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 |     atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 | 
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 |     dim3 threads(THREADS_PER_BLOCK);
36 |     subtraction_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 | 
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {  
40 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 |     dim3 threads(THREADS_PER_BLOCK);
43 |     subtraction_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 | 


--------------------------------------------------------------------------------
/libs/pointops2/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBTRACTION_CUDA_KERNEL
 2 | #define _SUBTRACTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/pointcept/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/__init__.py


--------------------------------------------------------------------------------
/pointcept/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import DefaultDataset, ConcatDataset
2 | from .builder import build_dataset
3 | from .utils import point_collate_fn, collate_fn
4 | 
5 | # outdoor scene
6 | from .semantic_kitti_multi_scans import SemanticKITTIMultiScansDataset
7 | # dataloader
8 | from .dataloader import MultiDatasetDataloader
9 | 


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/builder.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/dataloader.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/dataloader.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/defaults.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/defaults.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/semantic_kitti_multi_scans.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/semantic_kitti_multi_scans.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/transform.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/transform.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/datasets/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dataset Builder
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | 
 9 | from pointcept.utils.registry import Registry
10 | 
11 | DATASETS = Registry("datasets")
12 | 
13 | 
14 | def build_dataset(cfg):
15 |     """Build datasets."""
16 |     return DATASETS.build(cfg)
17 | 


--------------------------------------------------------------------------------
/pointcept/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utils for Datasets
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import random
 9 | from collections.abc import Mapping, Sequence
10 | import numpy as np
11 | import torch
12 | from torch.utils.data.dataloader import default_collate
13 | 
14 | 
15 | def collate_fn(batch):
16 |     """
17 |     collate function for point cloud which support dict and list,
18 |     'coord' is necessary to determine 'offset'
19 |     """
20 |     if not isinstance(batch, Sequence):
21 |         raise TypeError(f"{batch.dtype} is not supported.")
22 | 
23 |     if isinstance(batch[0], torch.Tensor):
24 |         return torch.cat(list(batch))
25 |     elif isinstance(batch[0], str):
26 |         # str is also a kind of Sequence, judgement should before Sequence
27 |         return list(batch)
28 |     elif isinstance(batch[0], Sequence):
29 |         for data in batch:
30 |             data.append(torch.tensor([data[0].shape[0]]))
31 |         batch = [collate_fn(samples) for samples in zip(*batch)]
32 |         batch[-1] = torch.cumsum(batch[-1], dim=0).int()
33 |         return batch
34 |     elif isinstance(batch[0], Mapping):
35 |         batch = {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
36 |         for key in batch.keys():
37 |             if "offset" in key:
38 |                 batch[key] = torch.cumsum(batch[key], dim=0)
39 |         return batch
40 |     else:
41 |         return default_collate(batch)
42 | 
43 | 
44 | def point_collate_fn(batch, mix_prob=0):
45 |     assert isinstance(
46 |         batch[0], Mapping
47 |     )  # currently, only support input_dict, rather than input_list
48 |     batch = collate_fn(batch)
49 |     if "offset" in batch.keys():
50 |         # Mix3d (https://arxiv.org/pdf/2110.02210.pdf)
51 |         if random.random() < mix_prob:
52 |             batch["offset"] = torch.cat(
53 |                 [batch["offset"][1:-1:2], batch["offset"][-1].unsqueeze(0)], dim=0
54 |             )
55 |     return batch
56 | 
57 | 
58 | def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5):
59 |     return a * np.exp(-dist2 / (2 * c**2))
60 | 


--------------------------------------------------------------------------------
/pointcept/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__init__.py


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/defaults.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/defaults.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/defaults.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/defaults.cpython-39.pyc


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/launch.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/launch.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/test.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/test.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/__pycache__/train.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/__pycache__/train.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import HookBase
2 | from .misc import *
3 | from .evaluator import *
4 | 
5 | from .builder import build_hooks
6 | 


--------------------------------------------------------------------------------
/pointcept/engines/hooks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/hooks/__pycache__/builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/builder.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/hooks/__pycache__/default.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/default.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/hooks/__pycache__/evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/hooks/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/engines/hooks/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/engines/hooks/builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Hook Builder
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | from pointcept.utils.registry import Registry
 9 | 
10 | 
11 | HOOKS = Registry("hooks")
12 | 
13 | 
14 | def build_hooks(cfg):
15 |     hooks = []
16 |     for hook_cfg in cfg:
17 |         hooks.append(HOOKS.build(hook_cfg))
18 |     return hooks
19 | 


--------------------------------------------------------------------------------
/pointcept/engines/hooks/default.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Default Hook
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | 
 9 | class HookBase:
10 |     """
11 |     Base class for hooks that can be registered with :class:`TrainerBase`.
12 |     """
13 | 
14 |     trainer = None  # A weak reference to the trainer object.
15 | 
16 |     def before_train(self):
17 |         pass
18 | 
19 |     def before_epoch(self):
20 |         pass
21 | 
22 |     def before_step(self):
23 |         pass
24 | 
25 |     def after_step(self):
26 |         pass
27 | 
28 |     def after_epoch(self):
29 |         pass
30 | 
31 |     def after_train(self):
32 |         pass
33 | 


--------------------------------------------------------------------------------
/pointcept/models/MambaMOS/__init__.py:
--------------------------------------------------------------------------------
1 | from .MambaMOS import *


--------------------------------------------------------------------------------
/pointcept/models/MambaMOS/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/MambaMOS/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_model
2 | from .default import DefaultSegmentor, DefaultClassifier
3 | 
4 | # Backbones
5 | from .MambaMOS import *


--------------------------------------------------------------------------------
/pointcept/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/__pycache__/builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/builder.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/__pycache__/default.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/default.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/__pycache__/modules.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Model Builder
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | from pointcept.utils.registry import Registry
 9 | 
10 | MODELS = Registry("models")
11 | MODULES = Registry("modules")
12 | 
13 | 
14 | def build_model(cfg):
15 |     """Build models."""
16 |     return MODELS.build(cfg)
17 | 


--------------------------------------------------------------------------------
/pointcept/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_criteria
2 | 
3 | from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss
4 | from .lovasz import LovaszLoss
5 | 


--------------------------------------------------------------------------------
/pointcept/models/losses/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/losses/__pycache__/builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/builder.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/losses/__pycache__/lovasz.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/lovasz.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/losses/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/losses/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/losses/builder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Criteria Builder
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | from pointcept.utils.registry import Registry
 9 | 
10 | LOSSES = Registry("losses")
11 | 
12 | 
13 | class Criteria(object):
14 |     def __init__(self, cfg=None):
15 |         self.cfg = cfg if cfg is not None else []
16 |         self.criteria = []
17 |         for loss_cfg in self.cfg:
18 |             self.criteria.append(LOSSES.build(cfg=loss_cfg))
19 | 
20 |     def __call__(self, pred, target):
21 |         if len(self.criteria) == 0:
22 |             # loss computation occur in model
23 |             return pred
24 |         loss = 0
25 |         for c in self.criteria:
26 |             loss += c(pred, target)
27 |         return loss
28 | 
29 | 
30 | def build_criteria(cfg):
31 |     return Criteria(cfg)
32 | 


--------------------------------------------------------------------------------
/pointcept/models/modules.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import torch.nn as nn
 3 | import spconv.pytorch as spconv
 4 | from collections import OrderedDict
 5 | from pointcept.models.utils.structure import Point
 6 | 
 7 | 
 8 | class PointModule(nn.Module):
 9 |     r"""PointModule
10 |     placeholder, all module subclass from this will take Point in PointSequential.
11 |     """
12 | 
13 |     def __init__(self, *args, **kwargs):
14 |         super().__init__(*args, **kwargs)
15 | 
16 | 
17 | class PointSequential(PointModule):
18 |     r"""A sequential container.
19 |     Modules will be added to it in the order they are passed in the constructor.
20 |     Alternatively, an ordered dict of modules can also be passed in.
21 |     """
22 | 
23 |     def __init__(self, *args, **kwargs):
24 |         super().__init__()
25 |         if len(args) == 1 and isinstance(args[0], OrderedDict):
26 |             for key, module in args[0].items():
27 |                 self.add_module(key, module)
28 |         else:
29 |             for idx, module in enumerate(args):
30 |                 self.add_module(str(idx), module)
31 |         for name, module in kwargs.items():
32 |             if sys.version_info < (3, 6):
33 |                 raise ValueError("kwargs only supported in py36+")
34 |             if name in self._modules:
35 |                 raise ValueError("name exists.")
36 |             self.add_module(name, module)
37 | 
38 |     def __getitem__(self, idx):
39 |         if not (-len(self) <= idx < len(self)):
40 |             raise IndexError("index {} is out of range".format(idx))
41 |         if idx < 0:
42 |             idx += len(self)
43 |         it = iter(self._modules.values())
44 |         for i in range(idx):
45 |             next(it)
46 |         return next(it)
47 | 
48 |     def __len__(self):
49 |         return len(self._modules)
50 | 
51 |     def add(self, module, name=None):
52 |         if name is None:
53 |             name = str(len(self._modules))
54 |             if name in self._modules:
55 |                 raise KeyError("name exists")
56 |         self.add_module(name, module)
57 | 
58 |     def forward(self, input):
59 |         for k, module in self._modules.items():
60 |             # Point module
61 |             if isinstance(module, PointModule):
62 |                 input = module(input)
63 |             # Spconv module
64 |             elif spconv.modules.is_spconv_module(module):
65 |                 if isinstance(input, Point):
66 |                     input.sparse_conv_feat = module(input.sparse_conv_feat)
67 |                     input.feat = input.sparse_conv_feat.features
68 |                 else:
69 |                     input = module(input)
70 |             # PyTorch module
71 |             else:
72 |                 if isinstance(input, Point):
73 |                     input.feat = module(input.feat)
74 |                     if "sparse_conv_feat" in input.keys():
75 |                         input.sparse_conv_feat = input.sparse_conv_feat.replace_feature(
76 |                             input.feat
77 |                         )
78 |                 elif isinstance(input, spconv.SparseConvTensor):
79 |                     if input.indices.shape[0] != 0:
80 |                         input = input.replace_feature(module(input.features))
81 |                 else:
82 |                     input = module(input)
83 |         return input
84 | 


--------------------------------------------------------------------------------
/pointcept/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .misc import offset2batch, offset2bincount, batch2offset, off_diagonal
2 | from .checkpoint import checkpoint
3 | from .serialization import encode, decode
4 | from .structure import Point
5 | 


--------------------------------------------------------------------------------
/pointcept/models/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/__pycache__/checkpoint.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/checkpoint.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/__pycache__/structure.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/__pycache__/structure.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Checkpoint Utils for Models
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | 
 9 | import torch
10 | 
11 | 
12 | class CheckpointFunction(torch.autograd.Function):
13 |     @staticmethod
14 |     def forward(ctx, run_function, length, *args):
15 |         ctx.run_function = run_function
16 |         ctx.input_tensors = list(args[:length])
17 |         ctx.input_params = list(args[length:])
18 | 
19 |         with torch.no_grad():
20 |             output_tensors = ctx.run_function(*ctx.input_tensors)
21 |         return output_tensors
22 | 
23 |     @staticmethod
24 |     def backward(ctx, *output_grads):
25 |         ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
26 |         with torch.enable_grad():
27 |             # Fixes a bug where the first op in run_function modifies the
28 |             # Tensor storage in place, which is not allowed for detach()'d
29 |             # Tensors.
30 |             shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
31 |             output_tensors = ctx.run_function(*shallow_copies)
32 |         input_grads = torch.autograd.grad(
33 |             output_tensors,
34 |             ctx.input_tensors + ctx.input_params,
35 |             output_grads,
36 |             allow_unused=True,
37 |         )
38 |         del ctx.input_tensors
39 |         del ctx.input_params
40 |         del output_tensors
41 |         return (None, None) + input_grads
42 | 
43 | 
44 | def checkpoint(func, inputs, params, flag):
45 |     """
46 |     Evaluate a function without caching intermediate activations, allowing for
47 |     reduced memory at the expense of extra compute in the backward pass.
48 |     :param func: the function to evaluate.
49 |     :param inputs: the argument sequence to pass to `func`.
50 |     :param params: a sequence of parameters `func` depends on but does not
51 |                    explicitly take as arguments.
52 |     :param flag: if False, disable gradient checkpointing.
53 |     """
54 |     if flag:
55 |         args = tuple(inputs) + tuple(params)
56 |         return CheckpointFunction.apply(func, len(inputs), *args)
57 |     else:
58 |         return func(*inputs)
59 | 


--------------------------------------------------------------------------------
/pointcept/models/utils/misc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | General Utils for Models
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import torch
 9 | 
10 | 
11 | @torch.inference_mode()
12 | def offset2bincount(offset):
13 |     return torch.diff(
14 |         offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long)
15 |     )
16 | 
17 | 
18 | @torch.inference_mode()
19 | def offset2batch(offset):
20 |     bincount = offset2bincount(offset)
21 |     return torch.arange(
22 |         len(bincount), device=offset.device, dtype=torch.long
23 |     ).repeat_interleave(bincount)
24 | 
25 | 
26 | @torch.inference_mode()
27 | def batch2offset(batch):
28 |     return torch.cumsum(batch.bincount(), dim=0).long()
29 | 
30 | 
31 | def off_diagonal(x):
32 |     # return a flattened view of the off-diagonal elements of a square matrix
33 |     n, m = x.shape
34 |     assert n == m
35 |     return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten()
36 | 


--------------------------------------------------------------------------------
/pointcept/models/utils/serialization/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import (
2 |     encode,
3 |     decode,
4 |     z_order_encode,
5 |     z_order_decode,
6 |     hilbert_encode,
7 |     hilbert_decode,
8 | )
9 | 


--------------------------------------------------------------------------------
/pointcept/models/utils/serialization/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/serialization/__pycache__/default.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/default.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/serialization/__pycache__/hilbert.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/hilbert.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/serialization/__pycache__/z_order.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/models/utils/serialization/__pycache__/z_order.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/models/utils/serialization/default.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .z_order import xyz2key as z_order_encode_
 3 | from .z_order import key2xyz as z_order_decode_
 4 | from .hilbert import encode as hilbert_encode_
 5 | from .hilbert import decode as hilbert_decode_
 6 | 
 7 | @torch.inference_mode()
 8 | def encode(grid_coord, batch=None, depth=16, order="z"):
 9 |     assert order in {"z", "z-trans", "hilbert", "hilbert-trans"}
10 |     if order == "z":
11 |         code = z_order_encode(grid_coord, depth=depth)
12 |     elif order == "z-trans":
13 |         code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth)
14 |     elif order == "hilbert":
15 |         code = hilbert_encode(grid_coord, depth=depth)
16 |     elif order == "hilbert-trans":
17 |         code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth)
18 |     else:
19 |         raise NotImplementedError
20 |     if batch is not None:
21 |         batch = batch.long()
22 |         code = batch << depth * 3 | code
23 |     return code
24 | 
25 | 
26 | @torch.inference_mode()
27 | def decode(code, depth=16, order="z"):
28 |     assert order in {"z", "hilbert"}
29 |     batch = code >> depth * 3
30 |     code = code & ((1 << depth * 3) - 1)
31 |     if order == "z":
32 |         grid_coord = z_order_decode(code, depth=depth)
33 |     elif order == "hilbert":
34 |         grid_coord = hilbert_decode(code, depth=depth)
35 |     else:
36 |         raise NotImplementedError
37 |     return grid_coord, batch
38 | 
39 | 
40 | def z_order_encode(grid_coord: torch.Tensor, depth: int = 16):
41 |     x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long()
42 |     # we block the support to batch, maintain batched code in Point class
43 |     code = z_order_encode_(x, y, z, b=None, depth=depth)
44 |     return code
45 | 
46 | 
47 | def z_order_decode(code: torch.Tensor, depth):
48 |     x, y, z = z_order_decode_(code, depth=depth)
49 |     grid_coord = torch.stack([x, y, z], dim=-1)  # (N,  3)
50 |     return grid_coord
51 | 
52 | 
53 | def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16):
54 |     return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth)
55 | 
56 | 
57 | def hilbert_decode(code: torch.Tensor, depth: int = 16):
58 |     return hilbert_decode_(code, num_dims=3, num_bits=depth)
59 | 


--------------------------------------------------------------------------------
/pointcept/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__init__.py


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/cache.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/cache.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/comm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/comm.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/env.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/env.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/events.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/events.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/optimizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/optimizer.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/path.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/path.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/registry.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/registry.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/scheduler.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/__pycache__/timer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terminal-K/MambaMOS/78370d0f022d0e6cb3a56d98342e7ae1014e440c/pointcept/utils/__pycache__/timer.cpython-38.pyc


--------------------------------------------------------------------------------
/pointcept/utils/cache.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Data Cache Utils
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | import SharedArray
10 | 
11 | try:
12 |     from multiprocessing.shared_memory import ShareableList
13 | except ImportError:
14 |     import warnings
15 | 
16 |     warnings.warn("Please update python version >= 3.8 to enable shared_memory")
17 | import numpy as np
18 | 
19 | 
20 | def shared_array(name, var=None):
21 |     if var is not None:
22 |         # check exist
23 |         if os.path.exists(f"/dev/shm/{name}"):
24 |             return SharedArray.attach(f"shm://{name}")
25 |         # create shared_array
26 |         data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype)
27 |         data[...] = var[...]
28 |         data.flags.writeable = False
29 |     else:
30 |         data = SharedArray.attach(f"shm://{name}").copy()
31 |     return data
32 | 
33 | 
34 | def shared_dict(name, var=None):
35 |     name = str(name)
36 |     assert "." not in name  # '.' is used as sep flag
37 |     data = {}
38 |     if var is not None:
39 |         assert isinstance(var, dict)
40 |         keys = var.keys()
41 |         # current version only cache np.array
42 |         keys_valid = []
43 |         for key in keys:
44 |             if isinstance(var[key], np.ndarray):
45 |                 keys_valid.append(key)
46 |         keys = keys_valid
47 | 
48 |         ShareableList(sequence=keys, name=name + ".keys")
49 |         for key in keys:
50 |             if isinstance(var[key], np.ndarray):
51 |                 data[key] = shared_array(name=f"{name}.{key}", var=var[key])
52 |     else:
53 |         keys = list(ShareableList(name=name + ".keys"))
54 |         for key in keys:
55 |             data[key] = shared_array(name=f"{name}.{key}")
56 |     return data
57 | 


--------------------------------------------------------------------------------
/pointcept/utils/env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Environment Utils
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | import numpy as np
11 | import torch
12 | import torch.backends.cudnn as cudnn
13 | 
14 | from datetime import datetime
15 | 
16 | 
17 | def get_random_seed():
18 |     seed = (
19 |         os.getpid()
20 |         + int(datetime.now().strftime("%S%f"))
21 |         + int.from_bytes(os.urandom(2), "big")
22 |     )
23 |     return seed
24 | 
25 | 
26 | def set_seed(seed=None):
27 |     if seed is None:
28 |         seed = get_random_seed()
29 |     random.seed(seed)
30 |     np.random.seed(seed)
31 |     torch.manual_seed(seed)
32 |     torch.cuda.manual_seed(seed)
33 |     torch.cuda.manual_seed_all(seed)
34 |     # cudnn.benchmark = False
35 |     # cudnn.deterministic = True
36 |     os.environ["PYTHONHASHSEED"] = str(seed)
37 | 


--------------------------------------------------------------------------------
/pointcept/utils/optimizer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Optimizer
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import torch
 9 | from pointcept.utils.logger import get_root_logger
10 | from pointcept.utils.registry import Registry
11 | 
12 | OPTIMIZERS = Registry("optimizers")
13 | 
14 | 
15 | OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD")
16 | OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam")
17 | OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW")
18 | 
19 | 
20 | def build_optimizer(cfg, model, param_dicts=None):
21 |     if param_dicts is None:
22 |         cfg.params = model.parameters()
23 |     else:
24 |         cfg.params = [dict(names=[], params=[], lr=cfg.lr)]
25 |         for i in range(len(param_dicts)):
26 |             param_group = dict(names=[], params=[])
27 |             if "lr" in param_dicts[i].keys():
28 |                 param_group["lr"] = param_dicts[i].lr
29 |             if "momentum" in param_dicts[i].keys():
30 |                 param_group["momentum"] = param_dicts[i].momentum
31 |             if "weight_decay" in param_dicts[i].keys():
32 |                 param_group["weight_decay"] = param_dicts[i].weight_decay
33 |             cfg.params.append(param_group)
34 | 
35 |         for n, p in model.named_parameters():
36 |             flag = False
37 |             for i in range(len(param_dicts)):
38 |                 if param_dicts[i].keyword in n:
39 |                     cfg.params[i + 1]["names"].append(n)
40 |                     cfg.params[i + 1]["params"].append(p)
41 |                     flag = True
42 |                     break
43 |             if not flag:
44 |                 cfg.params[0]["names"].append(n)
45 |                 cfg.params[0]["params"].append(p)
46 | 
47 |         logger = get_root_logger()
48 |         
49 |         for i in range(len(cfg.params)):
50 |             param_names = cfg.params[i].pop("names")
51 |             message = ""
52 |             for key in cfg.params[i].keys():
53 |                 if key != "params":
54 |                     message += f" {key}: {cfg.params[i][key]};"
55 |             logger.info(f"Params Group {i+1} -{message} Params: {param_names}.")
56 |     return OPTIMIZERS.build(cfg=cfg)
57 | 


--------------------------------------------------------------------------------
/pointcept/utils/path.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import os
  3 | import os.path as osp
  4 | from pathlib import Path
  5 | 
  6 | from .misc import is_str
  7 | 
  8 | 
  9 | def is_filepath(x):
 10 |     return is_str(x) or isinstance(x, Path)
 11 | 
 12 | 
 13 | def fopen(filepath, *args, **kwargs):
 14 |     if is_str(filepath):
 15 |         return open(filepath, *args, **kwargs)
 16 |     elif isinstance(filepath, Path):
 17 |         return filepath.open(*args, **kwargs)
 18 |     raise ValueError("`filepath` should be a string or a Path")
 19 | 
 20 | 
 21 | def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
 22 |     if not osp.isfile(filename):
 23 |         raise FileNotFoundError(msg_tmpl.format(filename))
 24 | 
 25 | 
 26 | def mkdir_or_exist(dir_name, mode=0o777):
 27 |     if dir_name == "":
 28 |         return
 29 |     dir_name = osp.expanduser(dir_name)
 30 |     os.makedirs(dir_name, mode=mode, exist_ok=True)
 31 | 
 32 | 
 33 | def symlink(src, dst, overwrite=True, **kwargs):
 34 |     if os.path.lexists(dst) and overwrite:
 35 |         os.remove(dst)
 36 |     os.symlink(src, dst, **kwargs)
 37 | 
 38 | 
 39 | def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True):
 40 |     """Scan a directory to find the interested files.
 41 | 
 42 |     Args:
 43 |         dir_path (str | obj:`Path`): Path of the directory.
 44 |         suffix (str | tuple(str), optional): File suffix that we are
 45 |             interested in. Default: None.
 46 |         recursive (bool, optional): If set to True, recursively scan the
 47 |             directory. Default: False.
 48 |         case_sensitive (bool, optional) : If set to False, ignore the case of
 49 |             suffix. Default: True.
 50 | 
 51 |     Returns:
 52 |         A generator for all the interested files with relative paths.
 53 |     """
 54 |     if isinstance(dir_path, (str, Path)):
 55 |         dir_path = str(dir_path)
 56 |     else:
 57 |         raise TypeError('"dir_path" must be a string or Path object')
 58 | 
 59 |     if (suffix is not None) and not isinstance(suffix, (str, tuple)):
 60 |         raise TypeError('"suffix" must be a string or tuple of strings')
 61 | 
 62 |     if suffix is not None and not case_sensitive:
 63 |         suffix = (
 64 |             suffix.lower()
 65 |             if isinstance(suffix, str)
 66 |             else tuple(item.lower() for item in suffix)
 67 |         )
 68 | 
 69 |     root = dir_path
 70 | 
 71 |     def _scandir(dir_path, suffix, recursive, case_sensitive):
 72 |         for entry in os.scandir(dir_path):
 73 |             if not entry.name.startswith(".") and entry.is_file():
 74 |                 rel_path = osp.relpath(entry.path, root)
 75 |                 _rel_path = rel_path if case_sensitive else rel_path.lower()
 76 |                 if suffix is None or _rel_path.endswith(suffix):
 77 |                     yield rel_path
 78 |             elif recursive and os.path.isdir(entry.path):
 79 |                 # scan recursively if entry.path is a directory
 80 |                 yield from _scandir(entry.path, suffix, recursive, case_sensitive)
 81 | 
 82 |     return _scandir(dir_path, suffix, recursive, case_sensitive)
 83 | 
 84 | 
 85 | def find_vcs_root(path, markers=(".git",)):
 86 |     """Finds the root directory (including itself) of specified markers.
 87 | 
 88 |     Args:
 89 |         path (str): Path of directory or file.
 90 |         markers (list[str], optional): List of file or directory names.
 91 | 
 92 |     Returns:
 93 |         The directory contained one of the markers or None if not found.
 94 |     """
 95 |     if osp.isfile(path):
 96 |         path = osp.dirname(path)
 97 | 
 98 |     prev, cur = None, osp.abspath(osp.expanduser(path))
 99 |     while cur != prev:
100 |         if any(osp.exists(osp.join(cur, marker)) for marker in markers):
101 |             return cur
102 |         prev, cur = cur, osp.split(cur)[0]
103 |     return None
104 | 


--------------------------------------------------------------------------------
/pointcept/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from time import perf_counter
 5 | from typing import Optional
 6 | 
 7 | 
 8 | class Timer:
 9 |     """
10 |     A timer which computes the time elapsed since the start/reset of the timer.
11 |     """
12 | 
13 |     def __init__(self) -> None:
14 |         self.reset()
15 | 
16 |     def reset(self) -> None:
17 |         """
18 |         Reset the timer.
19 |         """
20 |         self._start = perf_counter()
21 |         self._paused: Optional[float] = None
22 |         self._total_paused = 0
23 |         self._count_start = 1
24 | 
25 |     def pause(self) -> None:
26 |         """
27 |         Pause the timer.
28 |         """
29 |         if self._paused is not None:
30 |             raise ValueError("Trying to pause a Timer that is already paused!")
31 |         self._paused = perf_counter()
32 | 
33 |     def is_paused(self) -> bool:
34 |         """
35 |         Returns:
36 |             bool: whether the timer is currently paused
37 |         """
38 |         return self._paused is not None
39 | 
40 |     def resume(self) -> None:
41 |         """
42 |         Resume the timer.
43 |         """
44 |         if self._paused is None:
45 |             raise ValueError("Trying to resume a Timer that is not paused!")
46 |         # pyre-fixme[58]: `-` is not supported for operand types `float` and
47 |         #  `Optional[float]`.
48 |         self._total_paused += perf_counter() - self._paused
49 |         self._paused = None
50 |         self._count_start += 1
51 | 
52 |     def seconds(self) -> float:
53 |         """
54 |         Returns:
55 |             (float): the total number of seconds since the start/reset of the
56 |                 timer, excluding the time when the timer is paused.
57 |         """
58 |         if self._paused is not None:
59 |             end_time: float = self._paused  # type: ignore
60 |         else:
61 |             end_time = perf_counter()
62 |         return end_time - self._start - self._total_paused
63 | 
64 |     def avg_seconds(self) -> float:
65 |         """
66 |         Returns:
67 |             (float): the average number of seconds between every start/reset and
68 |             pause.
69 |         """
70 |         return self.seconds() / self._count_start
71 | 


--------------------------------------------------------------------------------
/pointcept/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Visualization Utils
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | import open3d as o3d
10 | import numpy as np
11 | import torch
12 | 
13 | 
14 | def to_numpy(x):
15 |     if isinstance(x, torch.Tensor):
16 |         x = x.clone().detach().cpu().numpy()
17 |     assert isinstance(x, np.ndarray)
18 |     return x
19 | 
20 | 
21 | def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None):
22 |     os.makedirs(os.path.dirname(file_path), exist_ok=True)
23 |     coord = to_numpy(coord)
24 |     if color is not None:
25 |         color = to_numpy(color)
26 |     pcd = o3d.geometry.PointCloud()
27 |     pcd.points = o3d.utility.Vector3dVector(coord)
28 |     pcd.colors = o3d.utility.Vector3dVector(
29 |         np.ones_like(coord) if color is None else color
30 |     )
31 |     o3d.io.write_point_cloud(file_path, pcd)
32 |     if logger is not None:
33 |         logger.info(f"Save Point Cloud to: {file_path}")
34 | 
35 | 
36 | def save_bounding_boxes(
37 |     bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None
38 | ):
39 |     bboxes_corners = to_numpy(bboxes_corners)
40 |     # point list
41 |     points = bboxes_corners.reshape(-1, 3)
42 |     # line list
43 |     box_lines = np.array(
44 |         [
45 |             [0, 1],
46 |             [1, 2],
47 |             [2, 3],
48 |             [3, 0],
49 |             [4, 5],
50 |             [5, 6],
51 |             [6, 7],
52 |             [7, 0],
53 |             [0, 4],
54 |             [1, 5],
55 |             [2, 6],
56 |             [3, 7],
57 |         ]
58 |     )
59 |     lines = []
60 |     for i, _ in enumerate(bboxes_corners):
61 |         lines.append(box_lines + i * 8)
62 |     lines = np.concatenate(lines)
63 |     # color list
64 |     color = np.array([color for _ in range(len(lines))])
65 |     # generate line set
66 |     line_set = o3d.geometry.LineSet()
67 |     line_set.points = o3d.utility.Vector3dVector(points)
68 |     line_set.lines = o3d.utility.Vector2iVector(lines)
69 |     line_set.colors = o3d.utility.Vector3dVector(color)
70 |     o3d.io.write_line_set(file_path, line_set)
71 | 
72 |     if logger is not None:
73 |         logger.info(f"Save Boxes to: {file_path}")
74 | 
75 | 
76 | def save_lines(
77 |     points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None
78 | ):
79 |     points = to_numpy(points)
80 |     lines = to_numpy(lines)
81 |     colors = np.array([color for _ in range(len(lines))])
82 |     line_set = o3d.geometry.LineSet()
83 |     line_set.points = o3d.utility.Vector3dVector(points)
84 |     line_set.lines = o3d.utility.Vector2iVector(lines)
85 |     line_set.colors = o3d.utility.Vector3dVector(colors)
86 |     o3d.io.write_line_set(file_path, line_set)
87 | 
88 |     if logger is not None:
89 |         logger.info(f"Save Lines to: {file_path}")
90 | 


--------------------------------------------------------------------------------
/scripts/build_image.sh:
--------------------------------------------------------------------------------
 1 | TORCH_VERSION=2.0.1
 2 | CUDA_VERSION=11.7
 3 | CUDNN_VERSION=8
 4 | 
 5 | ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"`
 6 | [ $? != 0 ] && exit 1
 7 | eval set -- "${ARGS}"
 8 | while true ; do
 9 |   case "$1" in
10 |     -t | --torch)
11 |       TORCH_VERSION=$2
12 |       shift 2
13 |       ;;
14 |     -c | --cuda)
15 |       CUDA_VERSION=$2
16 |       shift 2
17 |       ;;
18 |     --cudnn)
19 |       CUDNN_VERSION=$2
20 |       shift 2
21 |       ;;
22 |     --)
23 |       break
24 |       ;;
25 |     *)
26 |       echo "Invalid option: $1"
27 |       exit 1
28 |       ;;
29 |   esac
30 | done
31 | 
32 | CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."`
33 | BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel
34 | IMG_TAG=pointcept/pointcept:pytorch${BASE_TORCH_TAG}
35 | 
36 | echo "TORCH VERSION: ${TORCH_VERSION}"
37 | echo "CUDA VERSION: ${CUDA_VERSION}"
38 | echo "CUDNN VERSION: ${CUDNN_VERSION}"
39 | 
40 | 
41 | cat > ./Dockerfile <<- EOM
42 | FROM pytorch/pytorch:${BASE_TORCH_TAG}
43 | 
44 | # Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC)
45 | RUN rm /etc/apt/sources.list.d/*.list
46 | 
47 | # Installing apt packages
48 | RUN export DEBIAN_FRONTEND=noninteractive \
49 | 	&& apt -y update --no-install-recommends \
50 | 	&& apt -y install --no-install-recommends \
51 | 	  git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \
52 | 	&& apt autoremove -y \
53 | 	&& apt clean -y \
54 | 	&& export DEBIAN_FRONTEND=dialog
55 | 
56 | # Install Pointcept environment
57 | RUN conda install h5py pyyaml -c anaconda -y
58 | RUN conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y
59 | RUN conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y
60 | 
61 | RUN pip install --upgrade pip
62 | RUN pip install torch-geometric
63 | RUN pip install spconv-cu${CUDA_VERSION_NO_DOT}
64 | RUN pip install open3d
65 | 
66 | # Build MinkowskiEngine
67 | RUN git clone https://github.com/NVIDIA/MinkowskiEngine.git
68 | WORKDIR /workspace/MinkowskiEngine
69 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" python setup.py install --blas=openblas --force_cuda
70 | WORKDIR /workspace
71 | 
72 | # Build pointops
73 | RUN git clone https://github.com/Pointcept/Pointcept.git
74 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointops -v
75 | 
76 | # Build pointgroup_ops
77 | RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointgroup_ops -v
78 | 
79 | # Build swin3d
80 | RUN TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX 8.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v
81 | EOM
82 | 
83 | docker build . -f ./Dockerfile -t $IMG_TAG


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd $(dirname $(dirname "$0")) || exit
 4 | PYTHON=python
 5 | 
 6 | TEST_CODE=test.py
 7 | 
 8 | DATASET=scannet
 9 | CONFIG="None"
10 | EXP_NAME=debug
11 | WEIGHT=model_best
12 | GPU=None
13 | 
14 | while getopts "p:d:c:n:w:g:" opt; do
15 |   case $opt in
16 |     p)
17 |       PYTHON=$OPTARG
18 |       ;;
19 |     d)
20 |       DATASET=$OPTARG
21 |       ;;
22 |     c)
23 |       CONFIG=$OPTARG
24 |       ;;
25 |     n)
26 |       EXP_NAME=$OPTARG
27 |       ;;
28 |     w)
29 |       WEIGHT=$OPTARG
30 |       ;;
31 |     g)
32 |       GPU=$OPTARG
33 |       ;;
34 |     \?)
35 |       echo "Invalid option: -$OPTARG"
36 |       ;;
37 |   esac
38 | done
39 | 
40 | if [ "${NUM_GPU}" = 'None' ]
41 | then
42 |   NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'`
43 | fi
44 | 
45 | echo "Experiment name: $EXP_NAME"
46 | echo "Python interpreter dir: $PYTHON"
47 | echo "Dataset: $DATASET"
48 | echo "GPU Num: $GPU"
49 | 
50 | EXP_DIR=exp/${DATASET}/${EXP_NAME}
51 | MODEL_DIR=${EXP_DIR}/model
52 | CODE_DIR=${EXP_DIR}/code
53 | CONFIG_DIR=${EXP_DIR}/config.py
54 | 
55 | if [ "${CONFIG}" = "None" ]
56 | then
57 |     CONFIG_DIR=${EXP_DIR}/config.py
58 | else
59 |     CONFIG_DIR=configs/${DATASET}/${CONFIG}.py
60 | fi
61 | 
62 | echo "Loading config in:" $CONFIG_DIR
63 | #export PYTHONPATH=./$CODE_DIR
64 | export PYTHONPATH=./
65 | echo "Running code in: $CODE_DIR"
66 | 
67 | 
68 | echo " =========> RUN TASK <========="
69 | 
70 | #$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \
71 | $PYTHON -u tools/$TEST_CODE \
72 |   --config-file "$CONFIG_DIR" \
73 |   --num-gpus "$GPU" \
74 |   --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth
75 | 


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd $(dirname $(dirname "$0")) || exit
 4 | ROOT_DIR=$(pwd)
 5 | PYTHON=python
 6 | 
 7 | TRAIN_CODE=train.py
 8 | 
 9 | DATASET=scannet
10 | CONFIG="None"
11 | EXP_NAME=debug
12 | WEIGHT="None"
13 | RESUME=false
14 | GPU=None
15 | 
16 | 
17 | while getopts "p:d:c:n:w:g:r:" opt; do
18 |   case $opt in
19 |     p)
20 |       PYTHON=$OPTARG
21 |       ;;
22 |     d)
23 |       DATASET=$OPTARG
24 |       ;;
25 |     c)
26 |       CONFIG=$OPTARG
27 |       ;;
28 |     n)
29 |       EXP_NAME=$OPTARG
30 |       ;;
31 |     w)
32 |       WEIGHT=$OPTARG
33 |       ;;
34 |     r)
35 |       RESUME=$OPTARG
36 |       ;;
37 |     g)
38 |       GPU=$OPTARG
39 |       ;;
40 |     \?)
41 |       echo "Invalid option: -$OPTARG"
42 |       ;;
43 |   esac
44 | done
45 | 
46 | if [ "${NUM_GPU}" = 'None' ]
47 | then
48 |   NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'`
49 | fi
50 | 
51 | echo "Experiment name: $EXP_NAME"
52 | echo "Python interpreter dir: $PYTHON"
53 | echo "Dataset: $DATASET"
54 | echo "Config: $CONFIG"
55 | echo "GPU Num: $GPU"
56 | 
57 | EXP_DIR=exp/${DATASET}/${EXP_NAME}
58 | MODEL_DIR=${EXP_DIR}/model
59 | CODE_DIR=${EXP_DIR}/code
60 | CONFIG_DIR=configs/${DATASET}/${CONFIG}.py
61 | 
62 | 
63 | echo " =========> CREATE EXP DIR <========="
64 | echo "Experiment dir: $ROOT_DIR/$EXP_DIR"
65 | if ${RESUME}
66 | then
67 |   CONFIG_DIR=${EXP_DIR}/config.py
68 |   WEIGHT=$MODEL_DIR/model_last.pth
69 | else
70 |   mkdir -p "$MODEL_DIR" "$CODE_DIR"
71 |   cp -r scripts tools pointcept "$CODE_DIR"
72 | fi
73 | 
74 | echo "Loading config in:" $CONFIG_DIR
75 | export PYTHONPATH=./$CODE_DIR
76 | echo "Running code in: $CODE_DIR"
77 | 
78 | 
79 | echo " =========> RUN TASK <========="
80 | 
81 | if [ "${WEIGHT}" = "None" ]
82 | then
83 |     $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \
84 |     --config-file "$CONFIG_DIR" \
85 |     --num-gpus "$GPU" \
86 |     --options save_path="$EXP_DIR"
87 | else
88 |     $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \
89 |     --config-file "$CONFIG_DIR" \
90 |     --num-gpus "$GPU" \
91 |     --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT"
92 | fi


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Main Testing Script
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | import os
 9 | # os.environ['CUDA_VISIBLE_DEVICES'] = "3"
10 | 
11 | from pointcept.engines.defaults import (
12 |     default_argument_parser,
13 |     default_config_parser,
14 |     default_setup,
15 | )
16 | from pointcept.engines.test import TESTERS
17 | from pointcept.engines.launch import launch
18 | 
19 | 
20 | def main_worker(cfg):
21 |     cfg = default_setup(cfg)
22 |     tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg))
23 |     tester.test()
24 | 
25 | 
26 | def main():
27 |     args = default_argument_parser().parse_args()
28 |     cfg = default_config_parser(args.config_file, args.options)
29 | 
30 |     launch(
31 |         main_worker,
32 |         num_gpus_per_machine=args.num_gpus,
33 |         num_machines=args.num_machines,
34 |         machine_rank=args.machine_rank,
35 |         dist_url=args.dist_url,
36 |         cfg=(cfg,),
37 |     )
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------
/tools/test_s3dis_6fold.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test script for S3DIS 6-fold cross validation
  3 | 
  4 | Gathering Area_X.pth from result folder of experiment record of each area as follows:
  5 | |- RECORDS_PATH
  6 |   |- Area_1.pth
  7 |   |- Area_2.pth
  8 |   |- Area_3.pth
  9 |   |- Area_4.pth
 10 |   |- Area_5.pth
 11 |   |- Area_6.pth
 12 | 
 13 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 14 | Please cite our work if the code is helpful to you.
 15 | """
 16 | 
 17 | import argparse
 18 | import os
 19 | 
 20 | import torch
 21 | import numpy as np
 22 | import glob
 23 | from pointcept.utils.logger import get_root_logger
 24 | 
 25 | CLASS_NAMES = [
 26 |     "ceiling",
 27 |     "floor",
 28 |     "wall",
 29 |     "beam",
 30 |     "column",
 31 |     "window",
 32 |     "door",
 33 |     "table",
 34 |     "chair",
 35 |     "sofa",
 36 |     "bookcase",
 37 |     "board",
 38 |     "clutter",
 39 | ]
 40 | 
 41 | 
 42 | def evaluation(intersection, union, target, logger=None):
 43 |     iou_class = intersection / (union + 1e-10)
 44 |     accuracy_class = intersection / (target + 1e-10)
 45 |     mIoU = np.mean(iou_class)
 46 |     mAcc = np.mean(accuracy_class)
 47 |     allAcc = sum(intersection) / (sum(target) + 1e-10)
 48 | 
 49 |     if logger is not None:
 50 |         logger.info(
 51 |             "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format(
 52 |                 mIoU, mAcc, allAcc
 53 |             )
 54 |         )
 55 |         for i in range(len(CLASS_NAMES)):
 56 |             logger.info(
 57 |                 "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format(
 58 |                     idx=i,
 59 |                     name=CLASS_NAMES[i],
 60 |                     iou=iou_class[i],
 61 |                     accuracy=accuracy_class[i],
 62 |                 )
 63 |             )
 64 | 
 65 | 
 66 | def main():
 67 |     parser = argparse.ArgumentParser()
 68 |     parser.add_argument(
 69 |         "--record_root",
 70 |         required=True,
 71 |         help="Path to the S3DIS record of each split",
 72 |     )
 73 |     config = parser.parse_args()
 74 |     logger = get_root_logger(
 75 |         log_file=os.path.join(config.record_root, "6-fold.log"),
 76 |         file_mode="w",
 77 |     )
 78 | 
 79 |     records = sorted(glob.glob(os.path.join(config.record_root, "Area_*.pth")))
 80 |     assert len(records) == 6
 81 |     intersection_ = np.zeros(len(CLASS_NAMES), dtype=int)
 82 |     union_ = np.zeros(len(CLASS_NAMES), dtype=int)
 83 |     target_ = np.zeros(len(CLASS_NAMES), dtype=int)
 84 | 
 85 |     for record in records:
 86 |         area = os.path.basename(record).split(".")[0]
 87 |         info = torch.load(record)
 88 |         logger.info(f"<<<<<<<<<<<<<<<<< Parsing {area} <<<<<<<<<<<<<<<<<")
 89 |         intersection = info["intersection"]
 90 |         union = info["union"]
 91 |         target = info["target"]
 92 |         evaluation(intersection, union, target, logger=logger)
 93 |         intersection_ += intersection
 94 |         union_ += union
 95 |         target_ += target
 96 | 
 97 |     logger.info(f"<<<<<<<<<<<<<<<<< Parsing 6-fold <<<<<<<<<<<<<<<<<")
 98 |     evaluation(intersection_, union_, target_, logger=logger)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Main Training Script
 3 | 
 4 | Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com)
 5 | Please cite our work if the code is helpful to you.
 6 | """
 7 | 
 8 | # import os
 9 | # os.environ['CUDA_VISIBLE_DEVICES'] = "1"
10 | 
11 | from pointcept.engines.defaults import (
12 |     default_argument_parser,
13 |     default_config_parser,
14 |     default_setup,
15 | )
16 | from pointcept.engines.train import TRAINERS
17 | from pointcept.engines.launch import launch
18 | 
19 | 
20 | def main_worker(cfg):
21 |     cfg = default_setup(cfg)
22 |     trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg))
23 |     trainer.train()
24 | 
25 | 
26 | def main():
27 |     args = default_argument_parser().parse_args()
28 |     cfg = default_config_parser(args.config_file, args.options)
29 | 
30 |     launch(
31 |         main_worker,
32 |         num_gpus_per_machine=args.num_gpus,
33 |         num_machines=args.num_machines,
34 |         machine_rank=args.machine_rank,
35 |         dist_url=args.dist_url,
36 |         cfg=(cfg,),
37 |     )
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------