├── .gitignore
├── LICENSE.txt
├── README.md
├── assets
└── teaser.png
├── classification
├── README.md
├── dataset
│ ├── ScanObjectNNDataLoader.py
│ └── __init__.py
├── init.sh
├── models
│ ├── __init__.py
│ └── repsurf
│ │ ├── __init__.py
│ │ ├── repsurf_ssg_umb.py
│ │ └── repsurf_ssg_umb_2x.py
├── modules
│ ├── __init__.py
│ ├── pointnet2_utils.py
│ ├── pointops
│ │ ├── __init__.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── pointops.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── __init__.py
│ │ │ ├── ballquery
│ │ │ ├── ballquery_cuda.cpp
│ │ │ ├── ballquery_cuda_kernel.cu
│ │ │ └── ballquery_cuda_kernel.h
│ │ │ ├── cuda_utils.h
│ │ │ ├── grouping
│ │ │ ├── grouping_cuda.cpp
│ │ │ ├── grouping_cuda_kernel.cu
│ │ │ └── grouping_cuda_kernel.h
│ │ │ ├── grouping_int
│ │ │ ├── grouping_int_cuda.cpp
│ │ │ ├── grouping_int_cuda_kernel.cu
│ │ │ └── grouping_int_cuda_kernel.h
│ │ │ ├── interpolation
│ │ │ ├── interpolation_cuda.cpp
│ │ │ ├── interpolation_cuda_kernel.cu
│ │ │ └── interpolation_cuda_kernel.h
│ │ │ ├── knnquery
│ │ │ ├── __init__.py
│ │ │ ├── knnquery_cuda.cpp
│ │ │ ├── knnquery_cuda_kernel.cu
│ │ │ └── knnquery_cuda_kernel.h
│ │ │ ├── knnquery_heap
│ │ │ ├── __init__.py
│ │ │ ├── knnquery_heap_cuda.cpp
│ │ │ ├── knnquery_heap_cuda_kernel.cu
│ │ │ └── knnquery_heap_cuda_kernel.h
│ │ │ ├── pointops_api.cpp
│ │ │ └── sampling
│ │ │ ├── sampling_cuda.cpp
│ │ │ ├── sampling_cuda_kernel.cu
│ │ │ └── sampling_cuda_kernel.h
│ ├── polar_utils.py
│ ├── ptaug_utils.py
│ ├── recons_utils.py
│ └── repsurface_utils.py
├── scripts
│ └── scanobjectnn
│ │ ├── repsurf_ssg_umb.sh
│ │ └── repsurf_ssg_umb_2x.sh
├── tool
│ └── train_cls_scanobjectnn.py
└── util
│ ├── __init__.py
│ └── utils.py
├── segmentation
├── README.md
├── dataset
│ ├── S3DISDataLoader.py
│ └── __init__.py
├── init.sh
├── models
│ ├── __init__.py
│ ├── pointnet2
│ │ ├── __init__.py
│ │ └── pointnet2_ssg.py
│ ├── pointtransformer
│ │ ├── __init__.py
│ │ └── pointtransformer.py
│ └── repsurf
│ │ ├── __init__.py
│ │ └── repsurf_umb_ssg.py
├── modules
│ ├── __init__.py
│ ├── aug_utils.py
│ ├── pointnet2_utils.py
│ ├── pointops
│ │ ├── __init__.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── pointops.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── __init__.py
│ │ │ ├── aggregation
│ │ │ ├── aggregation_cuda.cpp
│ │ │ ├── aggregation_cuda_kernel.cu
│ │ │ └── aggregation_cuda_kernel.h
│ │ │ ├── cuda_utils.h
│ │ │ ├── grouping
│ │ │ ├── grouping_cuda.cpp
│ │ │ ├── grouping_cuda_kernel.cu
│ │ │ └── grouping_cuda_kernel.h
│ │ │ ├── interpolation
│ │ │ ├── interpolation_cuda.cpp
│ │ │ ├── interpolation_cuda_kernel.cu
│ │ │ └── interpolation_cuda_kernel.h
│ │ │ ├── knnquery
│ │ │ ├── knnquery_cuda.cpp
│ │ │ ├── knnquery_cuda_kernel.cu
│ │ │ └── knnquery_cuda_kernel.h
│ │ │ ├── pointops_api.cpp
│ │ │ ├── sampling
│ │ │ ├── sampling_cuda.cpp
│ │ │ ├── sampling_cuda_kernel.cu
│ │ │ └── sampling_cuda_kernel.h
│ │ │ └── subtraction
│ │ │ ├── subtraction_cuda.cpp
│ │ │ ├── subtraction_cuda_kernel.cu
│ │ │ └── subtraction_cuda_kernel.h
│ ├── pointtransformer_utils.py
│ ├── polar_utils.py
│ ├── recons_utils.py
│ ├── repsurface_utils.py
│ └── voxelize_utils.py
├── scripts
│ └── s3dis
│ │ ├── test_pointnet2.sh
│ │ ├── test_pointtransformer.sh
│ │ ├── test_repsurf_umb.sh
│ │ ├── train_pointnet2.sh
│ │ ├── train_pointtransformer.sh
│ │ └── train_repsurf_umb.sh
├── tool
│ ├── test_s3dis.py
│ └── train.py
└── util
│ ├── __init__.py
│ ├── data_util.py
│ └── utils.py
└── visualization
├── airplane_0001.txt
├── bed_0001.txt
├── cup_0001.txt
├── table_0250.txt
├── triangled_airplane.obj
├── triangled_bed.obj
├── triangled_cup.obj
└── triangled_table.obj
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 | .idea/
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2022 Haoxi Ran.
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RepSurf - Surface Representation for Point Clouds
[CVPR 2022 Oral]
2 |
3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact)
4 |
5 | [](https://paperswithcode.com/sota/3d-point-cloud-classification-on-scanobjectnn?p=surface-representation-for-point-clouds)
6 | [](https://paperswithcode.com/sota/3d-object-detection-on-sun-rgbd-val?p=surface-representation-for-point-clouds)
7 | [](https://paperswithcode.com/sota/3d-point-cloud-classification-on-modelnet40?p=surface-representation-for-point-clouds)
8 | [](https://paperswithcode.com/sota/semantic-segmentation-on-s3dis?p=surface-representation-for-point-clouds)
9 | [](https://paperswithcode.com/sota/3d-object-detection-on-scannetv2?p=surface-representation-for-point-clouds)
10 | [](https://paperswithcode.com/sota/semantic-segmentation-on-s3dis-area5?p=surface-representation-for-point-clouds)
11 |
12 | ### The pytorch official implementation of "[Surface Representation for Point Clouds](http://arxiv.org/abs/2205.05740)"
13 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740)
14 |
15 |
16 |
17 |

18 |
19 |
20 |
21 | ## News:
22 | - (**Sep 10** NEW :fire:) We have uploaded the implementation of RepSurf on S3DIS along with its training log and pretrained weights.
23 | - (**June 24** :fire:) We sucessfully finished our Oral presentation at CVPR 2022!
24 | - (**May 11**) We have uploaded the implementation of RepSurf on ScanObjectNN along with its training log and pretrained weights.
25 |
26 | ## Tasks:
27 |
28 | ### We conduct experiments of different tasks on different codebases:
29 |
30 | > Classification: **[3D Object Classification](./classification)**
31 | > Segmentation: **[3D Semantic Segmentation](./segmentation)**
32 |
33 |
34 | ## Visualization
35 |
36 | We provide several visualization results in the folder **./visualization** for a closer look at the construction of
37 | RepSurf.
38 |
39 |
40 | ## License
41 |
42 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for
43 | commercial use.
44 |
--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/assets/teaser.png
--------------------------------------------------------------------------------
/classification/README.md:
--------------------------------------------------------------------------------
1 | # RepSurf for Classification
2 |
3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact)
4 |
5 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740)
6 |
7 |
8 | ## Preparation
9 |
10 | ### Environment
11 |
12 | We tested under the environment:
13 |
14 | * python 3.7
15 | * pytorch 1.6.0
16 | * cuda 10.1
17 | * gcc 7.2.0
18 | * h5py
19 |
20 | For anaconda user, initialize the conda environment **repsurf-cls** by:
21 |
22 | ```
23 | sh init.sh
24 | ```
25 |
26 | ## Experiments
27 |
28 | ### ScanObjectNN (Data & Logs: [Google Drive](https://drive.google.com/drive/folders/1DGWT9W46MKVI0-lu18hJhB-R3BFVWuCs?usp=sharing))
29 |
30 | * Performance:
31 |
32 |
33 |
34 |
35 | Model |
36 | Accuracy |
37 | #Params |
38 | Augment |
39 | Code |
40 | Log |
41 | Checkpoint |
42 |
43 |
44 |
45 |
46 | MVTN |
47 | 82.8 |
48 | 4.24M |
49 | None |
50 | link |
51 | N/A |
52 | link |
53 |
54 |
55 | PointMLP |
56 | 85.7 |
57 | 12.6M |
58 | Scale, Shift |
59 | link |
60 | link |
61 | link |
62 |
63 |
64 | PointNet++ SSG |
65 | 77.9 |
66 | 1.475M |
67 | Rotate, Jitter |
68 | link |
69 | N/A |
70 | N/A |
71 |
72 |
73 | Umbrella RepSurf (PointNet++ SSG) |
74 | 84.87 |
75 | 1.483M |
76 | None |
77 | link |
78 | google drive |
79 | google drive (6MB) |
80 |
81 |
82 | Umbrella RepSurf (PointNet++ SSG, 2x) |
83 | 86.05 |
84 | 6.806M |
85 | None |
86 | link |
87 | google drive |
88 | google drive (27MB) |
89 |
90 |
91 |
92 |
93 |
94 | * To download dataset:
95 |
96 | ```
97 | wget https://download.cs.stanford.edu/orion/scanobjectnn/h5_files.zip
98 | unzip h5_files.zip
99 | ln -s [PATH]/h5_files data/ScanObjectNN
100 | ```
101 |
102 | **Note**: We conduct all experiments on the hardest variant of ScanObjectNN (**PB_T50_RS**).
103 |
104 |
105 | * To train **Umbrella RepSurf** on ScanObjectNN:
106 |
107 | ```
108 | sh scripts/scanobjectnn/repsurf_ssg_umb.sh
109 | ```
110 |
111 | * To train **Umbrella RepSurf (2x setting)** on ScanObjectNN:
112 |
113 | ```
114 | sh scripts/scanobjectnn/repsurf_ssg_umb_2x.sh
115 | ```
116 |
117 | ## Acknowledgment
118 |
119 | We use part of the library [pointops](https://github.com/hszhao/PointWeb/tree/master/lib/pointops)
120 | from [PointWeb](https://github.com/hszhao/PointWeb).
121 |
122 | ## License
123 |
124 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for
125 | commercial use.
126 |
--------------------------------------------------------------------------------
/classification/dataset/ScanObjectNNDataLoader.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import h5py
7 | import warnings
8 | from torch.utils.data import Dataset
9 |
10 | warnings.filterwarnings('ignore')
11 |
12 |
13 | class ScanObjectNNDataLoader(Dataset):
14 | def __init__(self, root, split='training', bg=True):
15 | self.root = root
16 |
17 | assert (split == 'training' or split == 'test')
18 | if bg:
19 | print('Use data with background points')
20 | dir_name = 'main_split'
21 | else:
22 | print('Use data without background points')
23 | dir_name = 'main_split_nobg'
24 | file_name = '_objectdataset_augmentedrot_scale75.h5'
25 | h5_name = '{}/{}/{}'.format(self.root, dir_name, split + file_name)
26 | with h5py.File(h5_name, mode="r") as f:
27 | self.data = f['data'][:].astype('float32')
28 | self.label = f['label'][:].astype('int64')
29 | print('The size of %s data is %d' % (split, self.data.shape[0]))
30 |
31 | def __len__(self):
32 | return self.data.shape[0]
33 |
34 | def __getitem__(self, index):
35 | return self.data[index].T, self.label[index]
36 |
--------------------------------------------------------------------------------
/classification/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/dataset/__init__.py
--------------------------------------------------------------------------------
/classification/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | mkdir -p log/PointAnalysis/log/ScanObjectNN
4 | mkdir -p data/
5 |
6 | conda create -n repsurf-cls python=3.7 -y
7 | conda activate repsurf-cls
8 |
9 | conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.1 -c pytorch -c conda-forge -y
10 | conda install -c anaconda h5py -y
11 |
12 | cd modules/pointops
13 | python3 setup.py install
14 | cd -
15 |
--------------------------------------------------------------------------------
/classification/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/models/__init__.py
--------------------------------------------------------------------------------
/classification/models/repsurf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/models/repsurf/__init__.py
--------------------------------------------------------------------------------
/classification/models/repsurf/repsurf_ssg_umb.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | from modules.repsurface_utils import SurfaceAbstractionCD, UmbrellaSurfaceConstructor
9 |
10 |
11 | class Model(nn.Module):
12 | def __init__(self, args):
13 | super(Model, self).__init__()
14 | center_channel = 0 if not args.return_center else (6 if args.return_polar else 3)
15 | repsurf_channel = 10
16 |
17 | self.init_nsample = args.num_point
18 | self.return_dist = args.return_dist
19 | self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_channel,
20 | return_dist=args.return_dist, aggr_type=args.umb_pool,
21 | cuda=args.cuda_ops)
22 | self.sa1 = SurfaceAbstractionCD(npoint=512, radius=0.2, nsample=32, feat_channel=repsurf_channel,
23 | pos_channel=center_channel, mlp=[64, 64, 128], group_all=False,
24 | return_polar=args.return_polar, cuda=args.cuda_ops)
25 | self.sa2 = SurfaceAbstractionCD(npoint=128, radius=0.4, nsample=64, feat_channel=128 + repsurf_channel,
26 | pos_channel=center_channel, mlp=[128, 128, 256], group_all=False,
27 | return_polar=args.return_polar, cuda=args.cuda_ops)
28 | self.sa3 = SurfaceAbstractionCD(npoint=None, radius=None, nsample=None, feat_channel=256 + repsurf_channel,
29 | pos_channel=center_channel, mlp=[256, 512, 1024], group_all=True,
30 | return_polar=args.return_polar, cuda=args.cuda_ops)
31 | # modelnet40
32 | self.classfier = nn.Sequential(
33 | nn.Linear(1024, 512),
34 | nn.BatchNorm1d(512),
35 | nn.ReLU(True),
36 | nn.Dropout(0.4),
37 | nn.Linear(512, 256),
38 | nn.BatchNorm1d(256),
39 | nn.ReLU(True),
40 | nn.Dropout(0.4),
41 | nn.Linear(256, args.num_class))
42 |
43 | def forward(self, points):
44 | # init
45 | center = points[:, :3, :]
46 |
47 | normal = self.surface_constructor(center)
48 |
49 | center, normal, feature = self.sa1(center, normal, None)
50 | center, normal, feature = self.sa2(center, normal, feature)
51 | center, normal, feature = self.sa3(center, normal, feature)
52 |
53 | feature = feature.view(-1, 1024)
54 | feature = self.classfier(feature)
55 | feature = F.log_softmax(feature, -1)
56 |
57 | return feature
58 |
--------------------------------------------------------------------------------
/classification/models/repsurf/repsurf_ssg_umb_2x.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | from modules.repsurface_utils import SurfaceAbstractionCD, UmbrellaSurfaceConstructor
9 |
10 |
11 | class Model(nn.Module):
12 | def __init__(self, args):
13 | super(Model, self).__init__()
14 | center_channel = 0 if not args.return_center else (6 if args.return_polar else 3)
15 | repsurf_channel = 10
16 |
17 | self.init_nsample = args.num_point
18 | self.return_dist = args.return_dist
19 | self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_channel,
20 | return_dist=args.return_dist, aggr_type=args.umb_pool,
21 | cuda=args.cuda_ops)
22 | self.sa1 = SurfaceAbstractionCD(npoint=512, radius=0.1, nsample=24, feat_channel=repsurf_channel,
23 | pos_channel=center_channel, mlp=[128, 128, 256], group_all=False,
24 | return_polar=args.return_polar, cuda=args.cuda_ops)
25 | self.sa2 = SurfaceAbstractionCD(npoint=128, radius=0.2, nsample=24, feat_channel=256 + repsurf_channel,
26 | pos_channel=center_channel, mlp=[256, 256, 512], group_all=False,
27 | return_polar=args.return_polar, cuda=args.cuda_ops)
28 | self.sa3 = SurfaceAbstractionCD(npoint=32, radius=0.4, nsample=24, feat_channel=512 + repsurf_channel,
29 | pos_channel=center_channel, mlp=[512, 512, 1024], group_all=False,
30 | return_polar=args.return_polar, cuda=args.cuda_ops)
31 | self.sa4 = SurfaceAbstractionCD(npoint=None, radius=None, nsample=None, feat_channel=1024 + repsurf_channel,
32 | pos_channel=center_channel, mlp=[1024, 1024, 2048], group_all=True,
33 | return_polar=args.return_polar, cuda=args.cuda_ops)
34 | # modelnet40
35 | self.classfier = nn.Sequential(
36 | nn.Linear(2048, 512),
37 | nn.BatchNorm1d(512),
38 | nn.ReLU(True),
39 | nn.Dropout(0.4),
40 | nn.Linear(512, 256),
41 | nn.BatchNorm1d(256),
42 | nn.ReLU(True),
43 | nn.Dropout(0.4),
44 | nn.Linear(256, args.num_class))
45 |
46 | def forward(self, points):
47 | # init
48 | center = points[:, :3, :]
49 |
50 | normal = self.surface_constructor(center)
51 |
52 | center, normal, feature = self.sa1(center, normal, None)
53 | center, normal, feature = self.sa2(center, normal, feature)
54 | center, normal, feature = self.sa3(center, normal, feature)
55 | center, normal, feature = self.sa4(center, normal, feature)
56 |
57 | feature = feature.view(-1, 2048)
58 | feature = self.classfier(feature)
59 | feature = F.log_softmax(feature, -1)
60 |
61 | return feature
62 |
--------------------------------------------------------------------------------
/classification/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/__init__.py
--------------------------------------------------------------------------------
/classification/modules/pointnet2_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import torch
7 |
8 | try:
9 | from modules.pointops.functions.pointops import furthestsampling, gathering, ballquery, knnquery, \
10 | grouping, interpolation, nearestneighbor
11 | except:
12 | raise Exception('Failed to load pointops')
13 |
14 |
15 | def square_distance(src, dst):
16 | """
17 | Calculate Squared distance between each two points.
18 |
19 | """
20 | B, N, _ = src.shape
21 | _, M, _ = dst.shape
22 | dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
23 | dist += torch.sum(src ** 2, -1).view(B, N, 1)
24 | dist += torch.sum(dst ** 2, -1).view(B, 1, M)
25 | return dist
26 |
27 |
28 | def index_points(points, idx, cuda=False, is_group=False):
29 | if cuda:
30 | if is_group:
31 | points = grouping(points.transpose(1, 2).contiguous(), idx)
32 | return points.permute(0, 2, 3, 1).contiguous()
33 | else:
34 | points = gathering(points.transpose(1, 2).contiguous(), idx)
35 | return points.permute(0, 2, 1).contiguous()
36 | device = points.device
37 | B = points.shape[0]
38 | view_shape = list(idx.shape)
39 | view_shape[1:] = [1] * (len(view_shape) - 1)
40 | repeat_shape = list(idx.shape)
41 | repeat_shape[0] = 1
42 | batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
43 | new_points = points[batch_indices, idx, :]
44 | return new_points
45 |
46 |
47 | def farthest_point_sample(xyz, npoint, cuda=False):
48 | """
49 | Input:
50 | xyz: pointcloud data, [B, N, 3]
51 | npoint: number of samples
52 | Return:
53 | centroids: sampled pointcloud index, [B, npoint]
54 |
55 | FLOPs:
56 | S * (3 + 3 + 2)
57 | """
58 | if cuda:
59 | if not xyz.is_contiguous():
60 | xyz = xyz.contiguous()
61 | return furthestsampling(xyz, npoint)
62 | device = xyz.device
63 | B, N, C = xyz.shape
64 | centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
65 | distance = torch.ones(B, N).to(device) * 1e10
66 | farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
67 | batch_indices = torch.arange(B, dtype=torch.long).to(device)
68 | for i in range(npoint):
69 | centroids[:, i] = farthest
70 | centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
71 | dist = torch.sum((xyz - centroid) ** 2, -1)
72 | mask = dist < distance
73 | distance[mask] = dist[mask]
74 | farthest = torch.max(distance, -1)[1]
75 | return centroids
76 |
77 |
78 | def query_ball_point(radius, nsample, xyz, new_xyz, debug=False, cuda=False):
79 | if cuda:
80 | if not xyz.is_contiguous():
81 | xyz = xyz.contiguous()
82 | if not new_xyz.is_contiguous():
83 | new_xyz = new_xyz.contiguous()
84 | return ballquery(radius, nsample, xyz, new_xyz)
85 | device = xyz.device
86 | B, N, C = xyz.shape
87 | _, S, _ = new_xyz.shape
88 | group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
89 | sqrdists = square_distance(new_xyz, xyz)
90 | group_idx[sqrdists > radius ** 2] = N
91 | group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
92 | group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
93 | mask = group_idx == N
94 | group_idx[mask] = group_first[mask]
95 | if debug:
96 | num_miss = torch.sum(mask)
97 | num_over = torch.sum(torch.clamp(torch.sum(sqrdists < radius ** 2, dim=2) - nsample, min=0))
98 | return num_miss, num_over
99 | return group_idx
100 |
101 |
102 | def query_knn_point(k, xyz, new_xyz, cuda=False):
103 | if cuda:
104 | if not xyz.is_contiguous():
105 | xyz = xyz.contiguous()
106 | if not new_xyz.is_contiguous():
107 | new_xyz = new_xyz.contiguous()
108 | return knnquery(k, xyz, new_xyz)
109 | dist = square_distance(new_xyz, xyz)
110 | group_idx = dist.sort(descending=False, dim=-1)[1][:, :, :k]
111 | return group_idx
112 |
113 |
114 | def sample(nsample, feature, cuda=False):
115 | feature = feature.permute(0, 2, 1)
116 | xyz = feature[:, :, :3]
117 |
118 | fps_idx = farthest_point_sample(xyz, nsample, cuda=cuda) # [B, npoint, C]
119 | torch.cuda.empty_cache()
120 | feature = index_points(feature, fps_idx, cuda=cuda, is_group=False)
121 | torch.cuda.empty_cache()
122 | feature = feature.permute(0, 2, 1)
123 |
124 | return feature
125 |
--------------------------------------------------------------------------------
/classification/modules/pointops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/__init__.py
--------------------------------------------------------------------------------
/classification/modules/pointops/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .pointops import *
2 |
--------------------------------------------------------------------------------
/classification/modules/pointops/setup.py:
--------------------------------------------------------------------------------
1 | #python3 setup.py install
2 |
3 | from setuptools import setup
4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
5 | import os
6 | from distutils.sysconfig import get_config_vars
7 |
8 | (opt,) = get_config_vars('OPT')
9 | os.environ['OPT'] = " ".join(
10 | flag for flag in opt.split() if flag != '-Wstrict-prototypes'
11 | )
12 |
13 | setup(
14 | name='pointops',
15 | ext_modules=[
16 | CUDAExtension('pointops_cuda', [
17 | 'src/pointops_api.cpp',
18 |
19 | 'src/ballquery/ballquery_cuda.cpp',
20 | 'src/ballquery/ballquery_cuda_kernel.cu',
21 | 'src/knnquery/knnquery_cuda.cpp',
22 | 'src/knnquery/knnquery_cuda_kernel.cu',
23 | 'src/knnquery_heap/knnquery_heap_cuda.cpp',
24 | 'src/knnquery_heap/knnquery_heap_cuda_kernel.cu',
25 | 'src/grouping/grouping_cuda.cpp',
26 | 'src/grouping/grouping_cuda_kernel.cu',
27 | 'src/grouping_int/grouping_int_cuda.cpp',
28 | 'src/grouping_int/grouping_int_cuda_kernel.cu',
29 | 'src/interpolation/interpolation_cuda.cpp',
30 | 'src/interpolation/interpolation_cuda_kernel.cu',
31 | 'src/sampling/sampling_cuda.cpp',
32 | 'src/sampling/sampling_cuda_kernel.cu',
33 | ],
34 | extra_compile_args={'cxx': ['-g'],
35 | 'nvcc': ['-O2']})
36 | ],
37 | cmdclass={'build_ext': BuildExtension})
38 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/__init__.py
--------------------------------------------------------------------------------
/classification/modules/pointops/src/ballquery/ballquery_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "ballquery_cuda_kernel.h"
7 |
8 | extern THCState *state;
9 |
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 |
14 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor)
15 | {
16 | const float *new_xyz = new_xyz_tensor.data_ptr();
17 | const float *xyz = xyz_tensor.data_ptr();
18 | int *idx = idx_tensor.data_ptr();
19 |
20 | ballquery_cuda_launcher(b, n, m, radius, nsample, new_xyz, xyz, idx);
21 | }
22 |
23 |
24 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor)
25 | {
26 | CHECK_INPUT(new_xyz_tensor);
27 | CHECK_INPUT(xyz_tensor);
28 |
29 | const float *new_xyz = new_xyz_tensor.data_ptr();
30 | const float *xyz = xyz_tensor.data_ptr();
31 | int *idx = idx_tensor.data_ptr();
32 |
33 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
34 |
35 | ballquery_cuda_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
36 | }
37 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/ballquery/ballquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "ballquery_cuda_kernel.h"
3 |
4 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
5 | // output: idx(b, m, nsample)
6 | __global__ void ballquery_cuda_kernel(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx)
7 | {
8 | int batch_index = blockIdx.x;
9 | xyz += batch_index * n * 3;
10 | new_xyz += batch_index * m * 3;
11 | idx += m * nsample * batch_index;
12 | int index = threadIdx.x;
13 | int stride = blockDim.x;
14 |
15 | float radius2 = radius * radius;
16 | for (int j = index; j < m; j += stride)
17 | {
18 | float new_x = new_xyz[j * 3 + 0];
19 | float new_y = new_xyz[j * 3 + 1];
20 | float new_z = new_xyz[j * 3 + 2];
21 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k)
22 | {
23 | float x = xyz[k * 3 + 0];
24 | float y = xyz[k * 3 + 1];
25 | float z = xyz[k * 3 + 2];
26 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
27 | if (d2 < radius2)
28 | {
29 | if (cnt == 0)
30 | {
31 | for (int l = 0; l < nsample; ++l)
32 | idx[j * nsample + l] = k;
33 | }
34 | idx[j * nsample + cnt] = k;
35 | ++cnt;
36 | }
37 | }
38 | }
39 | }
40 |
41 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx)
42 | {
43 | ballquery_cuda_kernel<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
44 | }
45 |
46 |
47 | __global__ void ballquery_cuda_kernel_fast(int b, int n, int m, float radius, int nsample, const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {
48 | int bs_idx = blockIdx.y;
49 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
50 | if (bs_idx >= b || pt_idx >= m) return;
51 |
52 | new_xyz += bs_idx * m * 3 + pt_idx * 3;
53 | xyz += bs_idx * n * 3;
54 | idx += bs_idx * m * nsample + pt_idx * nsample;
55 |
56 | float radius2 = radius * radius;
57 | float new_x = new_xyz[0];
58 | float new_y = new_xyz[1];
59 | float new_z = new_xyz[2];
60 |
61 | int cnt = 0;
62 | for (int k = 0; k < n; ++k) {
63 | float x = xyz[k * 3 + 0];
64 | float y = xyz[k * 3 + 1];
65 | float z = xyz[k * 3 + 2];
66 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
67 | if (d2 < radius2){
68 | if (cnt == 0){
69 | for (int l = 0; l < nsample; ++l) {
70 | idx[l] = k;
71 | }
72 | }
73 | idx[cnt] = k;
74 | ++cnt;
75 | if (cnt >= nsample){
76 | break;
77 | }
78 | }
79 | }
80 | }
81 |
82 |
83 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {
84 | // param new_xyz: (B, m, 3)
85 | // param xyz: (B, n, 3)
86 | // param idx: (B, m, nsample)
87 |
88 | cudaError_t err;
89 |
90 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
91 | dim3 threads(THREADS_PER_BLOCK);
92 |
93 | ballquery_cuda_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
94 | // cudaDeviceSynchronize(); // for using printf in kernel function
95 |
96 | err = cudaGetLastError();
97 | if (cudaSuccess != err) {
98 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
99 | exit(-1);
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/ballquery/ballquery_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _BALLQUERY_CUDA_KERNEL
2 | #define _BALLQUERY_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
8 |
9 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
10 |
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 |
15 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *xyz, const float *new_xyz, int *idx);
16 |
17 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream);
18 |
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 |
23 | #endif
24 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/cuda_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_UTILS_H
2 | #define _CUDA_UTILS_H
3 |
4 | #include
5 |
6 | #define TOTAL_THREADS 1024
7 |
8 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
9 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
10 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
11 |
12 | #define THREADS_PER_BLOCK 256
13 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
14 |
15 | inline int opt_n_threads(int work_size) {
16 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0);
17 | return max(min(1 << pow_2, TOTAL_THREADS), 1);
18 | }
19 |
20 | inline dim3 opt_block_config(int x, int y) {
21 | const int x_threads = opt_n_threads(x);
22 | const int y_threads = max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
23 | dim3 block_config(x_threads, y_threads, 1);
24 | return block_config;
25 | }
26 |
27 | #endif
--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "grouping_cuda_kernel.h"
7 |
8 | extern THCState *state;
9 |
10 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
11 | {
12 | const float *points = points_tensor.data_ptr();
13 | const int *idx = idx_tensor.data_ptr();
14 | float *out = out_tensor.data_ptr();
15 | grouping_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out);
16 | }
17 |
18 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor)
19 | {
20 | float *grad_points = grad_points_tensor.data_ptr();
21 | const int *idx = idx_tensor.data_ptr();
22 | const float *grad_out = grad_out_tensor.data_ptr();
23 | grouping_backward_cuda_launcher(b, c, n, m, nsample, grad_out, idx, grad_points);
24 | }
25 |
26 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {
27 |
28 | const float *points = points_tensor.data_ptr();
29 | const int *idx = idx_tensor.data_ptr();
30 | float *out = out_tensor.data_ptr();
31 | grouping_forward_cuda_launcher_fast(b, c, n, npoints, nsample, points, idx, out);
32 | }
--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "grouping_cuda_kernel.h"
3 |
4 | // input: points(b, c, n) idx(b, m, nsample)
5 | // output: out(b, c, m, nsample)
6 | __global__ void grouping_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out)
7 | {
8 | int batch_index = blockIdx.x;
9 | points += batch_index * n * c;
10 | idx += batch_index * m * nsample;
11 | out += batch_index * m * nsample * c;
12 | const int index = threadIdx.y * blockDim.x + threadIdx.x;
13 | const int stride = blockDim.y * blockDim.x;
14 | for (int i = index; i < c * m; i += stride)
15 | {
16 | const int l = i / m;
17 | const int j = i % m;
18 | for (int k = 0; k < nsample; ++k)
19 | {
20 | int ii = idx[j * nsample + k];
21 | out[(l * m + j) * nsample + k] = points[l * n + ii];
22 | }
23 | }
24 | }
25 |
26 | // input: grad_out(b, c, m, nsample), idx(b, m, nsample)
27 | // output: grad_points(b, c, n)
28 | __global__ void grouping_backward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points)
29 | {
30 | int batch_index = blockIdx.x;
31 | grad_out += batch_index * m * nsample * c;
32 | idx += batch_index * m * nsample;
33 | grad_points += batch_index * n * c;
34 | const int index = threadIdx.y * blockDim.x + threadIdx.x;
35 | const int stride = blockDim.y * blockDim.x;
36 | for (int i = index; i < c * m; i += stride)
37 | {
38 | const int l = i / m;
39 | const int j = i % m;
40 | for (int k = 0; k < nsample; ++k)
41 | {
42 | int ii = idx[j * nsample + k];
43 | atomicAdd(grad_points + l * n + ii, grad_out[(l * m + j) * nsample + k]);
44 | }
45 | }
46 | }
47 |
48 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out)
49 | {
50 | grouping_forward_cuda_kernel<<>>(b, c, n, m, nsample, points, idx, out);
51 | }
52 |
53 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points)
54 | {
55 | grouping_backward_cuda_kernel<<>>(b, c, n, m, nsample, grad_out, idx, grad_points);
56 | }
57 |
58 | // input: points(b, c, n) idx(b, npoints, nsample)
59 | // output: out(b, c, npoints, nsample)
60 | __global__ void grouping_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
61 | int bs_idx = blockIdx.z;
62 | int c_idx = blockIdx.y;
63 | int index = blockIdx.x * blockDim.x + threadIdx.x;
64 | int pt_idx = index / nsample;
65 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
66 |
67 | int sample_idx = index % nsample;
68 |
69 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
70 | int in_idx = bs_idx * c * n + c_idx * n + idx[0];
71 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
72 |
73 | out[out_idx] = points[in_idx];
74 | }
75 |
76 | // input: points(b, c, n) idx(b, npoints, nsample)
77 | // output: out(b, c, npoints, nsample)
78 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out) {
79 |
80 | cudaError_t err;
81 |
82 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
83 | dim3 threads(THREADS_PER_BLOCK);
84 |
85 | grouping_forward_cuda_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out);
86 | // cudaDeviceSynchronize(); // for using printf in kernel function
87 | err = cudaGetLastError();
88 | if (cudaSuccess != err) {
89 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
90 | exit(-1);
91 | }
92 | }
93 |
94 |
95 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _GROUPING_CUDA_KERNEL
2 | #define _GROUPING_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out);
8 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
9 |
10 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out);
17 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points);
18 |
19 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out);
20 |
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | #endif
25 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping_int/grouping_int_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "grouping_int_cuda_kernel.h"
7 |
8 | extern THCState *state;
9 |
10 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
11 | {
12 | const long int *points = points_tensor.data_ptr();
13 | const int *idx = idx_tensor.data_ptr();
14 | long int *out = out_tensor.data_ptr();
15 | grouping_int_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out);
16 | }
17 |
18 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
19 | {
20 | const long int *points = points_tensor.data_ptr();
21 | const int *idx = idx_tensor.data_ptr();
22 | long int *out = out_tensor.data_ptr();
23 | grouping_int_forward_cuda_launcher_fast(b, c, n, m, nsample, points, idx, out);
24 | }
--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping_int/grouping_int_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "grouping_int_cuda_kernel.h"
3 |
4 | // input: points(b, c, n) idx(b, m, nsample)
5 | // output: out(b, c, m, nsample)
6 | __global__ void grouping_int_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out)
7 | {
8 | int batch_index = blockIdx.x;
9 | points += batch_index * n * c;
10 | idx += batch_index * m * nsample;
11 | out += batch_index * m * nsample * c;
12 | const int index = threadIdx.y * blockDim.x + threadIdx.x;
13 | const int stride = blockDim.y * blockDim.x;
14 | for (int i = index; i < c * m; i += stride)
15 | {
16 | const int l = i / m;
17 | const int j = i % m;
18 | for (int k = 0; k < nsample; ++k)
19 | {
20 | int ii = idx[j * nsample + k];
21 | out[(l * m + j) * nsample + k] = points[l * n + ii];
22 | }
23 | }
24 | }
25 |
26 |
27 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out)
28 | {
29 | grouping_int_forward_cuda_kernel<<>>(b, c, n, m, nsample, points, idx, out);
30 | }
31 |
32 |
33 | __global__ void grouping_int_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const long int *__restrict__ points, const int *__restrict__ idx, long int *__restrict__ out)
34 | {
35 | int bs_idx = blockIdx.z;
36 | int c_idx = blockIdx.y;
37 | int index = blockIdx.x * blockDim.x + threadIdx.x;
38 | int pt_idx = index / nsample;
39 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
40 |
41 | int sample_idx = index % nsample;
42 |
43 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
44 | int in_idx = bs_idx * c * n + c_idx * n + idx[0];
45 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
46 |
47 | out[out_idx] = points[in_idx];
48 | }
49 |
50 |
51 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out)
52 | {
53 | cudaError_t err;
54 |
55 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
56 | dim3 threads(THREADS_PER_BLOCK);
57 |
58 | grouping_int_forward_cuda_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out);
59 | // cudaDeviceSynchronize(); // for using printf in kernel function
60 | err = cudaGetLastError();
61 | if (cudaSuccess != err) {
62 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
63 | exit(-1);
64 | }
65 | }
--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping_int/grouping_int_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _GROUPING_INT_CUDA_KERNEL
2 | #define _GROUPING_INT_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out);
8 |
9 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
10 |
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 |
15 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out);
16 |
17 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out);
18 |
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 | #endif
23 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "interpolation_cuda_kernel.h"
6 |
7 | extern THCState *state;
8 |
9 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor)
10 | {
11 | const float *unknown = unknown_tensor.data_ptr();
12 | const float *known = known_tensor.data_ptr();
13 | float *dist2 = dist2_tensor.data_ptr();
14 | int *idx = idx_tensor.data_ptr();
15 | nearestneighbor_cuda_launcher(b, n, m, unknown, known, dist2, idx);
16 | }
17 |
18 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor)
19 | {
20 | const float *points = points_tensor.data_ptr();
21 | const float *weight = weight_tensor.data_ptr();
22 | float *out = out_tensor.data_ptr();
23 | const int *idx = idx_tensor.data_ptr();
24 | interpolation_forward_cuda_launcher(b, c, m, n, points, idx, weight, out);
25 | }
26 |
27 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor)
28 | {
29 | const float *grad_out = grad_out_tensor.data_ptr();
30 | const float *weight = weight_tensor.data_ptr();
31 | float *grad_points = grad_points_tensor.data_ptr();
32 | const int *idx = idx_tensor.data_ptr();
33 | interpolation_backward_cuda_launcher(b, c, n, m, grad_out, idx, weight, grad_points);
34 | }
35 |
36 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
37 | const float *unknown = unknown_tensor.data_ptr();
38 | const float *known = known_tensor.data_ptr();
39 | float *dist2 = dist2_tensor.data_ptr();
40 | int *idx = idx_tensor.data_ptr();
41 | nearestneighbor_cuda_launcher_fast(b, n, m, unknown, known, dist2, idx);
42 | }
43 |
44 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) {
45 |
46 | const float *points = points_tensor.data_ptr();
47 | const float *weight = weight_tensor.data_ptr();
48 | float *out = out_tensor.data_ptr();
49 | const int *idx = idx_tensor.data_ptr();
50 | interpolation_forward_cuda_launcher_fast(b, c, m, n, points, idx, weight, out);
51 | }
--------------------------------------------------------------------------------
/classification/modules/pointops/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "interpolation_cuda_kernel.h"
3 |
4 | // input: unknown(b, n, 3) known(b, m, 3)
5 | // output: dist2(b, n, 3), idx(b, n, 3)
6 | __global__ void nearestneighbor_cuda_kernel(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx)
7 | {
8 | int batch_index = blockIdx.x;
9 | unknown += batch_index * n * 3;
10 | known += batch_index * m * 3;
11 | dist2 += batch_index * n * 3;
12 | idx += batch_index * n * 3;
13 |
14 | int index = threadIdx.x;
15 | int stride = blockDim.x;
16 | for (int j = index; j < n; j += stride)
17 | {
18 | float ux = unknown[j * 3 + 0];
19 | float uy = unknown[j * 3 + 1];
20 | float uz = unknown[j * 3 + 2];
21 |
22 | double best1 = 1e40, best2 = 1e40, best3 = 1e40;
23 | int besti1 = 0, besti2 = 0, besti3 = 0;
24 | for (int k = 0; k < m; ++k)
25 | {
26 | float x = known[k * 3 + 0];
27 | float y = known[k * 3 + 1];
28 | float z = known[k * 3 + 2];
29 | float d =
30 | (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
31 | if (d < best1)
32 | {
33 | best3 = best2;
34 | besti3 = besti2;
35 | best2 = best1;
36 | besti2 = besti1;
37 | best1 = d;
38 | besti1 = k;
39 | }
40 | else if (d < best2)
41 | {
42 | best3 = best2;
43 | besti3 = besti2;
44 | best2 = d;
45 | besti2 = k;
46 | }
47 | else if (d < best3)
48 | {
49 | best3 = d;
50 | besti3 = k;
51 | }
52 | }
53 | dist2[j * 3 + 0] = best1;
54 | dist2[j * 3 + 1] = best2;
55 | dist2[j * 3 + 2] = best3;
56 | idx[j * 3 + 0] = besti1;
57 | idx[j * 3 + 1] = besti2;
58 | idx[j * 3 + 2] = besti3;
59 | }
60 | }
61 |
62 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
63 | // output: out(b, c, n)
64 | __global__ void interpolation_forward_cuda_kernel(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out)
65 | {
66 | int batch_index = blockIdx.x;
67 | points += batch_index * m * c;
68 | idx += batch_index * n * 3;
69 | weight += batch_index * n * 3;
70 | out += batch_index * n * c;
71 |
72 | const int index = threadIdx.y * blockDim.x + threadIdx.x;
73 | const int stride = blockDim.y * blockDim.x;
74 | for (int i = index; i < c * n; i += stride)
75 | {
76 | const int l = i / n;
77 | const int j = i % n;
78 | float w1 = weight[j * 3 + 0];
79 | float w2 = weight[j * 3 + 1];
80 | float w3 = weight[j * 3 + 2];
81 | int i1 = idx[j * 3 + 0];
82 | int i2 = idx[j * 3 + 1];
83 | int i3 = idx[j * 3 + 2];
84 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + points[l * m + i3] * w3;
85 | }
86 | }
87 |
88 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
89 | // output: grad_points(b, c, m)
90 | __global__ void interpolation_backward_cuda_kernel( int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points)
91 | {
92 | int batch_index = blockIdx.x;
93 | grad_out += batch_index * n * c;
94 | idx += batch_index * n * 3;
95 | weight += batch_index * n * 3;
96 | grad_points += batch_index * m * c;
97 |
98 | const int index = threadIdx.y * blockDim.x + threadIdx.x;
99 | const int stride = blockDim.y * blockDim.x;
100 | for (int i = index; i < c * n; i += stride)
101 | {
102 | const int l = i / n;
103 | const int j = i % n;
104 | float w1 = weight[j * 3 + 0];
105 | float w2 = weight[j * 3 + 1];
106 | float w3 = weight[j * 3 + 2];
107 | int i1 = idx[j * 3 + 0];
108 | int i2 = idx[j * 3 + 1];
109 | int i3 = idx[j * 3 + 2];
110 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
111 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
112 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
113 | }
114 | }
115 |
116 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx)
117 | {
118 | nearestneighbor_cuda_kernel<<>>(b, n, m, unknown, known, dist2, idx);
119 | }
120 |
121 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out)
122 | {
123 | interpolation_forward_cuda_kernel<<>>(b, c, m, n, points, idx, weight, out);
124 | }
125 |
126 | void interpolation_backward_cuda_launcher(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points)
127 | {
128 | interpolation_backward_cuda_kernel<<>>(b, n, c, m, grad_out, idx, weight, grad_points);
129 | }
130 |
131 |
132 | // input: unknown(b, n, 3) known(b, m, 3)
133 | // output: dist2(b, n, 3), idx(b, n, 3)
134 | __global__ void nearestneighbor_cuda_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
135 |
136 | int bs_idx = blockIdx.y;
137 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
138 | if (bs_idx >= b || pt_idx >= n) return;
139 |
140 | unknown += bs_idx * n * 3 + pt_idx * 3;
141 | known += bs_idx * m * 3;
142 | dist2 += bs_idx * n * 3 + pt_idx * 3;
143 | idx += bs_idx * n * 3 + pt_idx * 3;
144 |
145 | float ux = unknown[0];
146 | float uy = unknown[1];
147 | float uz = unknown[2];
148 |
149 | double best1 = 1e40, best2 = 1e40, best3 = 1e40;
150 | int besti1 = 0, besti2 = 0, besti3 = 0;
151 | for (int k = 0; k < m; ++k) {
152 | float x = known[k * 3 + 0];
153 | float y = known[k * 3 + 1];
154 | float z = known[k * 3 + 2];
155 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
156 | if (d < best1) {
157 | best3 = best2; besti3 = besti2;
158 | best2 = best1; besti2 = besti1;
159 | best1 = d; besti1 = k;
160 | }
161 | else if (d < best2) {
162 | best3 = best2; besti3 = besti2;
163 | best2 = d; besti2 = k;
164 | }
165 | else if (d < best3) {
166 | best3 = d; besti3 = k;
167 | }
168 | }
169 | dist2[0] = best1;
170 | dist2[1] = best2;
171 | dist2[2] = best3;
172 |
173 | idx[0] = besti1;
174 | idx[1] = besti2;
175 | idx[2] = besti3;
176 | }
177 |
178 |
179 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
180 | // output: out(b, c, n)
181 | __global__ void interpolation_forward_cuda_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
182 |
183 | int bs_idx = blockIdx.z;
184 | int c_idx = blockIdx.y;
185 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
186 |
187 | if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
188 |
189 | weight += bs_idx * n * 3 + pt_idx * 3;
190 | points += bs_idx * c * m + c_idx * m;
191 | idx += bs_idx * n * 3 + pt_idx * 3;
192 | out += bs_idx * c * n + c_idx * n;
193 |
194 | out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
195 | }
196 |
197 |
198 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx)
199 | {
200 | cudaError_t err;
201 |
202 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
203 | dim3 threads(THREADS_PER_BLOCK);
204 |
205 | nearestneighbor_cuda_kernel_fast<<>>(b, n, m, unknown, known, dist2, idx);
206 |
207 | err = cudaGetLastError();
208 | if (cudaSuccess != err) {
209 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
210 | exit(-1);
211 | }
212 | }
213 |
214 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out) {
215 |
216 | cudaError_t err;
217 |
218 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
219 | dim3 threads(THREADS_PER_BLOCK);
220 | interpolation_forward_cuda_kernel_fast<<>>(b, c, m, n, points, idx, weight, out);
221 |
222 | err = cudaGetLastError();
223 | if (cudaSuccess != err) {
224 | fprintf(stderr, "CUDA kernel failed : %s\n",
225 | cudaGetErrorString(err));
226 | exit(-1);
227 | }
228 | }
229 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _INTERPOLATION_CUDA_KERNEL
2 | #define _INTERPOLATION_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
8 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
9 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
10 |
11 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
13 |
14 | #ifdef __cplusplus
15 | extern "C" {
16 | #endif
17 |
18 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx);
19 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out);
20 | void interpolation_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points);
21 |
22 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx);
23 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out);
24 |
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | #endif
29 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/knnquery/__init__.py
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "knnquery_cuda_kernel.h"
7 |
8 | extern THCState *state;
9 |
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 |
14 |
15 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
16 | {
17 | CHECK_INPUT(new_xyz_tensor);
18 | CHECK_INPUT(xyz_tensor);
19 |
20 | const float *new_xyz = new_xyz_tensor.data_ptr();
21 | const float *xyz = xyz_tensor.data_ptr();
22 | int *idx = idx_tensor.data_ptr();
23 | float *dist2 = dist2_tensor.data_ptr();
24 |
25 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
26 |
27 | knnquery_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream);
28 | }
29 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "knnquery_cuda_kernel.h"
3 |
4 | // input: xyz (b, n, 3) new_xyz (b, m, 3)
5 | // output: idx (b, m, nsample) dist2 (b, m, nsample)
6 | __global__ void knnquery_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) {
7 | int bs_idx = blockIdx.y;
8 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
9 | if (bs_idx >= b || pt_idx >= m) return;
10 |
11 | new_xyz += bs_idx * m * 3 + pt_idx * 3;
12 | xyz += bs_idx * n * 3;
13 | idx += bs_idx * m * nsample + pt_idx * nsample;
14 |
15 | float new_x = new_xyz[0];
16 | float new_y = new_xyz[1];
17 | float new_z = new_xyz[2];
18 |
19 | //double* best = new double[nsample];
20 | //int* besti = new int[nsample];
21 | double best[200];
22 | int besti[200];
23 | for(int i = 0; i < nsample; i++){
24 | best[i] = 1e40;
25 | besti[i] = 0;
26 | }
27 | for(int k = 0; k < n; k++){
28 | float x = xyz[k * 3 + 0];
29 | float y = xyz[k * 3 + 1];
30 | float z = xyz[k * 3 + 2];
31 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
32 | for(int j = 0; j < nsample; j++){
33 | if(d2 < best[j]){
34 | for(int i = nsample - 1; i > j; i--){
35 | best[i] = best[i - 1];
36 | besti[i] = besti[i - 1];
37 | }
38 | best[j] = d2;
39 | besti[j] = k;
40 | break;
41 | }
42 | }
43 | }
44 | for(int i = 0; i < nsample; i++){
45 | idx[i] = besti[i];
46 | dist2[i] = best[i];
47 | }
48 | //delete []best;
49 | //delete []besti;
50 | }
51 |
52 |
53 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) {
54 | // param new_xyz: (B, m, 3)
55 | // param xyz: (B, n, 3)
56 | // param idx: (B, m, nsample)
57 |
58 | cudaError_t err;
59 |
60 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
61 | dim3 threads(THREADS_PER_BLOCK);
62 |
63 | // fprintf('%d, %d', blocks, threads);
64 | knnquery_cuda_kernel<<>>(b, n, m, nsample, xyz, new_xyz, idx, dist2);
65 | // cudaDeviceSynchronize(); // for using printf in kernel function
66 |
67 | // err = cudaGetLastError();
68 | // if (cudaSuccess != err) {
69 | // fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
70 | // exit(-1);
71 | // }
72 | }
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _KNNQUERY_CUDA_KERNEL
2 | #define _KNNQUERY_CUDA_KERNEL
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream);
15 |
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 |
20 | #endif
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/knnquery_heap/__init__.py
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "knnquery_heap_cuda_kernel.h"
7 |
8 | extern THCState *state;
9 |
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 |
14 |
15 | void knnquery_heap_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
16 | {
17 | CHECK_INPUT(new_xyz_tensor);
18 | CHECK_INPUT(xyz_tensor);
19 |
20 | const float *new_xyz = new_xyz_tensor.data_ptr();
21 | const float *xyz = xyz_tensor.data_ptr();
22 | int *idx = idx_tensor.data_ptr();
23 | float *dist2 = dist2_tensor.data_ptr();
24 |
25 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
26 |
27 | knnquery_heap_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream);
28 | }
29 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "knnquery_heap_cuda_kernel.h"
3 |
4 |
5 | __device__ void swap_float(float *x, float *y)
6 | {
7 | float tmp = *x;
8 | *x = *y;
9 | *y = tmp;
10 | }
11 |
12 |
13 | __device__ void swap_int(int *x, int *y)
14 | {
15 | int tmp = *x;
16 | *x = *y;
17 | *y = tmp;
18 | }
19 |
20 |
21 | __device__ void reheap(float *dist, int *idx, int k)
22 | {
23 | int root = 0;
24 | int child = root * 2 + 1;
25 | while (child < k)
26 | {
27 | if(child + 1 < k && dist[child+1] > dist[child])
28 | child++;
29 | if(dist[root] > dist[child])
30 | return;
31 | swap_float(&dist[root], &dist[child]);
32 | swap_int(&idx[root], &idx[child]);
33 | root = child;
34 | child = root * 2 + 1;
35 | }
36 | }
37 |
38 |
39 | __device__ void heap_sort(float *dist, int *idx, int k)
40 | {
41 | int i;
42 | for (i = k - 1; i > 0; i--)
43 | {
44 | swap_float(&dist[0], &dist[i]);
45 | swap_int(&idx[0], &idx[i]);
46 | reheap(dist, idx, i);
47 | }
48 | }
49 |
50 |
51 | // input: xyz (b, n, 3) new_xyz (b, m, 3)
52 | // output: idx (b, m, nsample) dist2 (b, m, nsample)
53 | __global__ void knnquery_heap_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) {
54 | int bs_idx = blockIdx.y;
55 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
56 | if (bs_idx >= b || pt_idx >= m) return;
57 |
58 | new_xyz += bs_idx * m * 3 + pt_idx * 3;
59 | xyz += bs_idx * n * 3;
60 | idx += bs_idx * m * nsample + pt_idx * nsample;
61 | dist2 += bs_idx * m * nsample + pt_idx * nsample;
62 |
63 | float new_x = new_xyz[0];
64 | float new_y = new_xyz[1];
65 | float new_z = new_xyz[2];
66 |
67 | float best_dist[100];
68 | int best_idx[100];
69 | for(int i = 0; i < nsample; i++){
70 | best_dist[i] = 1e10;
71 | best_idx[i] = 0;
72 | }
73 | for(int i = 0; i < n; i++){
74 | float x = xyz[i * 3 + 0];
75 | float y = xyz[i * 3 + 1];
76 | float z = xyz[i * 3 + 2];
77 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
78 | if (d2 < best_dist[0]){
79 | best_dist[0] = d2;
80 | best_idx[0] = i;
81 | reheap(best_dist, best_idx, nsample);
82 | }
83 | }
84 | heap_sort(best_dist, best_idx, nsample);
85 | for(int i = 0; i < nsample; i++){
86 | idx[i] = best_idx[i];
87 | dist2[i] = best_dist[i];
88 | }
89 | }
90 |
91 |
92 | void knnquery_heap_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) {
93 | // param new_xyz: (B, m, 3)
94 | // param xyz: (B, n, 3)
95 | // param idx: (B, m, nsample)
96 |
97 | cudaError_t err;
98 |
99 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
100 | dim3 threads(THREADS_PER_BLOCK);
101 |
102 | knnquery_heap_cuda_kernel<<>>(b, n, m, nsample, xyz, new_xyz, idx, dist2);
103 | // cudaDeviceSynchronize(); // for using printf in kernel function
104 |
105 | err = cudaGetLastError();
106 | if (cudaSuccess != err) {
107 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
108 | exit(-1);
109 | }
110 | }
--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _KNNQUERY_HEAP_CUDA_KERNEL
2 | #define _KNNQUERY_HEAP_CUDA_KERNEL
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | void knnquery_heap_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | void knnquery_heap_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream);
15 |
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 |
20 | #endif
--------------------------------------------------------------------------------
/classification/modules/pointops/src/pointops_api.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "ballquery/ballquery_cuda_kernel.h"
5 | #include "grouping/grouping_cuda_kernel.h"
6 | #include "grouping_int/grouping_int_cuda_kernel.h"
7 | #include "sampling/sampling_cuda_kernel.h"
8 | #include "interpolation/interpolation_cuda_kernel.h"
9 | #include "knnquery/knnquery_cuda_kernel.h"
10 | #include "knnquery_heap/knnquery_heap_cuda_kernel.h"
11 |
12 |
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 | m.def("ballquery_cuda", &ballquery_cuda_fast, "ballquery_cuda_fast"); // name in python, cpp function address, docs
15 |
16 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
17 | m.def("knnquery_heap_cuda", &knnquery_heap_cuda, "knnquery_heap_cuda");
18 |
19 | m.def("grouping_forward_cuda", &grouping_forward_cuda_fast, "grouping_forward_cuda_fast");
20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
21 |
22 | m.def("grouping_int_forward_cuda", &grouping_int_forward_cuda_fast, "grouping_int_forward_cuda_fast");
23 |
24 | m.def("gathering_forward_cuda", &gathering_forward_cuda, "gathering_forward_cuda");
25 | m.def("gathering_backward_cuda", &gathering_backward_cuda, "gathering_backward_cuda");
26 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
27 |
28 | m.def("nearestneighbor_cuda", &nearestneighbor_cuda_fast, "nearestneighbor_cuda_fast");
29 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda_fast, "interpolation_forward_cuda_fast");
30 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
31 | }
32 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "sampling_cuda_kernel.h"
6 |
7 | extern THCState *state;
8 |
9 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
10 | {
11 | const float *points = points_tensor.data_ptr();
12 | const int *idx = idx_tensor.data_ptr();
13 | float *out = out_tensor.data_ptr();
14 | gathering_forward_cuda_launcher(b, c, n, m, points, idx, out);
15 | }
16 |
17 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor)
18 | {
19 |
20 | const float *grad_out = grad_out_tensor.data_ptr();
21 | const int *idx = idx_tensor.data_ptr();
22 | float *grad_points = grad_points_tensor.data_ptr();
23 | gathering_backward_cuda_launcher(b, c, n, m, grad_out, idx, grad_points);
24 | }
25 |
26 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor)
27 | {
28 | const float *points = points_tensor.data_ptr();
29 | float *temp = temp_tensor.data_ptr();
30 | int *idx = idx_tensor.data_ptr();
31 | furthestsampling_cuda_launcher(b, n, m, points, temp, idx);
32 | }
33 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/sampling/sampling_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "sampling_cuda_kernel.h"
3 |
4 | // input: points(b, c, n) idx(b, m)
5 | // output: out(b, c, m)
6 | __global__ void gathering_forward_cuda_kernel(int b, int c, int n, int m, const float *points, const int *idx, float *out)
7 | {
8 | for (int i = blockIdx.x; i < b; i += gridDim.x)
9 | {
10 | for (int l = blockIdx.y; l < c; l += gridDim.y)
11 | {
12 | for (int j = threadIdx.x; j < m; j += blockDim.x)
13 | {
14 | int a = idx[i * m + j];
15 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
16 | }
17 | }
18 | }
19 | }
20 |
21 | // input: grad_out(b, c, m) idx(b, m)
22 | // output: grad_points(b, c, n)
23 | __global__ void gathering_backward_cuda_kernel(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points)
24 | {
25 | for (int i = blockIdx.x; i < b; i += gridDim.x)
26 | {
27 | for (int l = blockIdx.y; l < c; l += gridDim.y)
28 | {
29 | for (int j = threadIdx.x; j < m; j += blockDim.x)
30 | {
31 | int a = idx[i * m + j];
32 | atomicAdd(grad_points + (i * c + l) * n + a, grad_out[(i * c + l) * m + j]);
33 | }
34 | }
35 | }
36 | }
37 |
38 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out)
39 | {
40 | gathering_forward_cuda_kernel<<>>(b, c, n, m, points, idx, out);
41 | }
42 |
43 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points)
44 | {
45 | gathering_backward_cuda_kernel<<>>(b, c, n, m, grad_out, idx, grad_points);
46 | }
47 |
48 | __device__ void __update(float *dists, int *dists_i,
49 | int idx1, int idx2) {
50 | const float v1 = dists[idx1], v2 = dists[idx2];
51 | const int i1 = dists_i[idx1], i2 = dists_i[idx2];
52 | dists[idx1] = max(v1, v2);
53 | dists_i[idx1] = v2 > v1 ? i2 : i1;
54 | }
55 |
56 | // Input dataset: (b, n, 3), tmp: (b, n)
57 | // Ouput idxs (b, m)
58 | template
59 | __global__ void furthestsampling_cuda_kernel(int b, int n, int m, const float *dataset, float *temp, int *idxs)
60 | {
61 | if (m <= 0)
62 | return;
63 | __shared__ float dists[block_size];
64 | __shared__ int dists_i[block_size];
65 |
66 | int batch_index = blockIdx.x;
67 | dataset += batch_index * n * 3;
68 | temp += batch_index * n;
69 | idxs += batch_index * m;
70 | int tid = threadIdx.x;
71 | const int stride = block_size;
72 | int old = 0;
73 | if (threadIdx.x == 0)
74 | idxs[0] = old;
75 |
76 | __syncthreads();
77 | for (int j = 1; j < m; j++)
78 | {
79 | int besti = 0;
80 | float best = -1;
81 | float x1 = dataset[old * 3 + 0];
82 | float y1 = dataset[old * 3 + 1];
83 | float z1 = dataset[old * 3 + 2];
84 | for (int k = tid; k < n; k += stride)
85 | {
86 | float x2, y2, z2;
87 | x2 = dataset[k * 3 + 0];
88 | y2 = dataset[k * 3 + 1];
89 | z2 = dataset[k * 3 + 2];
90 | //float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
91 | //if (mag <= 1e-3)
92 | // continue;
93 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
94 | float d2 = min(d, temp[k]);
95 | temp[k] = d2;
96 | besti = d2 > best ? k : besti;
97 | best = d2 > best ? d2 : best;
98 | }
99 | dists[tid] = best;
100 | dists_i[tid] = besti;
101 | __syncthreads();
102 |
103 | if (block_size >= 1024) {
104 | if (tid < 512) {
105 | __update(dists, dists_i, tid, tid + 512);
106 | }
107 | __syncthreads();
108 | }
109 | if (block_size >= 512) {
110 | if (tid < 256) {
111 | __update(dists, dists_i, tid, tid + 256);
112 | }
113 | __syncthreads();
114 | }
115 | if (block_size >= 256) {
116 | if (tid < 128) {
117 | __update(dists, dists_i, tid, tid + 128);
118 | }
119 | __syncthreads();
120 | }
121 | if (block_size >= 128) {
122 | if (tid < 64) {
123 | __update(dists, dists_i, tid, tid + 64);
124 | }
125 | __syncthreads();
126 | }
127 | if (block_size >= 64) {
128 | if (tid < 32) {
129 | __update(dists, dists_i, tid, tid + 32);
130 | }
131 | __syncthreads();
132 | }
133 | if (block_size >= 32) {
134 | if (tid < 16) {
135 | __update(dists, dists_i, tid, tid + 16);
136 | }
137 | __syncthreads();
138 | }
139 | if (block_size >= 16) {
140 | if (tid < 8) {
141 | __update(dists, dists_i, tid, tid + 8);
142 | }
143 | __syncthreads();
144 | }
145 | if (block_size >= 8) {
146 | if (tid < 4) {
147 | __update(dists, dists_i, tid, tid + 4);
148 | }
149 | __syncthreads();
150 | }
151 | if (block_size >= 4) {
152 | if (tid < 2) {
153 | __update(dists, dists_i, tid, tid + 2);
154 | }
155 | __syncthreads();
156 | }
157 | if (block_size >= 2) {
158 | if (tid < 1) {
159 | __update(dists, dists_i, tid, tid + 1);
160 | }
161 | __syncthreads();
162 | }
163 |
164 | old = dists_i[0];
165 | if (tid == 0)
166 | idxs[j] = old;
167 | }
168 | }
169 |
170 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs)
171 | {
172 | unsigned int n_threads = opt_n_threads(n);
173 | switch (n_threads) {
174 | case 1024:
175 | furthestsampling_cuda_kernel<1024><<>>(b, n, m, dataset, temp, idxs);
176 | break;
177 | case 512:
178 | furthestsampling_cuda_kernel<512><<>>(b, n, m, dataset, temp, idxs);
179 | break;
180 | case 256:
181 | furthestsampling_cuda_kernel<256><<>>(b, n, m, dataset, temp, idxs);
182 | break;
183 | case 128:
184 | furthestsampling_cuda_kernel<128><<>>(b, n, m, dataset, temp, idxs);
185 | break;
186 | case 64:
187 | furthestsampling_cuda_kernel<64><<>>(b, n, m, dataset, temp, idxs);
188 | break;
189 | case 32:
190 | furthestsampling_cuda_kernel<32><<>>(b, n, m, dataset, temp, idxs);
191 | break;
192 | case 16:
193 | furthestsampling_cuda_kernel<16><<>>(b, n, m, dataset, temp, idxs);
194 | break;
195 | case 8:
196 | furthestsampling_cuda_kernel<8><<>>(b, n, m, dataset, temp, idxs);
197 | break;
198 | case 4:
199 | furthestsampling_cuda_kernel<4><<>>(b, n, m, dataset, temp, idxs);
200 | break;
201 | case 2:
202 | furthestsampling_cuda_kernel<2><<>>(b, n, m, dataset, temp, idxs);
203 | break;
204 | case 1:
205 | furthestsampling_cuda_kernel<1><<>>(b, n, m, dataset, temp, idxs);
206 | break;
207 | default:
208 | furthestsampling_cuda_kernel<512><<>>(b, n, m, dataset, temp, idxs);
209 | }
210 | }
211 |
--------------------------------------------------------------------------------
/classification/modules/pointops/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _SAMPLING_CUDA_KERNEL
2 | #define _SAMPLING_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
8 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
9 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
10 |
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 |
15 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out);
16 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points);
17 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs);
18 |
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 | #endif
23 |
--------------------------------------------------------------------------------
/classification/modules/polar_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import torch
7 | import numpy as np
8 |
9 |
10 | def xyz2sphere(xyz, normalize=True):
11 | """
12 | Convert XYZ to Spherical Coordinate
13 |
14 | reference: https://en.wikipedia.org/wiki/Spherical_coordinate_system
15 |
16 | :param xyz: [B, N, 3] / [B, N, G, 3]
17 | :return: (rho, theta, phi) [B, N, 3] / [B, N, G, 3]
18 | """
19 | rho = torch.sqrt(torch.sum(torch.pow(xyz, 2), dim=-1, keepdim=True))
20 | rho = torch.clamp(rho, min=0) # range: [0, inf]
21 | theta = torch.acos(xyz[..., 2, None] / rho) # range: [0, pi]
22 | phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None]) # range: [-pi, pi]
23 | # check nan
24 | idx = rho == 0
25 | theta[idx] = 0
26 |
27 | if normalize:
28 | theta = theta / np.pi # [0, 1]
29 | phi = phi / (2 * np.pi) + .5 # [0, 1]
30 | out = torch.cat([rho, theta, phi], dim=-1)
31 | return out
32 |
33 |
34 | def xyz2cylind(xyz, normalize=True):
35 | """
36 | Convert XYZ to Cylindrical Coordinate
37 |
38 | reference: https://en.wikipedia.org/wiki/Cylindrical_coordinate_system
39 |
40 | :param normalize: Normalize phi & z
41 | :param xyz: [B, N, 3] / [B, N, G, 3]
42 | :return: (rho, phi, z) [B, N, 3]
43 | """
44 | rho = torch.sqrt(torch.sum(torch.pow(xyz[..., :2], 2), dim=-1, keepdim=True))
45 | rho = torch.clamp(rho, 0, 1) # range: [0, 1]
46 | phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None]) # range: [-pi, pi]
47 | z = xyz[..., 2, None]
48 | z = torch.clamp(z, -1, 1) # range: [-1, 1]
49 |
50 | if normalize:
51 | phi = phi / (2 * np.pi) + .5
52 | z = (z + 1.) / 2.
53 | out = torch.cat([rho, phi, z], dim=-1)
54 | return out
55 |
--------------------------------------------------------------------------------
/classification/modules/ptaug_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import torch
7 |
8 |
9 | #################
10 | # MAIN
11 | #################
12 |
13 | def get_aug_args(args):
14 | dataset = args.dataset
15 | if dataset == 'ScanObjectNN':
16 | aug_args = {'scale_factor': 0.5, 'shift_factor': 0.3}
17 | return aug_args
18 | else:
19 | raise Exception('No such dataset')
20 |
21 |
22 | def transform_point_cloud(batch, args, aug_args, train=True, label=None):
23 | """batch: B x 3/6 x N"""
24 | if args.aug_scale:
25 | batch[:, 0:3] = scale_point_cloud(batch[:, 0:3], aug_args['scale_factor'])
26 | if args.aug_shift:
27 | batch[:, 0:3] = shift_point_cloud(batch[:, 0:3], shift_range=aug_args['shift_factor'])
28 | if label is not None:
29 | return batch, label
30 | return batch
31 |
32 |
33 | #################
34 | # Shift
35 | #################
36 |
37 | def shift_point_cloud(batch_data, shift_range=0.2):
38 | """ Randomly shift point cloud. Shift is per point cloud.
39 | Input:
40 | B x C x N array, original batch of point clouds
41 | Return:
42 | B x C x N array, shifted batch of point clouds
43 | """
44 | shifts = (torch.rand(batch_data.shape[0], 3, 1, device=batch_data.device) * 2. - 1.) * shift_range
45 | batch_data += shifts
46 | return batch_data
47 |
48 |
49 | #################
50 | # Scale
51 | #################
52 |
53 | def scale_point_cloud(batch_data, scale_range=0.2):
54 | """ Randomly scale the point cloud. Scale is per point cloud.
55 | Input:
56 | B x C x N array, original batch of point clouds
57 | Return:
58 | B x C x N array, scaled batch of point clouds
59 | """
60 | scales = (torch.rand(batch_data.shape[0], 3, 1, device=batch_data.device) * 2. - 1.) * scale_range + 1.
61 | batch_data *= scales
62 | return batch_data
63 |
--------------------------------------------------------------------------------
/classification/modules/recons_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 05/10/2022
4 | """
5 |
6 | import torch
7 | from torch import nn
8 | from modules.pointnet2_utils import query_knn_point, index_points
9 |
10 |
11 | def _recons_factory(type):
12 | if type == 'knn':
13 | return knn_recons
14 | else:
15 | raise Exception('Not Implemented Reconstruction Type')
16 |
17 |
18 | def knn_recons(k, center, context, cuda=False):
19 | idx = query_knn_point(k, context, center, cuda=cuda)
20 | torch.cuda.empty_cache()
21 |
22 | group_xyz = index_points(context, idx, cuda=cuda, is_group=True) # [B, N, K, C]
23 | torch.cuda.empty_cache()
24 | return group_xyz
25 |
26 |
27 | def cal_normal(group_xyz, random_inv=False, is_group=False):
28 | """
29 | Calculate Normal Vector (Unit Form + First Term Positive)
30 |
31 | :param group_xyz: [B, N, K=3, 3] / [B, N, G, K=3, 3]
32 | :param random_inv:
33 | :param return_intersect:
34 | :param return_const:
35 | :return: [B, N, 3]
36 | """
37 | edge_vec1 = group_xyz[..., 1, :] - group_xyz[..., 0, :] # [B, N, 3]
38 | edge_vec2 = group_xyz[..., 2, :] - group_xyz[..., 0, :] # [B, N, 3]
39 |
40 | nor = torch.cross(edge_vec1, edge_vec2, dim=-1)
41 | unit_nor = nor / torch.norm(nor, dim=-1, keepdim=True) # [B, N, 3] / [B, N, G, 3]
42 | if not is_group:
43 | pos_mask = (unit_nor[..., 0] > 0).float() * 2. - 1. # keep x_n positive
44 | else:
45 | pos_mask = (unit_nor[..., 0:1, 0] > 0).float() * 2. - 1.
46 | unit_nor = unit_nor * pos_mask.unsqueeze(-1)
47 |
48 | # batch-wise random inverse normal vector (prob: 0.5)
49 | if random_inv:
50 | random_mask = torch.randint(0, 2, (group_xyz.size(0), 1, 1)).float() * 2. - 1.
51 | random_mask = random_mask.to(unit_nor.device)
52 | if not is_group:
53 | unit_nor = unit_nor * random_mask
54 | else:
55 | unit_nor = unit_nor * random_mask.unsqueeze(-1)
56 |
57 | return unit_nor
58 |
59 |
60 | def pca(X, k, center=True):
61 | """
62 | Principal Components Analysis impl. with SVD function
63 |
64 | :param X:
65 | :param k:
66 | :param center:
67 | :return:
68 | """
69 |
70 | n = X.size()[0]
71 | ones = torch.ones(n).view([n, 1])
72 | h = ((1 / n) * torch.mm(ones, ones.t())) if center else torch.zeros(n * n).view([n, n])
73 | H = torch.eye(n) - h
74 | X_center = torch.mm(H.double(), X.double())
75 | u, s, v = torch.svd(X_center)
76 | components = v[:k].t()
77 | explained_variance = torch.mul(s[:k], s[:k]) / (n - 1)
78 | return {'X': X, 'k': k, 'components': components,
79 | 'explained_variance': explained_variance}
80 |
81 |
82 | def cal_center(group_xyz):
83 | """
84 | Calculate Global Coordinates of the Center of Triangle
85 |
86 | :param group_xyz: [B, N, K, 3] / [B, N, G, K, 3]; K >= 3
87 | :return: [B, N, 3] / [B, N, G, 3]
88 | """
89 | center = torch.mean(group_xyz, dim=-2)
90 | return center
91 |
92 |
93 | def cal_area(group_xyz):
94 | """
95 | Calculate Area of Triangle
96 |
97 | :param group_xyz: [B, N, K, 3] / [B, N, G, K, 3]; K = 3
98 | :return: [B, N, 1] / [B, N, G, 1]
99 | """
100 | pad_shape = group_xyz[..., 0, None].shape
101 | det_xy = torch.det(torch.cat([group_xyz[..., 0, None], group_xyz[..., 1, None], torch.ones(pad_shape)], dim=-1))
102 | det_yz = torch.det(torch.cat([group_xyz[..., 1, None], group_xyz[..., 2, None], torch.ones(pad_shape)], dim=-1))
103 | det_zx = torch.det(torch.cat([group_xyz[..., 2, None], group_xyz[..., 0, None], torch.ones(pad_shape)], dim=-1))
104 | area = torch.sqrt(det_xy ** 2 + det_yz ** 2 + det_zx ** 2).unsqueeze(-1)
105 | return area
106 |
107 |
108 | def cal_const(normal, center, is_normalize=True):
109 | """
110 | Calculate Constant Term (Standard Version, with x_normal to be 1)
111 |
112 | math::
113 | const = x_nor * x_0 + y_nor * y_0 + z_nor * z_0
114 |
115 | :param is_normalize:
116 | :param normal: [B, N, 3] / [B, N, G, 3]
117 | :param center: [B, N, 3] / [B, N, G, 3]
118 | :return: [B, N, 1] / [B, N, G, 1]
119 | """
120 | const = torch.sum(normal * center, dim=-1, keepdim=True)
121 | factor = torch.sqrt(torch.Tensor([3])).to(normal.device)
122 | const = const / factor if is_normalize else const
123 |
124 | return const
125 |
126 |
127 | def check_nan(normal, center, pos=None):
128 | """
129 | Check & Remove NaN in normal tensor
130 |
131 | :param pos: [B, N, 1]
132 | :param center: [B, N, 3]
133 | :param normal: [B, N, 3]
134 | :return:
135 | """
136 | B, N, _ = normal.shape
137 | mask = torch.sum(torch.isnan(normal), dim=-1) > 0
138 | mask_first = torch.argmax((~mask).int(), dim=-1)
139 |
140 | normal_first = normal[torch.arange(B), None, mask_first].repeat([1, N, 1])
141 | normal[mask] = normal_first[mask]
142 | center_first = center[torch.arange(B), None, mask_first].repeat([1, N, 1])
143 | center[mask] = center_first[mask]
144 |
145 | if pos is not None:
146 | pos_first = pos[torch.arange(B), None, mask_first].repeat([1, N, 1])
147 | pos[mask] = pos_first[mask]
148 | return normal, center, pos
149 | return normal, center
150 |
151 |
152 | def check_nan_umb(normal, center, pos=None):
153 | """
154 | Check & Remove NaN in normal tensor
155 |
156 | :param pos: [B, N, G, 1]
157 | :param center: [B, N, G, 3]
158 | :param normal: [B, N, G, 3]
159 | :return:
160 | """
161 | B, N, G, _ = normal.shape
162 | mask = torch.sum(torch.isnan(normal), dim=-1) > 0
163 | mask_first = torch.argmax((~mask).int(), dim=-1)
164 | b_idx = torch.arange(B).unsqueeze(1).repeat([1, N])
165 | n_idx = torch.arange(N).unsqueeze(0).repeat([B, 1])
166 |
167 | normal_first = normal[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1])
168 | normal[mask] = normal_first[mask]
169 | center_first = center[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1])
170 | center[mask] = center_first[mask]
171 |
172 | if pos is not None:
173 | pos_first = pos[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1])
174 | pos[mask] = pos_first[mask]
175 | return normal, center, pos
176 | return normal, center
177 |
178 |
179 | class SurfaceConstructor(nn.Module):
180 | """
181 | Surface Constructor for Point Clouds
182 |
183 | Formulation of A Surface:
184 | A * (x - x_0) + B * (y - y_0) + C * (z - z_0) = 0,
185 | where A^2 + B^2 + C^2 = 1 & A > 0
186 | """
187 |
188 | def __init__(self, r=None, k=3, recons_type='knn', return_dist=False, random_inv=True, cuda=False):
189 | super(SurfaceConstructor, self).__init__()
190 | self.K = k
191 | self.R = r
192 | self.recons = _recons_factory(recons_type)
193 | self.cuda = cuda
194 |
195 | self.return_dist = return_dist
196 | self.random_inv = random_inv
197 |
198 | def forward(self, center, context):
199 | """
200 | Input:
201 | center: input points position as centroid points, [B, 3, N]
202 | context: input points position as context points, [B, 3, N']
203 |
204 | Output:
205 | normal: normals of constructed triangles, [B, 3, N]
206 | center: centroids of constructed triangles, [B, 3, N]
207 | pos: position info of constructed triangles, [B, 1, N]
208 | """
209 | center = center.permute(0, 2, 1)
210 | context = context.permute(0, 2, 1)
211 |
212 | group_xyz = self.recons(self.K, center, context, cuda=self.cuda)
213 | normal = cal_normal(group_xyz, random_inv=self.random_inv)
214 | center = cal_center(group_xyz)
215 |
216 | if self.return_dist:
217 | pos = cal_const(normal, center)
218 | normal, center, pos = check_nan(normal, center, pos)
219 | normal = normal.permute(0, 2, 1)
220 | center = center.permute(0, 2, 1)
221 | pos = pos.permute(0, 2, 1)
222 | return normal, center, pos
223 |
224 | normal, center = check_nan(normal, center)
225 | normal = normal.permute(0, 2, 1)
226 | center = center.permute(0, 2, 1)
227 |
228 | return normal, center
229 |
230 |
231 | if __name__ == '__main__':
232 | xyz = torch.rand(1, 3, 1024) * 2. - 1.
233 | constructor = SurfaceConstructor(return_dist=True)
234 |
235 | normal, center, pos = constructor(xyz, xyz)
236 | print(normal.shape)
237 | print(center.shape)
238 |
--------------------------------------------------------------------------------
/classification/scripts/scanobjectnn/repsurf_ssg_umb.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -v
3 |
4 | python3 tool/train_cls_scanobjectnn.py \
5 | --cuda_ops \
6 | --batch_size 64 \
7 | --model repsurf.repsurf_ssg_umb \
8 | --epoch 250 \
9 | --log_dir repsurf_cls_ssg_umb \
10 | --gpus 0 \
11 | --n_workers 12 \
12 | --return_center \
13 | --return_dist \
14 | --return_polar \
15 | --group_size 8 \
16 | --umb_pool sum \
17 | --num_point 1024
--------------------------------------------------------------------------------
/classification/scripts/scanobjectnn/repsurf_ssg_umb_2x.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -v
3 |
4 | python3 tool/train_cls_scanobjectnn.py \
5 | --cuda_ops \
6 | --batch_size 64 \
7 | --model repsurf.repsurf_ssg_umb_2x \
8 | --epoch 250 \
9 | --log_dir repsurf_cls_ssg_umb_2x \
10 | --gpus 0 \
11 | --n_workers 12 \
12 | --return_center \
13 | --return_dist \
14 | --return_polar \
15 | --group_size 8 \
16 | --umb_pool sum \
17 | --num_point 1024
--------------------------------------------------------------------------------
/classification/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/util/__init__.py
--------------------------------------------------------------------------------
/classification/util/utils.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import argparse
3 | import random
4 |
5 | import numpy as np
6 | import torch
7 | from torch import nn
8 | import torch.nn.functional as F
9 |
10 |
11 | def set_seed(seed):
12 | """
13 | Setting of Global Seed
14 |
15 | """
16 | torch.backends.cudnn.enabled = True
17 | torch.backends.cudnn.deterministic = True # consistent results on the cpu and gpu
18 | torch.backends.cudnn.benchmark = True
19 |
20 | np.random.seed(seed)
21 | random.seed(seed)
22 | torch.manual_seed(seed) # cpu
23 | torch.cuda.manual_seed(seed)
24 | torch.cuda.manual_seed_all(seed) # gpu
25 |
26 |
27 | def weight_init(m, init_type):
28 | if init_type == 'xavier':
29 | init_func = torch.nn.init.xavier_normal_
30 | elif init_type == 'kaiming':
31 | init_func = torch.nn.init.kaiming_normal_
32 | else:
33 | raise Exception('No such init type')
34 |
35 | if isinstance(m, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.Conv1d)):
36 | init_func(m.weight)
37 | if m.bias is not None:
38 | torch.nn.init.constant_(m.bias, 0)
39 | elif isinstance(m, (torch.nn.BatchNorm2d, torch.nn.BatchNorm1d)):
40 | torch.nn.init.constant_(m.weight, 1) # constant
41 | # torch.nn.init.normal_(m.weight, 1.0, 0.02) # normal
42 | torch.nn.init.constant_(m.bias, 0)
43 |
44 |
45 | class ClsLoss(nn.Module):
46 | def __init__(self):
47 | super(ClsLoss, self).__init__()
48 |
49 | def forward(self, pred, target):
50 | total_loss = F.nll_loss(pred, target)
51 |
52 | return total_loss
53 |
54 |
55 | class SmoothClsLoss(nn.Module):
56 | def __init__(self, smoothing_ratio=0.1):
57 | super(SmoothClsLoss, self).__init__()
58 | self.smoothing_ratio = smoothing_ratio
59 |
60 | def forward(self, pred, target):
61 | eps = self.smoothing_ratio
62 | n_class = pred.size(1)
63 |
64 | one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1)
65 | one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
66 | # log_prb = F.log_softmax(pred, dim=1)
67 |
68 | loss = -(one_hot * pred).sum(dim=1).mean()
69 | return loss
70 |
71 |
72 | def get_model(args):
73 | module = importlib.import_module('models.%s' % args.model)
74 | return module.Model(args)
75 |
76 |
77 | def get_loss():
78 | return SmoothClsLoss()
79 |
80 |
81 | def get_test_args():
82 | return argparse.Namespace()
83 |
--------------------------------------------------------------------------------
/segmentation/README.md:
--------------------------------------------------------------------------------
1 | # RepSurf for Segmentation
2 |
3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact)
4 |
5 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740)
6 |
7 |
8 | ## Preparation
9 |
10 | ### Environment
11 |
12 | We tested under the environment:
13 |
14 | * python 3.7
15 | * pytorch 1.6.0 / 1.8.0
16 | * cuda 10.1 / 11.1
17 | * gcc 7.2.0
18 | * h5py
19 | * sharedarray
20 | * tensorboardx
21 |
22 | For anaconda user, initialize the conda environment **repsurf-seg** by:
23 |
24 | ```
25 | sh init.sh
26 | ```
27 |
28 | ## Experiments
29 |
30 | ### S3DIS Area-5 (Data & Logs: [Google Drive](https://drive.google.com/drive/folders/1jIZuy4RPFJ4YHAE8ScVQgwtBwNGgfKnv?usp=sharing))
31 |
32 | * Performance using the same settings:
33 |
34 |
88 |
89 |
90 | **Note**:
91 | 1. The performance (mIoU/mAcc/OA) are from the final predictions on the whole scenes of S3DIS Area-5, while the results during training is on sub-sampled scenes for fast validation.
92 | 2. The training time of all above implementations is estimated on four NVIDIA RTX 3090. The time in the logs contains both training and validating time.
93 | 3. To speed up the training process, we apply Sectorized FPS (in the first stage) for all above methods. It can save 30~40% training time and does not affect the performance.
94 | 4. To lessen the instability from grid sampling during inference, we apply median filtering to all the above implementations. Besides, it can slightly improve the results (~0.4 mIoU).
95 |
96 | * To (firstly install gdown by **pip install gdown** and) download dataset:
97 |
98 | ```
99 | cd ./data/S3DIS
100 | gdown https://drive.google.com/u/1/uc?id=1UDM-bjrtqoIR9FWoIRyqLUJGyKEs22fP
101 | tar zxf s3dis.tar.gz && rm s3dis.tar.gz && cd -
102 | ```
103 |
104 | * To train one model (**Umbrella RepSurf, Point Transformer, PointNet2**) for S3DIS Area-5:
105 |
106 | ```
107 | sh scripts/s3dis/train_[MODEL].sh # MODEL: repsurf_umb, pointnet2, pointtransformer
108 | ```
109 |
110 | * To test one model (**Our Umbrella RepSurf, Point Transformer, PointNet2**) for S3DIS Area-5 on whole scenes:
111 |
112 | ```
113 | sh scripts/s3dis/test_[MODEL].sh # MODEL: repsurf_umb, pointnet2, pointtransformer
114 | ```
115 |
116 | ## Acknowledgment
117 |
118 | We thank the [Point Transformer Implementation](https://github.com/POSTECH-CVLab/point-transformer) for the library pointops.
119 |
120 | ## License
121 |
122 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for
123 | commercial use.
124 |
--------------------------------------------------------------------------------
/segmentation/dataset/S3DISDataLoader.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 06/30/2022
4 | """
5 |
6 | import os
7 | import numpy as np
8 | import SharedArray as SA
9 | from torch.utils.data import Dataset
10 |
11 | from util.data_util import sa_create, data_prepare
12 |
13 | NUM_CLASS = 13
14 |
15 |
16 | class S3DIS(Dataset):
17 | def __init__(self, args, split, coord_transform=None, rgb_transform=None,
18 | rgb_mean=None, rgb_std=None, shuffle_index=False):
19 | super().__init__()
20 | self.args, self.split, self.coord_transform, self.rgb_transform, self.rgb_mean, self.rgb_std, self.shuffle_index = \
21 | args, split, coord_transform, rgb_transform, rgb_mean, rgb_std, shuffle_index
22 | self.stop_aug = False
23 | data_list = sorted(os.listdir(args.data_dir))
24 | data_list = [item[:-4] for item in data_list if 'Area_' in item]
25 | if split == 'train':
26 | self.data_list = [item for item in data_list if not 'Area_{}'.format(args.test_area) in item]
27 | else:
28 | self.data_list = [item for item in data_list if 'Area_{}'.format(args.test_area) in item]
29 | self.data_idx = np.arange(len(self.data_list))
30 |
31 | for item in self.data_list:
32 | if not os.path.exists("/dev/shm/s3dis_{}".format(item)):
33 | data_path = os.path.join(args.data_dir, item + '.npy')
34 | data = np.load(data_path).astype(np.float32) # xyzrgbl, N*7
35 | sa_create("shm://s3dis_{}".format(item), data)
36 |
37 | def __getitem__(self, idx):
38 | data_idx = self.data_idx[idx % len(self.data_idx)]
39 | data = SA.attach("shm://s3dis_{}".format(self.data_list[data_idx])).copy()
40 | coord, feat, label = data[:, 0:3], data[:, 3:6], data[:, 6]
41 | coord, feat, label = \
42 | data_prepare(coord, feat, label, self.args, self.split, self.coord_transform, self.rgb_transform,
43 | self.rgb_mean, self.rgb_std, self.shuffle_index, self.stop_aug)
44 |
45 | return coord, feat, label
46 |
47 | def __len__(self):
48 | return len(self.data_idx) * self.args.loop
49 |
50 | @staticmethod
51 | def print_weight(data_root, data_list):
52 | print('Computing label weight...')
53 | num_point_list = []
54 | label_freq = np.zeros(NUM_CLASS)
55 | label_total = np.zeros(NUM_CLASS)
56 | # load data
57 | for idx, item in enumerate(data_list):
58 | data_path = os.path.join(data_root, item + '.npy')
59 | data = np.load(data_path)
60 | labels = data[:, 6]
61 | freq = np.histogram(labels, range(NUM_CLASS + 1))[0]
62 | label_freq += freq
63 | label_total += (freq > 0).astype(np.float) * labels.size
64 | num_point_list.append(labels.size)
65 |
66 | # label weight
67 | label_freq = label_freq / label_total
68 | label_weight = np.median(label_freq) / label_freq
69 | print(label_weight)
70 |
71 | @staticmethod
72 | def print_mean_std(data_root, data_list):
73 | print('Computing color mean & std...')
74 | point_list = []
75 | for idx, item in enumerate(data_list):
76 | data_path = os.path.join(data_root, item + '.npy')
77 | data = np.load(data_path)
78 | point_list.append(data[:, 3:6])
79 |
80 | points = np.vstack(point_list) / 255.
81 | mean = np.mean(points, 0)
82 | std = np.std(points, 0)
83 | print(f'mean: {mean}, std:{std}')
84 |
--------------------------------------------------------------------------------
/segmentation/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/dataset/__init__.py
--------------------------------------------------------------------------------
/segmentation/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | mkdir -p log/PointAnalysis/log/S3DIS
4 | mkdir -p log/PointAnalysis/log/ScanNet
5 | mkdir -p data/S3DIS
6 | mkdir -p data/ScanNet
7 |
8 | conda create -n repsurf-seg python=3.7 -y
9 | conda activate repsurf-seg
10 |
11 | #conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.1 -c pytorch -c conda-forge -y
12 | pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html
13 | conda install -c anaconda h5py pyyaml -y
14 | conda install -c conda-forge sharedarray tensorboardx -y
15 |
16 | cd modules/pointops
17 | python3 setup.py install
18 | cd -
19 |
--------------------------------------------------------------------------------
/segmentation/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/__init__.py
--------------------------------------------------------------------------------
/segmentation/models/pointnet2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/pointnet2/__init__.py
--------------------------------------------------------------------------------
/segmentation/models/pointnet2/pointnet2_ssg.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 06/30/2022
4 | """
5 |
6 | import torch
7 | import torch.nn as nn
8 | from modules.pointnet2_utils import PointNetSetAbstraction, PointNetFeaturePropagation
9 |
10 |
11 | class Model(nn.Module):
12 | def __init__(self, args):
13 | super(Model, self).__init__()
14 | self.sa1 = PointNetSetAbstraction(4, 32, 6 + 3, [32, 32, 64], num_sector=4)
15 | self.sa2 = PointNetSetAbstraction(4, 32, 64 + 3, [64, 64, 128])
16 | self.sa3 = PointNetSetAbstraction(4, 32, 128 + 3, [128, 128, 256])
17 | self.sa4 = PointNetSetAbstraction(4, 32, 256 + 3, [256, 256, 512])
18 |
19 | self.fp4 = PointNetFeaturePropagation(768, [256, 256])
20 | self.fp3 = PointNetFeaturePropagation(384, [256, 256])
21 | self.fp2 = PointNetFeaturePropagation(320, [256, 128])
22 | self.fp1 = PointNetFeaturePropagation(128, [128, 128, 128])
23 |
24 | self.classifier = nn.Sequential(
25 | nn.Linear(128, 128),
26 | nn.BatchNorm1d(128),
27 | nn.ReLU(True),
28 | nn.Dropout(0.5),
29 | nn.Linear(128, args.num_class),
30 | )
31 |
32 | def forward(self, pos_feat_off0):
33 | pos_feat_off0[1] = torch.cat([pos_feat_off0[0], pos_feat_off0[1]], 1)
34 |
35 | pos_feat_off1 = self.sa1(pos_feat_off0)
36 | pos_feat_off2 = self.sa2(pos_feat_off1)
37 | pos_feat_off3 = self.sa3(pos_feat_off2)
38 | pos_feat_off4 = self.sa4(pos_feat_off3)
39 |
40 | pos_feat_off3[1] = self.fp4(pos_feat_off3, pos_feat_off4)
41 | pos_feat_off2[1] = self.fp3(pos_feat_off2, pos_feat_off3)
42 | pos_feat_off1[1] = self.fp2(pos_feat_off1, pos_feat_off2)
43 | pos_feat_off0[1] = self.fp1([pos_feat_off0[0], None, pos_feat_off0[2]], pos_feat_off1)
44 |
45 | feature = self.classifier(pos_feat_off0[1])
46 |
47 | return feature
48 |
--------------------------------------------------------------------------------
/segmentation/models/pointtransformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/pointtransformer/__init__.py
--------------------------------------------------------------------------------
/segmentation/models/pointtransformer/pointtransformer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from modules.pointtransformer_utils import PointTransformerBlock, TransitionDown, TransitionUp
4 |
5 |
6 | class Model(nn.Module):
7 | def __init__(self, args):
8 | super().__init__()
9 | block = PointTransformerBlock
10 | num_block = [2, 3, 4, 6, 3]
11 | self.in_c = args.in_channel
12 | self.in_planes, planes = self.in_c, [32, 64, 128, 256, 512]
13 | fpn_planes, fpnhead_planes, share_planes = 128, 64, 8
14 | stride, nsample = [1, 4, 4, 4, 4], [16, 16, 16, 16, 16]
15 | self.enc1 = self._make_enc(block, planes[0], num_block[0], share_planes, stride=stride[0],
16 | nsample=nsample[0]) # N/1
17 | self.enc2 = self._make_enc(block, planes[1], num_block[1], share_planes, stride=stride[1],
18 | nsample=nsample[1], num_sector=4) # N/4
19 | self.enc3 = self._make_enc(block, planes[2], num_block[2], share_planes, stride=stride[2],
20 | nsample=nsample[2]) # N/16
21 | self.enc4 = self._make_enc(block, planes[3], num_block[3], share_planes, stride=stride[3],
22 | nsample=nsample[3]) # N/64
23 | self.enc5 = self._make_enc(block, planes[4], num_block[4], share_planes, stride=stride[4],
24 | nsample=nsample[4]) # N/256
25 | self.dec5 = self._make_dec(block, planes[4], 2, share_planes, nsample=nsample[4], is_head=True) # transform p5
26 | self.dec4 = self._make_dec(block, planes[3], 2, share_planes, nsample=nsample[3]) # fusion p5 and p4
27 | self.dec3 = self._make_dec(block, planes[2], 2, share_planes, nsample=nsample[2]) # fusion p4 and p3
28 | self.dec2 = self._make_dec(block, planes[1], 2, share_planes, nsample=nsample[1]) # fusion p3 and p2
29 | self.dec1 = self._make_dec(block, planes[0], 2, share_planes, nsample=nsample[0]) # fusion p2 and p1
30 | self.cls = nn.Sequential(nn.Linear(planes[0], planes[0]), nn.BatchNorm1d(planes[0]), nn.ReLU(inplace=True),
31 | nn.Linear(planes[0], args.num_class))
32 |
33 | def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16, num_sector=1):
34 | layers = [TransitionDown(self.in_planes, planes * block.expansion, stride, nsample, num_sector)]
35 | self.in_planes = planes * block.expansion
36 | for _ in range(1, blocks):
37 | layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample))
38 | return nn.Sequential(*layers)
39 |
40 | def _make_dec(self, block, planes, blocks, share_planes=8, nsample=16, is_head=False):
41 | layers = [TransitionUp(self.in_planes, None if is_head else planes * block.expansion)]
42 | self.in_planes = planes * block.expansion
43 | for _ in range(1, blocks):
44 | layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample))
45 | return nn.Sequential(*layers)
46 |
47 | def forward(self, pxo, *args):
48 | p0, x0, o0 = pxo # (n, 3), (n, c), (b)
49 | x0 = p0 if self.in_c == 3 else torch.cat((p0, x0), 1)
50 | p1, x1, o1 = self.enc1([p0, x0, o0])
51 | p2, x2, o2 = self.enc2([p1, x1, o1])
52 | p3, x3, o3 = self.enc3([p2, x2, o2])
53 | p4, x4, o4 = self.enc4([p3, x3, o3])
54 | p5, x5, o5 = self.enc5([p4, x4, o4])
55 | x5 = self.dec5[1:]([p5, self.dec5[0]([p5, x5, o5]), o5])[1]
56 | x4 = self.dec4[1:]([p4, self.dec4[0]([p4, x4, o4], [p5, x5, o5]), o4])[1]
57 | x3 = self.dec3[1:]([p3, self.dec3[0]([p3, x3, o3], [p4, x4, o4]), o3])[1]
58 | x2 = self.dec2[1:]([p2, self.dec2[0]([p2, x2, o2], [p3, x3, o3]), o2])[1]
59 | x1 = self.dec1[1:]([p1, self.dec1[0]([p1, x1, o1], [p2, x2, o2]), o1])[1]
60 | x = self.cls(x1)
61 | return x
62 |
--------------------------------------------------------------------------------
/segmentation/models/repsurf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/repsurf/__init__.py
--------------------------------------------------------------------------------
/segmentation/models/repsurf/repsurf_umb_ssg.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 06/30/2022
4 | """
5 |
6 | import torch
7 | import torch.nn as nn
8 | from modules.repsurface_utils import UmbrellaSurfaceConstructor, SurfaceAbstractionCD, SurfaceFeaturePropagationCD
9 |
10 |
11 | class Model(nn.Module):
12 | def __init__(self, args):
13 | super(Model, self).__init__()
14 | center_channel = 6 if args.return_polar else 3
15 | repsurf_in_channel = 10
16 | repsurf_out_channel = 10
17 |
18 | self.sa1 = SurfaceAbstractionCD(4, 32, args.in_channel + repsurf_out_channel, center_channel, [32, 32, 64],
19 | True, args.return_polar, num_sector=4)
20 | self.sa2 = SurfaceAbstractionCD(4, 32, 64 + repsurf_out_channel, center_channel, [64, 64, 128],
21 | True, args.return_polar)
22 | self.sa3 = SurfaceAbstractionCD(4, 32, 128 + repsurf_out_channel, center_channel, [128, 128, 256],
23 | True, args.return_polar)
24 | self.sa4 = SurfaceAbstractionCD(4, 32, 256 + repsurf_out_channel, center_channel, [256, 256, 512],
25 | True, args.return_polar)
26 |
27 | self.fp4 = SurfaceFeaturePropagationCD(512, 256, [256, 256])
28 | self.fp3 = SurfaceFeaturePropagationCD(256, 128, [256, 256])
29 | self.fp2 = SurfaceFeaturePropagationCD(256, 64, [256, 128])
30 | self.fp1 = SurfaceFeaturePropagationCD(128, None, [128, 128, 128])
31 |
32 | self.classifier = nn.Sequential(
33 | nn.Linear(128, 128),
34 | nn.BatchNorm1d(128),
35 | nn.ReLU(True),
36 | nn.Dropout(0.5),
37 | nn.Linear(128, args.num_class),
38 | )
39 |
40 | self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_in_channel, repsurf_out_channel)
41 |
42 | def forward(self, pos_feat_off0):
43 | pos_nor_feat_off0 = [
44 | pos_feat_off0[0],
45 | self.surface_constructor(pos_feat_off0[0], pos_feat_off0[2]),
46 | torch.cat([pos_feat_off0[0], pos_feat_off0[1]], 1),
47 | pos_feat_off0[2]
48 | ]
49 |
50 | pos_nor_feat_off1 = self.sa1(pos_nor_feat_off0)
51 | pos_nor_feat_off2 = self.sa2(pos_nor_feat_off1)
52 | pos_nor_feat_off3 = self.sa3(pos_nor_feat_off2)
53 | pos_nor_feat_off4 = self.sa4(pos_nor_feat_off3)
54 |
55 | del pos_nor_feat_off0[1], pos_nor_feat_off1[1], pos_nor_feat_off2[1], pos_nor_feat_off3[1], pos_nor_feat_off4[1]
56 | pos_nor_feat_off3[1] = self.fp4(pos_nor_feat_off3, pos_nor_feat_off4)
57 | pos_nor_feat_off2[1] = self.fp3(pos_nor_feat_off2, pos_nor_feat_off3)
58 | pos_nor_feat_off1[1] = self.fp2(pos_nor_feat_off1, pos_nor_feat_off2)
59 | pos_nor_feat_off0[1] = self.fp1([pos_nor_feat_off0[0], None, pos_nor_feat_off0[2]], pos_nor_feat_off1)
60 |
61 | feature = self.classifier(pos_nor_feat_off0[1])
62 |
63 | return feature
64 |
--------------------------------------------------------------------------------
/segmentation/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/__init__.py
--------------------------------------------------------------------------------
/segmentation/modules/pointnet2_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Haoxi Ran
3 | Date: 06/30/2022
4 | """
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | from modules.pointops.functions import pointops
11 |
12 |
13 | def sample_and_group(stride, nsample, xyz, points, offset, return_idx=False, num_sector=1):
14 | # sample
15 | if stride > 1:
16 | new_offset, sample_idx = [offset[0].item() // stride], offset[0].item() // stride
17 | for i in range(1, offset.shape[0]):
18 | sample_idx += (offset[i].item() - offset[i - 1].item()) // stride
19 | new_offset.append(sample_idx)
20 | new_offset = torch.cuda.IntTensor(new_offset)
21 | if num_sector > 1:
22 | fps_idx = pointops.sectorized_fps(xyz, offset, new_offset, num_sector) # [M]
23 | else:
24 | fps_idx = pointops.furthestsampling(xyz, offset, new_offset) # [M]
25 | new_xyz = xyz[fps_idx.long(), :] # [M, 3]
26 | else:
27 | new_xyz = xyz
28 | new_offset = offset
29 |
30 | # group
31 | N, M = xyz.shape[0], new_xyz.shape[0]
32 | group_idx, _ = pointops.knnquery(nsample, xyz, new_xyz, offset, new_offset) # [M, nsample]
33 | group_xyz = xyz[group_idx.view(-1).long(), :].view(M, nsample, 3) # [M, nsample, 3]
34 | group_xyz_norm = group_xyz - new_xyz.unsqueeze(1)
35 |
36 | if points is not None and not return_idx:
37 | C = points.shape[1]
38 | group_points = points[group_idx.view(-1).long(), :].view(M, nsample, C)
39 | new_points = torch.cat([group_xyz_norm, group_points], dim=-1) # [M, nsample, 3/6+C]
40 | else:
41 | new_points = group_xyz_norm
42 |
43 | if return_idx:
44 | return new_xyz, new_points, new_offset, group_idx
45 | else:
46 | return new_xyz, new_points, new_offset
47 |
48 |
49 | class PointNetSetAbstraction(nn.Module):
50 | """
51 | PointNet2 SA Module
52 |
53 | """
54 |
55 | def __init__(self, stride, nsample, in_channel, mlp, num_sector=1):
56 | super(PointNetSetAbstraction, self).__init__()
57 | self.stride = stride
58 | self.nsample = nsample
59 | self.num_sector = num_sector
60 | self.mlp_convs = nn.ModuleList()
61 | self.mlp_bns = nn.ModuleList()
62 |
63 | last_channel = in_channel
64 | for out_channel in mlp:
65 | self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
66 | self.mlp_bns.append(nn.BatchNorm1d(out_channel))
67 | last_channel = out_channel
68 |
69 | def forward(self, pos_feat_off):
70 | xyz, points, offset = pos_feat_off # [N, 3], [N, C], [B]
71 |
72 | new_xyz, new_points, new_offset = sample_and_group(self.stride, self.nsample, xyz, points, offset,
73 | num_sector=self.num_sector)
74 |
75 | # new_xyz: sampled points position data, [M, 3]
76 | # new_points: sampled points data, [M, nsample, 3+C]
77 | new_points = new_points.transpose(1, 2).contiguous() # [M, 3+C, nsample]
78 | for i, conv in enumerate(self.mlp_convs):
79 | bn = self.mlp_bns[i]
80 | new_points = F.relu(bn(conv(new_points)))
81 | new_points = torch.max(new_points, 2)[0]
82 |
83 | return [new_xyz, new_points, new_offset]
84 |
85 |
86 | class PointNetFeaturePropagation(nn.Module):
87 | """
88 | PointNet2 FP Module
89 |
90 | """
91 |
92 | def __init__(self, in_channel, mlp):
93 | super(PointNetFeaturePropagation, self).__init__()
94 | self.mlp_convs = nn.ModuleList()
95 | self.mlp_bns = nn.ModuleList()
96 | last_channel = in_channel
97 | for out_channel in mlp:
98 | self.mlp_convs.append(nn.Linear(last_channel, out_channel))
99 | self.mlp_bns.append(nn.BatchNorm1d(out_channel))
100 | last_channel = out_channel
101 |
102 | def forward(self, pos_feat_off1, pos_feat_off2):
103 | xyz1, points1, offset1 = pos_feat_off1 # [N, 3], [N, C], [B]
104 | xyz2, points2, offset2 = pos_feat_off2 # [M, 3], [M, C], [B]
105 |
106 | idx, dist = pointops.knnquery(3, xyz2, xyz1, offset2, offset1) # [M, 3], [M, 3]
107 | dist_recip = 1.0 / (dist + 1e-8) # [M, 3]
108 | norm = torch.sum(dist_recip, dim=1, keepdim=True)
109 | weight = dist_recip / norm # [M, 3]
110 |
111 | interpolated_points = torch.cuda.FloatTensor(xyz1.shape[0], points2.shape[1]).zero_()
112 | for i in range(3):
113 | interpolated_points += points2[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
114 |
115 | # skip connection
116 | if points1 is not None:
117 | new_points = torch.cat([points1, interpolated_points], dim=1) # [M, C1+C2]
118 | else:
119 | new_points = interpolated_points
120 |
121 | # mlp
122 | for i, conv in enumerate(self.mlp_convs):
123 | bn = self.mlp_bns[i]
124 | new_points = F.relu(bn(conv(new_points)))
125 |
126 | return new_points
127 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/__init__.py
--------------------------------------------------------------------------------
/segmentation/modules/pointops/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/functions/__init__.py
--------------------------------------------------------------------------------
/segmentation/modules/pointops/setup.py:
--------------------------------------------------------------------------------
1 | #python3 setup.py install
2 | from setuptools import setup
3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
4 | import os
5 | from distutils.sysconfig import get_config_vars
6 |
7 | (opt,) = get_config_vars('OPT')
8 | os.environ['OPT'] = " ".join(
9 | flag for flag in opt.split() if flag != '-Wstrict-prototypes'
10 | )
11 |
12 | setup(
13 | name='pointops_cuda',
14 | author='Hengshuang Zhao',
15 | ext_modules=[
16 | CUDAExtension('pointops_cuda', [
17 | 'src/pointops_api.cpp',
18 | 'src/knnquery/knnquery_cuda.cpp',
19 | 'src/knnquery/knnquery_cuda_kernel.cu',
20 | 'src/sampling/sampling_cuda.cpp',
21 | 'src/sampling/sampling_cuda_kernel.cu',
22 | 'src/grouping/grouping_cuda.cpp',
23 | 'src/grouping/grouping_cuda_kernel.cu',
24 | 'src/interpolation/interpolation_cuda.cpp',
25 | 'src/interpolation/interpolation_cuda_kernel.cu',
26 | 'src/subtraction/subtraction_cuda.cpp',
27 | 'src/subtraction/subtraction_cuda_kernel.cu',
28 | 'src/aggregation/aggregation_cuda.cpp',
29 | 'src/aggregation/aggregation_cuda_kernel.cu',
30 | ],
31 | extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']}
32 | )
33 | ],
34 | cmdclass={'build_ext': BuildExtension}
35 | )
36 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/src/__init__.py
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "aggregation_cuda_kernel.h"
6 |
7 |
8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
9 | {
10 | const float *input = input_tensor.data_ptr();
11 | const float *position = position_tensor.data_ptr();
12 | const float *weight = weight_tensor.data_ptr();
13 | const int *idx = idx_tensor.data_ptr();
14 | float *output = output_tensor.data_ptr();
15 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
16 | }
17 |
18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
19 | {
20 | const float *input = input_tensor.data_ptr();
21 | const float *position = position_tensor.data_ptr();
22 | const float *weight = weight_tensor.data_ptr();
23 | const int *idx = idx_tensor.data_ptr();
24 | const float *grad_output = grad_output_tensor.data_ptr();
25 | float *grad_input = grad_input_tensor.data_ptr();
26 | float *grad_position = grad_position_tensor.data_ptr();
27 | float *grad_weight = grad_weight_tensor.data_ptr();
28 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
29 | }
30 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "aggregation_cuda_kernel.h"
3 |
4 |
5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
7 | int index = blockIdx.x * blockDim.x + threadIdx.x;
8 | if (index >= n * c) return;
9 | const int c_idx = index % c;
10 | const int n_idx = index / c;
11 | const int w_c_idx = c_idx % w_c;
12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 | {
14 | int idx_idx = n_idx * nsample + nsample_idx;
15 | int input_idx = idx[idx_idx] * c + c_idx;
16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 | }
20 | }
21 |
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 | int index = blockIdx.x * blockDim.x + threadIdx.x;
25 | if (index >= n * c) return;
26 | const int c_idx = index % c;
27 | const int n_idx = index / c;
28 | const int w_c_idx = c_idx % w_c;
29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 | {
31 | int idx_idx = n_idx * nsample + nsample_idx;
32 | int input_idx = idx[idx_idx] * c + c_idx;
33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 | }
39 | }
40 |
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 | dim3 threads(THREADS_PER_BLOCK);
45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 |
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 | dim3 threads(THREADS_PER_BLOCK);
52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _AGGREGATION_CUDA_KERNEL
2 | #define _AGGREGATION_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 |
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/cuda_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_UTILS_H
2 | #define _CUDA_UTILS_H
3 |
4 | #include
5 | #include
6 |
7 | #define TOTAL_THREADS 1024
8 | #define THREADS_PER_BLOCK 256
9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 |
11 | inline int opt_n_threads(int work_size) {
12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0);
13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 |
16 | inline dim3 opt_block_config(int x, int y) {
17 | const int x_threads = opt_n_threads(x);
18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 | dim3 block_config(x_threads, y_threads, 1);
20 | return block_config;
21 | }
22 |
23 | #endif
24 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "grouping_cuda_kernel.h"
6 |
7 |
8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
9 | {
10 | const float *input = input_tensor.data_ptr();
11 | const int *idx = idx_tensor.data_ptr();
12 | float *output = output_tensor.data_ptr();
13 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
14 | }
15 |
16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
17 | {
18 | const float *grad_output = grad_output_tensor.data_ptr();
19 | const int *idx = idx_tensor.data_ptr();
20 | float *grad_input = grad_input_tensor.data_ptr();
21 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
22 | }
23 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "grouping_cuda_kernel.h"
3 |
4 |
5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
7 | int index = blockIdx.x * blockDim.x + threadIdx.x;
8 | if (index >= m * nsample * c) return;
9 | const int c_idx = index % c;
10 | const int nsample_idx = (index / c) % nsample;
11 | const int m_idx = index / nsample / c;
12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 | output[index] = input[input_idx];
14 | }
15 |
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 | int index = blockIdx.x * blockDim.x + threadIdx.x;
19 | if (index >= m * nsample * c) return;
20 | const int c_idx = index % c;
21 | const int nsample_idx = (index / c) % nsample;
22 | const int m_idx = index / nsample / c;
23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 | atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 |
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 | dim3 threads(THREADS_PER_BLOCK);
31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output);
32 | }
33 |
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {
36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 | dim3 threads(THREADS_PER_BLOCK);
39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _GROUPING_CUDA_KERNEL
2 | #define _GROUPING_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 |
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "interpolation_cuda_kernel.h"
6 |
7 |
8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
9 | {
10 | const float *input = input_tensor.data_ptr();
11 | const int *idx = idx_tensor.data_ptr();
12 | const float *weight = weight_tensor.data_ptr();
13 | float *output = output_tensor.data_ptr();
14 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
15 | }
16 |
17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
18 | {
19 | const float *grad_output = grad_output_tensor.data_ptr();
20 | const int *idx = idx_tensor.data_ptr();
21 | const float *weight = weight_tensor.data_ptr();
22 | float *grad_input = grad_input_tensor.data_ptr();
23 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
24 | }
25 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "interpolation_cuda_kernel.h"
3 |
4 |
5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
6 | {
7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
8 | int index = blockIdx.x * blockDim.x + threadIdx.x;
9 | if (index >= n * c) return;
10 | int c_idx = index % c;
11 | int n_idx = index / c;
12 | for (int i = 0; i < k; i++)
13 | {
14 | int idx_idx = n_idx * k + i;
15 | int input_idx = idx[idx_idx] * c + c_idx;
16 | output[index] += input[input_idx] * weight[idx_idx];
17 | }
18 | }
19 |
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 | int index = blockIdx.x * blockDim.x + threadIdx.x;
24 | if (index >= n * c) return;
25 | int c_idx = index % c;
26 | int n_idx = index / c;
27 | for (int i = 0; i < k; i++)
28 | {
29 | int idx_idx = n_idx * k + i;
30 | int input_idx = idx[idx_idx] * c + c_idx;
31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 | }
33 | }
34 |
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 | dim3 threads(THREADS_PER_BLOCK);
39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output);
40 | }
41 |
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 | dim3 threads(THREADS_PER_BLOCK);
46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _INTERPOLATION_CUDA_KERNEL
2 | #define _INTERPOLATION_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 |
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "knnquery_cuda_kernel.h"
6 |
7 |
8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
9 | {
10 | const float *xyz = xyz_tensor.data_ptr();
11 | const float *new_xyz = new_xyz_tensor.data_ptr();
12 | const int *offset = offset_tensor.data_ptr();
13 | const int *new_offset = new_offset_tensor.data_ptr();
14 | int *idx = idx_tensor.data_ptr();
15 | float *dist2 = dist2_tensor.data_ptr();
16 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
17 | }
18 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "knnquery_cuda_kernel.h"
3 |
4 |
5 | __device__ void swap_float(float *x, float *y)
6 | {
7 | float tmp = *x;
8 | *x = *y;
9 | *y = tmp;
10 | }
11 |
12 |
13 | __device__ void swap_int(int *x, int *y)
14 | {
15 | int tmp = *x;
16 | *x = *y;
17 | *y = tmp;
18 | }
19 |
20 |
21 | __device__ void reheap(float *dist, int *idx, int k)
22 | {
23 | int root = 0;
24 | int child = root * 2 + 1;
25 | while (child < k)
26 | {
27 | if(child + 1 < k && dist[child+1] > dist[child])
28 | child++;
29 | if(dist[root] > dist[child])
30 | return;
31 | swap_float(&dist[root], &dist[child]);
32 | swap_int(&idx[root], &idx[child]);
33 | root = child;
34 | child = root * 2 + 1;
35 | }
36 | }
37 |
38 |
39 | __device__ void heap_sort(float *dist, int *idx, int k)
40 | {
41 | int i;
42 | for (i = k - 1; i > 0; i--)
43 | {
44 | swap_float(&dist[0], &dist[i]);
45 | swap_int(&idx[0], &idx[i]);
46 | reheap(dist, idx, i);
47 | }
48 | }
49 |
50 |
51 | __device__ int get_bt_idx(int idx, const int *offset)
52 | {
53 | int i = 0;
54 | while (1)
55 | {
56 | if (idx < offset[i])
57 | break;
58 | else
59 | i++;
60 | }
61 | return i;
62 | }
63 |
64 |
65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
66 | // input: xyz (n, 3) new_xyz (m, 3)
67 | // output: idx (m, nsample) dist2 (m, nsample)
68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
69 | if (pt_idx >= m) return;
70 |
71 | new_xyz += pt_idx * 3;
72 | idx += pt_idx * nsample;
73 | dist2 += pt_idx * nsample;
74 | int bt_idx = get_bt_idx(pt_idx, new_offset);
75 | int start;
76 | if (bt_idx == 0)
77 | start = 0;
78 | else
79 | start = offset[bt_idx - 1];
80 | int end = offset[bt_idx];
81 |
82 | float new_x = new_xyz[0];
83 | float new_y = new_xyz[1];
84 | float new_z = new_xyz[2];
85 |
86 | float best_dist[100];
87 | int best_idx[100];
88 | for(int i = 0; i < nsample; i++){
89 | best_dist[i] = 1e10;
90 | best_idx[i] = start;
91 | }
92 | for(int i = start; i < end; i++){
93 | float x = xyz[i * 3 + 0];
94 | float y = xyz[i * 3 + 1];
95 | float z = xyz[i * 3 + 2];
96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
97 | if (d2 < best_dist[0]){
98 | best_dist[0] = d2;
99 | best_idx[0] = i;
100 | reheap(best_dist, best_idx, nsample);
101 | }
102 | }
103 | heap_sort(best_dist, best_idx, nsample);
104 | for(int i = 0; i < nsample; i++){
105 | idx[i] = best_idx[i];
106 | dist2[i] = best_dist[i];
107 | }
108 | }
109 |
110 |
111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
114 | dim3 threads(THREADS_PER_BLOCK);
115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
116 | }
117 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _KNNQUERY_CUDA_KERNEL
2 | #define _KNNQUERY_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
8 |
9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 |
13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 |
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/pointops_api.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "knnquery/knnquery_cuda_kernel.h"
5 | #include "sampling/sampling_cuda_kernel.h"
6 | #include "grouping/grouping_cuda_kernel.h"
7 | #include "interpolation/interpolation_cuda_kernel.h"
8 | #include "aggregation/aggregation_cuda_kernel.h"
9 | #include "subtraction/subtraction_cuda_kernel.h"
10 |
11 |
12 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
13 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
14 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
15 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
16 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
17 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
18 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
19 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
20 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
21 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
22 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
23 | }
24 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "sampling_cuda_kernel.h"
6 |
7 |
8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
9 | {
10 | const float *xyz = xyz_tensor.data_ptr();
11 | const int *offset = offset_tensor.data_ptr();
12 | const int *new_offset = new_offset_tensor.data_ptr();
13 | float *tmp = tmp_tensor.data_ptr();
14 | int *idx = idx_tensor.data_ptr();
15 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
16 | }
17 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/sampling/sampling_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "sampling_cuda_kernel.h"
3 |
4 |
5 | __device__ void __update(float *dists, int *dists_i, int idx1, int idx2) {
6 | const float v1 = dists[idx1], v2 = dists[idx2];
7 | const int i1 = dists_i[idx1], i2 = dists_i[idx2];
8 | dists[idx1] = max(v1, v2);
9 | dists_i[idx1] = v2 > v1 ? i2 : i1;
10 | }
11 |
12 | // input xyz: (n, 3), tmp: (b, n_max)
13 | // ouput idx (m)
14 | template
15 | __global__ void furthestsampling_cuda_kernel(const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx)
16 | {
17 | __shared__ float dists[block_size];
18 | __shared__ int dists_i[block_size];
19 |
20 | int bid = blockIdx.x;
21 | int start_n, end_n, start_m, end_m, old;
22 | if (bid == 0) {
23 | start_n = 0;
24 | end_n = offset[0];
25 | start_m = 0;
26 | end_m = new_offset[0];
27 | old = 0;
28 | }
29 | else {
30 | start_n = offset[bid - 1];
31 | end_n = offset[bid];
32 | start_m = new_offset[bid - 1];
33 | end_m = new_offset[bid];
34 | old = offset[bid - 1];
35 | }
36 |
37 | const int stride = block_size;
38 | int tid = threadIdx.x;
39 | if (tid == 0) idx[start_m] = start_n;
40 |
41 | __syncthreads();
42 | for (int j = start_m + 1; j < end_m; j++)
43 | {
44 | int besti = start_n;
45 | float best = -1;
46 | float x1 = xyz[old * 3 + 0];
47 | float y1 = xyz[old * 3 + 1];
48 | float z1 = xyz[old * 3 + 2];
49 | for (int k = start_n + tid; k < end_n; k += stride)
50 | {
51 | float x2 = xyz[k * 3 + 0];
52 | float y2 = xyz[k * 3 + 1];
53 | float z2 = xyz[k * 3 + 2];
54 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
55 | float d2 = min(d, tmp[k]);
56 | tmp[k] = d2;
57 | besti = d2 > best ? k : besti;
58 | best = d2 > best ? d2 : best;
59 | }
60 | dists[tid] = best;
61 | dists_i[tid] = besti;
62 | __syncthreads();
63 |
64 | if (block_size >= 1024) {
65 | if (tid < 512) {
66 | __update(dists, dists_i, tid, tid + 512);
67 | }
68 | __syncthreads();
69 | }
70 | if (block_size >= 512) {
71 | if (tid < 256) {
72 | __update(dists, dists_i, tid, tid + 256);
73 | }
74 | __syncthreads();
75 | }
76 | if (block_size >= 256) {
77 | if (tid < 128) {
78 | __update(dists, dists_i, tid, tid + 128);
79 | }
80 | __syncthreads();
81 | }
82 | if (block_size >= 128) {
83 | if (tid < 64) {
84 | __update(dists, dists_i, tid, tid + 64);
85 | }
86 | __syncthreads();
87 | }
88 | if (block_size >= 64) {
89 | if (tid < 32) {
90 | __update(dists, dists_i, tid, tid + 32);
91 | }
92 | __syncthreads();
93 | }
94 | if (block_size >= 32) {
95 | if (tid < 16) {
96 | __update(dists, dists_i, tid, tid + 16);
97 | }
98 | __syncthreads();
99 | }
100 | if (block_size >= 16) {
101 | if (tid < 8) {
102 | __update(dists, dists_i, tid, tid + 8);
103 | }
104 | __syncthreads();
105 | }
106 | if (block_size >= 8) {
107 | if (tid < 4) {
108 | __update(dists, dists_i, tid, tid + 4);
109 | }
110 | __syncthreads();
111 | }
112 | if (block_size >= 4) {
113 | if (tid < 2) {
114 | __update(dists, dists_i, tid, tid + 2);
115 | }
116 | __syncthreads();
117 | }
118 | if (block_size >= 2) {
119 | if (tid < 1) {
120 | __update(dists, dists_i, tid, tid + 1);
121 | }
122 | __syncthreads();
123 | }
124 |
125 | old = dists_i[0];
126 | if (tid == 0)
127 | idx[j] = old;
128 | }
129 | }
130 |
131 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx)
132 | {
133 | unsigned int n_threads = opt_n_threads(n);
134 | switch (n_threads) {
135 | case 1024:
136 | furthestsampling_cuda_kernel<1024><<>>(xyz, offset, new_offset, tmp, idx);
137 | break;
138 | case 512:
139 | furthestsampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx);
140 | break;
141 | case 256:
142 | furthestsampling_cuda_kernel<256><<>>(xyz, offset, new_offset, tmp, idx);
143 | break;
144 | case 128:
145 | furthestsampling_cuda_kernel<128><<>>(xyz, offset, new_offset, tmp, idx);
146 | break;
147 | case 64:
148 | furthestsampling_cuda_kernel<64><<>>(xyz, offset, new_offset, tmp, idx);
149 | break;
150 | case 32:
151 | furthestsampling_cuda_kernel<32><<>>(xyz, offset, new_offset, tmp, idx);
152 | break;
153 | case 16:
154 | furthestsampling_cuda_kernel<16><<>>(xyz, offset, new_offset, tmp, idx);
155 | break;
156 | case 8:
157 | furthestsampling_cuda_kernel<8><<>>(xyz, offset, new_offset, tmp, idx);
158 | break;
159 | case 4:
160 | furthestsampling_cuda_kernel<4><<>>(xyz, offset, new_offset, tmp, idx);
161 | break;
162 | case 2:
163 | furthestsampling_cuda_kernel<2><<>>(xyz, offset, new_offset, tmp, idx);
164 | break;
165 | case 1:
166 | furthestsampling_cuda_kernel<1><<>>(xyz, offset, new_offset, tmp, idx);
167 | break;
168 | default:
169 | furthestsampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx);
170 | }
171 | }
172 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _SAMPLING_CUDA_KERNEL
2 | #define _SAMPLING_CUDA_KERNEL
3 | #include
4 | #include
5 | #include
6 |
7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
8 |
9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 |
13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 |
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "subtraction_cuda_kernel.h"
6 |
7 |
8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
9 | {
10 | const float *input1 = input1_tensor.data_ptr();
11 | const float *input2 = input2_tensor.data_ptr();
12 | const int *idx = idx_tensor.data_ptr();
13 | float *output = output_tensor.data_ptr();
14 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
15 | }
16 |
17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
18 | {
19 | const int *idx = idx_tensor.data_ptr();
20 | const float *grad_output = grad_output_tensor.data_ptr();
21 | float *grad_input1 = grad_input1_tensor.data_ptr();
22 | float *grad_input2 = grad_input2_tensor.data_ptr();
23 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
24 | }
25 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | #include "../cuda_utils.h"
2 | #include "subtraction_cuda_kernel.h"
3 |
4 |
5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
7 | int index = blockIdx.x * blockDim.x + threadIdx.x;
8 | if (index >= n * nsample * c) return;
9 | const int c_idx = index % c;
10 | const int nsample_idx = (index / c) % nsample;
11 | const int n_idx = index / nsample / c;
12 | const int idx_idx = n_idx * nsample + nsample_idx;
13 | const int input1_idx = n_idx * c + c_idx;
14 | const int input2_idx = idx[idx_idx] * c + c_idx;
15 | output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 |
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 | int index = blockIdx.x * blockDim.x + threadIdx.x;
21 | if (index >= n * nsample * c) return;
22 | const int c_idx = index % c;
23 | const int nsample_idx = (index / c) % nsample;
24 | const int n_idx = index / nsample / c;
25 | const int idx_idx = n_idx * nsample + nsample_idx;
26 | const int input1_idx = n_idx * c + c_idx;
27 | const int input2_idx = idx[idx_idx] * c + c_idx;
28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 |
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 | dim3 threads(THREADS_PER_BLOCK);
36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 |
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 | dim3 threads(THREADS_PER_BLOCK);
43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 |
--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _SUBTRACTION_CUDA_KERNEL
2 | #define _SUBTRACTION_CUDA_KERNEL
3 | #include
4 | #include