├── .gitignore
├── LICENSE.txt
├── README.md
├── assets
    └── teaser.png
├── classification
    ├── README.md
    ├── dataset
    │   ├── ScanObjectNNDataLoader.py
    │   └── __init__.py
    ├── init.sh
    ├── models
    │   ├── __init__.py
    │   └── repsurf
    │   │   ├── __init__.py
    │   │   ├── repsurf_ssg_umb.py
    │   │   └── repsurf_ssg_umb_2x.py
    ├── modules
    │   ├── __init__.py
    │   ├── pointnet2_utils.py
    │   ├── pointops
    │   │   ├── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── pointops.py
    │   │   ├── setup.py
    │   │   └── src
    │   │   │   ├── __init__.py
    │   │   │   ├── ballquery
    │   │   │       ├── ballquery_cuda.cpp
    │   │   │       ├── ballquery_cuda_kernel.cu
    │   │   │       └── ballquery_cuda_kernel.h
    │   │   │   ├── cuda_utils.h
    │   │   │   ├── grouping
    │   │   │       ├── grouping_cuda.cpp
    │   │   │       ├── grouping_cuda_kernel.cu
    │   │   │       └── grouping_cuda_kernel.h
    │   │   │   ├── grouping_int
    │   │   │       ├── grouping_int_cuda.cpp
    │   │   │       ├── grouping_int_cuda_kernel.cu
    │   │   │       └── grouping_int_cuda_kernel.h
    │   │   │   ├── interpolation
    │   │   │       ├── interpolation_cuda.cpp
    │   │   │       ├── interpolation_cuda_kernel.cu
    │   │   │       └── interpolation_cuda_kernel.h
    │   │   │   ├── knnquery
    │   │   │       ├── __init__.py
    │   │   │       ├── knnquery_cuda.cpp
    │   │   │       ├── knnquery_cuda_kernel.cu
    │   │   │       └── knnquery_cuda_kernel.h
    │   │   │   ├── knnquery_heap
    │   │   │       ├── __init__.py
    │   │   │       ├── knnquery_heap_cuda.cpp
    │   │   │       ├── knnquery_heap_cuda_kernel.cu
    │   │   │       └── knnquery_heap_cuda_kernel.h
    │   │   │   ├── pointops_api.cpp
    │   │   │   └── sampling
    │   │   │       ├── sampling_cuda.cpp
    │   │   │       ├── sampling_cuda_kernel.cu
    │   │   │       └── sampling_cuda_kernel.h
    │   ├── polar_utils.py
    │   ├── ptaug_utils.py
    │   ├── recons_utils.py
    │   └── repsurface_utils.py
    ├── scripts
    │   └── scanobjectnn
    │   │   ├── repsurf_ssg_umb.sh
    │   │   └── repsurf_ssg_umb_2x.sh
    ├── tool
    │   └── train_cls_scanobjectnn.py
    └── util
    │   ├── __init__.py
    │   └── utils.py
├── segmentation
    ├── README.md
    ├── dataset
    │   ├── S3DISDataLoader.py
    │   └── __init__.py
    ├── init.sh
    ├── models
    │   ├── __init__.py
    │   ├── pointnet2
    │   │   ├── __init__.py
    │   │   └── pointnet2_ssg.py
    │   ├── pointtransformer
    │   │   ├── __init__.py
    │   │   └── pointtransformer.py
    │   └── repsurf
    │   │   ├── __init__.py
    │   │   └── repsurf_umb_ssg.py
    ├── modules
    │   ├── __init__.py
    │   ├── aug_utils.py
    │   ├── pointnet2_utils.py
    │   ├── pointops
    │   │   ├── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── pointops.py
    │   │   ├── setup.py
    │   │   └── src
    │   │   │   ├── __init__.py
    │   │   │   ├── aggregation
    │   │   │       ├── aggregation_cuda.cpp
    │   │   │       ├── aggregation_cuda_kernel.cu
    │   │   │       └── aggregation_cuda_kernel.h
    │   │   │   ├── cuda_utils.h
    │   │   │   ├── grouping
    │   │   │       ├── grouping_cuda.cpp
    │   │   │       ├── grouping_cuda_kernel.cu
    │   │   │       └── grouping_cuda_kernel.h
    │   │   │   ├── interpolation
    │   │   │       ├── interpolation_cuda.cpp
    │   │   │       ├── interpolation_cuda_kernel.cu
    │   │   │       └── interpolation_cuda_kernel.h
    │   │   │   ├── knnquery
    │   │   │       ├── knnquery_cuda.cpp
    │   │   │       ├── knnquery_cuda_kernel.cu
    │   │   │       └── knnquery_cuda_kernel.h
    │   │   │   ├── pointops_api.cpp
    │   │   │   ├── sampling
    │   │   │       ├── sampling_cuda.cpp
    │   │   │       ├── sampling_cuda_kernel.cu
    │   │   │       └── sampling_cuda_kernel.h
    │   │   │   └── subtraction
    │   │   │       ├── subtraction_cuda.cpp
    │   │   │       ├── subtraction_cuda_kernel.cu
    │   │   │       └── subtraction_cuda_kernel.h
    │   ├── pointtransformer_utils.py
    │   ├── polar_utils.py
    │   ├── recons_utils.py
    │   ├── repsurface_utils.py
    │   └── voxelize_utils.py
    ├── scripts
    │   └── s3dis
    │   │   ├── test_pointnet2.sh
    │   │   ├── test_pointtransformer.sh
    │   │   ├── test_repsurf_umb.sh
    │   │   ├── train_pointnet2.sh
    │   │   ├── train_pointtransformer.sh
    │   │   └── train_repsurf_umb.sh
    ├── tool
    │   ├── test_s3dis.py
    │   └── train.py
    └── util
    │   ├── __init__.py
    │   ├── data_util.py
    │   └── utils.py
└── visualization
    ├── airplane_0001.txt
    ├── bed_0001.txt
    ├── cup_0001.txt
    ├── table_0250.txt
    ├── triangled_airplane.obj
    ├── triangled_bed.obj
    ├── triangled_cup.obj
    └── triangled_table.obj


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | .idea/


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2022 Haoxi Ran.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RepSurf - Surface Representation for Point Clouds <br> [CVPR 2022 Oral]
 2 | 
 3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact)
 4 | 
 5 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-point-cloud-classification-on-scanobjectnn)](https://paperswithcode.com/sota/3d-point-cloud-classification-on-scanobjectnn?p=surface-representation-for-point-clouds) <br>
 6 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-object-detection-on-sun-rgbd-val)](https://paperswithcode.com/sota/3d-object-detection-on-sun-rgbd-val?p=surface-representation-for-point-clouds) <br>
 7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-point-cloud-classification-on-modelnet40)](https://paperswithcode.com/sota/3d-point-cloud-classification-on-modelnet40?p=surface-representation-for-point-clouds) <br>
 8 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/semantic-segmentation-on-s3dis)](https://paperswithcode.com/sota/semantic-segmentation-on-s3dis?p=surface-representation-for-point-clouds) <br>
 9 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-object-detection-on-scannetv2)](https://paperswithcode.com/sota/3d-object-detection-on-scannetv2?p=surface-representation-for-point-clouds) <br>
10 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/semantic-segmentation-on-s3dis-area5)](https://paperswithcode.com/sota/semantic-segmentation-on-s3dis-area5?p=surface-representation-for-point-clouds) 
11 | 
12 | ### The pytorch official implementation of "[Surface Representation for Point Clouds](http://arxiv.org/abs/2205.05740)" 
13 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740)
14 | 
15 | 
16 | <div align="center">
17 |   <img src="assets/teaser.png" width="600px">
18 | </div>
19 | 
20 | 
21 | ## News:
22 | - (**Sep 10** NEW :fire:) We have uploaded the implementation of RepSurf on S3DIS along with its training log and pretrained weights.
23 | - (**June 24** :fire:) We sucessfully finished our Oral presentation at CVPR 2022!
24 | - (**May 11**) We have uploaded the implementation of RepSurf on ScanObjectNN along with its training log and pretrained weights. 
25 | 
26 | ## Tasks:
27 | 
28 | ### We conduct experiments of different tasks on different codebases:
29 | 
30 | > Classification: **[3D Object Classification](./classification)** <br>
31 | > Segmentation: **[3D Semantic Segmentation](./segmentation)**
32 | 
33 | 
34 | ## Visualization
35 | 
36 | We provide several visualization results in the folder **./visualization** for a closer look at the construction of
37 | RepSurf.
38 | 
39 | 
40 | ## License
41 | 
42 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for
43 | commercial use.
44 | 


--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/assets/teaser.png


--------------------------------------------------------------------------------
/classification/README.md:
--------------------------------------------------------------------------------
  1 | # RepSurf for Classification <br>
  2 | 
  3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact)
  4 | 
  5 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740)
  6 | 
  7 | 
  8 | ## Preparation
  9 | 
 10 | ### Environment
 11 | 
 12 | We tested under the environment:
 13 | 
 14 | * python 3.7
 15 | * pytorch 1.6.0
 16 | * cuda 10.1
 17 | * gcc 7.2.0
 18 | * h5py
 19 | 
 20 | For anaconda user, initialize the conda environment **repsurf-cls** by:
 21 | 
 22 | ```
 23 | sh init.sh
 24 | ```
 25 | 
 26 | ## Experiments
 27 | 
 28 | ### ScanObjectNN (Data & Logs: [Google Drive](https://drive.google.com/drive/folders/1DGWT9W46MKVI0-lu18hJhB-R3BFVWuCs?usp=sharing))
 29 | 
 30 | * Performance:
 31 | 
 32 | <table style="width:100%">
 33 |   <thead>
 34 |     <tr>
 35 |       <th>Model</th>
 36 |       <th>Accuracy</th>
 37 |       <th>#Params</th>
 38 |       <th>Augment</th>
 39 |       <th>Code</th>
 40 |       <th>Log</th>
 41 |       <th>Checkpoint</th>
 42 |     </tr>
 43 |   </thead>
 44 |   <tbody>
 45 |     <tr>
 46 |       <td align="center"><a href="https://github.com/ajhamdi/MVTN">MVTN</a></td>
 47 |       <td align="center">82.8</td>
 48 |       <td align="center">4.24M</td>
 49 |       <td align="center">None</td>
 50 |       <td align="center"><a href="https://github.com/ajhamdi/MVTN/blob/master/models/mvtn.py">link</a></td>
 51 |       <td align="center">N/A</td>
 52 |       <td align="center"><a href="https://github.com/ajhamdi/MVTN/blob/master/results/checkpoints/scanobjectnn/model-00029.pth">link</a></td>
 53 |     </tr>
 54 |     <tr>
 55 |       <td align="center"><a href="https://github.com/ma-xu/pointMLP-pytorch">PointMLP</a></td>
 56 |       <td align="center">85.7</td>
 57 |       <td align="center">12.6M</td>
 58 |       <td align="center">Scale, Shift</td>
 59 |       <td align="center"><a href="https://github.com/ma-xu/pointMLP-pytorch/blob/main/classification_ScanObjectNN/models/pointmlp.py">link</a></td>
 60 |       <td align="center"><a href="https://web.northeastern.edu/smilelab/xuma/pointMLP/checkpoints/fixstd/scanobjectnn/pointMLP-20220204021453/">link</a></td>
 61 |       <td align="center"><a href="https://web.northeastern.edu/smilelab/xuma/pointMLP/checkpoints/fixstd/scanobjectnn/pointMLP-20220204021453/">link</a></td>
 62 |     </tr>
 63 |     <tr>
 64 |       <td align="center">PointNet++ SSG</td>
 65 |       <td align="center">77.9</td>
 66 |       <td align="center">1.475M</td>
 67 |       <td align="center">Rotate, Jitter</td>
 68 |       <td align="center"><a href="https://github.com/hkust-vgd/scanobjectnn/blob/master/pointnet2/models/pointnet2_cls_ssg.py">link</a></td>
 69 |       <td align="center">N/A</td>
 70 |       <td align="center">N/A</td>
 71 |     </tr>
 72 |     <tr>
 73 |       <td align="center"><b>Umbrella RepSurf</b> (PointNet++ SSG)</td>
 74 |       <td align="center"><b>84.87</b></td>
 75 |       <td align="center">1.483M</td>
 76 |       <td align="center">None</td>
 77 |       <td align="center"><a href="models/repsurf/repsurf_ssg_umb.py">link</a></td>
 78 |       <td align="center"><a href="https://drive.google.com/file/d/1qJK8T3dhF6177Xla227aXPEeNtyNssLF/view?usp=sharing">google drive</a></td>
 79 |       <td align="center"><a href="https://drive.google.com/file/d/17UDArfvNVjrJBTjr_HdxcOQipn0DWMMf/view?usp=sharing">google drive (6MB)</a></td>
 80 |     </tr>
 81 |     <tr>
 82 |       <td align="center"><b>Umbrella RepSurf</b> (PointNet++ SSG, 2x)</td>
 83 |       <td align="center"><b>86.05</b></td>
 84 |       <td align="center">6.806M</td>
 85 |       <td align="center">None</td>
 86 |       <td align="center"><a href="models/repsurf/repsurf_ssg_umb_2x.py">link</a></td>
 87 |       <td align="center"><a href="https://drive.google.com/file/d/15HwmAi1erL68G08dzNQILSipwCIDfNAw/view?usp=sharing">google drive</a></td>
 88 |       <td align="center"><a href="https://drive.google.com/file/d/1yGPNt1REzxVwn8Guw-PFHFcwxvfueWgf/view?usp=sharing">google drive (27MB)</a></td>
 89 |     </tr>
 90 |   </tbody>
 91 | </table>
 92 | <br>
 93 | 
 94 | * To download dataset:
 95 | 
 96 | ```
 97 | wget https://download.cs.stanford.edu/orion/scanobjectnn/h5_files.zip
 98 | unzip h5_files.zip
 99 | ln -s [PATH]/h5_files data/ScanObjectNN
100 | ```
101 | 
102 | **Note**: We conduct all experiments on the hardest variant of ScanObjectNN (**PB_T50_RS**).
103 | <br>
104 | 
105 | * To train **Umbrella RepSurf** on ScanObjectNN:
106 | 
107 | ```
108 | sh scripts/scanobjectnn/repsurf_ssg_umb.sh
109 | ```
110 | 
111 | * To train **Umbrella RepSurf (2x setting)** on ScanObjectNN:
112 | 
113 | ```
114 | sh scripts/scanobjectnn/repsurf_ssg_umb_2x.sh
115 | ```
116 | 
117 | ## Acknowledgment
118 | 
119 | We use part of the library [pointops](https://github.com/hszhao/PointWeb/tree/master/lib/pointops)
120 | from [PointWeb](https://github.com/hszhao/PointWeb).
121 | 
122 | ## License
123 | 
124 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for
125 | commercial use.
126 | 


--------------------------------------------------------------------------------
/classification/dataset/ScanObjectNNDataLoader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 05/10/2022
 4 | """
 5 | 
 6 | import h5py
 7 | import warnings
 8 | from torch.utils.data import Dataset
 9 | 
10 | warnings.filterwarnings('ignore')
11 | 
12 | 
13 | class ScanObjectNNDataLoader(Dataset):
14 |     def __init__(self, root, split='training', bg=True):
15 |         self.root = root
16 | 
17 |         assert (split == 'training' or split == 'test')
18 |         if bg:
19 |             print('Use data with background points')
20 |             dir_name = 'main_split'
21 |         else:
22 |             print('Use data without background points')
23 |             dir_name = 'main_split_nobg'
24 |         file_name = '_objectdataset_augmentedrot_scale75.h5'
25 |         h5_name = '{}/{}/{}'.format(self.root, dir_name, split + file_name)
26 |         with h5py.File(h5_name, mode="r") as f:
27 |             self.data = f['data'][:].astype('float32')
28 |             self.label = f['label'][:].astype('int64')
29 |         print('The size of %s data is %d' % (split, self.data.shape[0]))
30 | 
31 |     def __len__(self):
32 |         return self.data.shape[0]
33 | 
34 |     def __getitem__(self, index):
35 |         return self.data[index].T, self.label[index]
36 | 


--------------------------------------------------------------------------------
/classification/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/dataset/__init__.py


--------------------------------------------------------------------------------
/classification/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | mkdir -p log/PointAnalysis/log/ScanObjectNN
 4 | mkdir -p data/
 5 | 
 6 | conda create -n repsurf-cls python=3.7 -y
 7 | conda activate repsurf-cls
 8 | 
 9 | conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.1 -c pytorch -c conda-forge -y
10 | conda install -c anaconda h5py -y
11 | 
12 | cd modules/pointops
13 | python3 setup.py install
14 | cd -
15 | 


--------------------------------------------------------------------------------
/classification/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/models/__init__.py


--------------------------------------------------------------------------------
/classification/models/repsurf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/models/repsurf/__init__.py


--------------------------------------------------------------------------------
/classification/models/repsurf/repsurf_ssg_umb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 05/10/2022
 4 | """
 5 | 
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from modules.repsurface_utils import SurfaceAbstractionCD, UmbrellaSurfaceConstructor
 9 | 
10 | 
11 | class Model(nn.Module):
12 |     def __init__(self, args):
13 |         super(Model, self).__init__()
14 |         center_channel = 0 if not args.return_center else (6 if args.return_polar else 3)
15 |         repsurf_channel = 10
16 | 
17 |         self.init_nsample = args.num_point
18 |         self.return_dist = args.return_dist
19 |         self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_channel,
20 |                                                               return_dist=args.return_dist, aggr_type=args.umb_pool,
21 |                                                               cuda=args.cuda_ops)
22 |         self.sa1 = SurfaceAbstractionCD(npoint=512, radius=0.2, nsample=32, feat_channel=repsurf_channel,
23 |                                         pos_channel=center_channel, mlp=[64, 64, 128], group_all=False,
24 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
25 |         self.sa2 = SurfaceAbstractionCD(npoint=128, radius=0.4, nsample=64, feat_channel=128 + repsurf_channel,
26 |                                         pos_channel=center_channel, mlp=[128, 128, 256], group_all=False,
27 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
28 |         self.sa3 = SurfaceAbstractionCD(npoint=None, radius=None, nsample=None, feat_channel=256 + repsurf_channel,
29 |                                         pos_channel=center_channel, mlp=[256, 512, 1024], group_all=True,
30 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
31 |         # modelnet40
32 |         self.classfier = nn.Sequential(
33 |             nn.Linear(1024, 512),
34 |             nn.BatchNorm1d(512),
35 |             nn.ReLU(True),
36 |             nn.Dropout(0.4),
37 |             nn.Linear(512, 256),
38 |             nn.BatchNorm1d(256),
39 |             nn.ReLU(True),
40 |             nn.Dropout(0.4),
41 |             nn.Linear(256, args.num_class))
42 | 
43 |     def forward(self, points):
44 |         # init
45 |         center = points[:, :3, :]
46 | 
47 |         normal = self.surface_constructor(center)
48 | 
49 |         center, normal, feature = self.sa1(center, normal, None)
50 |         center, normal, feature = self.sa2(center, normal, feature)
51 |         center, normal, feature = self.sa3(center, normal, feature)
52 | 
53 |         feature = feature.view(-1, 1024)
54 |         feature = self.classfier(feature)
55 |         feature = F.log_softmax(feature, -1)
56 | 
57 |         return feature
58 | 


--------------------------------------------------------------------------------
/classification/models/repsurf/repsurf_ssg_umb_2x.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 05/10/2022
 4 | """
 5 | 
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from modules.repsurface_utils import SurfaceAbstractionCD, UmbrellaSurfaceConstructor
 9 | 
10 | 
11 | class Model(nn.Module):
12 |     def __init__(self, args):
13 |         super(Model, self).__init__()
14 |         center_channel = 0 if not args.return_center else (6 if args.return_polar else 3)
15 |         repsurf_channel = 10
16 | 
17 |         self.init_nsample = args.num_point
18 |         self.return_dist = args.return_dist
19 |         self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_channel,
20 |                                                               return_dist=args.return_dist, aggr_type=args.umb_pool,
21 |                                                               cuda=args.cuda_ops)
22 |         self.sa1 = SurfaceAbstractionCD(npoint=512, radius=0.1, nsample=24, feat_channel=repsurf_channel,
23 |                                         pos_channel=center_channel, mlp=[128, 128, 256], group_all=False,
24 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
25 |         self.sa2 = SurfaceAbstractionCD(npoint=128, radius=0.2, nsample=24, feat_channel=256 + repsurf_channel,
26 |                                         pos_channel=center_channel, mlp=[256, 256, 512], group_all=False,
27 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
28 |         self.sa3 = SurfaceAbstractionCD(npoint=32, radius=0.4, nsample=24, feat_channel=512 + repsurf_channel,
29 |                                         pos_channel=center_channel, mlp=[512, 512, 1024], group_all=False,
30 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
31 |         self.sa4 = SurfaceAbstractionCD(npoint=None, radius=None, nsample=None, feat_channel=1024 + repsurf_channel,
32 |                                         pos_channel=center_channel, mlp=[1024, 1024, 2048], group_all=True,
33 |                                         return_polar=args.return_polar, cuda=args.cuda_ops)
34 |         # modelnet40
35 |         self.classfier = nn.Sequential(
36 |             nn.Linear(2048, 512),
37 |             nn.BatchNorm1d(512),
38 |             nn.ReLU(True),
39 |             nn.Dropout(0.4),
40 |             nn.Linear(512, 256),
41 |             nn.BatchNorm1d(256),
42 |             nn.ReLU(True),
43 |             nn.Dropout(0.4),
44 |             nn.Linear(256, args.num_class))
45 | 
46 |     def forward(self, points):
47 |         # init
48 |         center = points[:, :3, :]
49 | 
50 |         normal = self.surface_constructor(center)
51 | 
52 |         center, normal, feature = self.sa1(center, normal, None)
53 |         center, normal, feature = self.sa2(center, normal, feature)
54 |         center, normal, feature = self.sa3(center, normal, feature)
55 |         center, normal, feature = self.sa4(center, normal, feature)
56 | 
57 |         feature = feature.view(-1, 2048)
58 |         feature = self.classfier(feature)
59 |         feature = F.log_softmax(feature, -1)
60 | 
61 |         return feature
62 | 


--------------------------------------------------------------------------------
/classification/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/__init__.py


--------------------------------------------------------------------------------
/classification/modules/pointnet2_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Haoxi Ran
  3 | Date: 05/10/2022
  4 | """
  5 | 
  6 | import torch
  7 | 
  8 | try:
  9 |     from modules.pointops.functions.pointops import furthestsampling, gathering, ballquery, knnquery, \
 10 |         grouping, interpolation, nearestneighbor
 11 | except:
 12 |     raise Exception('Failed to load pointops')
 13 | 
 14 | 
 15 | def square_distance(src, dst):
 16 |     """
 17 |     Calculate Squared distance between each two points.
 18 | 
 19 |     """
 20 |     B, N, _ = src.shape
 21 |     _, M, _ = dst.shape
 22 |     dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
 23 |     dist += torch.sum(src ** 2, -1).view(B, N, 1)
 24 |     dist += torch.sum(dst ** 2, -1).view(B, 1, M)
 25 |     return dist
 26 | 
 27 | 
 28 | def index_points(points, idx, cuda=False, is_group=False):
 29 |     if cuda:
 30 |         if is_group:
 31 |             points = grouping(points.transpose(1, 2).contiguous(), idx)
 32 |             return points.permute(0, 2, 3, 1).contiguous()
 33 |         else:
 34 |             points = gathering(points.transpose(1, 2).contiguous(), idx)
 35 |             return points.permute(0, 2, 1).contiguous()
 36 |     device = points.device
 37 |     B = points.shape[0]
 38 |     view_shape = list(idx.shape)
 39 |     view_shape[1:] = [1] * (len(view_shape) - 1)
 40 |     repeat_shape = list(idx.shape)
 41 |     repeat_shape[0] = 1
 42 |     batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
 43 |     new_points = points[batch_indices, idx, :]
 44 |     return new_points
 45 | 
 46 | 
 47 | def farthest_point_sample(xyz, npoint, cuda=False):
 48 |     """
 49 |     Input:
 50 |         xyz: pointcloud data, [B, N, 3]
 51 |         npoint: number of samples
 52 |     Return:
 53 |         centroids: sampled pointcloud index, [B, npoint]
 54 | 
 55 |     FLOPs:
 56 |         S * (3 + 3 + 2)
 57 |     """
 58 |     if cuda:
 59 |         if not xyz.is_contiguous():
 60 |             xyz = xyz.contiguous()
 61 |         return furthestsampling(xyz, npoint)
 62 |     device = xyz.device
 63 |     B, N, C = xyz.shape
 64 |     centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
 65 |     distance = torch.ones(B, N).to(device) * 1e10
 66 |     farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
 67 |     batch_indices = torch.arange(B, dtype=torch.long).to(device)
 68 |     for i in range(npoint):
 69 |         centroids[:, i] = farthest
 70 |         centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
 71 |         dist = torch.sum((xyz - centroid) ** 2, -1)
 72 |         mask = dist < distance
 73 |         distance[mask] = dist[mask]
 74 |         farthest = torch.max(distance, -1)[1]
 75 |     return centroids
 76 | 
 77 | 
 78 | def query_ball_point(radius, nsample, xyz, new_xyz, debug=False, cuda=False):
 79 |     if cuda:
 80 |         if not xyz.is_contiguous():
 81 |             xyz = xyz.contiguous()
 82 |         if not new_xyz.is_contiguous():
 83 |             new_xyz = new_xyz.contiguous()
 84 |         return ballquery(radius, nsample, xyz, new_xyz)
 85 |     device = xyz.device
 86 |     B, N, C = xyz.shape
 87 |     _, S, _ = new_xyz.shape
 88 |     group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
 89 |     sqrdists = square_distance(new_xyz, xyz)
 90 |     group_idx[sqrdists > radius ** 2] = N
 91 |     group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
 92 |     group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
 93 |     mask = group_idx == N
 94 |     group_idx[mask] = group_first[mask]
 95 |     if debug:
 96 |         num_miss = torch.sum(mask)
 97 |         num_over = torch.sum(torch.clamp(torch.sum(sqrdists < radius ** 2, dim=2) - nsample, min=0))
 98 |         return num_miss, num_over
 99 |     return group_idx
100 | 
101 | 
102 | def query_knn_point(k, xyz, new_xyz, cuda=False):
103 |     if cuda:
104 |         if not xyz.is_contiguous():
105 |             xyz = xyz.contiguous()
106 |         if not new_xyz.is_contiguous():
107 |             new_xyz = new_xyz.contiguous()
108 |         return knnquery(k, xyz, new_xyz)
109 |     dist = square_distance(new_xyz, xyz)
110 |     group_idx = dist.sort(descending=False, dim=-1)[1][:, :, :k]
111 |     return group_idx
112 | 
113 | 
114 | def sample(nsample, feature, cuda=False):
115 |     feature = feature.permute(0, 2, 1)
116 |     xyz = feature[:, :, :3]
117 | 
118 |     fps_idx = farthest_point_sample(xyz, nsample, cuda=cuda)  # [B, npoint, C]
119 |     torch.cuda.empty_cache()
120 |     feature = index_points(feature, fps_idx, cuda=cuda, is_group=False)
121 |     torch.cuda.empty_cache()
122 |     feature = feature.permute(0, 2, 1)
123 | 
124 |     return feature
125 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/__init__.py


--------------------------------------------------------------------------------
/classification/modules/pointops/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .pointops import *
2 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/setup.py:
--------------------------------------------------------------------------------
 1 | #python3 setup.py install
 2 | 
 3 | from setuptools import setup
 4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 5 | import os
 6 | from distutils.sysconfig import get_config_vars
 7 | 
 8 | (opt,) = get_config_vars('OPT')
 9 | os.environ['OPT'] = " ".join(
10 |     flag for flag in opt.split() if flag != '-Wstrict-prototypes'
11 | )
12 | 
13 | setup(
14 |     name='pointops',
15 |     ext_modules=[
16 |         CUDAExtension('pointops_cuda', [
17 |             'src/pointops_api.cpp',
18 | 
19 |             'src/ballquery/ballquery_cuda.cpp',
20 |             'src/ballquery/ballquery_cuda_kernel.cu',
21 |             'src/knnquery/knnquery_cuda.cpp',
22 |             'src/knnquery/knnquery_cuda_kernel.cu',
23 |             'src/knnquery_heap/knnquery_heap_cuda.cpp',
24 |             'src/knnquery_heap/knnquery_heap_cuda_kernel.cu',
25 |             'src/grouping/grouping_cuda.cpp',
26 |             'src/grouping/grouping_cuda_kernel.cu',
27 |             'src/grouping_int/grouping_int_cuda.cpp',
28 |             'src/grouping_int/grouping_int_cuda_kernel.cu',
29 |             'src/interpolation/interpolation_cuda.cpp',
30 |             'src/interpolation/interpolation_cuda_kernel.cu',
31 |             'src/sampling/sampling_cuda.cpp',
32 |             'src/sampling/sampling_cuda_kernel.cu',
33 |         ],
34 |                       extra_compile_args={'cxx': ['-g'],
35 |                                           'nvcc': ['-O2']})
36 |     ],
37 |     cmdclass={'build_ext': BuildExtension})
38 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/__init__.py


--------------------------------------------------------------------------------
/classification/modules/pointops/src/ballquery/ballquery_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <THC/THC.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | 
 6 | #include "ballquery_cuda_kernel.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 | 
14 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor)
15 | {
16 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
17 |     const float *xyz = xyz_tensor.data_ptr<float>();
18 |     int *idx = idx_tensor.data_ptr<int>();
19 | 
20 |     ballquery_cuda_launcher(b, n, m, radius, nsample, new_xyz, xyz, idx);
21 | }
22 | 
23 | 
24 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor)
25 | {
26 |     CHECK_INPUT(new_xyz_tensor);
27 |     CHECK_INPUT(xyz_tensor);
28 | 
29 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
30 |     const float *xyz = xyz_tensor.data_ptr<float>();
31 |     int *idx = idx_tensor.data_ptr<int>();
32 | 
33 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
34 | 
35 |     ballquery_cuda_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
36 | }
37 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/ballquery/ballquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "ballquery_cuda_kernel.h"
  3 | 
  4 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
  5 | // output: idx(b, m, nsample)
  6 | __global__ void ballquery_cuda_kernel(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx)
  7 | {
  8 |     int batch_index = blockIdx.x;
  9 |     xyz += batch_index * n * 3;
 10 |     new_xyz += batch_index * m * 3;
 11 |     idx += m * nsample * batch_index;
 12 |     int index = threadIdx.x;
 13 |     int stride = blockDim.x;
 14 | 
 15 |     float radius2 = radius * radius;
 16 |     for (int j = index; j < m; j += stride)
 17 |     {
 18 |         float new_x = new_xyz[j * 3 + 0];
 19 |         float new_y = new_xyz[j * 3 + 1];
 20 |         float new_z = new_xyz[j * 3 + 2];
 21 |         for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k)
 22 |         {
 23 |             float x = xyz[k * 3 + 0];
 24 |             float y = xyz[k * 3 + 1];
 25 |             float z = xyz[k * 3 + 2];
 26 |             float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 27 |             if (d2 < radius2)
 28 |             {
 29 |                 if (cnt == 0)
 30 |                 {
 31 |                     for (int l = 0; l < nsample; ++l)
 32 |                         idx[j * nsample + l] = k;
 33 |                 }
 34 |                 idx[j * nsample + cnt] = k;
 35 |                 ++cnt;
 36 |             }
 37 |         }
 38 |     }
 39 | }
 40 | 
 41 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx)
 42 | {
 43 |     ballquery_cuda_kernel<<<b, opt_n_threads(m), 0>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
 44 | }
 45 | 
 46 | 
 47 | __global__ void ballquery_cuda_kernel_fast(int b, int n, int m, float radius, int nsample, const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {
 48 |     int bs_idx = blockIdx.y;
 49 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 50 |     if (bs_idx >= b || pt_idx >= m) return;
 51 | 
 52 |     new_xyz += bs_idx * m * 3 + pt_idx * 3;
 53 |     xyz += bs_idx * n * 3;
 54 |     idx += bs_idx * m * nsample + pt_idx * nsample;
 55 | 
 56 |     float radius2 = radius * radius;
 57 |     float new_x = new_xyz[0];
 58 |     float new_y = new_xyz[1];
 59 |     float new_z = new_xyz[2];
 60 | 
 61 |     int cnt = 0;
 62 |     for (int k = 0; k < n; ++k) {
 63 |         float x = xyz[k * 3 + 0];
 64 |         float y = xyz[k * 3 + 1];
 65 |         float z = xyz[k * 3 + 2];
 66 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 67 |         if (d2 < radius2){
 68 |             if (cnt == 0){
 69 |                 for (int l = 0; l < nsample; ++l) {
 70 |                     idx[l] = k;
 71 |                 }
 72 |             }
 73 |             idx[cnt] = k;
 74 |             ++cnt;
 75 |             if (cnt >= nsample){
 76 |                 break;
 77 |             }
 78 |         }
 79 |     }
 80 | }
 81 | 
 82 | 
 83 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {
 84 |     // param new_xyz: (B, m, 3)
 85 |     // param xyz: (B, n, 3)
 86 |     // param idx: (B, m, nsample)
 87 | 
 88 |     cudaError_t err;
 89 | 
 90 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
 91 |     dim3 threads(THREADS_PER_BLOCK);
 92 | 
 93 |     ballquery_cuda_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
 94 |     // cudaDeviceSynchronize();  // for using printf in kernel function
 95 | 
 96 |     err = cudaGetLastError();
 97 |     if (cudaSuccess != err) {
 98 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 99 |         exit(-1);
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/ballquery/ballquery_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALLQUERY_CUDA_KERNEL
 2 | #define _BALLQUERY_CUDA_KERNEL
 3 | #include <torch/serialize/tensor.h>
 4 | #include <vector>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
 8 | 
 9 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *xyz, const float *new_xyz, int *idx);
16 | 
17 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream);
18 | 
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | 
 6 | #define TOTAL_THREADS 1024
 7 | 
 8 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
 9 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
10 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
11 | 
12 | #define THREADS_PER_BLOCK 256
13 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
14 | 
15 | inline int opt_n_threads(int work_size) {
16 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
17 |     return max(min(1 << pow_2, TOTAL_THREADS), 1);
18 | }
19 | 
20 | inline dim3 opt_block_config(int x, int y) {
21 |     const int x_threads = opt_n_threads(x);
22 |     const int y_threads = max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
23 |     dim3 block_config(x_threads, y_threads, 1);
24 |     return block_config;
25 | }
26 | 
27 | #endif


--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <ATen/cuda/CUDAContext.h>
 3 | #include <vector>
 4 | #include <THC/THC.h>
 5 | 
 6 | #include "grouping_cuda_kernel.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
11 | {
12 |     const float *points = points_tensor.data_ptr<float>();
13 |     const int *idx = idx_tensor.data_ptr<int>();
14 |     float *out = out_tensor.data_ptr<float>();
15 |     grouping_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out);
16 | }
17 | 
18 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor)
19 | {
20 |     float *grad_points = grad_points_tensor.data_ptr<float>();
21 |     const int *idx = idx_tensor.data_ptr<int>();
22 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
23 |     grouping_backward_cuda_launcher(b, c, n, m, nsample, grad_out, idx, grad_points);
24 | }
25 | 
26 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {
27 | 
28 |     const float *points = points_tensor.data_ptr<float>();
29 |     const int *idx = idx_tensor.data_ptr<int>();
30 |     float *out = out_tensor.data_ptr<float>();
31 |     grouping_forward_cuda_launcher_fast(b, c, n, npoints, nsample, points, idx, out);
32 | }


--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | // input: points(b, c, n) idx(b, m, nsample)
 5 | // output: out(b, c, m, nsample)
 6 | __global__ void grouping_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out)
 7 | {
 8 |     int batch_index = blockIdx.x;
 9 |     points += batch_index * n * c;
10 |     idx += batch_index * m * nsample;
11 |     out += batch_index * m * nsample * c;
12 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
13 |     const int stride = blockDim.y * blockDim.x;
14 |     for (int i = index; i < c * m; i += stride)
15 |     {
16 |         const int l = i / m;
17 |         const int j = i % m;
18 |         for (int k = 0; k < nsample; ++k)
19 |         {
20 |             int ii = idx[j * nsample + k];
21 |             out[(l * m + j) * nsample + k] = points[l * n + ii];
22 |         }
23 |     }
24 | }
25 | 
26 | // input: grad_out(b, c, m, nsample), idx(b, m, nsample)
27 | // output: grad_points(b, c, n)
28 | __global__ void grouping_backward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points)
29 | {
30 |     int batch_index = blockIdx.x;
31 |     grad_out += batch_index * m * nsample * c;
32 |     idx += batch_index * m * nsample;
33 |     grad_points += batch_index * n * c;
34 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
35 |     const int stride = blockDim.y * blockDim.x;
36 |     for (int i = index; i < c * m; i += stride)
37 |     {
38 |         const int l = i / m;
39 |         const int j = i % m;
40 |         for (int k = 0; k < nsample; ++k)
41 |         {
42 |             int ii = idx[j * nsample + k];
43 |             atomicAdd(grad_points + l * n + ii, grad_out[(l * m + j) * nsample + k]);
44 |         }
45 |     }
46 | }
47 | 
48 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out)
49 | {
50 |     grouping_forward_cuda_kernel<<<b, opt_block_config(m, c), 0>>>(b, c, n, m, nsample, points, idx, out);
51 | }
52 | 
53 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points)
54 | {
55 |     grouping_backward_cuda_kernel<<<b, opt_block_config(m, c), 0>>>(b, c, n, m, nsample, grad_out, idx, grad_points);
56 | }
57 | 
58 | // input: points(b, c, n) idx(b, npoints, nsample)
59 | // output: out(b, c, npoints, nsample)
60 | __global__ void grouping_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
61 |     int bs_idx = blockIdx.z;
62 |     int c_idx = blockIdx.y;
63 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
64 |     int pt_idx = index / nsample;
65 |     if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
66 | 
67 |     int sample_idx = index % nsample;
68 | 
69 |     idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
70 |     int in_idx = bs_idx * c * n + c_idx * n + idx[0];
71 |     int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
72 | 
73 |     out[out_idx] = points[in_idx];
74 | }
75 | 
76 | // input: points(b, c, n) idx(b, npoints, nsample)
77 | // output: out(b, c, npoints, nsample)
78 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out) {
79 | 
80 |     cudaError_t err;
81 | 
82 |     dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
83 |     dim3 threads(THREADS_PER_BLOCK);
84 | 
85 |     grouping_forward_cuda_kernel_fast<<<blocks, threads, 0>>>(b, c, n, npoints, nsample, points, idx, out);
86 |     // cudaDeviceSynchronize();  // for using printf in kernel function
87 |     err = cudaGetLastError();
88 |     if (cudaSuccess != err) {
89 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
90 |         exit(-1);
91 |     }
92 | }
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <torch/serialize/tensor.h>
 4 | #include <vector>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out);
 8 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
 9 | 
10 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out);
17 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points);
18 | 
19 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out);
20 | 
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | #endif
25 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping_int/grouping_int_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include <THC/THC.h>
 5 | 
 6 | #include "grouping_int_cuda_kernel.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
11 | {
12 |     const long int *points = points_tensor.data_ptr<long int>();
13 |     const int *idx = idx_tensor.data_ptr<int>();
14 |     long int *out = out_tensor.data_ptr<long int>();
15 |     grouping_int_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out);
16 | }
17 | 
18 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
19 | {
20 |     const long int *points = points_tensor.data_ptr<long int>();
21 |     const int *idx = idx_tensor.data_ptr<int>();
22 |     long int *out = out_tensor.data_ptr<long int>();
23 |     grouping_int_forward_cuda_launcher_fast(b, c, n, m, nsample, points, idx, out);
24 | }


--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping_int/grouping_int_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_int_cuda_kernel.h"
 3 | 
 4 | // input: points(b, c, n) idx(b, m, nsample)
 5 | // output: out(b, c, m, nsample)
 6 | __global__ void grouping_int_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out)
 7 | {
 8 |     int batch_index = blockIdx.x;
 9 |     points += batch_index * n * c;
10 |     idx += batch_index * m * nsample;
11 |     out += batch_index * m * nsample * c;
12 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
13 |     const int stride = blockDim.y * blockDim.x;
14 |     for (int i = index; i < c * m; i += stride)
15 |     {
16 |         const int l = i / m;
17 |         const int j = i % m;
18 |         for (int k = 0; k < nsample; ++k)
19 |         {
20 |             int ii = idx[j * nsample + k];
21 |             out[(l * m + j) * nsample + k] = points[l * n + ii];
22 |         }
23 |     }
24 | }
25 | 
26 | 
27 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out)
28 | {
29 |     grouping_int_forward_cuda_kernel<<<b, opt_block_config(m, c), 0>>>(b, c, n, m, nsample, points, idx, out);
30 | }
31 | 
32 | 
33 | __global__ void grouping_int_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const long int *__restrict__ points, const int *__restrict__ idx, long int *__restrict__ out)
34 | {
35 |     int bs_idx = blockIdx.z;
36 |     int c_idx = blockIdx.y;
37 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
38 |     int pt_idx = index / nsample;
39 |     if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
40 | 
41 |     int sample_idx = index % nsample;
42 | 
43 |     idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
44 |     int in_idx = bs_idx * c * n + c_idx * n + idx[0];
45 |     int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
46 | 
47 |     out[out_idx] = points[in_idx];
48 | }
49 | 
50 | 
51 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out)
52 | {
53 |     cudaError_t err;
54 | 
55 |     dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
56 |     dim3 threads(THREADS_PER_BLOCK);
57 | 
58 |     grouping_int_forward_cuda_kernel_fast<<<blocks, threads, 0>>>(b, c, n, npoints, nsample, points, idx, out);
59 |     // cudaDeviceSynchronize();  // for using printf in kernel function
60 |     err = cudaGetLastError();
61 |     if (cudaSuccess != err) {
62 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
63 |         exit(-1);
64 |     }
65 | }


--------------------------------------------------------------------------------
/classification/modules/pointops/src/grouping_int/grouping_int_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_INT_CUDA_KERNEL
 2 | #define _GROUPING_INT_CUDA_KERNEL
 3 | #include <torch/serialize/tensor.h>
 4 | #include <vector>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out);
 8 | 
 9 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out);
16 | 
17 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out);
18 | 
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 | #endif
23 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include <THC/THC.h>
 5 | #include "interpolation_cuda_kernel.h"
 6 | 
 7 | extern THCState *state;
 8 | 
 9 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor)
10 | {
11 |     const float *unknown = unknown_tensor.data_ptr<float>();
12 |     const float *known = known_tensor.data_ptr<float>();
13 |     float *dist2 = dist2_tensor.data_ptr<float>();
14 |     int *idx = idx_tensor.data_ptr<int>();
15 |     nearestneighbor_cuda_launcher(b, n, m, unknown, known, dist2, idx);
16 | }
17 | 
18 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor)
19 | {
20 |     const float *points = points_tensor.data_ptr<float>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     float *out = out_tensor.data_ptr<float>();
23 |     const int *idx = idx_tensor.data_ptr<int>();
24 |     interpolation_forward_cuda_launcher(b, c, m, n, points, idx, weight, out);
25 | }
26 | 
27 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor)
28 | {
29 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
30 |     const float *weight = weight_tensor.data_ptr<float>();
31 |     float *grad_points = grad_points_tensor.data_ptr<float>();
32 |     const int *idx = idx_tensor.data_ptr<int>();
33 |     interpolation_backward_cuda_launcher(b, c, n, m, grad_out, idx, weight, grad_points);
34 | }
35 | 
36 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
37 |     const float *unknown = unknown_tensor.data_ptr<float>();
38 |     const float *known = known_tensor.data_ptr<float>();
39 |     float *dist2 = dist2_tensor.data_ptr<float>();
40 |     int *idx = idx_tensor.data_ptr<int>();
41 |     nearestneighbor_cuda_launcher_fast(b, n, m, unknown, known, dist2, idx);
42 | }
43 | 
44 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) {
45 | 
46 |     const float *points = points_tensor.data_ptr<float>();
47 |     const float *weight = weight_tensor.data_ptr<float>();
48 |     float *out = out_tensor.data_ptr<float>();
49 |     const int *idx = idx_tensor.data_ptr<int>();
50 |     interpolation_forward_cuda_launcher_fast(b, c, m, n, points, idx, weight, out);
51 | }


--------------------------------------------------------------------------------
/classification/modules/pointops/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "interpolation_cuda_kernel.h"
  3 | 
  4 | // input: unknown(b, n, 3) known(b, m, 3)
  5 | // output: dist2(b, n, 3), idx(b, n, 3)
  6 | __global__ void nearestneighbor_cuda_kernel(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx)
  7 | {
  8 |     int batch_index = blockIdx.x;
  9 |     unknown += batch_index * n * 3;
 10 |     known += batch_index * m * 3;
 11 |     dist2 += batch_index * n * 3;
 12 |     idx += batch_index * n * 3;
 13 | 
 14 |     int index = threadIdx.x;
 15 |     int stride = blockDim.x;
 16 |     for (int j = index; j < n; j += stride)
 17 |     {
 18 | 		float ux = unknown[j * 3 + 0];
 19 | 		float uy = unknown[j * 3 + 1];
 20 | 		float uz = unknown[j * 3 + 2];
 21 | 
 22 | 		double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 23 | 		int besti1 = 0, besti2 = 0, besti3 = 0;
 24 | 		for (int k = 0; k < m; ++k)
 25 | 		{
 26 | 		    float x = known[k * 3 + 0];
 27 | 		    float y = known[k * 3 + 1];
 28 | 		    float z = known[k * 3 + 2];
 29 | 		    float d =
 30 | 			(ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 31 | 		    if (d < best1)
 32 | 		    {
 33 | 				best3 = best2;
 34 | 				besti3 = besti2;
 35 | 				best2 = best1;
 36 | 				besti2 = besti1;
 37 | 				best1 = d;
 38 | 				besti1 = k;
 39 | 			}
 40 | 			else if (d < best2)
 41 | 			{
 42 | 				best3 = best2;
 43 | 				besti3 = besti2;
 44 | 				best2 = d;
 45 | 				besti2 = k;
 46 | 			}
 47 | 			else if (d < best3)
 48 | 			{
 49 | 				best3 = d;
 50 | 				besti3 = k;
 51 | 		    }
 52 | 		}
 53 | 		dist2[j * 3 + 0] = best1;
 54 | 		dist2[j * 3 + 1] = best2;
 55 | 		dist2[j * 3 + 2] = best3;
 56 | 		idx[j * 3 + 0] = besti1;
 57 | 		idx[j * 3 + 1] = besti2;
 58 | 		idx[j * 3 + 2] = besti3;
 59 |     }
 60 | }
 61 | 
 62 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 63 | // output: out(b, c, n)
 64 | __global__ void interpolation_forward_cuda_kernel(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out)
 65 | {
 66 |     int batch_index = blockIdx.x;
 67 |     points += batch_index * m * c;
 68 |     idx += batch_index * n * 3;
 69 |     weight += batch_index * n * 3;
 70 |     out += batch_index * n * c;
 71 | 
 72 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
 73 |     const int stride = blockDim.y * blockDim.x;
 74 |     for (int i = index; i < c * n; i += stride)
 75 |     {
 76 | 		const int l = i / n;
 77 | 		const int j = i % n;
 78 | 		float w1 = weight[j * 3 + 0];
 79 | 		float w2 = weight[j * 3 + 1];
 80 | 		float w3 = weight[j * 3 + 2];
 81 | 		int i1 = idx[j * 3 + 0];
 82 | 		int i2 = idx[j * 3 + 1];
 83 | 		int i3 = idx[j * 3 + 2];
 84 | 		out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + points[l * m + i3] * w3;
 85 |     }
 86 | }
 87 | 
 88 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
 89 | // output: grad_points(b, c, m)
 90 | __global__ void interpolation_backward_cuda_kernel( int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points)
 91 | {
 92 |     int batch_index = blockIdx.x;
 93 |     grad_out += batch_index * n * c;
 94 |     idx += batch_index * n * 3;
 95 |     weight += batch_index * n * 3;
 96 |     grad_points += batch_index * m * c;
 97 | 
 98 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
 99 |     const int stride = blockDim.y * blockDim.x;
100 |     for (int i = index; i < c * n; i += stride)
101 |     {
102 | 		const int l = i / n;
103 | 		const int j = i % n;
104 | 		float w1 = weight[j * 3 + 0];
105 | 		float w2 = weight[j * 3 + 1];
106 | 		float w3 = weight[j * 3 + 2];
107 | 		int i1 = idx[j * 3 + 0];
108 | 		int i2 = idx[j * 3 + 1];
109 | 		int i3 = idx[j * 3 + 2];
110 | 		atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
111 | 		atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
112 | 		atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
113 |     }
114 | }
115 | 
116 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx)
117 | {
118 |     nearestneighbor_cuda_kernel<<<b, opt_n_threads(n), 0>>>(b, n, m, unknown, known, dist2, idx);
119 | }
120 | 
121 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out)
122 | {
123 |     interpolation_forward_cuda_kernel<<<b, opt_block_config(n, c), 0>>>(b, c, m, n, points, idx, weight, out);
124 | }
125 | 
126 | void interpolation_backward_cuda_launcher(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points)
127 | {
128 |     interpolation_backward_cuda_kernel<<<b, opt_block_config(n, c), 0>>>(b, n, c, m, grad_out, idx, weight, grad_points);
129 | }
130 | 
131 | 
132 | // input: unknown(b, n, 3) known(b, m, 3)
133 | // output: dist2(b, n, 3), idx(b, n, 3)
134 | __global__ void nearestneighbor_cuda_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
135 | 
136 |     int bs_idx = blockIdx.y;
137 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
138 |     if (bs_idx >= b || pt_idx >= n) return;
139 | 
140 |     unknown += bs_idx * n * 3 + pt_idx * 3;
141 |     known += bs_idx * m * 3;
142 |     dist2 += bs_idx * n * 3 + pt_idx * 3;
143 |     idx += bs_idx * n * 3 + pt_idx * 3;
144 | 
145 |     float ux = unknown[0];
146 |     float uy = unknown[1];
147 |     float uz = unknown[2];
148 | 
149 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
150 |     int besti1 = 0, besti2 = 0, besti3 = 0;
151 |     for (int k = 0; k < m; ++k) {
152 |         float x = known[k * 3 + 0];
153 |         float y = known[k * 3 + 1];
154 |         float z = known[k * 3 + 2];
155 |         float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
156 |         if (d < best1) {
157 |             best3 = best2; besti3 = besti2;
158 |             best2 = best1; besti2 = besti1;
159 |             best1 = d; besti1 = k;
160 |         }
161 |         else if (d < best2) {
162 |             best3 = best2; besti3 = besti2;
163 |             best2 = d; besti2 = k;
164 |         }
165 |         else if (d < best3) {
166 |             best3 = d; besti3 = k;
167 |         }
168 |     }
169 |     dist2[0] = best1;
170 |     dist2[1] = best2;
171 |     dist2[2] = best3;
172 | 
173 |     idx[0] = besti1;
174 |     idx[1] = besti2;
175 |     idx[2] = besti3;
176 | }
177 | 
178 | 
179 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
180 | // output: out(b, c, n)
181 | __global__ void interpolation_forward_cuda_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
182 | 
183 |     int bs_idx = blockIdx.z;
184 |     int c_idx = blockIdx.y;
185 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
186 | 
187 |     if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
188 | 
189 |     weight += bs_idx * n * 3 + pt_idx * 3;
190 |     points += bs_idx * c * m + c_idx * m;
191 |     idx += bs_idx * n * 3 + pt_idx * 3;
192 |     out += bs_idx * c * n + c_idx * n;
193 | 
194 |     out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
195 | }
196 | 
197 | 
198 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx)
199 | {
200 |     cudaError_t err;
201 | 
202 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
203 |     dim3 threads(THREADS_PER_BLOCK);
204 | 
205 |     nearestneighbor_cuda_kernel_fast<<<blocks, threads, 0>>>(b, n, m, unknown, known, dist2, idx);
206 | 
207 |     err = cudaGetLastError();
208 |     if (cudaSuccess != err) {
209 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
210 |         exit(-1);
211 |     }
212 | }
213 | 
214 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out) {
215 | 
216 |     cudaError_t err;
217 | 
218 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
219 |     dim3 threads(THREADS_PER_BLOCK);
220 |     interpolation_forward_cuda_kernel_fast<<<blocks, threads, 0>>>(b, c, m, n, points, idx, weight, out);
221 | 
222 |     err = cudaGetLastError();
223 |     if (cudaSuccess != err) {
224 |         fprintf(stderr, "CUDA kernel failed : %s\n",
225 |         cudaGetErrorString(err));
226 |         exit(-1);
227 |     }
228 | }
229 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <torch/serialize/tensor.h>
 4 | #include <vector>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
 8 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
 9 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
10 | 
11 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
13 | 
14 | #ifdef __cplusplus
15 | extern "C" {
16 | #endif
17 | 
18 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx);
19 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out);
20 | void interpolation_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points);
21 | 
22 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx);
23 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | #endif
29 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/knnquery/__init__.py


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <THC/THC.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | 
 6 | #include "knnquery_cuda_kernel.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 | 
14 | 
15 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
16 | {
17 |     CHECK_INPUT(new_xyz_tensor);
18 |     CHECK_INPUT(xyz_tensor);
19 | 
20 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
21 |     const float *xyz = xyz_tensor.data_ptr<float>();
22 |     int *idx = idx_tensor.data_ptr<int>();
23 |     float *dist2 = dist2_tensor.data_ptr<float>();
24 | 
25 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
26 | 
27 |     knnquery_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream);
28 | }
29 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "knnquery_cuda_kernel.h"
 3 | 
 4 | // input: xyz (b, n, 3) new_xyz (b, m, 3)
 5 | // output: idx (b, m, nsample) dist2 (b, m, nsample)
 6 | __global__ void knnquery_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) {
 7 |     int bs_idx = blockIdx.y;
 8 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (bs_idx >= b || pt_idx >= m) return;
10 | 
11 |     new_xyz += bs_idx * m * 3 + pt_idx * 3;
12 |     xyz += bs_idx * n * 3;
13 |     idx += bs_idx * m * nsample + pt_idx * nsample;
14 | 
15 |     float new_x = new_xyz[0];
16 |     float new_y = new_xyz[1];
17 |     float new_z = new_xyz[2];
18 | 
19 |     //double* best = new double[nsample];
20 |     //int* besti = new int[nsample];
21 |     double best[200];
22 |     int besti[200];
23 |     for(int i = 0; i < nsample; i++){
24 |         best[i] = 1e40;
25 |         besti[i] = 0;
26 |     }
27 |     for(int k = 0; k < n; k++){
28 |         float x = xyz[k * 3 + 0];
29 |         float y = xyz[k * 3 + 1];
30 |         float z = xyz[k * 3 + 2];
31 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
32 |         for(int j = 0; j < nsample; j++){
33 |             if(d2 < best[j]){
34 |                 for(int i = nsample - 1; i > j; i--){
35 |                     best[i] = best[i - 1];
36 |                     besti[i] = besti[i - 1];
37 |                 }
38 |                 best[j] = d2;
39 |                 besti[j] = k;
40 |                 break;
41 |             }
42 |         }
43 |     }
44 |     for(int i = 0; i < nsample; i++){
45 |         idx[i] = besti[i];
46 |         dist2[i] = best[i];
47 |     }
48 |     //delete []best;
49 |     //delete []besti;
50 | }
51 | 
52 | 
53 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) {
54 |     // param new_xyz: (B, m, 3)
55 |     // param xyz: (B, n, 3)
56 |     // param idx: (B, m, nsample)
57 | 
58 |     cudaError_t err;
59 | 
60 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
61 |     dim3 threads(THREADS_PER_BLOCK);
62 |     
63 |     // fprintf('%d, %d', blocks, threads);
64 |     knnquery_cuda_kernel<<<blocks, threads, 0, stream>>>(b, n, m, nsample, xyz, new_xyz, idx, dist2);
65 |     // cudaDeviceSynchronize();  // for using printf in kernel function
66 | 
67 |     // err = cudaGetLastError();
68 |     // if (cudaSuccess != err) {
69 |     //     fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
70 |     //     exit(-1);
71 |     // }
72 | }


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNNQUERY_CUDA_KERNEL
 2 | #define _KNNQUERY_CUDA_KERNEL
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <vector>
 6 | #include <ATen/cuda/CUDAContext.h>
 7 | 
 8 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | 
20 | #endif


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/knnquery_heap/__init__.py


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <THC/THC.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | 
 6 | #include "knnquery_heap_cuda_kernel.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 | 
14 | 
15 | void knnquery_heap_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
16 | {
17 |     CHECK_INPUT(new_xyz_tensor);
18 |     CHECK_INPUT(xyz_tensor);
19 | 
20 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
21 |     const float *xyz = xyz_tensor.data_ptr<float>();
22 |     int *idx = idx_tensor.data_ptr<int>();
23 |     float *dist2 = dist2_tensor.data_ptr<float>();
24 | 
25 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
26 | 
27 |     knnquery_heap_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream);
28 | }
29 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knnquery_heap_cuda_kernel.h"
  3 | 
  4 | 
  5 | __device__ void swap_float(float *x, float *y)
  6 | {
  7 |     float tmp = *x;
  8 |     *x = *y;
  9 |     *y = tmp;
 10 | }
 11 | 
 12 | 
 13 | __device__ void swap_int(int *x, int *y)
 14 | {
 15 |     int tmp = *x;
 16 |     *x = *y;
 17 |     *y = tmp;
 18 | }
 19 | 
 20 | 
 21 | __device__ void reheap(float *dist, int *idx, int k)
 22 | {
 23 |     int root = 0;
 24 |     int child = root * 2 + 1;
 25 |     while (child < k)
 26 |     {
 27 |         if(child + 1 < k && dist[child+1] > dist[child])
 28 |             child++;
 29 |         if(dist[root] > dist[child])
 30 |             return;
 31 |         swap_float(&dist[root], &dist[child]);
 32 |         swap_int(&idx[root], &idx[child]);
 33 |         root = child;
 34 |         child = root * 2 + 1;
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | __device__ void heap_sort(float *dist, int *idx, int k)
 40 | {
 41 |     int i;
 42 |     for (i = k - 1; i > 0; i--)
 43 |     {
 44 |         swap_float(&dist[0], &dist[i]);
 45 |         swap_int(&idx[0], &idx[i]);
 46 |         reheap(dist, idx, i);
 47 |     }
 48 | }
 49 | 
 50 | 
 51 | // input: xyz (b, n, 3) new_xyz (b, m, 3)
 52 | // output: idx (b, m, nsample) dist2 (b, m, nsample)
 53 | __global__ void knnquery_heap_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) {
 54 |     int bs_idx = blockIdx.y;
 55 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 56 |     if (bs_idx >= b || pt_idx >= m) return;
 57 | 
 58 |     new_xyz += bs_idx * m * 3 + pt_idx * 3;
 59 |     xyz += bs_idx * n * 3;
 60 |     idx += bs_idx * m * nsample + pt_idx * nsample;
 61 |     dist2 += bs_idx * m * nsample + pt_idx * nsample;
 62 | 
 63 |     float new_x = new_xyz[0];
 64 |     float new_y = new_xyz[1];
 65 |     float new_z = new_xyz[2];
 66 | 
 67 |     float best_dist[100];
 68 |     int best_idx[100];
 69 |     for(int i = 0; i < nsample; i++){
 70 |         best_dist[i] = 1e10;
 71 |         best_idx[i] = 0;
 72 |     }
 73 |     for(int i = 0; i < n; i++){
 74 |         float x = xyz[i * 3 + 0];
 75 |         float y = xyz[i * 3 + 1];
 76 |         float z = xyz[i * 3 + 2];
 77 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 78 |         if (d2 < best_dist[0]){
 79 |             best_dist[0] = d2;
 80 |             best_idx[0] = i;
 81 |             reheap(best_dist, best_idx, nsample);
 82 |         }
 83 |     }
 84 |     heap_sort(best_dist, best_idx, nsample);
 85 |     for(int i = 0; i < nsample; i++){
 86 |         idx[i] = best_idx[i];
 87 |         dist2[i] = best_dist[i];
 88 |     }
 89 | }
 90 | 
 91 | 
 92 | void knnquery_heap_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) {
 93 |     // param new_xyz: (B, m, 3)
 94 |     // param xyz: (B, n, 3)
 95 |     // param idx: (B, m, nsample)
 96 | 
 97 |     cudaError_t err;
 98 | 
 99 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
100 |     dim3 threads(THREADS_PER_BLOCK);
101 | 
102 |     knnquery_heap_cuda_kernel<<<blocks, threads, 0, stream>>>(b, n, m, nsample, xyz, new_xyz, idx, dist2);
103 |     // cudaDeviceSynchronize();  // for using printf in kernel function
104 | 
105 |     err = cudaGetLastError();
106 |     if (cudaSuccess != err) {
107 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
108 |         exit(-1);
109 |     }
110 | }


--------------------------------------------------------------------------------
/classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNNQUERY_HEAP_CUDA_KERNEL
 2 | #define _KNNQUERY_HEAP_CUDA_KERNEL
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <vector>
 6 | #include <ATen/cuda/CUDAContext.h>
 7 | 
 8 | void knnquery_heap_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void knnquery_heap_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | 
20 | #endif


--------------------------------------------------------------------------------
/classification/modules/pointops/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "ballquery/ballquery_cuda_kernel.h"
 5 | #include "grouping/grouping_cuda_kernel.h"
 6 | #include "grouping_int/grouping_int_cuda_kernel.h"
 7 | #include "sampling/sampling_cuda_kernel.h"
 8 | #include "interpolation/interpolation_cuda_kernel.h"
 9 | #include "knnquery/knnquery_cuda_kernel.h"
10 | #include "knnquery_heap/knnquery_heap_cuda_kernel.h"
11 | 
12 | 
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 |     m.def("ballquery_cuda", &ballquery_cuda_fast, "ballquery_cuda_fast");   // name in python, cpp function address, docs
15 | 
16 |     m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
17 |     m.def("knnquery_heap_cuda", &knnquery_heap_cuda, "knnquery_heap_cuda");
18 | 
19 |     m.def("grouping_forward_cuda", &grouping_forward_cuda_fast, "grouping_forward_cuda_fast");
20 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
21 | 
22 |     m.def("grouping_int_forward_cuda", &grouping_int_forward_cuda_fast, "grouping_int_forward_cuda_fast");
23 | 
24 |     m.def("gathering_forward_cuda", &gathering_forward_cuda, "gathering_forward_cuda");
25 |     m.def("gathering_backward_cuda", &gathering_backward_cuda, "gathering_backward_cuda");
26 |     m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
27 | 
28 |     m.def("nearestneighbor_cuda", &nearestneighbor_cuda_fast, "nearestneighbor_cuda_fast");
29 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda_fast, "interpolation_forward_cuda_fast");
30 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
31 | }
32 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include <THC/THC.h>
 5 | #include "sampling_cuda_kernel.h"
 6 | 
 7 | extern THCState *state;
 8 | 
 9 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor)
10 | {
11 |     const float *points = points_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *out = out_tensor.data_ptr<float>();
14 |     gathering_forward_cuda_launcher(b, c, n, m, points, idx, out);
15 | }
16 | 
17 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor)
18 | {
19 | 
20 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
21 |     const int *idx = idx_tensor.data_ptr<int>();
22 |     float *grad_points = grad_points_tensor.data_ptr<float>();
23 |     gathering_backward_cuda_launcher(b, c, n, m, grad_out, idx, grad_points);
24 | }
25 | 
26 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor)
27 | {
28 |     const float *points = points_tensor.data_ptr<float>();
29 |     float *temp = temp_tensor.data_ptr<float>();
30 |     int *idx = idx_tensor.data_ptr<int>();
31 |     furthestsampling_cuda_launcher(b, n, m, points, temp, idx);
32 | }
33 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/sampling/sampling_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "sampling_cuda_kernel.h"
  3 | 
  4 | // input: points(b, c, n) idx(b, m)
  5 | // output: out(b, c, m)
  6 | __global__ void gathering_forward_cuda_kernel(int b, int c, int n, int m, const float *points, const int *idx, float *out)
  7 | {
  8 |     for (int i = blockIdx.x; i < b; i += gridDim.x)
  9 |     {
 10 |         for (int l = blockIdx.y; l < c; l += gridDim.y)
 11 |         {
 12 |             for (int j = threadIdx.x; j < m; j += blockDim.x)
 13 |             {
 14 |                 int a = idx[i * m + j];
 15 |                 out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
 16 |             }
 17 |         }
 18 |     }
 19 | }
 20 | 
 21 | // input: grad_out(b, c, m) idx(b, m)
 22 | // output: grad_points(b, c, n)
 23 | __global__ void gathering_backward_cuda_kernel(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points)
 24 | {
 25 |     for (int i = blockIdx.x; i < b; i += gridDim.x)
 26 |     {
 27 |         for (int l = blockIdx.y; l < c; l += gridDim.y)
 28 |         {
 29 |             for (int j = threadIdx.x; j < m; j += blockDim.x)
 30 |             {
 31 |                 int a = idx[i * m + j];
 32 |                 atomicAdd(grad_points + (i * c + l) * n + a, grad_out[(i * c + l) * m + j]);
 33 |             }
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out)
 39 | {
 40 |     gathering_forward_cuda_kernel<<<dim3(b, c, 1), opt_n_threads(m), 0>>>(b, c, n, m, points, idx, out);
 41 | }
 42 | 
 43 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points)
 44 | {
 45 |     gathering_backward_cuda_kernel<<<dim3(b, c, 1), opt_n_threads(m), 0>>>(b, c, n, m, grad_out, idx, grad_points);
 46 | }
 47 | 
 48 | __device__ void __update(float *dists, int *dists_i,
 49 | 			 int idx1, int idx2) {
 50 |     const float v1 = dists[idx1], v2 = dists[idx2];
 51 |     const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 52 |     dists[idx1] = max(v1, v2);
 53 |     dists_i[idx1] = v2 > v1 ? i2 : i1;
 54 | }
 55 | 
 56 | // Input dataset: (b, n, 3), tmp: (b, n)
 57 | // Ouput idxs (b, m)
 58 | template <unsigned int block_size>
 59 | __global__ void furthestsampling_cuda_kernel(int b, int n, int m, const float *dataset, float *temp, int *idxs)
 60 | {
 61 |     if (m <= 0)
 62 | 	    return;
 63 |     __shared__ float dists[block_size];
 64 |     __shared__ int dists_i[block_size];
 65 | 
 66 |     int batch_index = blockIdx.x;
 67 |     dataset += batch_index * n * 3;
 68 |     temp += batch_index * n;
 69 |     idxs += batch_index * m;
 70 |     int tid = threadIdx.x;
 71 |     const int stride = block_size;
 72 |     int old = 0;
 73 |     if (threadIdx.x == 0)
 74 | 	    idxs[0] = old;
 75 | 
 76 |     __syncthreads();
 77 |     for (int j = 1; j < m; j++)
 78 |     {
 79 |         int besti = 0;
 80 |         float best = -1;
 81 |         float x1 = dataset[old * 3 + 0];
 82 |         float y1 = dataset[old * 3 + 1];
 83 |         float z1 = dataset[old * 3 + 2];
 84 |         for (int k = tid; k < n; k += stride)
 85 |         {
 86 |             float x2, y2, z2;
 87 |             x2 = dataset[k * 3 + 0];
 88 |             y2 = dataset[k * 3 + 1];
 89 |             z2 = dataset[k * 3 + 2];
 90 |             //float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
 91 |             //if (mag <= 1e-3)
 92 |             //    continue;
 93 |             float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
 94 |             float d2 = min(d, temp[k]);
 95 |             temp[k] = d2;
 96 |             besti = d2 > best ? k : besti;
 97 |             best = d2 > best ? d2 : best;
 98 |         }
 99 |         dists[tid] = best;
100 |         dists_i[tid] = besti;
101 |         __syncthreads();
102 | 
103 |         if (block_size >= 1024) {
104 |             if (tid < 512) {
105 |             __update(dists, dists_i, tid, tid + 512);
106 |             }
107 |             __syncthreads();
108 |         }
109 |         if (block_size >= 512) {
110 |             if (tid < 256) {
111 |             __update(dists, dists_i, tid, tid + 256);
112 |             }
113 |             __syncthreads();
114 |         }
115 |         if (block_size >= 256) {
116 |             if (tid < 128) {
117 |             __update(dists, dists_i, tid, tid + 128);
118 |             }
119 |             __syncthreads();
120 |         }
121 |         if (block_size >= 128) {
122 |             if (tid < 64) {
123 |             __update(dists, dists_i, tid, tid + 64);
124 |             }
125 |             __syncthreads();
126 |         }
127 |         if (block_size >= 64) {
128 |             if (tid < 32) {
129 |             __update(dists, dists_i, tid, tid + 32);
130 |             }
131 |             __syncthreads();
132 |         }
133 |         if (block_size >= 32) {
134 |             if (tid < 16) {
135 |             __update(dists, dists_i, tid, tid + 16);
136 |             }
137 |             __syncthreads();
138 |         }
139 |         if (block_size >= 16) {
140 |             if (tid < 8) {
141 |             __update(dists, dists_i, tid, tid + 8);
142 |             }
143 |             __syncthreads();
144 |         }
145 |         if (block_size >= 8) {
146 |             if (tid < 4) {
147 |             __update(dists, dists_i, tid, tid + 4);
148 |             }
149 |             __syncthreads();
150 |         }
151 |         if (block_size >= 4) {
152 |             if (tid < 2) {
153 |             __update(dists, dists_i, tid, tid + 2);
154 |             }
155 |             __syncthreads();
156 |         }
157 |         if (block_size >= 2) {
158 |             if (tid < 1) {
159 |             __update(dists, dists_i, tid, tid + 1);
160 |             }
161 |             __syncthreads();
162 |         }
163 | 
164 |         old = dists_i[0];
165 |         if (tid == 0)
166 |             idxs[j] = old;
167 |     }
168 | }
169 | 
170 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs)
171 | {   
172 | 	unsigned int n_threads = opt_n_threads(n);
173 | 	switch (n_threads) {
174 | 	    case 1024:
175 | 	        furthestsampling_cuda_kernel<1024><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
176 | 	        break;
177 | 		case 512:
178 | 			furthestsampling_cuda_kernel<512><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
179 | 			break;
180 |     	case 256:
181 | 			furthestsampling_cuda_kernel<256><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
182 | 			break;
183 |     	case 128:
184 | 			furthestsampling_cuda_kernel<128><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
185 | 			break;
186 |     	case 64:
187 | 			furthestsampling_cuda_kernel<64><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
188 | 			break;
189 |     	case 32:
190 | 			furthestsampling_cuda_kernel<32><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
191 | 			break;
192 |     	case 16:
193 | 			furthestsampling_cuda_kernel<16><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
194 | 			break;
195 |     	case 8:
196 | 			furthestsampling_cuda_kernel<8><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
197 | 			break;
198 | 	    case 4:
199 | 			furthestsampling_cuda_kernel<4><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
200 | 			break;
201 | 	    case 2:
202 | 			furthestsampling_cuda_kernel<2><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
203 | 			break;
204 | 	    case 1:
205 | 			furthestsampling_cuda_kernel<1><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
206 | 			break;
207 | 	    default:
208 | 			furthestsampling_cuda_kernel<512><<<b, n_threads, 0>>>(b, n, m, dataset, temp, idxs);
209 | 	    }
210 | }
211 | 


--------------------------------------------------------------------------------
/classification/modules/pointops/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <torch/serialize/tensor.h>
 4 | #include <vector>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
 8 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
 9 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out);
16 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points);
17 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs);
18 | 
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 | #endif
23 | 


--------------------------------------------------------------------------------
/classification/modules/polar_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 05/10/2022
 4 | """
 5 | 
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | 
10 | def xyz2sphere(xyz, normalize=True):
11 |     """
12 |     Convert XYZ to Spherical Coordinate
13 | 
14 |     reference: https://en.wikipedia.org/wiki/Spherical_coordinate_system
15 | 
16 |     :param xyz: [B, N, 3] / [B, N, G, 3]
17 |     :return: (rho, theta, phi) [B, N, 3] / [B, N, G, 3]
18 |     """
19 |     rho = torch.sqrt(torch.sum(torch.pow(xyz, 2), dim=-1, keepdim=True))
20 |     rho = torch.clamp(rho, min=0)  # range: [0, inf]
21 |     theta = torch.acos(xyz[..., 2, None] / rho)  # range: [0, pi]
22 |     phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None])  # range: [-pi, pi]
23 |     # check nan
24 |     idx = rho == 0
25 |     theta[idx] = 0
26 | 
27 |     if normalize:
28 |         theta = theta / np.pi  # [0, 1]
29 |         phi = phi / (2 * np.pi) + .5  # [0, 1]
30 |     out = torch.cat([rho, theta, phi], dim=-1)
31 |     return out
32 | 
33 | 
34 | def xyz2cylind(xyz, normalize=True):
35 |     """
36 |     Convert XYZ to Cylindrical Coordinate
37 | 
38 |     reference: https://en.wikipedia.org/wiki/Cylindrical_coordinate_system
39 | 
40 |     :param normalize: Normalize phi & z
41 |     :param xyz: [B, N, 3] / [B, N, G, 3]
42 |     :return: (rho, phi, z) [B, N, 3]
43 |     """
44 |     rho = torch.sqrt(torch.sum(torch.pow(xyz[..., :2], 2), dim=-1, keepdim=True))
45 |     rho = torch.clamp(rho, 0, 1)  # range: [0, 1]
46 |     phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None])  # range: [-pi, pi]
47 |     z = xyz[..., 2, None]
48 |     z = torch.clamp(z, -1, 1)  # range: [-1, 1]
49 | 
50 |     if normalize:
51 |         phi = phi / (2 * np.pi) + .5
52 |         z = (z + 1.) / 2.
53 |     out = torch.cat([rho, phi, z], dim=-1)
54 |     return out
55 | 


--------------------------------------------------------------------------------
/classification/modules/ptaug_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 05/10/2022
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | #################
10 | # MAIN
11 | #################
12 | 
13 | def get_aug_args(args):
14 |     dataset = args.dataset
15 |     if dataset == 'ScanObjectNN':
16 |         aug_args = {'scale_factor': 0.5, 'shift_factor': 0.3}
17 |         return aug_args
18 |     else:
19 |         raise Exception('No such dataset')
20 | 
21 | 
22 | def transform_point_cloud(batch, args, aug_args, train=True, label=None):
23 |     """batch: B x 3/6 x N"""
24 |     if args.aug_scale:
25 |         batch[:, 0:3] = scale_point_cloud(batch[:, 0:3], aug_args['scale_factor'])
26 |     if args.aug_shift:
27 |         batch[:, 0:3] = shift_point_cloud(batch[:, 0:3], shift_range=aug_args['shift_factor'])
28 |     if label is not None:
29 |         return batch, label
30 |     return batch
31 | 
32 | 
33 | #################
34 | # Shift
35 | #################
36 | 
37 | def shift_point_cloud(batch_data, shift_range=0.2):
38 |     """ Randomly shift point cloud. Shift is per point cloud.
39 |         Input:
40 |           B x C x N array, original batch of point clouds
41 |         Return:
42 |           B x C x N array, shifted batch of point clouds
43 |     """
44 |     shifts = (torch.rand(batch_data.shape[0], 3, 1, device=batch_data.device) * 2. - 1.) * shift_range
45 |     batch_data += shifts
46 |     return batch_data
47 | 
48 | 
49 | #################
50 | # Scale
51 | #################
52 | 
53 | def scale_point_cloud(batch_data, scale_range=0.2):
54 |     """ Randomly scale the point cloud. Scale is per point cloud.
55 |         Input:
56 |             B x C x N array, original batch of point clouds
57 |         Return:
58 |             B x C x N array, scaled batch of point clouds
59 |     """
60 |     scales = (torch.rand(batch_data.shape[0], 3, 1, device=batch_data.device) * 2. - 1.) * scale_range + 1.
61 |     batch_data *= scales
62 |     return batch_data
63 | 


--------------------------------------------------------------------------------
/classification/modules/recons_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Haoxi Ran
  3 | Date: 05/10/2022
  4 | """
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | from modules.pointnet2_utils import query_knn_point, index_points
  9 | 
 10 | 
 11 | def _recons_factory(type):
 12 |     if type == 'knn':
 13 |         return knn_recons
 14 |     else:
 15 |         raise Exception('Not Implemented Reconstruction Type')
 16 | 
 17 | 
 18 | def knn_recons(k, center, context, cuda=False):
 19 |     idx = query_knn_point(k, context, center, cuda=cuda)
 20 |     torch.cuda.empty_cache()
 21 | 
 22 |     group_xyz = index_points(context, idx, cuda=cuda, is_group=True)  # [B, N, K, C]
 23 |     torch.cuda.empty_cache()
 24 |     return group_xyz
 25 | 
 26 | 
 27 | def cal_normal(group_xyz, random_inv=False, is_group=False):
 28 |     """
 29 |     Calculate Normal Vector (Unit Form + First Term Positive)
 30 | 
 31 |     :param group_xyz: [B, N, K=3, 3] / [B, N, G, K=3, 3]
 32 |     :param random_inv:
 33 |     :param return_intersect:
 34 |     :param return_const:
 35 |     :return: [B, N, 3]
 36 |     """
 37 |     edge_vec1 = group_xyz[..., 1, :] - group_xyz[..., 0, :]  # [B, N, 3]
 38 |     edge_vec2 = group_xyz[..., 2, :] - group_xyz[..., 0, :]  # [B, N, 3]
 39 | 
 40 |     nor = torch.cross(edge_vec1, edge_vec2, dim=-1)
 41 |     unit_nor = nor / torch.norm(nor, dim=-1, keepdim=True)  # [B, N, 3] / [B, N, G, 3]
 42 |     if not is_group:
 43 |         pos_mask = (unit_nor[..., 0] > 0).float() * 2. - 1.  # keep x_n positive
 44 |     else:
 45 |         pos_mask = (unit_nor[..., 0:1, 0] > 0).float() * 2. - 1.
 46 |     unit_nor = unit_nor * pos_mask.unsqueeze(-1)
 47 | 
 48 |     # batch-wise random inverse normal vector (prob: 0.5)
 49 |     if random_inv:
 50 |         random_mask = torch.randint(0, 2, (group_xyz.size(0), 1, 1)).float() * 2. - 1.
 51 |         random_mask = random_mask.to(unit_nor.device)
 52 |         if not is_group:
 53 |             unit_nor = unit_nor * random_mask
 54 |         else:
 55 |             unit_nor = unit_nor * random_mask.unsqueeze(-1)
 56 | 
 57 |     return unit_nor
 58 | 
 59 | 
 60 | def pca(X, k, center=True):
 61 |     """
 62 |     Principal Components Analysis impl. with SVD function
 63 | 
 64 |     :param X:
 65 |     :param k:
 66 |     :param center:
 67 |     :return:
 68 |     """
 69 | 
 70 |     n = X.size()[0]
 71 |     ones = torch.ones(n).view([n, 1])
 72 |     h = ((1 / n) * torch.mm(ones, ones.t())) if center else torch.zeros(n * n).view([n, n])
 73 |     H = torch.eye(n) - h
 74 |     X_center = torch.mm(H.double(), X.double())
 75 |     u, s, v = torch.svd(X_center)
 76 |     components = v[:k].t()
 77 |     explained_variance = torch.mul(s[:k], s[:k]) / (n - 1)
 78 |     return {'X': X, 'k': k, 'components': components,
 79 |             'explained_variance': explained_variance}
 80 | 
 81 | 
 82 | def cal_center(group_xyz):
 83 |     """
 84 |     Calculate Global Coordinates of the Center of Triangle
 85 | 
 86 |     :param group_xyz: [B, N, K, 3] / [B, N, G, K, 3]; K >= 3
 87 |     :return: [B, N, 3] / [B, N, G, 3]
 88 |     """
 89 |     center = torch.mean(group_xyz, dim=-2)
 90 |     return center
 91 | 
 92 | 
 93 | def cal_area(group_xyz):
 94 |     """
 95 |     Calculate Area of Triangle
 96 | 
 97 |     :param group_xyz: [B, N, K, 3] / [B, N, G, K, 3]; K = 3
 98 |     :return: [B, N, 1] / [B, N, G, 1]
 99 |     """
100 |     pad_shape = group_xyz[..., 0, None].shape
101 |     det_xy = torch.det(torch.cat([group_xyz[..., 0, None], group_xyz[..., 1, None], torch.ones(pad_shape)], dim=-1))
102 |     det_yz = torch.det(torch.cat([group_xyz[..., 1, None], group_xyz[..., 2, None], torch.ones(pad_shape)], dim=-1))
103 |     det_zx = torch.det(torch.cat([group_xyz[..., 2, None], group_xyz[..., 0, None], torch.ones(pad_shape)], dim=-1))
104 |     area = torch.sqrt(det_xy ** 2 + det_yz ** 2 + det_zx ** 2).unsqueeze(-1)
105 |     return area
106 | 
107 | 
108 | def cal_const(normal, center, is_normalize=True):
109 |     """
110 |     Calculate Constant Term (Standard Version, with x_normal to be 1)
111 | 
112 |     math::
113 |         const = x_nor * x_0 + y_nor * y_0 + z_nor * z_0
114 | 
115 |     :param is_normalize:
116 |     :param normal: [B, N, 3] / [B, N, G, 3]
117 |     :param center: [B, N, 3] / [B, N, G, 3]
118 |     :return: [B, N, 1] / [B, N, G, 1]
119 |     """
120 |     const = torch.sum(normal * center, dim=-1, keepdim=True)
121 |     factor = torch.sqrt(torch.Tensor([3])).to(normal.device)
122 |     const = const / factor if is_normalize else const
123 | 
124 |     return const
125 | 
126 | 
127 | def check_nan(normal, center, pos=None):
128 |     """
129 |     Check & Remove NaN in normal tensor
130 | 
131 |     :param pos: [B, N, 1]
132 |     :param center: [B, N, 3]
133 |     :param normal: [B, N, 3]
134 |     :return:
135 |     """
136 |     B, N, _ = normal.shape
137 |     mask = torch.sum(torch.isnan(normal), dim=-1) > 0
138 |     mask_first = torch.argmax((~mask).int(), dim=-1)
139 | 
140 |     normal_first = normal[torch.arange(B), None, mask_first].repeat([1, N, 1])
141 |     normal[mask] = normal_first[mask]
142 |     center_first = center[torch.arange(B), None, mask_first].repeat([1, N, 1])
143 |     center[mask] = center_first[mask]
144 | 
145 |     if pos is not None:
146 |         pos_first = pos[torch.arange(B), None, mask_first].repeat([1, N, 1])
147 |         pos[mask] = pos_first[mask]
148 |         return normal, center, pos
149 |     return normal, center
150 | 
151 | 
152 | def check_nan_umb(normal, center, pos=None):
153 |     """
154 |     Check & Remove NaN in normal tensor
155 | 
156 |     :param pos: [B, N, G, 1]
157 |     :param center: [B, N, G, 3]
158 |     :param normal: [B, N, G, 3]
159 |     :return:
160 |     """
161 |     B, N, G, _ = normal.shape
162 |     mask = torch.sum(torch.isnan(normal), dim=-1) > 0
163 |     mask_first = torch.argmax((~mask).int(), dim=-1)
164 |     b_idx = torch.arange(B).unsqueeze(1).repeat([1, N])
165 |     n_idx = torch.arange(N).unsqueeze(0).repeat([B, 1])
166 | 
167 |     normal_first = normal[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1])
168 |     normal[mask] = normal_first[mask]
169 |     center_first = center[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1])
170 |     center[mask] = center_first[mask]
171 | 
172 |     if pos is not None:
173 |         pos_first = pos[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1])
174 |         pos[mask] = pos_first[mask]
175 |         return normal, center, pos
176 |     return normal, center
177 | 
178 | 
179 | class SurfaceConstructor(nn.Module):
180 |     """
181 |     Surface Constructor for Point Clouds
182 | 
183 |     Formulation of A Surface:
184 |         A * (x - x_0) + B * (y - y_0) + C * (z - z_0) = 0,
185 |         where A^2 + B^2 + C^2 = 1 & A > 0
186 |     """
187 | 
188 |     def __init__(self, r=None, k=3, recons_type='knn', return_dist=False, random_inv=True, cuda=False):
189 |         super(SurfaceConstructor, self).__init__()
190 |         self.K = k
191 |         self.R = r
192 |         self.recons = _recons_factory(recons_type)
193 |         self.cuda = cuda
194 | 
195 |         self.return_dist = return_dist
196 |         self.random_inv = random_inv
197 | 
198 |     def forward(self, center, context):
199 |         """
200 |         Input:
201 |             center: input points position as centroid points, [B, 3, N]
202 |             context: input points position as context points, [B, 3, N']
203 | 
204 |         Output:
205 |             normal: normals of constructed triangles, [B, 3, N]
206 |             center: centroids of constructed triangles, [B, 3, N]
207 |             pos: position info of constructed triangles, [B, 1, N]
208 |         """
209 |         center = center.permute(0, 2, 1)
210 |         context = context.permute(0, 2, 1)
211 | 
212 |         group_xyz = self.recons(self.K, center, context, cuda=self.cuda)
213 |         normal = cal_normal(group_xyz, random_inv=self.random_inv)
214 |         center = cal_center(group_xyz)
215 | 
216 |         if self.return_dist:
217 |             pos = cal_const(normal, center)
218 |             normal, center, pos = check_nan(normal, center, pos)
219 |             normal = normal.permute(0, 2, 1)
220 |             center = center.permute(0, 2, 1)
221 |             pos = pos.permute(0, 2, 1)
222 |             return normal, center, pos
223 | 
224 |         normal, center = check_nan(normal, center)
225 |         normal = normal.permute(0, 2, 1)
226 |         center = center.permute(0, 2, 1)
227 | 
228 |         return normal, center
229 | 
230 | 
231 | if __name__ == '__main__':
232 |     xyz = torch.rand(1, 3, 1024) * 2. - 1.
233 |     constructor = SurfaceConstructor(return_dist=True)
234 | 
235 |     normal, center, pos = constructor(xyz, xyz)
236 |     print(normal.shape)
237 |     print(center.shape)
238 | 


--------------------------------------------------------------------------------
/classification/scripts/scanobjectnn/repsurf_ssg_umb.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -v
 3 | 
 4 | python3 tool/train_cls_scanobjectnn.py \
 5 |           --cuda_ops \
 6 |           --batch_size 64 \
 7 |           --model repsurf.repsurf_ssg_umb \
 8 |           --epoch 250 \
 9 |           --log_dir repsurf_cls_ssg_umb \
10 |           --gpus 0 \
11 |           --n_workers 12 \
12 |           --return_center \
13 |           --return_dist \
14 |           --return_polar \
15 |           --group_size 8 \
16 |           --umb_pool sum \
17 |           --num_point 1024


--------------------------------------------------------------------------------
/classification/scripts/scanobjectnn/repsurf_ssg_umb_2x.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -v
 3 | 
 4 | python3 tool/train_cls_scanobjectnn.py \
 5 |           --cuda_ops \
 6 |           --batch_size 64 \
 7 |           --model repsurf.repsurf_ssg_umb_2x \
 8 |           --epoch 250 \
 9 |           --log_dir repsurf_cls_ssg_umb_2x \
10 |           --gpus 0 \
11 |           --n_workers 12 \
12 |           --return_center \
13 |           --return_dist \
14 |           --return_polar \
15 |           --group_size 8 \
16 |           --umb_pool sum \
17 |           --num_point 1024


--------------------------------------------------------------------------------
/classification/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/util/__init__.py


--------------------------------------------------------------------------------
/classification/util/utils.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import argparse
 3 | import random
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | def set_seed(seed):
12 |     """
13 |     Setting of Global Seed
14 | 
15 |     """
16 |     torch.backends.cudnn.enabled = True
17 |     torch.backends.cudnn.deterministic = True  # consistent results on the cpu and gpu
18 |     torch.backends.cudnn.benchmark = True
19 | 
20 |     np.random.seed(seed)
21 |     random.seed(seed)
22 |     torch.manual_seed(seed)  # cpu
23 |     torch.cuda.manual_seed(seed)
24 |     torch.cuda.manual_seed_all(seed)  # gpu
25 | 
26 | 
27 | def weight_init(m, init_type):
28 |     if init_type == 'xavier':
29 |         init_func = torch.nn.init.xavier_normal_
30 |     elif init_type == 'kaiming':
31 |         init_func = torch.nn.init.kaiming_normal_
32 |     else:
33 |         raise Exception('No such init type')
34 | 
35 |     if isinstance(m, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.Conv1d)):
36 |         init_func(m.weight)
37 |         if m.bias is not None:
38 |             torch.nn.init.constant_(m.bias, 0)
39 |     elif isinstance(m, (torch.nn.BatchNorm2d, torch.nn.BatchNorm1d)):
40 |         torch.nn.init.constant_(m.weight, 1)  # constant
41 |         # torch.nn.init.normal_(m.weight, 1.0, 0.02)  # normal
42 |         torch.nn.init.constant_(m.bias, 0)
43 | 
44 | 
45 | class ClsLoss(nn.Module):
46 |     def __init__(self):
47 |         super(ClsLoss, self).__init__()
48 | 
49 |     def forward(self, pred, target):
50 |         total_loss = F.nll_loss(pred, target)
51 | 
52 |         return total_loss
53 | 
54 | 
55 | class SmoothClsLoss(nn.Module):
56 |     def __init__(self, smoothing_ratio=0.1):
57 |         super(SmoothClsLoss, self).__init__()
58 |         self.smoothing_ratio = smoothing_ratio
59 | 
60 |     def forward(self, pred, target):
61 |         eps = self.smoothing_ratio
62 |         n_class = pred.size(1)
63 | 
64 |         one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1)
65 |         one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
66 |         # log_prb = F.log_softmax(pred, dim=1)
67 | 
68 |         loss = -(one_hot * pred).sum(dim=1).mean()
69 |         return loss
70 | 
71 | 
72 | def get_model(args):
73 |     module = importlib.import_module('models.%s' % args.model)
74 |     return module.Model(args)
75 | 
76 | 
77 | def get_loss():
78 |     return SmoothClsLoss()
79 | 
80 | 
81 | def get_test_args():
82 |     return argparse.Namespace()
83 | 


--------------------------------------------------------------------------------
/segmentation/README.md:
--------------------------------------------------------------------------------
  1 | # RepSurf for Segmentation <br>
  2 | 
  3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact)
  4 | 
  5 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740)
  6 | 
  7 | 
  8 | ## Preparation
  9 | 
 10 | ### Environment
 11 | 
 12 | We tested under the environment:
 13 | 
 14 | * python 3.7
 15 | * pytorch 1.6.0 / 1.8.0
 16 | * cuda 10.1 / 11.1
 17 | * gcc 7.2.0
 18 | * h5py
 19 | * sharedarray
 20 | * tensorboardx
 21 | 
 22 | For anaconda user, initialize the conda environment **repsurf-seg** by:
 23 | 
 24 | ```
 25 | sh init.sh
 26 | ```
 27 | 
 28 | ## Experiments
 29 | 
 30 | ### S3DIS Area-5 (Data & Logs: [Google Drive](https://drive.google.com/drive/folders/1jIZuy4RPFJ4YHAE8ScVQgwtBwNGgfKnv?usp=sharing))
 31 | 
 32 | * Performance using the same settings:
 33 | 
 34 | <table style="width:100%">
 35 |   <thead>
 36 |     <tr>
 37 |       <th>Model</th>
 38 |       <th>mIoU</th>
 39 |       <th>mAcc</th>
 40 |       <th>OA</th>
 41 |       <th>#Params</th>
 42 |       <th>Training Time</th>
 43 |       <th>Code</th>
 44 |       <th>Training Log</th>
 45 |       <th>Test Log</th>
 46 |       <th>Checkpoint</th>
 47 |     </tr>
 48 |   </thead>
 49 |   <tbody>
 50 |     <tr>
 51 |       <td align="center">Point Transformer <br> (our settings)</td>
 52 |       <td align="center">70.37 (official: 70.4)</td>
 53 |       <td align="center">77.02 (official: 76.5)</td>
 54 |       <td align="center">90.80 (official: 90.8)</td>
 55 |       <td align="center">7.767M</td>
 56 |       <td align="center">19.91h</td>
 57 |       <td align="center"><a href="./models/pointtransformer/pointtransformer.py">pointtransformer.py</a></td>
 58 |       <td align="center"><a href="https://drive.google.com/file/d/1cLQetUso-fVzlfcJODXlfV-7MXa3vl-Y/view?usp=sharing">google drive</a></td>
 59 |       <td align="center"><a href="https://drive.google.com/file/d/1umrMvmwLsexKUZytcMdE12ek8xIk8E3_/view?usp=sharing">google drive</a></td>
 60 |       <td align="center"><a href="https://drive.google.com/file/d/1XnbRR2Yi6MFWVl5LVtBxLOTBN9qhuxlV/view?usp=sharing">google drive <br> (30 MB)</a></td>
 61 |     </tr>
 62 |     <tr>
 63 |       <td align="center">PointNet++ SSG (our settings)</td>
 64 |       <td align="center">64.05</td>
 65 |       <td align="center">71.52</td>
 66 |       <td align="center">87.92</td>
 67 |       <td align="center">0.968M</td>
 68 |       <td align="center">9.08h</td>
 69 |       <td align="center"><a href="./models/pointnet2/pointnet2_ssg.py">pointnet2_ssg.py</a></td>
 70 |       <td align="center"><a href="https://drive.google.com/file/d/1xUkUB0iT-WYzzzR5yiWhZkSYPjjarKlC/view?usp=sharing">google drive</a></td>
 71 |       <td align="center"><a href="https://drive.google.com/file/d/1floQ53zgTxSs_nDn_MosIUWz4Rt7eHQx/view?usp=sharing">google drive</a></td>
 72 |       <td align="center"><a href="https://drive.google.com/file/d/1hdj7G8dplCouHYor16pChd7pB8M4rodu/view?usp=sharing">google drive <br> (4 MB)</a></td>
 73 |     </tr>
 74 |     <tr>
 75 |       <td align="center">PointNet++ SSG <b>w/ Umbrella RepSurf</b> (ours)</td>
 76 |       <td align="center"><b>68.86</b></td>
 77 |       <td align="center"><b>76.54</b></td>
 78 |       <td align="center"><b>90.22</b></td>
 79 |       <td align="center"><b>0.976M</b></td>
 80 |       <td align="center">9.18h</td>
 81 |       <td align="center"><a href="./models/repsurf/repsurf_umb_ssg.py">repsurf_umb_ssg.py</a></td>
 82 |       <td align="center"><a href="https://drive.google.com/file/d/1C1mG7XFsJAiQYHMNuA8bVitEuY4TGXKY/view?usp=sharing">google drive</a></td>
 83 |       <td align="center"><a href="https://drive.google.com/file/d/1mNgmWhYcp2njwJybkGjLVModERCR9fr8/view?usp=sharing">google drive</a></td>
 84 |       <td align="center"><a href="https://drive.google.com/file/d/1pmXBt4wHKpC5llmD6pMNo2NmZZKNIQaq/view?usp=sharing">google drive <br> (4 MB)</a></td>
 85 |     </tr>
 86 |   </tbody>
 87 | </table>
 88 | <br>
 89 | 
 90 | **Note**: 
 91 | 1. The performance (mIoU/mAcc/OA) are from the final predictions on the whole scenes of S3DIS Area-5, while the results during training is on sub-sampled scenes for fast validation. 
 92 | 2. The training time of all above implementations is estimated on four NVIDIA RTX 3090. The time in the logs contains both training and validating time.
 93 | 3. To speed up the training process, we apply Sectorized FPS (in the first stage) for all above methods. It can save 30～40% training time and does not affect the performance.   
 94 | 4. To lessen the instability from grid sampling during inference, we apply median filtering to all the above implementations. Besides, it can slightly improve the results (~0.4 mIoU).
 95 | 
 96 | * To (firstly install gdown by **pip install gdown** and) download dataset:
 97 | 
 98 | ```
 99 | cd ./data/S3DIS
100 | gdown https://drive.google.com/u/1/uc?id=1UDM-bjrtqoIR9FWoIRyqLUJGyKEs22fP
101 | tar zxf s3dis.tar.gz && rm s3dis.tar.gz && cd -
102 | ```
103 | 
104 | * To train one model (**Umbrella RepSurf, Point Transformer, PointNet2**) for S3DIS Area-5:
105 | 
106 | ```
107 | sh scripts/s3dis/train_[MODEL].sh  # MODEL: repsurf_umb, pointnet2, pointtransformer
108 | ```
109 | 
110 | * To test one model (**Our Umbrella RepSurf, Point Transformer, PointNet2**) for S3DIS Area-5 on whole scenes:
111 | 
112 | ```
113 | sh scripts/s3dis/test_[MODEL].sh  # MODEL: repsurf_umb, pointnet2, pointtransformer
114 | ```
115 | 
116 | ## Acknowledgment
117 | 
118 | We thank the [Point Transformer Implementation](https://github.com/POSTECH-CVLab/point-transformer) for the library pointops.
119 | 
120 | ## License
121 | 
122 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for
123 | commercial use.
124 | 


--------------------------------------------------------------------------------
/segmentation/dataset/S3DISDataLoader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 06/30/2022
 4 | """
 5 | 
 6 | import os
 7 | import numpy as np
 8 | import SharedArray as SA
 9 | from torch.utils.data import Dataset
10 | 
11 | from util.data_util import sa_create, data_prepare
12 | 
13 | NUM_CLASS = 13
14 | 
15 | 
16 | class S3DIS(Dataset):
17 |     def __init__(self, args, split, coord_transform=None, rgb_transform=None,
18 |                  rgb_mean=None, rgb_std=None, shuffle_index=False):
19 |         super().__init__()
20 |         self.args, self.split, self.coord_transform, self.rgb_transform, self.rgb_mean, self.rgb_std, self.shuffle_index = \
21 |             args, split, coord_transform, rgb_transform, rgb_mean, rgb_std, shuffle_index
22 |         self.stop_aug = False
23 |         data_list = sorted(os.listdir(args.data_dir))
24 |         data_list = [item[:-4] for item in data_list if 'Area_' in item]
25 |         if split == 'train':
26 |             self.data_list = [item for item in data_list if not 'Area_{}'.format(args.test_area) in item]
27 |         else:
28 |             self.data_list = [item for item in data_list if 'Area_{}'.format(args.test_area) in item]
29 |         self.data_idx = np.arange(len(self.data_list))
30 | 
31 |         for item in self.data_list:
32 |             if not os.path.exists("/dev/shm/s3dis_{}".format(item)):
33 |                 data_path = os.path.join(args.data_dir, item + '.npy')
34 |                 data = np.load(data_path).astype(np.float32)  # xyzrgbl, N*7
35 |                 sa_create("shm://s3dis_{}".format(item), data)
36 | 
37 |     def __getitem__(self, idx):
38 |         data_idx = self.data_idx[idx % len(self.data_idx)]
39 |         data = SA.attach("shm://s3dis_{}".format(self.data_list[data_idx])).copy()
40 |         coord, feat, label = data[:, 0:3], data[:, 3:6], data[:, 6]
41 |         coord, feat, label = \
42 |             data_prepare(coord, feat, label, self.args, self.split, self.coord_transform, self.rgb_transform,
43 |                          self.rgb_mean, self.rgb_std, self.shuffle_index, self.stop_aug)
44 | 
45 |         return coord, feat, label
46 | 
47 |     def __len__(self):
48 |         return len(self.data_idx) * self.args.loop
49 | 
50 |     @staticmethod
51 |     def print_weight(data_root, data_list):
52 |         print('Computing label weight...')
53 |         num_point_list = []
54 |         label_freq = np.zeros(NUM_CLASS)
55 |         label_total = np.zeros(NUM_CLASS)
56 |         # load data
57 |         for idx, item in enumerate(data_list):
58 |             data_path = os.path.join(data_root, item + '.npy')
59 |             data = np.load(data_path)
60 |             labels = data[:, 6]
61 |             freq = np.histogram(labels, range(NUM_CLASS + 1))[0]
62 |             label_freq += freq
63 |             label_total += (freq > 0).astype(np.float) * labels.size
64 |             num_point_list.append(labels.size)
65 | 
66 |         # label weight
67 |         label_freq = label_freq / label_total
68 |         label_weight = np.median(label_freq) / label_freq
69 |         print(label_weight)
70 | 
71 |     @staticmethod
72 |     def print_mean_std(data_root, data_list):
73 |         print('Computing color mean & std...')
74 |         point_list = []
75 |         for idx, item in enumerate(data_list):
76 |             data_path = os.path.join(data_root, item + '.npy')
77 |             data = np.load(data_path)
78 |             point_list.append(data[:, 3:6])
79 | 
80 |         points = np.vstack(point_list) / 255.
81 |         mean = np.mean(points, 0)
82 |         std = np.std(points, 0)
83 |         print(f'mean: {mean}, std:{std}')
84 | 


--------------------------------------------------------------------------------
/segmentation/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/dataset/__init__.py


--------------------------------------------------------------------------------
/segmentation/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | mkdir -p log/PointAnalysis/log/S3DIS
 4 | mkdir -p log/PointAnalysis/log/ScanNet
 5 | mkdir -p data/S3DIS
 6 | mkdir -p data/ScanNet
 7 | 
 8 | conda create -n repsurf-seg python=3.7 -y
 9 | conda activate repsurf-seg
10 | 
11 | #conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.1 -c pytorch -c conda-forge -y
12 | pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html
13 | conda install -c anaconda h5py pyyaml -y
14 | conda install -c conda-forge sharedarray tensorboardx -y
15 | 
16 | cd modules/pointops
17 | python3 setup.py install
18 | cd -
19 | 


--------------------------------------------------------------------------------
/segmentation/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/__init__.py


--------------------------------------------------------------------------------
/segmentation/models/pointnet2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/pointnet2/__init__.py


--------------------------------------------------------------------------------
/segmentation/models/pointnet2/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 06/30/2022
 4 | """
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | from modules.pointnet2_utils import PointNetSetAbstraction, PointNetFeaturePropagation
 9 | 
10 | 
11 | class Model(nn.Module):
12 |     def __init__(self, args):
13 |         super(Model, self).__init__()
14 |         self.sa1 = PointNetSetAbstraction(4, 32, 6 + 3, [32, 32, 64], num_sector=4)
15 |         self.sa2 = PointNetSetAbstraction(4, 32, 64 + 3, [64, 64, 128])
16 |         self.sa3 = PointNetSetAbstraction(4, 32, 128 + 3, [128, 128, 256])
17 |         self.sa4 = PointNetSetAbstraction(4, 32, 256 + 3, [256, 256, 512])
18 | 
19 |         self.fp4 = PointNetFeaturePropagation(768, [256, 256])
20 |         self.fp3 = PointNetFeaturePropagation(384, [256, 256])
21 |         self.fp2 = PointNetFeaturePropagation(320, [256, 128])
22 |         self.fp1 = PointNetFeaturePropagation(128, [128, 128, 128])
23 | 
24 |         self.classifier = nn.Sequential(
25 |             nn.Linear(128, 128),
26 |             nn.BatchNorm1d(128),
27 |             nn.ReLU(True),
28 |             nn.Dropout(0.5),
29 |             nn.Linear(128, args.num_class),
30 |         )
31 | 
32 |     def forward(self, pos_feat_off0):
33 |         pos_feat_off0[1] = torch.cat([pos_feat_off0[0], pos_feat_off0[1]], 1)
34 | 
35 |         pos_feat_off1 = self.sa1(pos_feat_off0)
36 |         pos_feat_off2 = self.sa2(pos_feat_off1)
37 |         pos_feat_off3 = self.sa3(pos_feat_off2)
38 |         pos_feat_off4 = self.sa4(pos_feat_off3)
39 | 
40 |         pos_feat_off3[1] = self.fp4(pos_feat_off3, pos_feat_off4)
41 |         pos_feat_off2[1] = self.fp3(pos_feat_off2, pos_feat_off3)
42 |         pos_feat_off1[1] = self.fp2(pos_feat_off1, pos_feat_off2)
43 |         pos_feat_off0[1] = self.fp1([pos_feat_off0[0], None, pos_feat_off0[2]], pos_feat_off1)
44 | 
45 |         feature = self.classifier(pos_feat_off0[1])
46 | 
47 |         return feature
48 | 


--------------------------------------------------------------------------------
/segmentation/models/pointtransformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/pointtransformer/__init__.py


--------------------------------------------------------------------------------
/segmentation/models/pointtransformer/pointtransformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from modules.pointtransformer_utils import PointTransformerBlock, TransitionDown, TransitionUp
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self, args):
 8 |         super().__init__()
 9 |         block = PointTransformerBlock
10 |         num_block = [2, 3, 4, 6, 3]
11 |         self.in_c = args.in_channel
12 |         self.in_planes, planes = self.in_c, [32, 64, 128, 256, 512]
13 |         fpn_planes, fpnhead_planes, share_planes = 128, 64, 8
14 |         stride, nsample = [1, 4, 4, 4, 4], [16, 16, 16, 16, 16]
15 |         self.enc1 = self._make_enc(block, planes[0], num_block[0], share_planes, stride=stride[0],
16 |                                    nsample=nsample[0])  # N/1
17 |         self.enc2 = self._make_enc(block, planes[1], num_block[1], share_planes, stride=stride[1],
18 |                                    nsample=nsample[1], num_sector=4)  # N/4
19 |         self.enc3 = self._make_enc(block, planes[2], num_block[2], share_planes, stride=stride[2],
20 |                                    nsample=nsample[2])  # N/16
21 |         self.enc4 = self._make_enc(block, planes[3], num_block[3], share_planes, stride=stride[3],
22 |                                    nsample=nsample[3])  # N/64
23 |         self.enc5 = self._make_enc(block, planes[4], num_block[4], share_planes, stride=stride[4],
24 |                                    nsample=nsample[4])  # N/256
25 |         self.dec5 = self._make_dec(block, planes[4], 2, share_planes, nsample=nsample[4], is_head=True)  # transform p5
26 |         self.dec4 = self._make_dec(block, planes[3], 2, share_planes, nsample=nsample[3])  # fusion p5 and p4
27 |         self.dec3 = self._make_dec(block, planes[2], 2, share_planes, nsample=nsample[2])  # fusion p4 and p3
28 |         self.dec2 = self._make_dec(block, planes[1], 2, share_planes, nsample=nsample[1])  # fusion p3 and p2
29 |         self.dec1 = self._make_dec(block, planes[0], 2, share_planes, nsample=nsample[0])  # fusion p2 and p1
30 |         self.cls = nn.Sequential(nn.Linear(planes[0], planes[0]), nn.BatchNorm1d(planes[0]), nn.ReLU(inplace=True),
31 |                                  nn.Linear(planes[0], args.num_class))
32 | 
33 |     def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16, num_sector=1):
34 |         layers = [TransitionDown(self.in_planes, planes * block.expansion, stride, nsample, num_sector)]
35 |         self.in_planes = planes * block.expansion
36 |         for _ in range(1, blocks):
37 |             layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample))
38 |         return nn.Sequential(*layers)
39 | 
40 |     def _make_dec(self, block, planes, blocks, share_planes=8, nsample=16, is_head=False):
41 |         layers = [TransitionUp(self.in_planes, None if is_head else planes * block.expansion)]
42 |         self.in_planes = planes * block.expansion
43 |         for _ in range(1, blocks):
44 |             layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample))
45 |         return nn.Sequential(*layers)
46 | 
47 |     def forward(self, pxo, *args):
48 |         p0, x0, o0 = pxo  # (n, 3), (n, c), (b)
49 |         x0 = p0 if self.in_c == 3 else torch.cat((p0, x0), 1)
50 |         p1, x1, o1 = self.enc1([p0, x0, o0])
51 |         p2, x2, o2 = self.enc2([p1, x1, o1])
52 |         p3, x3, o3 = self.enc3([p2, x2, o2])
53 |         p4, x4, o4 = self.enc4([p3, x3, o3])
54 |         p5, x5, o5 = self.enc5([p4, x4, o4])
55 |         x5 = self.dec5[1:]([p5, self.dec5[0]([p5, x5, o5]), o5])[1]
56 |         x4 = self.dec4[1:]([p4, self.dec4[0]([p4, x4, o4], [p5, x5, o5]), o4])[1]
57 |         x3 = self.dec3[1:]([p3, self.dec3[0]([p3, x3, o3], [p4, x4, o4]), o3])[1]
58 |         x2 = self.dec2[1:]([p2, self.dec2[0]([p2, x2, o2], [p3, x3, o3]), o2])[1]
59 |         x1 = self.dec1[1:]([p1, self.dec1[0]([p1, x1, o1], [p2, x2, o2]), o1])[1]
60 |         x = self.cls(x1)
61 |         return x
62 | 


--------------------------------------------------------------------------------
/segmentation/models/repsurf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/repsurf/__init__.py


--------------------------------------------------------------------------------
/segmentation/models/repsurf/repsurf_umb_ssg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 06/30/2022
 4 | """
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | from modules.repsurface_utils import UmbrellaSurfaceConstructor, SurfaceAbstractionCD, SurfaceFeaturePropagationCD
 9 | 
10 | 
11 | class Model(nn.Module):
12 |     def __init__(self, args):
13 |         super(Model, self).__init__()
14 |         center_channel = 6 if args.return_polar else 3
15 |         repsurf_in_channel = 10
16 |         repsurf_out_channel = 10
17 | 
18 |         self.sa1 = SurfaceAbstractionCD(4, 32, args.in_channel + repsurf_out_channel, center_channel, [32, 32, 64],
19 |                                         True, args.return_polar, num_sector=4)
20 |         self.sa2 = SurfaceAbstractionCD(4, 32, 64 + repsurf_out_channel, center_channel, [64, 64, 128],
21 |                                         True, args.return_polar)
22 |         self.sa3 = SurfaceAbstractionCD(4, 32, 128 + repsurf_out_channel, center_channel, [128, 128, 256],
23 |                                         True, args.return_polar)
24 |         self.sa4 = SurfaceAbstractionCD(4, 32, 256 + repsurf_out_channel, center_channel, [256, 256, 512],
25 |                                         True, args.return_polar)
26 | 
27 |         self.fp4 = SurfaceFeaturePropagationCD(512, 256, [256, 256])
28 |         self.fp3 = SurfaceFeaturePropagationCD(256, 128, [256, 256])
29 |         self.fp2 = SurfaceFeaturePropagationCD(256, 64, [256, 128])
30 |         self.fp1 = SurfaceFeaturePropagationCD(128, None, [128, 128, 128])
31 | 
32 |         self.classifier = nn.Sequential(
33 |             nn.Linear(128, 128),
34 |             nn.BatchNorm1d(128),
35 |             nn.ReLU(True),
36 |             nn.Dropout(0.5),
37 |             nn.Linear(128, args.num_class),
38 |         )
39 | 
40 |         self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_in_channel, repsurf_out_channel)
41 | 
42 |     def forward(self, pos_feat_off0):
43 |         pos_nor_feat_off0 = [
44 |             pos_feat_off0[0],
45 |             self.surface_constructor(pos_feat_off0[0], pos_feat_off0[2]),
46 |             torch.cat([pos_feat_off0[0], pos_feat_off0[1]], 1),
47 |             pos_feat_off0[2]
48 |         ]
49 | 
50 |         pos_nor_feat_off1 = self.sa1(pos_nor_feat_off0)
51 |         pos_nor_feat_off2 = self.sa2(pos_nor_feat_off1)
52 |         pos_nor_feat_off3 = self.sa3(pos_nor_feat_off2)
53 |         pos_nor_feat_off4 = self.sa4(pos_nor_feat_off3)
54 | 
55 |         del pos_nor_feat_off0[1], pos_nor_feat_off1[1], pos_nor_feat_off2[1], pos_nor_feat_off3[1], pos_nor_feat_off4[1]
56 |         pos_nor_feat_off3[1] = self.fp4(pos_nor_feat_off3, pos_nor_feat_off4)
57 |         pos_nor_feat_off2[1] = self.fp3(pos_nor_feat_off2, pos_nor_feat_off3)
58 |         pos_nor_feat_off1[1] = self.fp2(pos_nor_feat_off1, pos_nor_feat_off2)
59 |         pos_nor_feat_off0[1] = self.fp1([pos_nor_feat_off0[0], None, pos_nor_feat_off0[2]], pos_nor_feat_off1)
60 | 
61 |         feature = self.classifier(pos_nor_feat_off0[1])
62 | 
63 |         return feature
64 | 


--------------------------------------------------------------------------------
/segmentation/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/__init__.py


--------------------------------------------------------------------------------
/segmentation/modules/pointnet2_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Haoxi Ran
  3 | Date: 06/30/2022
  4 | """
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | from modules.pointops.functions import pointops
 11 | 
 12 | 
 13 | def sample_and_group(stride, nsample, xyz, points, offset, return_idx=False, num_sector=1):
 14 |     # sample
 15 |     if stride > 1:
 16 |         new_offset, sample_idx = [offset[0].item() // stride], offset[0].item() // stride
 17 |         for i in range(1, offset.shape[0]):
 18 |             sample_idx += (offset[i].item() - offset[i - 1].item()) // stride
 19 |             new_offset.append(sample_idx)
 20 |         new_offset = torch.cuda.IntTensor(new_offset)
 21 |         if num_sector > 1:
 22 |             fps_idx = pointops.sectorized_fps(xyz, offset, new_offset, num_sector)  # [M]
 23 |         else:
 24 |             fps_idx = pointops.furthestsampling(xyz, offset, new_offset)  # [M]
 25 |         new_xyz = xyz[fps_idx.long(), :]  # [M, 3]
 26 |     else:
 27 |         new_xyz = xyz
 28 |         new_offset = offset
 29 | 
 30 |     # group
 31 |     N, M = xyz.shape[0], new_xyz.shape[0]
 32 |     group_idx, _ = pointops.knnquery(nsample, xyz, new_xyz, offset, new_offset)  # [M, nsample]
 33 |     group_xyz = xyz[group_idx.view(-1).long(), :].view(M, nsample, 3)  # [M, nsample, 3]
 34 |     group_xyz_norm = group_xyz - new_xyz.unsqueeze(1)
 35 | 
 36 |     if points is not None and not return_idx:
 37 |         C = points.shape[1]
 38 |         group_points = points[group_idx.view(-1).long(), :].view(M, nsample, C)
 39 |         new_points = torch.cat([group_xyz_norm, group_points], dim=-1)  # [M, nsample, 3/6+C]
 40 |     else:
 41 |         new_points = group_xyz_norm
 42 | 
 43 |     if return_idx:
 44 |         return new_xyz, new_points, new_offset, group_idx
 45 |     else:
 46 |         return new_xyz, new_points, new_offset
 47 | 
 48 | 
 49 | class PointNetSetAbstraction(nn.Module):
 50 |     """
 51 |     PointNet2 SA Module
 52 | 
 53 |     """
 54 | 
 55 |     def __init__(self, stride, nsample, in_channel, mlp, num_sector=1):
 56 |         super(PointNetSetAbstraction, self).__init__()
 57 |         self.stride = stride
 58 |         self.nsample = nsample
 59 |         self.num_sector = num_sector
 60 |         self.mlp_convs = nn.ModuleList()
 61 |         self.mlp_bns = nn.ModuleList()
 62 | 
 63 |         last_channel = in_channel
 64 |         for out_channel in mlp:
 65 |             self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
 66 |             self.mlp_bns.append(nn.BatchNorm1d(out_channel))
 67 |             last_channel = out_channel
 68 | 
 69 |     def forward(self, pos_feat_off):
 70 |         xyz, points, offset = pos_feat_off  # [N, 3], [N, C], [B]
 71 | 
 72 |         new_xyz, new_points, new_offset = sample_and_group(self.stride, self.nsample, xyz, points, offset,
 73 |                                                            num_sector=self.num_sector)
 74 | 
 75 |         # new_xyz: sampled points position data, [M, 3]
 76 |         # new_points: sampled points data, [M, nsample, 3+C]
 77 |         new_points = new_points.transpose(1, 2).contiguous()  # [M, 3+C, nsample]
 78 |         for i, conv in enumerate(self.mlp_convs):
 79 |             bn = self.mlp_bns[i]
 80 |             new_points = F.relu(bn(conv(new_points)))
 81 |         new_points = torch.max(new_points, 2)[0]
 82 | 
 83 |         return [new_xyz, new_points, new_offset]
 84 | 
 85 | 
 86 | class PointNetFeaturePropagation(nn.Module):
 87 |     """
 88 |     PointNet2 FP Module
 89 | 
 90 |     """
 91 | 
 92 |     def __init__(self, in_channel, mlp):
 93 |         super(PointNetFeaturePropagation, self).__init__()
 94 |         self.mlp_convs = nn.ModuleList()
 95 |         self.mlp_bns = nn.ModuleList()
 96 |         last_channel = in_channel
 97 |         for out_channel in mlp:
 98 |             self.mlp_convs.append(nn.Linear(last_channel, out_channel))
 99 |             self.mlp_bns.append(nn.BatchNorm1d(out_channel))
100 |             last_channel = out_channel
101 | 
102 |     def forward(self, pos_feat_off1, pos_feat_off2):
103 |         xyz1, points1, offset1 = pos_feat_off1  # [N, 3], [N, C], [B]
104 |         xyz2, points2, offset2 = pos_feat_off2  # [M, 3], [M, C], [B]
105 | 
106 |         idx, dist = pointops.knnquery(3, xyz2, xyz1, offset2, offset1)  # [M, 3], [M, 3]
107 |         dist_recip = 1.0 / (dist + 1e-8)  # [M, 3]
108 |         norm = torch.sum(dist_recip, dim=1, keepdim=True)
109 |         weight = dist_recip / norm  # [M, 3]
110 | 
111 |         interpolated_points = torch.cuda.FloatTensor(xyz1.shape[0], points2.shape[1]).zero_()
112 |         for i in range(3):
113 |             interpolated_points += points2[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
114 | 
115 |         # skip connection
116 |         if points1 is not None:
117 |             new_points = torch.cat([points1, interpolated_points], dim=1)  # [M, C1+C2]
118 |         else:
119 |             new_points = interpolated_points
120 | 
121 |         # mlp
122 |         for i, conv in enumerate(self.mlp_convs):
123 |             bn = self.mlp_bns[i]
124 |             new_points = F.relu(bn(conv(new_points)))
125 | 
126 |         return new_points
127 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/__init__.py


--------------------------------------------------------------------------------
/segmentation/modules/pointops/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/functions/__init__.py


--------------------------------------------------------------------------------
/segmentation/modules/pointops/setup.py:
--------------------------------------------------------------------------------
 1 | #python3 setup.py install
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | import os
 5 | from distutils.sysconfig import get_config_vars
 6 | 
 7 | (opt,) = get_config_vars('OPT')
 8 | os.environ['OPT'] = " ".join(
 9 |     flag for flag in opt.split() if flag != '-Wstrict-prototypes'
10 | )
11 | 
12 | setup(
13 |     name='pointops_cuda',
14 |     author='Hengshuang Zhao',
15 |     ext_modules=[
16 |         CUDAExtension('pointops_cuda', [
17 |             'src/pointops_api.cpp',
18 |             'src/knnquery/knnquery_cuda.cpp',
19 |             'src/knnquery/knnquery_cuda_kernel.cu',
20 |             'src/sampling/sampling_cuda.cpp',
21 |             'src/sampling/sampling_cuda_kernel.cu',
22 |             'src/grouping/grouping_cuda.cpp',
23 |             'src/grouping/grouping_cuda_kernel.cu',
24 |             'src/interpolation/interpolation_cuda.cpp',
25 |             'src/interpolation/interpolation_cuda_kernel.cu',
26 |             'src/subtraction/subtraction_cuda.cpp',
27 |             'src/subtraction/subtraction_cuda_kernel.cu',
28 |             'src/aggregation/aggregation_cuda.cpp',
29 |             'src/aggregation/aggregation_cuda_kernel.cu',
30 |             ],
31 |         extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']}
32 |         )
33 |     ],
34 |     cmdclass={'build_ext': BuildExtension}
35 | )
36 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/src/__init__.py


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "aggregation_cuda_kernel.h"
 6 | 
 7 | 
 8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input = input_tensor.data_ptr<float>();
11 |     const float *position = position_tensor.data_ptr<float>();
12 |     const float *weight = weight_tensor.data_ptr<float>();
13 |     const int *idx = idx_tensor.data_ptr<int>();
14 |     float *output = output_tensor.data_ptr<float>();
15 |     aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
16 | }
17 | 
18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
19 | {
20 | 	const float *input = input_tensor.data_ptr<float>();
21 |     const float *position = position_tensor.data_ptr<float>();
22 |     const float *weight = weight_tensor.data_ptr<float>();
23 |     const int *idx = idx_tensor.data_ptr<int>();
24 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
25 |     float *grad_input = grad_input_tensor.data_ptr<float>();
26 |     float *grad_position = grad_position_tensor.data_ptr<float>();
27 |     float *grad_weight = grad_weight_tensor.data_ptr<float>();
28 |     aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
29 | }
30 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "aggregation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
 6 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * c) return;
 9 |     const int c_idx = index % c;
10 |     const int n_idx = index / c;
11 |     const int w_c_idx = c_idx % w_c;
12 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 |     {   
14 |         int idx_idx = n_idx * nsample + nsample_idx;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 |         output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 |     }
20 | }
21 | 
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
25 |     if (index >= n * c) return;
26 |     const int c_idx = index % c;
27 |     const int n_idx = index / c;
28 |     const int w_c_idx = c_idx % w_c;
29 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 |     {   
31 |         int idx_idx = n_idx * nsample + nsample_idx;
32 |         int input_idx = idx[idx_idx] * c + c_idx;
33 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 |         grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 |         atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 |     }
39 | }
40 | 
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 |     dim3 threads(THREADS_PER_BLOCK);
45 |     aggregation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 | 
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {  
49 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 |     dim3 threads(THREADS_PER_BLOCK);
52 |     aggregation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AGGREGATION_CUDA_KERNEL
 2 | #define _AGGREGATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | #include <algorithm>
 6 | 
 7 | #define TOTAL_THREADS 1024
 8 | #define THREADS_PER_BLOCK 256
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |     return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |     const int x_threads = opt_n_threads(x);
18 |     const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 |     dim3 block_config(x_threads, y_threads, 1);
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "grouping_cuda_kernel.h"
 6 | 
 7 | 
 8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input = input_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
14 | }
15 | 
16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
17 | {
18 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     float *grad_input = grad_input_tensor.data_ptr<float>();
21 |     grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
22 | }
23 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
 6 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= m * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int m_idx = index / nsample / c;
12 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 |     output[index] = input[input_idx];
14 | }
15 | 
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
19 |     if (index >= m * nsample * c) return;
20 |     const int c_idx = index % c;
21 |     const int nsample_idx = (index / c) % nsample;
22 |     const int m_idx = index / nsample / c;
23 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 |     atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 | 
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 |     dim3 threads(THREADS_PER_BLOCK);
31 |     grouping_forward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, input, idx, output);
32 | }
33 | 
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {  
36 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     grouping_backward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "interpolation_cuda_kernel.h"
 6 | 
 7 | 
 8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input = input_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     const float *weight = weight_tensor.data_ptr<float>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
15 | }
16 | 
17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
18 | {
19 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
20 |     const int *idx = idx_tensor.data_ptr<int>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     float *grad_input = grad_input_tensor.data_ptr<float>();
23 |     interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
24 | }
25 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "interpolation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
 6 | {
 7 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (index >= n * c) return;
10 |     int c_idx = index % c;
11 |     int n_idx = index / c;
12 |     for (int i = 0; i < k; i++)
13 |     {
14 |         int idx_idx = n_idx * k + i;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         output[index] += input[input_idx] * weight[idx_idx];
17 |     }
18 | }
19 | 
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     if (index >= n * c) return;
25 |     int c_idx = index % c;
26 |     int n_idx = index / c;
27 |     for (int i = 0; i < k; i++)
28 |     {
29 |         int idx_idx = n_idx * k + i;
30 |         int input_idx = idx[idx_idx] * c + c_idx;
31 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 |     }
33 | }
34 | 
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     interpolation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, input, idx, weight, output);
40 | }
41 | 
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 |     dim3 threads(THREADS_PER_BLOCK);
46 |     interpolation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "knnquery_cuda_kernel.h"
 6 | 
 7 | 
 8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
 9 | {
10 |     const float *xyz = xyz_tensor.data_ptr<float>();
11 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
12 |     const int *offset = offset_tensor.data_ptr<int>();
13 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
14 |     int *idx = idx_tensor.data_ptr<int>();
15 |     float *dist2 = dist2_tensor.data_ptr<float>();
16 |     knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
17 | }
18 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knnquery_cuda_kernel.h"
  3 | 
  4 | 
  5 | __device__ void swap_float(float *x, float *y)
  6 | {
  7 |     float tmp = *x;
  8 |     *x = *y;
  9 |     *y = tmp;
 10 | }
 11 | 
 12 | 
 13 | __device__ void swap_int(int *x, int *y)
 14 | {
 15 |     int tmp = *x;
 16 |     *x = *y;
 17 |     *y = tmp;
 18 | }
 19 | 
 20 | 
 21 | __device__ void reheap(float *dist, int *idx, int k)
 22 | {
 23 |     int root = 0;
 24 |     int child = root * 2 + 1;
 25 |     while (child < k)
 26 |     {
 27 |         if(child + 1 < k && dist[child+1] > dist[child])
 28 |             child++;
 29 |         if(dist[root] > dist[child])
 30 |             return;
 31 |         swap_float(&dist[root], &dist[child]);
 32 |         swap_int(&idx[root], &idx[child]);
 33 |         root = child;
 34 |         child = root * 2 + 1;
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | __device__ void heap_sort(float *dist, int *idx, int k)
 40 | {
 41 |     int i;
 42 |     for (i = k - 1; i > 0; i--)
 43 |     {
 44 |         swap_float(&dist[0], &dist[i]);
 45 |         swap_int(&idx[0], &idx[i]);
 46 |         reheap(dist, idx, i);
 47 |     }
 48 | }
 49 | 
 50 | 
 51 | __device__ int get_bt_idx(int idx, const int *offset)
 52 | {
 53 |     int i = 0;
 54 |     while (1)
 55 |     {
 56 |         if (idx < offset[i])
 57 |             break;
 58 |         else
 59 |             i++;
 60 |     }
 61 |     return i;
 62 | }
 63 | 
 64 | 
 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
 66 |     // input: xyz (n, 3) new_xyz (m, 3)
 67 |     // output: idx (m, nsample) dist2 (m, nsample)
 68 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 69 |     if (pt_idx >= m) return;
 70 | 
 71 |     new_xyz += pt_idx * 3;
 72 |     idx += pt_idx * nsample;
 73 |     dist2 += pt_idx * nsample;
 74 |     int bt_idx = get_bt_idx(pt_idx, new_offset);
 75 |     int start;
 76 |     if (bt_idx == 0)
 77 |         start = 0;
 78 |     else
 79 |         start = offset[bt_idx - 1];
 80 |     int end = offset[bt_idx];
 81 | 
 82 |     float new_x = new_xyz[0];
 83 |     float new_y = new_xyz[1];
 84 |     float new_z = new_xyz[2];
 85 | 
 86 |     float best_dist[100];
 87 |     int best_idx[100];
 88 |     for(int i = 0; i < nsample; i++){
 89 |         best_dist[i] = 1e10;
 90 |         best_idx[i] = start;
 91 |     }
 92 |     for(int i = start; i < end; i++){
 93 |         float x = xyz[i * 3 + 0];
 94 |         float y = xyz[i * 3 + 1];
 95 |         float z = xyz[i * 3 + 2];
 96 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 97 |         if (d2 < best_dist[0]){
 98 |             best_dist[0] = d2;
 99 |             best_idx[0] = i;
100 |             reheap(best_dist, best_idx, nsample);
101 |         }
102 |     }
103 |     heap_sort(best_dist, best_idx, nsample);
104 |     for(int i = 0; i < nsample; i++){
105 |         idx[i] = best_idx[i];
106 |         dist2[i] = best_dist[i];
107 |     }
108 | }
109 | 
110 | 
111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
112 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
113 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
114 |     dim3 threads(THREADS_PER_BLOCK);
115 |     knnquery_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
116 | }
117 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNNQUERY_CUDA_KERNEL
 2 | #define _KNNQUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "knnquery/knnquery_cuda_kernel.h"
 5 | #include "sampling/sampling_cuda_kernel.h"
 6 | #include "grouping/grouping_cuda_kernel.h"
 7 | #include "interpolation/interpolation_cuda_kernel.h"
 8 | #include "aggregation/aggregation_cuda_kernel.h"
 9 | #include "subtraction/subtraction_cuda_kernel.h"
10 | 
11 | 
12 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
13 |     m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
14 |     m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
15 |     m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
16 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
17 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
18 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
19 |     m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
20 |     m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
21 |     m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
22 |     m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
23 | }
24 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "sampling_cuda_kernel.h"
 6 | 
 7 | 
 8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
 9 | {
10 |     const float *xyz = xyz_tensor.data_ptr<float>();
11 |     const int *offset = offset_tensor.data_ptr<int>();
12 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
13 |     float *tmp = tmp_tensor.data_ptr<float>();
14 |     int *idx = idx_tensor.data_ptr<int>();
15 |     furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
16 | }
17 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/sampling/sampling_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "sampling_cuda_kernel.h"
  3 | 
  4 | 
  5 | __device__ void __update(float *dists, int *dists_i, int idx1, int idx2) {
  6 |     const float v1 = dists[idx1], v2 = dists[idx2];
  7 |     const int i1 = dists_i[idx1], i2 = dists_i[idx2];
  8 |     dists[idx1] = max(v1, v2);
  9 |     dists_i[idx1] = v2 > v1 ? i2 : i1;
 10 | }
 11 | 
 12 | // input xyz: (n, 3), tmp: (b, n_max)
 13 | // ouput idx (m)
 14 | template <unsigned int block_size>
 15 | __global__ void furthestsampling_cuda_kernel(const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx)
 16 | {
 17 |     __shared__ float dists[block_size];
 18 |     __shared__ int dists_i[block_size];
 19 | 
 20 |     int bid = blockIdx.x;
 21 |     int start_n, end_n, start_m, end_m, old;
 22 |     if (bid == 0) {
 23 |         start_n = 0;
 24 |         end_n = offset[0];
 25 |         start_m = 0;
 26 |         end_m = new_offset[0];
 27 |         old = 0;
 28 |     }
 29 |     else {
 30 |         start_n = offset[bid - 1];
 31 |         end_n = offset[bid];
 32 |         start_m = new_offset[bid - 1];
 33 |         end_m = new_offset[bid];
 34 |         old = offset[bid - 1];
 35 |     }
 36 | 
 37 |     const int stride = block_size;
 38 |     int tid = threadIdx.x;
 39 |     if (tid == 0) idx[start_m] = start_n;
 40 | 
 41 |     __syncthreads();
 42 |     for (int j = start_m + 1; j < end_m; j++)
 43 |     {
 44 |         int besti = start_n;
 45 |         float best = -1;
 46 |         float x1 = xyz[old * 3 + 0];
 47 |         float y1 = xyz[old * 3 + 1];
 48 |         float z1 = xyz[old * 3 + 2];
 49 |         for (int k = start_n + tid; k < end_n; k += stride)
 50 |         {
 51 |             float x2 = xyz[k * 3 + 0];
 52 |             float y2 = xyz[k * 3 + 1];
 53 |             float z2 = xyz[k * 3 + 2];
 54 |             float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
 55 |             float d2 = min(d, tmp[k]);
 56 |             tmp[k] = d2;
 57 |             besti = d2 > best ? k : besti;
 58 |             best = d2 > best ? d2 : best;
 59 |         }
 60 |         dists[tid] = best;
 61 |         dists_i[tid] = besti;
 62 |         __syncthreads();
 63 | 
 64 |         if (block_size >= 1024) {
 65 |             if (tid < 512) {
 66 |             __update(dists, dists_i, tid, tid + 512);
 67 |             }
 68 |             __syncthreads();
 69 |         }
 70 |         if (block_size >= 512) {
 71 |             if (tid < 256) {
 72 |             __update(dists, dists_i, tid, tid + 256);
 73 |             }
 74 |             __syncthreads();
 75 |         }
 76 |         if (block_size >= 256) {
 77 |             if (tid < 128) {
 78 |             __update(dists, dists_i, tid, tid + 128);
 79 |             }
 80 |             __syncthreads();
 81 |         }
 82 |         if (block_size >= 128) {
 83 |             if (tid < 64) {
 84 |             __update(dists, dists_i, tid, tid + 64);
 85 |             }
 86 |             __syncthreads();
 87 |         }
 88 |         if (block_size >= 64) {
 89 |             if (tid < 32) {
 90 |             __update(dists, dists_i, tid, tid + 32);
 91 |             }
 92 |             __syncthreads();
 93 |         }
 94 |         if (block_size >= 32) {
 95 |             if (tid < 16) {
 96 |             __update(dists, dists_i, tid, tid + 16);
 97 |             }
 98 |             __syncthreads();
 99 |         }
100 |         if (block_size >= 16) {
101 |             if (tid < 8) {
102 |             __update(dists, dists_i, tid, tid + 8);
103 |             }
104 |             __syncthreads();
105 |         }
106 |         if (block_size >= 8) {
107 |             if (tid < 4) {
108 |             __update(dists, dists_i, tid, tid + 4);
109 |             }
110 |             __syncthreads();
111 |         }
112 |         if (block_size >= 4) {
113 |             if (tid < 2) {
114 |             __update(dists, dists_i, tid, tid + 2);
115 |             }
116 |             __syncthreads();
117 |         }
118 |         if (block_size >= 2) {
119 |             if (tid < 1) {
120 |             __update(dists, dists_i, tid, tid + 1);
121 |             }
122 |             __syncthreads();
123 |         }
124 | 
125 |         old = dists_i[0];
126 |         if (tid == 0)
127 |             idx[j] = old;
128 |     }
129 | }
130 | 
131 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx)
132 | {   
133 | 	unsigned int n_threads = opt_n_threads(n);
134 | 	switch (n_threads) {
135 |         case 1024:
136 |             furthestsampling_cuda_kernel<1024><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
137 |             break;
138 |         case 512:
139 |             furthestsampling_cuda_kernel<512><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
140 |             break;
141 |         case 256:
142 |             furthestsampling_cuda_kernel<256><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
143 |             break;
144 |         case 128:
145 |             furthestsampling_cuda_kernel<128><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
146 |             break;
147 |         case 64:
148 |             furthestsampling_cuda_kernel<64><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
149 |             break;
150 |         case 32:
151 |             furthestsampling_cuda_kernel<32><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
152 |             break;
153 |         case 16:
154 |             furthestsampling_cuda_kernel<16><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
155 |             break;
156 |         case 8:
157 |             furthestsampling_cuda_kernel<8><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
158 |             break;
159 |         case 4:
160 |             furthestsampling_cuda_kernel<4><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
161 |             break;
162 |         case 2:
163 |             furthestsampling_cuda_kernel<2><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
164 |             break;
165 |         case 1:
166 |             furthestsampling_cuda_kernel<1><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
167 |             break;
168 |         default:
169 |             furthestsampling_cuda_kernel<512><<<b, n_threads, 0>>>(xyz, offset, new_offset, tmp, idx);
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "subtraction_cuda_kernel.h"
 6 | 
 7 | 
 8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input1 = input1_tensor.data_ptr<float>();
11 |     const float *input2 = input2_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
15 | }
16 | 
17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
18 | {
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
21 |     float *grad_input1 = grad_input1_tensor.data_ptr<float>();
22 |     float *grad_input2 = grad_input2_tensor.data_ptr<float>();
23 |     subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
24 | }
25 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "subtraction_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
 6 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int n_idx = index / nsample / c;
12 |     const int idx_idx = n_idx * nsample + nsample_idx;
13 |     const int input1_idx = n_idx * c + c_idx;
14 |     const int input2_idx = idx[idx_idx] * c + c_idx;
15 |     output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 | 
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if (index >= n * nsample * c) return;
22 |     const int c_idx = index % c;
23 |     const int nsample_idx = (index / c) % nsample;
24 |     const int n_idx = index / nsample / c;
25 |     const int idx_idx = n_idx * nsample + nsample_idx;
26 |     const int input1_idx = n_idx * c + c_idx;
27 |     const int input2_idx = idx[idx_idx] * c + c_idx;
28 |     atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 |     atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 | 
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 |     dim3 threads(THREADS_PER_BLOCK);
36 |     subtraction_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 | 
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {  
40 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 |     dim3 threads(THREADS_PER_BLOCK);
43 |     subtraction_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointops/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBTRACTION_CUDA_KERNEL
 2 | #define _SUBTRACTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/segmentation/modules/pointtransformer_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from modules.pointops.functions import pointops
  5 | 
  6 | 
  7 | class PointTransformerLayer(nn.Module):
  8 |     def __init__(self, in_planes, out_planes, share_planes=8, nsample=16):
  9 |         super().__init__()
 10 |         self.mid_planes = mid_planes = out_planes // 1
 11 |         self.out_planes = out_planes
 12 |         self.share_planes = share_planes
 13 |         self.nsample = nsample
 14 |         self.linear_q = nn.Linear(in_planes, mid_planes)
 15 |         self.linear_k = nn.Linear(in_planes, mid_planes)
 16 |         self.linear_v = nn.Linear(in_planes, out_planes)
 17 |         self.linear_p = nn.Sequential(nn.Linear(3, 3), nn.BatchNorm1d(3), nn.ReLU(inplace=True),
 18 |                                       nn.Linear(3, out_planes))
 19 |         self.linear_w = nn.Sequential(nn.BatchNorm1d(mid_planes), nn.ReLU(inplace=True),
 20 |                                       nn.Linear(mid_planes, mid_planes // share_planes),
 21 |                                       nn.BatchNorm1d(mid_planes // share_planes), nn.ReLU(inplace=True),
 22 |                                       nn.Linear(out_planes // share_planes, out_planes // share_planes))
 23 |         self.softmax = nn.Softmax(dim=1)
 24 | 
 25 |     def forward(self, pxo) -> torch.Tensor:
 26 |         p, x, o = pxo  # (n, 3), (n, c), (b)
 27 |         x_q, x_k, x_v = self.linear_q(x), self.linear_k(x), self.linear_v(x)  # (n, c)
 28 |         x_k = pointops.queryandgroup(self.nsample, p, p, x_k, None, o, o, use_xyz=True)  # (n, nsample, 3+c)
 29 |         x_v = pointops.queryandgroup(self.nsample, p, p, x_v, None, o, o, use_xyz=False)  # (n, nsample, c)
 30 |         p_r, x_k = x_k[:, :, 0:3], x_k[:, :, 3:]
 31 |         for i, layer in enumerate(self.linear_p):
 32 |             # (n, nsample, c)
 33 |             p_r = layer(p_r.transpose(1, 2).contiguous()).transpose(1, 2).contiguous() if i == 1 else layer(p_r)
 34 |         w = x_k - x_q.unsqueeze(1) + p_r.view(p_r.shape[0], p_r.shape[1], self.out_planes // self.mid_planes,
 35 |                                               self.mid_planes).sum(2)  # (n, nsample, c)
 36 |         for i, layer in enumerate(self.linear_w):
 37 |             w = layer(w.transpose(1, 2).contiguous()).transpose(1, 2).contiguous() if i % 3 == 0 else layer(w)
 38 |         w = self.softmax(w)  # (n, nsample, c)
 39 |         n, nsample, c = x_v.shape
 40 |         s = self.share_planes
 41 |         x = ((x_v + p_r).view(n, nsample, s, c // s) * w.unsqueeze(2)).sum(1).view(n, c)
 42 |         return x
 43 | 
 44 | 
 45 | class TransitionDown(nn.Module):
 46 |     def __init__(self, in_planes, out_planes, stride=1, nsample=16, num_sector=1):
 47 |         super().__init__()
 48 |         self.stride, self.nsample, self.num_sector = stride, nsample, num_sector
 49 |         if stride != 1:
 50 |             self.linear = nn.Linear(3 + in_planes, out_planes, bias=False)
 51 |             self.pool = nn.MaxPool1d(nsample)
 52 |         else:
 53 |             self.linear = nn.Linear(in_planes, out_planes, bias=False)
 54 |         self.bn = nn.BatchNorm1d(out_planes)
 55 |         self.relu = nn.ReLU(inplace=True)
 56 | 
 57 |     def forward(self, pxo):
 58 |         p, x, o = pxo  # (n, 3), (n, c), (b)
 59 |         if self.stride != 1:
 60 |             n_o, count = [o[0].item() // self.stride], o[0].item() // self.stride
 61 |             for i in range(1, o.shape[0]):
 62 |                 count += (o[i].item() - o[i - 1].item()) // self.stride
 63 |                 n_o.append(count)
 64 |             n_o = torch.cuda.IntTensor(n_o)
 65 |             if self.num_sector > 1 and self.training:
 66 |                 idx = pointops.sectorized_fps(p, o, n_o, self.num_sector)  # [M]
 67 |             else:
 68 |                 idx = pointops.furthestsampling(p, o, n_o)  # [M]
 69 |             n_p = p[idx.long(), :]  # (m, 3)
 70 |             x = pointops.queryandgroup(self.nsample, p, n_p, x, None, o, n_o, use_xyz=True)  # (m, 3+c, nsample)
 71 |             x = self.relu(self.bn(self.linear(x).transpose(1, 2).contiguous()))  # (m, c, nsample)
 72 |             x = self.pool(x).squeeze(-1)  # (m, c)
 73 |             p, o = n_p, n_o
 74 |         else:
 75 |             x = self.relu(self.bn(self.linear(x)))  # (n, c)
 76 |         return [p, x, o]
 77 | 
 78 | 
 79 | class TransitionUp(nn.Module):
 80 |     def __init__(self, in_planes, out_planes=None):
 81 |         super().__init__()
 82 |         if out_planes is None:
 83 |             self.linear1 = nn.Sequential(nn.Linear(2 * in_planes, in_planes), nn.BatchNorm1d(in_planes),
 84 |                                          nn.ReLU(inplace=True))
 85 |             self.linear2 = nn.Sequential(nn.Linear(in_planes, in_planes), nn.ReLU(inplace=True))
 86 |         else:
 87 |             self.linear1 = nn.Sequential(nn.Linear(out_planes, out_planes), nn.BatchNorm1d(out_planes),
 88 |                                          nn.ReLU(inplace=True))
 89 |             self.linear2 = nn.Sequential(nn.Linear(in_planes, out_planes), nn.BatchNorm1d(out_planes),
 90 |                                          nn.ReLU(inplace=True))
 91 | 
 92 |     def forward(self, pxo1, pxo2=None):
 93 |         if pxo2 is None:
 94 |             _, x, o = pxo1  # (n, 3), (n, c), (b)
 95 |             x_tmp = []
 96 |             for i in range(o.shape[0]):
 97 |                 if i == 0:
 98 |                     s_i, e_i, cnt = 0, o[0], o[0]
 99 |                 else:
100 |                     s_i, e_i, cnt = o[i - 1], o[i], o[i] - o[i - 1]
101 |                 x_b = x[s_i:e_i, :]
102 |                 x_b = torch.cat((x_b, self.linear2(x_b.sum(0, True) / cnt).repeat(cnt, 1)), 1)
103 |                 x_tmp.append(x_b)
104 |             x = torch.cat(x_tmp, 0)
105 |             x = self.linear1(x)
106 |         else:
107 |             p1, x1, o1 = pxo1
108 |             p2, x2, o2 = pxo2
109 |             x = self.linear1(x1) + pointops.interpolation(p2, p1, self.linear2(x2), o2, o1)
110 |         return x
111 | 
112 | 
113 | class PointTransformerBlock(nn.Module):
114 |     expansion = 1
115 | 
116 |     def __init__(self, in_planes, planes, share_planes=8, nsample=16):
117 |         super(PointTransformerBlock, self).__init__()
118 |         self.linear1 = nn.Linear(in_planes, planes, bias=False)
119 |         self.bn1 = nn.BatchNorm1d(planes)
120 |         self.transformer2 = PointTransformerLayer(planes, planes, share_planes, nsample)
121 |         self.bn2 = nn.BatchNorm1d(planes)
122 |         self.linear3 = nn.Linear(planes, planes * self.expansion, bias=False)
123 |         self.bn3 = nn.BatchNorm1d(planes * self.expansion)
124 |         self.relu = nn.ReLU(inplace=True)
125 | 
126 |     def forward(self, pxo):
127 |         p, x, o = pxo  # (n, 3), (n, c), (b)
128 |         identity = x
129 |         x = self.relu(self.bn1(self.linear1(x)))
130 |         x = self.relu(self.bn2(self.transformer2([p, x, o])))
131 |         x = self.bn3(self.linear3(x))
132 |         x += identity
133 |         x = self.relu(x)
134 |         return [p, x, o]
135 | 


--------------------------------------------------------------------------------
/segmentation/modules/polar_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Haoxi Ran
 3 | Date: 06/30/2022
 4 | """
 5 | 
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | 
10 | def xyz2sphere(xyz, normalize=True):
11 |     """
12 |     Convert XYZ to Spherical Coordinate
13 | 
14 |     reference: https://en.wikipedia.org/wiki/Spherical_coordinate_system
15 | 
16 |     :param xyz: [N, 3] / [N, G, 3]
17 |     :return: (rho, theta, phi) [N, 3] / [N, G, 3]
18 |     """
19 |     rho = torch.sqrt(torch.sum(torch.pow(xyz, 2), dim=-1, keepdim=True))
20 |     rho = torch.clamp(rho, min=0)  # range: [0, inf]
21 |     theta = torch.acos(xyz[..., 2, None] / rho)  # range: [0, pi]
22 |     phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None])  # range: [-pi, pi]
23 |     # check nan
24 |     idx = rho == 0
25 |     theta[idx] = 0
26 | 
27 |     if normalize:
28 |         theta = theta / np.pi  # [0, 1]
29 |         phi = phi / (2 * np.pi) + .5  # [0, 1]
30 |     out = torch.cat([rho, theta, phi], dim=-1)
31 |     return out
32 | 
33 | 
34 | def xyz2cylind(xyz, normalize=True):
35 |     """
36 |     Convert XYZ to Cylindrical Coordinate
37 | 
38 |     reference: https://en.wikipedia.org/wiki/Cylindrical_coordinate_system
39 | 
40 |     :param normalize: Normalize phi & z
41 |     :param xyz: [N, 3] / [N, G, 3]
42 |     :return: (rho, phi, z) [N, 3] / [N, G, 3]
43 |     """
44 |     rho = torch.sqrt(torch.sum(torch.pow(xyz[..., :2], 2), dim=-1, keepdim=True))
45 |     rho = torch.clamp(rho, 0, 1)  # range: [0, 1]
46 |     phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None])  # range: [-pi, pi]
47 |     z = xyz[..., 2, None]
48 |     z = torch.clamp(z, -1, 1)  # range: [-1, 1]
49 | 
50 |     if normalize:
51 |         phi = phi / (2 * np.pi) + .5
52 |         z = (z + 1.) / 2.
53 |     out = torch.cat([rho, phi, z], dim=-1)
54 |     return out
55 | 


--------------------------------------------------------------------------------
/segmentation/modules/recons_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Haoxi Ran
  3 | Date: 06/30/2022
  4 | """
  5 | 
  6 | import torch
  7 | import numpy as np
  8 | 
  9 | 
 10 | def cal_normal(group_xyz, offset, random_inv=False, is_group=False):
 11 |     """
 12 |     Calculate Normal Vector (Unit Form + First Term Positive)
 13 | 
 14 |     :param group_xyz: [N, K=3, 3] / [N, G, K=3, 3]
 15 |     """
 16 |     edge_vec1 = group_xyz[..., 1, :] - group_xyz[..., 0, :]  # [N, 3]
 17 |     edge_vec2 = group_xyz[..., 2, :] - group_xyz[..., 0, :]  # [N, 3]
 18 | 
 19 |     nor = torch.cross(edge_vec1, edge_vec2, dim=-1)
 20 |     unit_nor = nor / torch.norm(nor, dim=-1, keepdim=True)  # [B, N, 3] / [B, N, G, 3]
 21 |     if not is_group:
 22 |         pos_mask = (unit_nor[..., 0] > 0).float() * 2. - 1.  # keep x_n positive
 23 |     else:
 24 |         pos_mask = (unit_nor[..., 0:1, 0] > 0).float() * 2. - 1.
 25 |     unit_nor = unit_nor * pos_mask.unsqueeze(-1)
 26 | 
 27 |     # batch-wise random inverse normal vector (prob: 0.5)
 28 |     if random_inv:
 29 |         batch_prob = np.random.rand(offset.shape[0]) < 0.5
 30 |         random_mask = []
 31 |         sample_offset = [0] + list(offset.cpu().numpy())
 32 |         for idx in range(len(sample_offset) - 1):
 33 |             sample_mask = torch.ones((sample_offset[idx+1] - sample_offset[idx], 1), dtype=torch.float32)
 34 |             if not batch_prob[idx]:
 35 |                 sample_mask *= -1
 36 |             random_mask.append(sample_mask)
 37 |         random_mask = torch.cat(random_mask, dim=0).to(unit_nor.device)
 38 |         # random_mask = torch.randint(0, 2, (group_xyz.size(0), 1)).float() * 2. - 1.
 39 |         # random_mask = random_mask.to(unit_nor.device)
 40 |         if not is_group:
 41 |             unit_nor = unit_nor * random_mask
 42 |         else:
 43 |             unit_nor = unit_nor * random_mask.unsqueeze(-1)
 44 | 
 45 |     return unit_nor
 46 | 
 47 | 
 48 | def cal_center(group_xyz):
 49 |     """
 50 |     Calculate Global Coordinates of the Center of Triangle
 51 | 
 52 |     :param group_xyz: [N, K, 3] / [N, G, K, 3]; K >= 3
 53 |     :return: [N, 3] / [N, G, 3]
 54 |     """
 55 |     center = torch.mean(group_xyz, dim=-2)
 56 |     return center
 57 | 
 58 | 
 59 | def cal_area(group_xyz):
 60 |     """
 61 |     Calculate Area of Triangle
 62 | 
 63 |     :param group_xyz: [N, K, 3] / [N, G, K, 3]; K = 3
 64 |     :return: [N, 1] / [N, G, 1]
 65 |     """
 66 |     pad_shape = group_xyz[..., 0, None].shape
 67 |     det_xy = torch.det(torch.cat([group_xyz[..., 0, None], group_xyz[..., 1, None], torch.ones(pad_shape)], dim=-1))
 68 |     det_yz = torch.det(torch.cat([group_xyz[..., 1, None], group_xyz[..., 2, None], torch.ones(pad_shape)], dim=-1))
 69 |     det_zx = torch.det(torch.cat([group_xyz[..., 2, None], group_xyz[..., 0, None], torch.ones(pad_shape)], dim=-1))
 70 |     area = torch.sqrt(det_xy ** 2 + det_yz ** 2 + det_zx ** 2).unsqueeze(-1)
 71 |     return area
 72 | 
 73 | 
 74 | def cal_const(normal, center, is_normalize=True):
 75 |     """
 76 |     Calculate Constant Term (Standard Version, with x_normal to be 1)
 77 | 
 78 |     math::
 79 |         const = x_nor * x_0 + y_nor * y_0 + z_nor * z_0
 80 | 
 81 |     :param is_normalize:
 82 |     :param normal: [N, 3] / [N, G, 3]
 83 |     :param center: [N, 3] / [N, G, 3]
 84 |     :return: [N, 1] / [N, G, 1]
 85 |     """
 86 |     const = torch.sum(normal * center, dim=-1, keepdim=True)
 87 |     factor = torch.sqrt(torch.Tensor([3])).to(normal.device)
 88 |     const = const / factor if is_normalize else const
 89 | 
 90 |     return const
 91 | 
 92 | 
 93 | def check_nan(normal, center, pos=None):
 94 |     """
 95 |     Check & Remove NaN in normal tensor
 96 | 
 97 |     :param pos: [N, 1]
 98 |     :param center: [N, 3]
 99 |     :param normal: [N, 3]
100 |     """
101 |     N, _ = normal.shape
102 |     mask = torch.sum(torch.isnan(normal), dim=-1) > 0
103 |     mask_first = torch.argmax((~mask).int(), dim=-1)
104 | 
105 |     normal_first = normal[None, mask_first].repeat([N, 1])
106 |     normal[mask] = normal_first[mask]
107 |     center_first = center[None, mask_first].repeat([N, 1])
108 |     center[mask] = center_first[mask]
109 | 
110 |     if pos is not None:
111 |         pos_first = pos[None, mask_first].repeat([N, 1])
112 |         pos[mask] = pos_first[mask]
113 |         return normal, center, pos
114 |     return normal, center
115 | 
116 | 
117 | def check_nan_umb(normal, center, pos=None):
118 |     """
119 |     Check & Remove NaN in normal tensor
120 | 
121 |     :param pos: [N, G, 1]
122 |     :param center: [N, G, 3]
123 |     :param normal: [N, G, 3]
124 |     """
125 |     N, G, _ = normal.shape
126 |     mask = torch.sum(torch.isnan(normal), dim=-1) > 0
127 |     mask_first = torch.argmax((~mask).int(), dim=-1)
128 | 
129 |     normal_first = normal[torch.arange(N), None, mask_first].repeat([1, G, 1])
130 |     normal[mask] = normal_first[mask]
131 |     center_first = center[torch.arange(N), None, mask_first].repeat([1, G, 1])
132 |     center[mask] = center_first[mask]
133 | 
134 |     if pos is not None:
135 |         pos_first = pos[torch.arange(N), None, mask_first].repeat([1, G, 1])
136 |         pos[mask] = pos_first[mask]
137 |         return normal, center, pos
138 |     return normal, center
139 | 


--------------------------------------------------------------------------------
/segmentation/modules/voxelize_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def fnv_hash_vec(arr):
 5 |     """
 6 |     FNV64-1A
 7 | 
 8 |     """
 9 |     assert arr.ndim == 2
10 |     # Floor first for negative coordinates
11 |     arr = arr.copy()
12 |     arr = arr.astype(np.uint64, copy=False)
13 |     hashed_arr = np.uint64(14695981039346656037) * np.ones(arr.shape[0], dtype=np.uint64)
14 |     for j in range(arr.shape[1]):
15 |         hashed_arr *= np.uint64(1099511628211)
16 |         hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j])
17 |     return hashed_arr
18 | 
19 | 
20 | def ravel_hash_vec(arr):
21 |     """
22 |     Ravel the coordinates after subtracting the min coordinates.
23 | 
24 |     """
25 |     assert arr.ndim == 2
26 |     arr = arr.copy()
27 |     arr -= arr.min(0)
28 |     arr = arr.astype(np.uint64, copy=False)
29 |     arr_max = arr.max(0).astype(np.uint64) + 1
30 | 
31 |     keys = np.zeros(arr.shape[0], dtype=np.uint64)
32 |     # Fortran style indexing
33 |     for j in range(arr.shape[1] - 1):
34 |         keys += arr[:, j]
35 |         keys *= arr_max[j + 1]
36 |     keys += arr[:, -1]
37 |     return keys
38 | 
39 | 
40 | def voxelize(coord, voxel_size=0.05, hash_type='fnv', mode=0):
41 |     # voxelize coordinates
42 |     discrete_coord = np.floor(coord / np.array(voxel_size))
43 | 
44 |     # shuffle coordinates
45 |     if hash_type == 'ravel':
46 |         key = ravel_hash_vec(discrete_coord)
47 |     else:
48 |         key = fnv_hash_vec(discrete_coord)
49 | 
50 |     idx_sort = np.argsort(key)
51 |     key_sort = key[idx_sort]
52 |     _, count = np.unique(key_sort, return_counts=True)
53 |     if mode == 0:  # train mode
54 |         idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + np.random.randint(0, count.max(), count.size) % count
55 |         idx_unique = idx_sort[idx_select]
56 |         return idx_unique
57 |     else:  # val mode
58 |         return idx_sort, count
59 | 


--------------------------------------------------------------------------------
/segmentation/scripts/s3dis/test_pointnet2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=./
 4 | 
 5 | log_dir='pointnet2_A5'
 6 | 
 7 | python3 tool/test_s3dis.py --log_dir ${log_dir} \
 8 |           --batch_size_test 12 \
 9 |           --gpu_id 0 \
10 |           --model pointnet2.pointnet2_ssg \
11 |           --test_area 5 \
12 |           --filter


--------------------------------------------------------------------------------
/segmentation/scripts/s3dis/test_pointtransformer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=./
 4 | 
 5 | log_dir='pointtransformer_A5'
 6 | 
 7 | python3 tool/test_s3dis.py --log_dir ${log_dir} \
 8 |           --batch_size_test 12 \
 9 |           --gpu_id 0 \
10 |           --model pointtransformer.pointtransformer \
11 |           --test_area 5 \
12 |           --filter


--------------------------------------------------------------------------------
/segmentation/scripts/s3dis/test_repsurf_umb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=./
 4 | 
 5 | log_dir='repsurf_umb_A5'
 6 | 
 7 | python3 tool/test_s3dis.py --log_dir ${log_dir} \
 8 |           --batch_size_test 12 \
 9 |           --gpu_id 0 \
10 |           --model repsurf.repsurf_umb_ssg \
11 |           --test_area 5 \
12 |           --filter


--------------------------------------------------------------------------------
/segmentation/scripts/s3dis/train_pointnet2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=./
 4 | 
 5 | log_dir='pointnet2_A5'
 6 | 
 7 | python3 tool/train.py --log_dir ${log_dir} --dataset S3DIS \
 8 |           --batch_size 8 \
 9 |           --batch_size_val 24 \
10 |           --workers 24 \
11 |           --gpus 0 1 2 3 \
12 |           --model pointnet2.pointnet2_ssg \
13 |           --optimizer AdamW \
14 |           --min_val 60 \
15 |           --epoch 100 \
16 |           --lr_decay_epochs 60 80 \
17 |           --test_area 5 \
18 |           --learning_rate 0.006 \
19 |           --lr_decay 0.1 \
20 |           --weight_decay 1e-2 \
21 |           --aug_scale \
22 |           --color_contrast \
23 |           --color_shift \
24 |           --color_jitter \
25 |           --hs_shift


--------------------------------------------------------------------------------
/segmentation/scripts/s3dis/train_pointtransformer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=./
 4 | 
 5 | log_dir='pointtransformer_A5'
 6 | 
 7 | python3 tool/train.py --log_dir ${log_dir} --dataset S3DIS \
 8 |           --batch_size 8 \
 9 |           --batch_size_val 24 \
10 |           --workers 24 \
11 |           --gpus 0 1 2 3 \
12 |           --model pointtransformer.pointtransformer \
13 |           --optimizer AdamW \
14 |           --min_val 60 \
15 |           --epoch 100 \
16 |           --lr_decay_epochs 60 80 \
17 |           --test_area 5 \
18 |           --learning_rate 0.006 \
19 |           --lr_decay 0.1 \
20 |           --weight_decay 1e-2 \
21 |           --aug_scale \
22 |           --color_contrast \
23 |           --color_shift \
24 |           --color_jitter \
25 |           --hs_shift


--------------------------------------------------------------------------------
/segmentation/scripts/s3dis/train_repsurf_umb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=./
 4 | 
 5 | log_dir='repsurf_umb_A5'
 6 | 
 7 | python3 tool/train.py --log_dir ${log_dir} --dataset S3DIS \
 8 |           --batch_size 8 \
 9 |           --batch_size_val 24 \
10 |           --workers 24 \
11 |           --gpus 0 1 2 3 \
12 |           --model repsurf.repsurf_umb_ssg \
13 |           --optimizer AdamW \
14 |           --min_val 60 \
15 |           --epoch 100 \
16 |           --lr_decay_epochs 60 80 \
17 |           --test_area 5 \
18 |           --learning_rate 0.006 \
19 |           --lr_decay 0.1 \
20 |           --weight_decay 1e-2 \
21 |           --freeze_epoch 10 \
22 |           --color_contrast \
23 |           --color_shift \
24 |           --color_jitter \
25 |           --hs_shift
26 | 


--------------------------------------------------------------------------------
/segmentation/tool/test_s3dis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Haoxi Ran
  3 | Date: 06/30/2022
  4 | """
  5 | 
  6 | import json
  7 | import os
  8 | import time
  9 | import random
 10 | import numpy as np
 11 | import argparse
 12 | import collections
 13 | from pathlib import Path
 14 | 
 15 | import torch
 16 | import torch.nn.parallel
 17 | import torch.optim
 18 | import torch.utils.data
 19 | 
 20 | from util.utils import AverageMeter, intersectionAndUnion, get_rgb_stat, pc_median_filter_gpu
 21 | from util.utils import get_model, get_logger
 22 | from modules.voxelize_utils import voxelize
 23 | 
 24 | 
 25 | LABEL2COLOR = collections.OrderedDict([
 26 |     ('ceiling', [0, 255, 0]), ('floor', [0, 0, 255]), ('wall', [0, 255, 255]), ('beam', [255, 255, 0]),
 27 |     ('column', [255, 0, 255]), ('window', [100, 100, 255]), ('door', [200, 200, 100]), ('chair', [170, 120, 200]),
 28 |     ('table', [255, 0, 0]), ('bookcase', [200, 100, 100]), ('sofa', [10, 200, 100]), ('board', [200, 200, 200]),
 29 |     ('clutter', [50, 50, 50])])
 30 | LABEL2CLASS = list(LABEL2COLOR.keys())
 31 | PALETTE = np.array(list(LABEL2COLOR.values()), dtype=np.int64)
 32 | 
 33 | 
 34 | def parse_args():
 35 |     parser = argparse.ArgumentParser('Model')
 36 | 
 37 |     # Basic
 38 |     parser.add_argument('--log_dir', type=str, default=None, help='experiment root')
 39 |     parser.add_argument('--data_dir', type=str, default='./data/S3DIS/trainval_fullarea', help='data dir')
 40 |     parser.add_argument('--log_root', type=str, default='./log', help='log root dir')
 41 |     parser.add_argument('--model_path', type=str, default=None, help='saved model weight')
 42 |     parser.add_argument('--model', default='pointnet_sem_seg', help='model name [default: pointnet_sem_seg]')
 43 |     parser.add_argument('--gpu_id', type=str, default='0')
 44 |     parser.add_argument('--seed', type=int, default=1000, help='Test Seed')
 45 | 
 46 |     # Test
 47 |     parser.add_argument('--batch_size_test', type=int, default=12, help='batch size in test [default: 24]')
 48 |     parser.add_argument('--test_area', type=int, default=5, help='Which area to use for test [default: 5]')
 49 |     parser.add_argument('--filter', action='store_true', default=False, help='Apply median filter [default: False]')
 50 |     parser.add_argument('--data_norm', type=str, default='mean', help='initializer for model [mean, min, z_min]')
 51 |     parser.add_argument('--visual', action='store_true', default=False, help='Output visual results [default: False]')
 52 | 
 53 |     # Modeling
 54 |     parser.add_argument('--group_size', type=int, default=8, help='Size of umbrella group [default: 8]')
 55 |     parser.add_argument('--return_polar', action='store_true', default=False,
 56 |                         help='Whether to return polar coordinate in surface abstraction [default: False]')
 57 | 
 58 |     return parser.parse_args()
 59 | 
 60 | 
 61 | def main():
 62 |     global args, logger
 63 | 
 64 |     args = parse_args()
 65 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
 66 |     random.seed(args.seed)
 67 |     np.random.seed(args.seed)
 68 | 
 69 |     args.dataset, args.num_class, args.voxel_max, args.voxel_size, args.in_channel, args.ignore_label = \
 70 |         'S3DIS', 13, 80000, 0.04, 6, 255
 71 | 
 72 |     experiment_dir = Path(os.path.join(args.log_root, 'PointAnalysis', 'log', 'S3DIS'))
 73 |     experiment_dir = experiment_dir.joinpath(args.log_dir)
 74 |     checkpoints_dir = experiment_dir.joinpath('checkpoints/')
 75 |     args.ckpt_dir = str(checkpoints_dir)
 76 |     log_dir = experiment_dir.joinpath('logs/')
 77 |     args.log_dir = str(log_dir)
 78 |     result_dir = experiment_dir.joinpath('visual/')
 79 |     result_dir.mkdir(exist_ok=True)
 80 |     args.result_dir = str(result_dir)
 81 | 
 82 |     logger = get_logger(args.log_dir, 'test_%s' % args.model)
 83 |     logger.info(json.dumps(vars(args), indent=4, sort_keys=True))
 84 |     logger.info("=> creating models ...")
 85 |     model = get_model(args).cuda()
 86 |     logger.info(model)
 87 | 
 88 |     ckpt_file = os.path.join(args.ckpt_dir, 'model_best.pth') if args.model_path is None else args.model_path
 89 |     if os.path.isfile(ckpt_file):
 90 |         logger.info("=> loading checkpoint '{}'".format(ckpt_file))
 91 |         checkpoint = torch.load(ckpt_file)
 92 |         state_dict = checkpoint['state_dict']
 93 |         new_state_dict = collections.OrderedDict()
 94 |         for k, v in state_dict.items():
 95 |             name = k[7:]
 96 |             new_state_dict[name] = v.cpu()
 97 |         model.load_state_dict(new_state_dict, strict=True)
 98 |         logger.info("=> loaded checkpoint '{}'".format(ckpt_file))
 99 |     else:
100 |         raise RuntimeError("=> no checkpoint found at '{}'".format(ckpt_file))
101 | 
102 |     test(model)
103 | 
104 | 
105 | def data_prepare():
106 |     """ Return area names of the test dataset """
107 |     data_list = sorted(os.listdir(args.data_dir))
108 |     data_list = [item[:-4] for item in data_list if 'Area_{}'.format(args.test_area) in item]
109 |     print("Totally {} samples in val set.".format(len(data_list)))
110 | 
111 |     return data_list
112 | 
113 | 
114 | def data_load(data_name):
115 |     """ Load data by area name """
116 |     data_path = os.path.join(args.data_dir, data_name + '.npy')
117 |     data = np.load(data_path)  # xyzrgbl, N*7
118 |     coord, feat, label = data[:, :3], data[:, 3:6], data[:, 6]
119 | 
120 |     idx_data = []
121 |     if args.voxel_size:
122 |         idx_sort, count = voxelize(coord - np.min(coord, 0), args.voxel_size, mode=1)
123 |         for i in range(count.max()):
124 |             idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + i % count
125 |             idx_part = idx_sort[idx_select]
126 |             idx_data.append(idx_part)
127 |     else:
128 |         idx_data.append(np.arange(label.shape[0]))
129 | 
130 |     return coord, feat, label, idx_data
131 | 
132 | 
133 | def data_process(coord, feat, idx_data):
134 |     """ Split points into batches by index """
135 |     idx_size = len(idx_data)
136 |     idx_list, coord_list, feat_list, offset_list = [], [], [], []
137 |     for i in range(idx_size):
138 |         idx_part = idx_data[i]
139 |         coord_part, feat_part = coord[idx_part], feat[idx_part]
140 |         if args.voxel_max and coord_part.shape[0] > args.voxel_max:
141 |             coord_p, idx_uni, cnt = np.random.rand(coord_part.shape[0]) * 1e-3, np.array([]), 0
142 |             while idx_uni.size != idx_part.shape[0]:
143 |                 init_idx = np.argmin(coord_p)
144 |                 dist = np.sum(np.power(coord_part - coord_part[init_idx], 2), 1)
145 |                 idx_crop = np.argsort(dist)[:args.voxel_max]
146 |                 coord_sub, feat_sub, idx_sub = coord_part[idx_crop], feat_part[idx_crop], idx_part[idx_crop]
147 |                 dist = dist[idx_crop]
148 |                 delta = np.square(1 - dist / np.max(dist))
149 |                 coord_p[idx_crop] += delta
150 |                 coord_sub, feat_sub = input_normalize(coord_sub, feat_sub)
151 |                 idx_list.append(idx_sub), coord_list.append(coord_sub), feat_list.append(
152 |                     feat_sub), offset_list.append(idx_sub.size)
153 |                 idx_uni = np.unique(np.concatenate((idx_uni, idx_sub)))
154 |         else:
155 |             coord_part, feat_part = input_normalize(coord_part, feat_part)
156 |             idx_list.append(idx_part), coord_list.append(coord_part), feat_list.append(
157 |                 feat_part), offset_list.append(idx_part.size)
158 | 
159 |     return idx_list, coord_list, feat_list, offset_list
160 | 
161 | 
162 | def input_normalize(coord, feat):
163 |     # normalize
164 |     if args.data_norm == 'mean':
165 |         coord -= np.mean(coord, 0)
166 |     elif args.data_norm == 'min':
167 |         coord -= np.min(coord, 0)
168 |     else:
169 |         raise Exception('No such data norm type')
170 | 
171 |     feat = feat / 255.
172 |     if args.color_mean is not None and args.color_std is not None:
173 |         feat = (feat - args.color_mean) / args.color_std
174 |     return coord, feat
175 | 
176 | 
177 | def visualize_scene(coord, pred, label, name):
178 |     color_pred = PALETTE[pred.astype(np.int64)]
179 |     color_gt = PALETTE[label.astype(np.int64)]
180 |     pred_save_path = os.path.join(args.result_dir, '{}_pred.txt'.format(name))
181 |     label_save_path = os.path.join(args.result_dir, '{}_label.txt'.format(name))
182 |     np.savetxt(pred_save_path, np.hstack([coord, color_pred]), fmt="%f " * 3 + "%d " * 3)
183 |     np.savetxt(label_save_path, np.hstack([coord, color_gt]), fmt="%f " * 3 + "%d " * 3)
184 | 
185 | 
186 | def test(model):
187 |     logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>')
188 |     batch_time = AverageMeter()
189 |     model.eval()
190 | 
191 |     pred_list, label_list = [], []
192 |     data_list = data_prepare()
193 |     args.color_mean, args.color_std = get_rgb_stat(args)
194 | 
195 |     for idx_scene, scene_name in enumerate(data_list):
196 |         end = time.time()
197 |         coord, feat, label, idx_data = data_load(scene_name)
198 |         idx_list, coord_list, feat_list, offset_list = data_process(coord, feat, idx_data)
199 | 
200 |         pred = torch.zeros((label.size, args.num_class)).cuda(non_blocking=True)
201 |         pred_count = torch.zeros((label.size, args.num_class)).cuda(non_blocking=True)
202 |         num_batch = int(np.ceil(len(idx_list) / args.batch_size_test))
203 |         for idx_batch in range(num_batch):
204 |             idx_start = idx_batch * args.batch_size_test
205 |             idx_end = min((idx_batch + 1) * args.batch_size_test, len(idx_list))
206 |             idx_part, coord_part, feat_part, offset_part = \
207 |                 idx_list[idx_start:idx_end], coord_list[idx_start:idx_end], \
208 |                 feat_list[idx_start:idx_end], offset_list[idx_start:idx_end]
209 | 
210 |             idx_part = np.concatenate(idx_part)
211 |             coord_part = torch.FloatTensor(np.concatenate(coord_part)).cuda(non_blocking=True)
212 |             feat_part = torch.FloatTensor(np.concatenate(feat_part)).cuda(non_blocking=True)
213 |             offset_part = torch.IntTensor(np.cumsum(offset_part)).cuda(non_blocking=True)
214 | 
215 |             with torch.no_grad():
216 |                 pred_part = torch.nn.functional.softmax(model([coord_part, feat_part, offset_part]), dim=1)  # (n, k)
217 |                 torch.cuda.empty_cache()
218 | 
219 |             pred[idx_part, :] += pred_part
220 |             pred_count[idx_part, :] += 1.
221 |             logger.info('Scene {}/{}, {}/{}, {}/{}'.format(idx_scene + 1, len(data_list), idx_end, len(idx_list), args.voxel_max, idx_part.shape[0]))
222 | 
223 |         # IoU per scene
224 |         pred_choice = np.argmax((pred/pred_count).cpu().numpy(), 1)
225 |         coord = coord
226 |         label = label
227 | 
228 |         # median filter
229 |         if args.filter:
230 |             coord_gpu = torch.from_numpy(coord).float().cuda(non_blocking=True)
231 |             pred_gpu = torch.from_numpy(pred_choice).int().cuda(non_blocking=True)
232 |             pred_choice = pc_median_filter_gpu(coord_gpu, pred_gpu, 32)
233 | 
234 |         batch_time.update(time.time() - end)
235 |         logger.info('Test: [{}/{}]-{} Batch {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
236 |             idx_scene + 1, len(data_list), label.size, batch_time=batch_time))
237 |         pred_list.append(pred_choice)
238 |         label_list.append(label)
239 | 
240 |         if args.visual:
241 |             visualize_scene(coord, pred_choice, label, scene_name)
242 | 
243 |     # mIoU
244 |     intersection, union, target = intersectionAndUnion(np.concatenate(pred_list), np.concatenate(label_list),
245 |                                                        args.num_class, args.ignore_label)
246 |     iou_class = intersection / (union + 1e-10)
247 |     accuracy_class = intersection / (target + 1e-10)
248 |     mIoU = np.mean(iou_class)
249 |     mAcc = np.mean(accuracy_class)
250 |     allAcc = sum(intersection) / (sum(target) + 1e-10)
251 |     logger.info('Val result: mIoU / mAcc / OA {:.2f} / {:.2f} / {:.2f}.'.format(mIoU * 100, mAcc * 100, allAcc * 100))
252 | 
253 |     for i in range(args.num_class):
254 |         logger.info('Class_{} Result: IoU / Acc {:.2f} / {:.2f}, name: {}.'.format(
255 |             i, iou_class[i] * 100, accuracy_class[i] * 100, LABEL2CLASS[i]))
256 |     logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<')
257 | 
258 | 
259 | if __name__ == '__main__':
260 |     main()
261 | 


--------------------------------------------------------------------------------
/segmentation/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/util/__init__.py


--------------------------------------------------------------------------------
/segmentation/util/data_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import SharedArray as SA
 3 | import torch
 4 | 
 5 | from modules.voxelize_utils import voxelize
 6 | 
 7 | 
 8 | def sa_create(name, var):
 9 |     x = SA.create(name, var.shape, dtype=var.dtype)
10 |     x[...] = var[...]
11 |     x.flags.writeable = False
12 |     return x
13 | 
14 | 
15 | def collate_fn(batch):
16 |     coord, feat, label = list(zip(*batch))
17 |     offset, count = [], 0
18 |     for item in coord:
19 |         count += item.shape[0]
20 |         offset.append(count)
21 | 
22 |     return torch.cat(coord), torch.cat(feat), torch.cat(label) if label[0] is not None else None, torch.IntTensor(
23 |         offset)
24 | 
25 | 
26 | def data_prepare(coord, feat, label, args, split, coord_transform, rgb_transform,
27 |                  rgb_mean=None, rgb_std=None, shuffle_index=True, stop_transform=False):
28 |     dataset = args.dataset.split('_')[0]
29 | 
30 |     # coordinate augment
31 |     if coord_transform and not stop_transform:
32 |         coord, _, _ = coord_transform(coord, None, None)
33 | 
34 |     # rgb augment
35 |     if rgb_transform and not stop_transform:
36 |         _, feat, _ = rgb_transform(None, feat, None)
37 | 
38 |     # grid sampling
39 |     if args.voxel_size:
40 |         uniq_idx = voxelize(coord - np.min(coord, 0), args.voxel_size)
41 |         coord, feat = coord[uniq_idx], feat[uniq_idx]
42 |         if label is not None:
43 |             label = label[uniq_idx]
44 | 
45 |     # drop points when overflow
46 |     if split != 'val' and args.voxel_max and coord.shape[0] > args.voxel_max:
47 |         init_idx = np.random.randint(coord.shape[0]) if 'train' in split else coord.shape[0] // 2
48 |         crop_idx = np.argsort(np.sum(np.square(coord - coord[init_idx]), 1))[:args.voxel_max]
49 |         coord, feat = coord[crop_idx], feat[crop_idx]
50 |         if label is not None:
51 |             label = label[crop_idx]
52 | 
53 |     # shuffle points
54 |     if shuffle_index:
55 |         shuf_idx = np.arange(coord.shape[0])
56 |         np.random.shuffle(shuf_idx)
57 |         coord, feat = coord[shuf_idx], feat[shuf_idx]
58 |         if label is not None:
59 |             label = label[shuf_idx]
60 | 
61 |     # coord norm
62 |     if args.data_norm == 'mean':
63 |         coord -= np.mean(coord, 0)
64 |     elif args.data_norm == 'min':
65 |         coord -= np.min(coord, 0)
66 | 
67 |     # rgb norm
68 |     if dataset in ['S3DIS', 'ScanNet']:
69 |         feat = feat / 255.
70 |         if rgb_mean is not None and rgb_std is not None:
71 |             feat = (feat - rgb_mean) / rgb_std
72 | 
73 |     return torch.FloatTensor(coord), torch.FloatTensor(feat), torch.LongTensor(label) if label is not None else None
74 | 


--------------------------------------------------------------------------------
/segmentation/util/utils.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import argparse
  3 | import random
  4 | import logging
  5 | import sys
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch import nn
 10 | import torch.optim.lr_scheduler as lr_scheduler
 11 | 
 12 | from dataset.S3DISDataLoader import S3DIS
 13 | from modules.pointops.functions import pointops
 14 | 
 15 | 
 16 | def main_process(args):
 17 |     """
 18 |     Determine whether the main process
 19 | 
 20 |     """
 21 |     return not args.multiprocessing_distributed or (
 22 |             args.multiprocessing_distributed and args.rank % args.ngpus_per_node == 0)
 23 | 
 24 | 
 25 | class AverageMeter(object):
 26 |     """Computes and stores the average and current value"""
 27 | 
 28 |     def __init__(self):
 29 |         self.reset()
 30 | 
 31 |     def reset(self):
 32 |         self.val = 0
 33 |         self.avg = 0
 34 |         self.sum = 0
 35 |         self.count = 0
 36 | 
 37 |     def update(self, val, n=1):
 38 |         self.val = val
 39 |         self.sum += val * n
 40 |         self.count += n
 41 |         self.avg = self.sum / self.count
 42 | 
 43 | 
 44 | def intersectionAndUnion(output, target, K, ignore_index=255):
 45 |     # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1.
 46 |     assert (output.ndim in [1, 2, 3])
 47 |     assert output.shape == target.shape
 48 |     output = output.reshape(output.size).copy()
 49 |     target = target.reshape(target.size)
 50 |     output[np.where(target == ignore_index)[0]] = ignore_index
 51 |     intersection = output[np.where(output == target)[0]]
 52 |     area_intersection, _ = np.histogram(intersection, bins=np.arange(K + 1))
 53 |     area_output, _ = np.histogram(output, bins=np.arange(K + 1))
 54 |     area_target, _ = np.histogram(target, bins=np.arange(K + 1))
 55 |     area_union = area_output + area_target - area_intersection
 56 |     return area_intersection, area_union, area_target
 57 | 
 58 | 
 59 | def intersectionAndUnionGPU(output, target, K, ignore_index=255):
 60 |     # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1.
 61 |     assert (output.dim() in [1, 2, 3])
 62 |     assert output.shape == target.shape
 63 |     output = output.view(-1)
 64 |     target = target.view(-1)
 65 |     output[target == ignore_index] = ignore_index
 66 |     intersection = output[output == target]
 67 |     area_intersection = torch.histc(intersection, bins=K, min=0, max=K - 1)
 68 |     area_output = torch.histc(output, bins=K, min=0, max=K - 1)
 69 |     area_target = torch.histc(target, bins=K, min=0, max=K - 1)
 70 |     area_union = area_output + area_target - area_intersection
 71 |     return area_intersection, area_union, area_target
 72 | 
 73 | 
 74 | def find_free_port():
 75 |     import socket
 76 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 77 |     # Binding to port 0 will cause the OS to find an available port for us
 78 |     sock.bind(("", 0))
 79 |     port = sock.getsockname()[1]
 80 |     sock.close()
 81 |     # NOTE: there is still a chance the port could be taken by other processes.
 82 |     return port
 83 | 
 84 | 
 85 | def set_seed(seed):
 86 |     """
 87 |     Setting of Global Seed
 88 | 
 89 |     """
 90 |     np.random.seed(seed)
 91 |     random.seed(seed)
 92 |     torch.manual_seed(seed)  # cpu
 93 |     torch.cuda.manual_seed(seed)
 94 | 
 95 |     torch.backends.cudnn.deterministic = True  # consistent results on the cpu and gpu
 96 |     torch.backends.cudnn.benchmark = False
 97 | 
 98 | 
 99 | def worker_init_fn(worker_id, seed=None):
100 |     if seed is not None:
101 |         random.seed(seed + worker_id)
102 |         np.random.seed(seed + worker_id)
103 |         torch.manual_seed(seed + worker_id)
104 |         torch.cuda.manual_seed(seed + worker_id)
105 |         torch.cuda.manual_seed_all(seed + worker_id)
106 | 
107 | 
108 | def get_logger(log_dir, model):
109 |     logger_name = "main-logger"
110 |     logger = logging.getLogger(logger_name)
111 |     logging.root.handlers = []
112 | 
113 |     logging.basicConfig(
114 |         level=logging.INFO,
115 |         format="[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s",
116 |         handlers=[
117 |             logging.FileHandler('%s/%s.txt' % (log_dir, model)),
118 |             logging.StreamHandler(sys.stdout)
119 |         ]
120 |     )
121 | 
122 |     return logger
123 | 
124 | 
125 | def get_aug_args(args):
126 |     dataset = args.dataset
127 |     if 'S3DIS' in dataset:
128 |         aug_args = {'scale_factor': 0.1, 'scale_ani': True, 'scale_prob': 1.,
129 |                     'pert_factor': 0.03, 'pert_prob': 1., 'rot_prob': 0.5,
130 |                     'shifts': [0.1, 0.1, 0.1], 'shift_prob': 1.}
131 |         return aug_args
132 |     else:
133 |         raise Exception('No such dataset')
134 | 
135 | 
136 | def get_dataset_obj(args):
137 |     dataset_name = args.dataset
138 |     if 'S3DIS' in dataset_name:
139 |         return S3DIS
140 | 
141 | 
142 | def get_dataset_description(args):
143 |     dataset_name = args.dataset
144 |     if 'S3DIS' in dataset_name:
145 |         return '%s_A%d' % (dataset_name, args.test_area)
146 |     if 'ScanNet' in dataset_name:
147 |         return dataset_name
148 | 
149 | 
150 | def get_loop(args):
151 |     if 'S3DIS' in args.dataset:
152 |         return 30
153 |     if 'ScanNet' in args.dataset:
154 |         return 6
155 |     else:
156 |         raise Exception('No Fixed Loop for the Dataset')
157 | 
158 | 
159 | def get_class_weights(dataset_name):
160 |     # pre-calculate the class weight
161 |     if dataset_name == 'S3DIS_A1':
162 |         num_per_class = [0.27362621, 0.3134626, 0.18798782, 1.38965602, 1.44210271, 0.86639497, 1.07227331,
163 |                          1., 1.05912352, 1.92726327, 0.52329938, 2.04783419, 0.5104427]
164 |     elif dataset_name == 'S3DIS_A2':
165 |         num_per_class = [0.29036634, 0.34709631, 0.19514767, 1.20129272, 1.39663689, 0.87889087, 1.11586938,
166 |                          1., 1.54599972, 1.87057415, 0.56458097, 1.87316536, 0.51576885]
167 |     elif dataset_name == 'S3DIS_A3':
168 |         num_per_class = [0.27578885, 0.32039725, 0.19055443, 1.14914046, 1.46885687, 0.85450877, 1.05414776,
169 |                          1., 1.09680025, 2.09280004, 0.59355243, 1.95746691, 0.50429199]
170 |     elif dataset_name == 'S3DIS_A4':
171 |         num_per_class = [0.27667177, 0.32612854, 0.19886974, 1.18282174, 1.52145143, 0.8793782, 1.14202999,
172 |                          1., 1.0857859, 1.89738584, 0.5964717, 1.95820557, 0.52113351]
173 |     elif dataset_name == 'S3DIS_A5':
174 |         num_per_class = [0.28459923, 0.32990557, 0.1999722, 1.20798185, 1.33784535, 1., 0.93323316, 1.0753585,
175 |                          1.00199521, 1.53657772, 0.7987055, 1.82384844, 0.48565471]
176 |     elif dataset_name == 'S3DIS_A6':
177 |         num_per_class = [0.29442441, 0.37941846, 0.21360804, 0.9812721, 1.40968965, 0.88577139, 1.,
178 |                          1.09387107, 1.53238009, 1.61365643, 1.15693894, 1.57821041, 0.47342451]
179 |     elif dataset_name == 'ScanNet_train':
180 |         num_per_class = [0.32051547, 0.1980627, 0.2621471, 0.74563083, 0.52141879, 0.65918949, 0.73560561, 1.03624985,
181 |                          1.00063147, 0.90604468, 0.43435155, 3.91494446, 1.94558718, 1., 0.54871637, 2.13587716,
182 |                          1.13931665, 2.06423695, 5.59103054, 1.08557339, 1.35027497]
183 |     elif dataset_name == 'ScanNet_trainval':
184 |         num_per_class = [0.32051547, 0.1980627, 0.2621471, 0.74563083, 0.52141879, 0.65918949, 0.73560561, 1.03624985,
185 |                          1.00063147, 0.90604468, 0.43435155, 3.91494446, 1.94558718, 1., 0.54871637, 2.13587716,
186 |                          1.13931665, 2.06423695, 5.59103054, 1.08557339, 1.35027497]
187 |     else:
188 |         raise Exception('No Prepared Class Weights of Dataset')
189 |     return torch.FloatTensor(num_per_class)
190 | 
191 | 
192 | def get_rgb_stat(args):
193 |     if 'S3DIS' in args.dataset:
194 |         mean, std = [0.52146571, 0.50457911, 0.44939377], [0.19645595, 0.19576158, 0.20104336]
195 |     elif 'ScanNet' in args.dataset:
196 |         mean, std = [0.08400667, 0.08400667, 0.08400667], [0.28983903, 0.28983903, 0.28983903]
197 |     else:
198 |         return None, None
199 |     return np.array(mean, dtype=np.float32), np.array(std, dtype=np.float32)
200 | 
201 | 
202 | def get_model(args):
203 |     module = importlib.import_module('models.%s' % args.model)
204 |     return module.Model(args)
205 | 
206 | 
207 | def get_optimizer(args, model):
208 |     param_dicts = model.parameters()
209 |     if args.optimizer == 'SGD':
210 |         optimizer = torch.optim.SGD(param_dicts, lr=args.learning_rate, momentum=args.momentum,
211 |                                     weight_decay=args.weight_decay)
212 |     elif args.optimizer == 'AdamW':
213 |         optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
214 |     else:
215 |         raise Exception('Not impl. such optimizer')
216 |     return optimizer
217 | 
218 | 
219 | def get_scheduler(args, optimizer):
220 |     if args.scheduler == 'step':
221 |         scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_decay_epochs, gamma=args.lr_decay)
222 |     else:
223 |         raise Exception('Not impl. such scheduler')
224 |     return scheduler
225 | 
226 | 
227 | def get_loss(weight=None, ignore_label=None):
228 |     return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_label)
229 | 
230 | 
231 | def get_test_args():
232 |     return argparse.Namespace()
233 | 
234 | 
235 | def pc_median_filter_gpu(coord, label, group_size=16):
236 |     """
237 |     :param coord: coordinates of a whole point cloud [N, 3]
238 |     :param label: segmentation results of a whole point cloud [N,]
239 |     :param group_size: num of neighbors for filtering
240 |     """
241 |     offset = torch.IntTensor([coord.shape[0]]).to(coord.device)
242 |     group_idx, _ = pointops.knnquery(group_size, coord, coord, offset, offset)  # [N, group_size]
243 |     group_label = label[group_idx.view(-1).long()].view(coord.shape[0], group_size)  # [N, group_size]
244 |     median_label = torch.median(group_label, 1)[0]
245 |     return median_label.cpu().numpy()
246 | 


--------------------------------------------------------------------------------