├── .gitignore ├── LICENSE.txt ├── README.md ├── assets └── teaser.png ├── classification ├── README.md ├── dataset │ ├── ScanObjectNNDataLoader.py │ └── __init__.py ├── init.sh ├── models │ ├── __init__.py │ └── repsurf │ │ ├── __init__.py │ │ ├── repsurf_ssg_umb.py │ │ └── repsurf_ssg_umb_2x.py ├── modules │ ├── __init__.py │ ├── pointnet2_utils.py │ ├── pointops │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── pointops.py │ │ ├── setup.py │ │ └── src │ │ │ ├── __init__.py │ │ │ ├── ballquery │ │ │ ├── ballquery_cuda.cpp │ │ │ ├── ballquery_cuda_kernel.cu │ │ │ └── ballquery_cuda_kernel.h │ │ │ ├── cuda_utils.h │ │ │ ├── grouping │ │ │ ├── grouping_cuda.cpp │ │ │ ├── grouping_cuda_kernel.cu │ │ │ └── grouping_cuda_kernel.h │ │ │ ├── grouping_int │ │ │ ├── grouping_int_cuda.cpp │ │ │ ├── grouping_int_cuda_kernel.cu │ │ │ └── grouping_int_cuda_kernel.h │ │ │ ├── interpolation │ │ │ ├── interpolation_cuda.cpp │ │ │ ├── interpolation_cuda_kernel.cu │ │ │ └── interpolation_cuda_kernel.h │ │ │ ├── knnquery │ │ │ ├── __init__.py │ │ │ ├── knnquery_cuda.cpp │ │ │ ├── knnquery_cuda_kernel.cu │ │ │ └── knnquery_cuda_kernel.h │ │ │ ├── knnquery_heap │ │ │ ├── __init__.py │ │ │ ├── knnquery_heap_cuda.cpp │ │ │ ├── knnquery_heap_cuda_kernel.cu │ │ │ └── knnquery_heap_cuda_kernel.h │ │ │ ├── pointops_api.cpp │ │ │ └── sampling │ │ │ ├── sampling_cuda.cpp │ │ │ ├── sampling_cuda_kernel.cu │ │ │ └── sampling_cuda_kernel.h │ ├── polar_utils.py │ ├── ptaug_utils.py │ ├── recons_utils.py │ └── repsurface_utils.py ├── scripts │ └── scanobjectnn │ │ ├── repsurf_ssg_umb.sh │ │ └── repsurf_ssg_umb_2x.sh ├── tool │ └── train_cls_scanobjectnn.py └── util │ ├── __init__.py │ └── utils.py ├── segmentation ├── README.md ├── dataset │ ├── S3DISDataLoader.py │ └── __init__.py ├── init.sh ├── models │ ├── __init__.py │ ├── pointnet2 │ │ ├── __init__.py │ │ └── pointnet2_ssg.py │ ├── pointtransformer │ │ ├── __init__.py │ │ └── pointtransformer.py │ └── repsurf │ │ ├── __init__.py │ │ └── repsurf_umb_ssg.py ├── modules │ ├── __init__.py │ ├── aug_utils.py │ ├── pointnet2_utils.py │ ├── pointops │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── pointops.py │ │ ├── setup.py │ │ └── src │ │ │ ├── __init__.py │ │ │ ├── aggregation │ │ │ ├── aggregation_cuda.cpp │ │ │ ├── aggregation_cuda_kernel.cu │ │ │ └── aggregation_cuda_kernel.h │ │ │ ├── cuda_utils.h │ │ │ ├── grouping │ │ │ ├── grouping_cuda.cpp │ │ │ ├── grouping_cuda_kernel.cu │ │ │ └── grouping_cuda_kernel.h │ │ │ ├── interpolation │ │ │ ├── interpolation_cuda.cpp │ │ │ ├── interpolation_cuda_kernel.cu │ │ │ └── interpolation_cuda_kernel.h │ │ │ ├── knnquery │ │ │ ├── knnquery_cuda.cpp │ │ │ ├── knnquery_cuda_kernel.cu │ │ │ └── knnquery_cuda_kernel.h │ │ │ ├── pointops_api.cpp │ │ │ ├── sampling │ │ │ ├── sampling_cuda.cpp │ │ │ ├── sampling_cuda_kernel.cu │ │ │ └── sampling_cuda_kernel.h │ │ │ └── subtraction │ │ │ ├── subtraction_cuda.cpp │ │ │ ├── subtraction_cuda_kernel.cu │ │ │ └── subtraction_cuda_kernel.h │ ├── pointtransformer_utils.py │ ├── polar_utils.py │ ├── recons_utils.py │ ├── repsurface_utils.py │ └── voxelize_utils.py ├── scripts │ └── s3dis │ │ ├── test_pointnet2.sh │ │ ├── test_pointtransformer.sh │ │ ├── test_repsurf_umb.sh │ │ ├── train_pointnet2.sh │ │ ├── train_pointtransformer.sh │ │ └── train_repsurf_umb.sh ├── tool │ ├── test_s3dis.py │ └── train.py └── util │ ├── __init__.py │ ├── data_util.py │ └── utils.py └── visualization ├── airplane_0001.txt ├── bed_0001.txt ├── cup_0001.txt ├── table_0250.txt ├── triangled_airplane.obj ├── triangled_bed.obj ├── triangled_cup.obj └── triangled_table.obj /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .idea/ -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Haoxi Ran. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RepSurf - Surface Representation for Point Clouds
[CVPR 2022 Oral] 2 | 3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact) 4 | 5 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-point-cloud-classification-on-scanobjectnn)](https://paperswithcode.com/sota/3d-point-cloud-classification-on-scanobjectnn?p=surface-representation-for-point-clouds)
6 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-object-detection-on-sun-rgbd-val)](https://paperswithcode.com/sota/3d-object-detection-on-sun-rgbd-val?p=surface-representation-for-point-clouds)
7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-point-cloud-classification-on-modelnet40)](https://paperswithcode.com/sota/3d-point-cloud-classification-on-modelnet40?p=surface-representation-for-point-clouds)
8 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/semantic-segmentation-on-s3dis)](https://paperswithcode.com/sota/semantic-segmentation-on-s3dis?p=surface-representation-for-point-clouds)
9 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/3d-object-detection-on-scannetv2)](https://paperswithcode.com/sota/3d-object-detection-on-scannetv2?p=surface-representation-for-point-clouds)
10 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/surface-representation-for-point-clouds/semantic-segmentation-on-s3dis-area5)](https://paperswithcode.com/sota/semantic-segmentation-on-s3dis-area5?p=surface-representation-for-point-clouds) 11 | 12 | ### The pytorch official implementation of "[Surface Representation for Point Clouds](http://arxiv.org/abs/2205.05740)" 13 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740) 14 | 15 | 16 |
17 | 18 |
19 | 20 | 21 | ## News: 22 | - (**Sep 10** NEW :fire:) We have uploaded the implementation of RepSurf on S3DIS along with its training log and pretrained weights. 23 | - (**June 24** :fire:) We sucessfully finished our Oral presentation at CVPR 2022! 24 | - (**May 11**) We have uploaded the implementation of RepSurf on ScanObjectNN along with its training log and pretrained weights. 25 | 26 | ## Tasks: 27 | 28 | ### We conduct experiments of different tasks on different codebases: 29 | 30 | > Classification: **[3D Object Classification](./classification)**
31 | > Segmentation: **[3D Semantic Segmentation](./segmentation)** 32 | 33 | 34 | ## Visualization 35 | 36 | We provide several visualization results in the folder **./visualization** for a closer look at the construction of 37 | RepSurf. 38 | 39 | 40 | ## License 41 | 42 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for 43 | commercial use. 44 | -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/assets/teaser.png -------------------------------------------------------------------------------- /classification/README.md: -------------------------------------------------------------------------------- 1 | # RepSurf for Classification
2 | 3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact) 4 | 5 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740) 6 | 7 | 8 | ## Preparation 9 | 10 | ### Environment 11 | 12 | We tested under the environment: 13 | 14 | * python 3.7 15 | * pytorch 1.6.0 16 | * cuda 10.1 17 | * gcc 7.2.0 18 | * h5py 19 | 20 | For anaconda user, initialize the conda environment **repsurf-cls** by: 21 | 22 | ``` 23 | sh init.sh 24 | ``` 25 | 26 | ## Experiments 27 | 28 | ### ScanObjectNN (Data & Logs: [Google Drive](https://drive.google.com/drive/folders/1DGWT9W46MKVI0-lu18hJhB-R3BFVWuCs?usp=sharing)) 29 | 30 | * Performance: 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 |
ModelAccuracy#ParamsAugmentCodeLogCheckpoint
MVTN82.84.24MNonelinkN/Alink
PointMLP85.712.6MScale, Shiftlinklinklink
PointNet++ SSG77.91.475MRotate, JitterlinkN/AN/A
Umbrella RepSurf (PointNet++ SSG)84.871.483MNonelinkgoogle drivegoogle drive (6MB)
Umbrella RepSurf (PointNet++ SSG, 2x)86.056.806MNonelinkgoogle drivegoogle drive (27MB)
92 |
93 | 94 | * To download dataset: 95 | 96 | ``` 97 | wget https://download.cs.stanford.edu/orion/scanobjectnn/h5_files.zip 98 | unzip h5_files.zip 99 | ln -s [PATH]/h5_files data/ScanObjectNN 100 | ``` 101 | 102 | **Note**: We conduct all experiments on the hardest variant of ScanObjectNN (**PB_T50_RS**). 103 |
104 | 105 | * To train **Umbrella RepSurf** on ScanObjectNN: 106 | 107 | ``` 108 | sh scripts/scanobjectnn/repsurf_ssg_umb.sh 109 | ``` 110 | 111 | * To train **Umbrella RepSurf (2x setting)** on ScanObjectNN: 112 | 113 | ``` 114 | sh scripts/scanobjectnn/repsurf_ssg_umb_2x.sh 115 | ``` 116 | 117 | ## Acknowledgment 118 | 119 | We use part of the library [pointops](https://github.com/hszhao/PointWeb/tree/master/lib/pointops) 120 | from [PointWeb](https://github.com/hszhao/PointWeb). 121 | 122 | ## License 123 | 124 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for 125 | commercial use. 126 | -------------------------------------------------------------------------------- /classification/dataset/ScanObjectNNDataLoader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import h5py 7 | import warnings 8 | from torch.utils.data import Dataset 9 | 10 | warnings.filterwarnings('ignore') 11 | 12 | 13 | class ScanObjectNNDataLoader(Dataset): 14 | def __init__(self, root, split='training', bg=True): 15 | self.root = root 16 | 17 | assert (split == 'training' or split == 'test') 18 | if bg: 19 | print('Use data with background points') 20 | dir_name = 'main_split' 21 | else: 22 | print('Use data without background points') 23 | dir_name = 'main_split_nobg' 24 | file_name = '_objectdataset_augmentedrot_scale75.h5' 25 | h5_name = '{}/{}/{}'.format(self.root, dir_name, split + file_name) 26 | with h5py.File(h5_name, mode="r") as f: 27 | self.data = f['data'][:].astype('float32') 28 | self.label = f['label'][:].astype('int64') 29 | print('The size of %s data is %d' % (split, self.data.shape[0])) 30 | 31 | def __len__(self): 32 | return self.data.shape[0] 33 | 34 | def __getitem__(self, index): 35 | return self.data[index].T, self.label[index] 36 | -------------------------------------------------------------------------------- /classification/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/dataset/__init__.py -------------------------------------------------------------------------------- /classification/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mkdir -p log/PointAnalysis/log/ScanObjectNN 4 | mkdir -p data/ 5 | 6 | conda create -n repsurf-cls python=3.7 -y 7 | conda activate repsurf-cls 8 | 9 | conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.1 -c pytorch -c conda-forge -y 10 | conda install -c anaconda h5py -y 11 | 12 | cd modules/pointops 13 | python3 setup.py install 14 | cd - 15 | -------------------------------------------------------------------------------- /classification/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/models/__init__.py -------------------------------------------------------------------------------- /classification/models/repsurf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/models/repsurf/__init__.py -------------------------------------------------------------------------------- /classification/models/repsurf/repsurf_ssg_umb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from modules.repsurface_utils import SurfaceAbstractionCD, UmbrellaSurfaceConstructor 9 | 10 | 11 | class Model(nn.Module): 12 | def __init__(self, args): 13 | super(Model, self).__init__() 14 | center_channel = 0 if not args.return_center else (6 if args.return_polar else 3) 15 | repsurf_channel = 10 16 | 17 | self.init_nsample = args.num_point 18 | self.return_dist = args.return_dist 19 | self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_channel, 20 | return_dist=args.return_dist, aggr_type=args.umb_pool, 21 | cuda=args.cuda_ops) 22 | self.sa1 = SurfaceAbstractionCD(npoint=512, radius=0.2, nsample=32, feat_channel=repsurf_channel, 23 | pos_channel=center_channel, mlp=[64, 64, 128], group_all=False, 24 | return_polar=args.return_polar, cuda=args.cuda_ops) 25 | self.sa2 = SurfaceAbstractionCD(npoint=128, radius=0.4, nsample=64, feat_channel=128 + repsurf_channel, 26 | pos_channel=center_channel, mlp=[128, 128, 256], group_all=False, 27 | return_polar=args.return_polar, cuda=args.cuda_ops) 28 | self.sa3 = SurfaceAbstractionCD(npoint=None, radius=None, nsample=None, feat_channel=256 + repsurf_channel, 29 | pos_channel=center_channel, mlp=[256, 512, 1024], group_all=True, 30 | return_polar=args.return_polar, cuda=args.cuda_ops) 31 | # modelnet40 32 | self.classfier = nn.Sequential( 33 | nn.Linear(1024, 512), 34 | nn.BatchNorm1d(512), 35 | nn.ReLU(True), 36 | nn.Dropout(0.4), 37 | nn.Linear(512, 256), 38 | nn.BatchNorm1d(256), 39 | nn.ReLU(True), 40 | nn.Dropout(0.4), 41 | nn.Linear(256, args.num_class)) 42 | 43 | def forward(self, points): 44 | # init 45 | center = points[:, :3, :] 46 | 47 | normal = self.surface_constructor(center) 48 | 49 | center, normal, feature = self.sa1(center, normal, None) 50 | center, normal, feature = self.sa2(center, normal, feature) 51 | center, normal, feature = self.sa3(center, normal, feature) 52 | 53 | feature = feature.view(-1, 1024) 54 | feature = self.classfier(feature) 55 | feature = F.log_softmax(feature, -1) 56 | 57 | return feature 58 | -------------------------------------------------------------------------------- /classification/models/repsurf/repsurf_ssg_umb_2x.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from modules.repsurface_utils import SurfaceAbstractionCD, UmbrellaSurfaceConstructor 9 | 10 | 11 | class Model(nn.Module): 12 | def __init__(self, args): 13 | super(Model, self).__init__() 14 | center_channel = 0 if not args.return_center else (6 if args.return_polar else 3) 15 | repsurf_channel = 10 16 | 17 | self.init_nsample = args.num_point 18 | self.return_dist = args.return_dist 19 | self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_channel, 20 | return_dist=args.return_dist, aggr_type=args.umb_pool, 21 | cuda=args.cuda_ops) 22 | self.sa1 = SurfaceAbstractionCD(npoint=512, radius=0.1, nsample=24, feat_channel=repsurf_channel, 23 | pos_channel=center_channel, mlp=[128, 128, 256], group_all=False, 24 | return_polar=args.return_polar, cuda=args.cuda_ops) 25 | self.sa2 = SurfaceAbstractionCD(npoint=128, radius=0.2, nsample=24, feat_channel=256 + repsurf_channel, 26 | pos_channel=center_channel, mlp=[256, 256, 512], group_all=False, 27 | return_polar=args.return_polar, cuda=args.cuda_ops) 28 | self.sa3 = SurfaceAbstractionCD(npoint=32, radius=0.4, nsample=24, feat_channel=512 + repsurf_channel, 29 | pos_channel=center_channel, mlp=[512, 512, 1024], group_all=False, 30 | return_polar=args.return_polar, cuda=args.cuda_ops) 31 | self.sa4 = SurfaceAbstractionCD(npoint=None, radius=None, nsample=None, feat_channel=1024 + repsurf_channel, 32 | pos_channel=center_channel, mlp=[1024, 1024, 2048], group_all=True, 33 | return_polar=args.return_polar, cuda=args.cuda_ops) 34 | # modelnet40 35 | self.classfier = nn.Sequential( 36 | nn.Linear(2048, 512), 37 | nn.BatchNorm1d(512), 38 | nn.ReLU(True), 39 | nn.Dropout(0.4), 40 | nn.Linear(512, 256), 41 | nn.BatchNorm1d(256), 42 | nn.ReLU(True), 43 | nn.Dropout(0.4), 44 | nn.Linear(256, args.num_class)) 45 | 46 | def forward(self, points): 47 | # init 48 | center = points[:, :3, :] 49 | 50 | normal = self.surface_constructor(center) 51 | 52 | center, normal, feature = self.sa1(center, normal, None) 53 | center, normal, feature = self.sa2(center, normal, feature) 54 | center, normal, feature = self.sa3(center, normal, feature) 55 | center, normal, feature = self.sa4(center, normal, feature) 56 | 57 | feature = feature.view(-1, 2048) 58 | feature = self.classfier(feature) 59 | feature = F.log_softmax(feature, -1) 60 | 61 | return feature 62 | -------------------------------------------------------------------------------- /classification/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/__init__.py -------------------------------------------------------------------------------- /classification/modules/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import torch 7 | 8 | try: 9 | from modules.pointops.functions.pointops import furthestsampling, gathering, ballquery, knnquery, \ 10 | grouping, interpolation, nearestneighbor 11 | except: 12 | raise Exception('Failed to load pointops') 13 | 14 | 15 | def square_distance(src, dst): 16 | """ 17 | Calculate Squared distance between each two points. 18 | 19 | """ 20 | B, N, _ = src.shape 21 | _, M, _ = dst.shape 22 | dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) 23 | dist += torch.sum(src ** 2, -1).view(B, N, 1) 24 | dist += torch.sum(dst ** 2, -1).view(B, 1, M) 25 | return dist 26 | 27 | 28 | def index_points(points, idx, cuda=False, is_group=False): 29 | if cuda: 30 | if is_group: 31 | points = grouping(points.transpose(1, 2).contiguous(), idx) 32 | return points.permute(0, 2, 3, 1).contiguous() 33 | else: 34 | points = gathering(points.transpose(1, 2).contiguous(), idx) 35 | return points.permute(0, 2, 1).contiguous() 36 | device = points.device 37 | B = points.shape[0] 38 | view_shape = list(idx.shape) 39 | view_shape[1:] = [1] * (len(view_shape) - 1) 40 | repeat_shape = list(idx.shape) 41 | repeat_shape[0] = 1 42 | batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape) 43 | new_points = points[batch_indices, idx, :] 44 | return new_points 45 | 46 | 47 | def farthest_point_sample(xyz, npoint, cuda=False): 48 | """ 49 | Input: 50 | xyz: pointcloud data, [B, N, 3] 51 | npoint: number of samples 52 | Return: 53 | centroids: sampled pointcloud index, [B, npoint] 54 | 55 | FLOPs: 56 | S * (3 + 3 + 2) 57 | """ 58 | if cuda: 59 | if not xyz.is_contiguous(): 60 | xyz = xyz.contiguous() 61 | return furthestsampling(xyz, npoint) 62 | device = xyz.device 63 | B, N, C = xyz.shape 64 | centroids = torch.zeros(B, npoint, dtype=torch.long).to(device) 65 | distance = torch.ones(B, N).to(device) * 1e10 66 | farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device) 67 | batch_indices = torch.arange(B, dtype=torch.long).to(device) 68 | for i in range(npoint): 69 | centroids[:, i] = farthest 70 | centroid = xyz[batch_indices, farthest, :].view(B, 1, 3) 71 | dist = torch.sum((xyz - centroid) ** 2, -1) 72 | mask = dist < distance 73 | distance[mask] = dist[mask] 74 | farthest = torch.max(distance, -1)[1] 75 | return centroids 76 | 77 | 78 | def query_ball_point(radius, nsample, xyz, new_xyz, debug=False, cuda=False): 79 | if cuda: 80 | if not xyz.is_contiguous(): 81 | xyz = xyz.contiguous() 82 | if not new_xyz.is_contiguous(): 83 | new_xyz = new_xyz.contiguous() 84 | return ballquery(radius, nsample, xyz, new_xyz) 85 | device = xyz.device 86 | B, N, C = xyz.shape 87 | _, S, _ = new_xyz.shape 88 | group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1]) 89 | sqrdists = square_distance(new_xyz, xyz) 90 | group_idx[sqrdists > radius ** 2] = N 91 | group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample] 92 | group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample]) 93 | mask = group_idx == N 94 | group_idx[mask] = group_first[mask] 95 | if debug: 96 | num_miss = torch.sum(mask) 97 | num_over = torch.sum(torch.clamp(torch.sum(sqrdists < radius ** 2, dim=2) - nsample, min=0)) 98 | return num_miss, num_over 99 | return group_idx 100 | 101 | 102 | def query_knn_point(k, xyz, new_xyz, cuda=False): 103 | if cuda: 104 | if not xyz.is_contiguous(): 105 | xyz = xyz.contiguous() 106 | if not new_xyz.is_contiguous(): 107 | new_xyz = new_xyz.contiguous() 108 | return knnquery(k, xyz, new_xyz) 109 | dist = square_distance(new_xyz, xyz) 110 | group_idx = dist.sort(descending=False, dim=-1)[1][:, :, :k] 111 | return group_idx 112 | 113 | 114 | def sample(nsample, feature, cuda=False): 115 | feature = feature.permute(0, 2, 1) 116 | xyz = feature[:, :, :3] 117 | 118 | fps_idx = farthest_point_sample(xyz, nsample, cuda=cuda) # [B, npoint, C] 119 | torch.cuda.empty_cache() 120 | feature = index_points(feature, fps_idx, cuda=cuda, is_group=False) 121 | torch.cuda.empty_cache() 122 | feature = feature.permute(0, 2, 1) 123 | 124 | return feature 125 | -------------------------------------------------------------------------------- /classification/modules/pointops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/__init__.py -------------------------------------------------------------------------------- /classification/modules/pointops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .pointops import * 2 | -------------------------------------------------------------------------------- /classification/modules/pointops/setup.py: -------------------------------------------------------------------------------- 1 | #python3 setup.py install 2 | 3 | from setuptools import setup 4 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 5 | import os 6 | from distutils.sysconfig import get_config_vars 7 | 8 | (opt,) = get_config_vars('OPT') 9 | os.environ['OPT'] = " ".join( 10 | flag for flag in opt.split() if flag != '-Wstrict-prototypes' 11 | ) 12 | 13 | setup( 14 | name='pointops', 15 | ext_modules=[ 16 | CUDAExtension('pointops_cuda', [ 17 | 'src/pointops_api.cpp', 18 | 19 | 'src/ballquery/ballquery_cuda.cpp', 20 | 'src/ballquery/ballquery_cuda_kernel.cu', 21 | 'src/knnquery/knnquery_cuda.cpp', 22 | 'src/knnquery/knnquery_cuda_kernel.cu', 23 | 'src/knnquery_heap/knnquery_heap_cuda.cpp', 24 | 'src/knnquery_heap/knnquery_heap_cuda_kernel.cu', 25 | 'src/grouping/grouping_cuda.cpp', 26 | 'src/grouping/grouping_cuda_kernel.cu', 27 | 'src/grouping_int/grouping_int_cuda.cpp', 28 | 'src/grouping_int/grouping_int_cuda_kernel.cu', 29 | 'src/interpolation/interpolation_cuda.cpp', 30 | 'src/interpolation/interpolation_cuda_kernel.cu', 31 | 'src/sampling/sampling_cuda.cpp', 32 | 'src/sampling/sampling_cuda_kernel.cu', 33 | ], 34 | extra_compile_args={'cxx': ['-g'], 35 | 'nvcc': ['-O2']}) 36 | ], 37 | cmdclass={'build_ext': BuildExtension}) 38 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/__init__.py -------------------------------------------------------------------------------- /classification/modules/pointops/src/ballquery/ballquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ballquery_cuda_kernel.h" 7 | 8 | extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) 15 | { 16 | const float *new_xyz = new_xyz_tensor.data_ptr(); 17 | const float *xyz = xyz_tensor.data_ptr(); 18 | int *idx = idx_tensor.data_ptr(); 19 | 20 | ballquery_cuda_launcher(b, n, m, radius, nsample, new_xyz, xyz, idx); 21 | } 22 | 23 | 24 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) 25 | { 26 | CHECK_INPUT(new_xyz_tensor); 27 | CHECK_INPUT(xyz_tensor); 28 | 29 | const float *new_xyz = new_xyz_tensor.data_ptr(); 30 | const float *xyz = xyz_tensor.data_ptr(); 31 | int *idx = idx_tensor.data_ptr(); 32 | 33 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 34 | 35 | ballquery_cuda_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream); 36 | } 37 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/ballquery/ballquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "ballquery_cuda_kernel.h" 3 | 4 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 5 | // output: idx(b, m, nsample) 6 | __global__ void ballquery_cuda_kernel(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx) 7 | { 8 | int batch_index = blockIdx.x; 9 | xyz += batch_index * n * 3; 10 | new_xyz += batch_index * m * 3; 11 | idx += m * nsample * batch_index; 12 | int index = threadIdx.x; 13 | int stride = blockDim.x; 14 | 15 | float radius2 = radius * radius; 16 | for (int j = index; j < m; j += stride) 17 | { 18 | float new_x = new_xyz[j * 3 + 0]; 19 | float new_y = new_xyz[j * 3 + 1]; 20 | float new_z = new_xyz[j * 3 + 2]; 21 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) 22 | { 23 | float x = xyz[k * 3 + 0]; 24 | float y = xyz[k * 3 + 1]; 25 | float z = xyz[k * 3 + 2]; 26 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 27 | if (d2 < radius2) 28 | { 29 | if (cnt == 0) 30 | { 31 | for (int l = 0; l < nsample; ++l) 32 | idx[j * nsample + l] = k; 33 | } 34 | idx[j * nsample + cnt] = k; 35 | ++cnt; 36 | } 37 | } 38 | } 39 | } 40 | 41 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx) 42 | { 43 | ballquery_cuda_kernel<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 44 | } 45 | 46 | 47 | __global__ void ballquery_cuda_kernel_fast(int b, int n, int m, float radius, int nsample, const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { 48 | int bs_idx = blockIdx.y; 49 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 50 | if (bs_idx >= b || pt_idx >= m) return; 51 | 52 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 53 | xyz += bs_idx * n * 3; 54 | idx += bs_idx * m * nsample + pt_idx * nsample; 55 | 56 | float radius2 = radius * radius; 57 | float new_x = new_xyz[0]; 58 | float new_y = new_xyz[1]; 59 | float new_z = new_xyz[2]; 60 | 61 | int cnt = 0; 62 | for (int k = 0; k < n; ++k) { 63 | float x = xyz[k * 3 + 0]; 64 | float y = xyz[k * 3 + 1]; 65 | float z = xyz[k * 3 + 2]; 66 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 67 | if (d2 < radius2){ 68 | if (cnt == 0){ 69 | for (int l = 0; l < nsample; ++l) { 70 | idx[l] = k; 71 | } 72 | } 73 | idx[cnt] = k; 74 | ++cnt; 75 | if (cnt >= nsample){ 76 | break; 77 | } 78 | } 79 | } 80 | } 81 | 82 | 83 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) { 84 | // param new_xyz: (B, m, 3) 85 | // param xyz: (B, n, 3) 86 | // param idx: (B, m, nsample) 87 | 88 | cudaError_t err; 89 | 90 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 91 | dim3 threads(THREADS_PER_BLOCK); 92 | 93 | ballquery_cuda_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 94 | // cudaDeviceSynchronize(); // for using printf in kernel function 95 | 96 | err = cudaGetLastError(); 97 | if (cudaSuccess != err) { 98 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 99 | exit(-1); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/ballquery/ballquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALLQUERY_CUDA_KERNEL 2 | #define _BALLQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void ballquery_cuda(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 8 | 9 | void ballquery_cuda_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void ballquery_cuda_launcher(int b, int n, int m, float radius, int nsample, const float *xyz, const float *new_xyz, int *idx); 16 | 17 | void ballquery_cuda_launcher_fast(int b, int n, int m, float radius, int nsample, const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | 8 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ") 9 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 10 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 11 | 12 | #define THREADS_PER_BLOCK 256 13 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 14 | 15 | inline int opt_n_threads(int work_size) { 16 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 17 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 18 | } 19 | 20 | inline dim3 opt_block_config(int x, int y) { 21 | const int x_threads = opt_n_threads(x); 22 | const int y_threads = max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 23 | dim3 block_config(x_threads, y_threads, 1); 24 | return block_config; 25 | } 26 | 27 | #endif -------------------------------------------------------------------------------- /classification/modules/pointops/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "grouping_cuda_kernel.h" 7 | 8 | extern THCState *state; 9 | 10 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 11 | { 12 | const float *points = points_tensor.data_ptr(); 13 | const int *idx = idx_tensor.data_ptr(); 14 | float *out = out_tensor.data_ptr(); 15 | grouping_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out); 16 | } 17 | 18 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) 19 | { 20 | float *grad_points = grad_points_tensor.data_ptr(); 21 | const int *idx = idx_tensor.data_ptr(); 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | grouping_backward_cuda_launcher(b, c, n, m, nsample, grad_out, idx, grad_points); 24 | } 25 | 26 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { 27 | 28 | const float *points = points_tensor.data_ptr(); 29 | const int *idx = idx_tensor.data_ptr(); 30 | float *out = out_tensor.data_ptr(); 31 | grouping_forward_cuda_launcher_fast(b, c, n, npoints, nsample, points, idx, out); 32 | } -------------------------------------------------------------------------------- /classification/modules/pointops/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | // input: points(b, c, n) idx(b, m, nsample) 5 | // output: out(b, c, m, nsample) 6 | __global__ void grouping_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out) 7 | { 8 | int batch_index = blockIdx.x; 9 | points += batch_index * n * c; 10 | idx += batch_index * m * nsample; 11 | out += batch_index * m * nsample * c; 12 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 13 | const int stride = blockDim.y * blockDim.x; 14 | for (int i = index; i < c * m; i += stride) 15 | { 16 | const int l = i / m; 17 | const int j = i % m; 18 | for (int k = 0; k < nsample; ++k) 19 | { 20 | int ii = idx[j * nsample + k]; 21 | out[(l * m + j) * nsample + k] = points[l * n + ii]; 22 | } 23 | } 24 | } 25 | 26 | // input: grad_out(b, c, m, nsample), idx(b, m, nsample) 27 | // output: grad_points(b, c, n) 28 | __global__ void grouping_backward_cuda_kernel(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) 29 | { 30 | int batch_index = blockIdx.x; 31 | grad_out += batch_index * m * nsample * c; 32 | idx += batch_index * m * nsample; 33 | grad_points += batch_index * n * c; 34 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 35 | const int stride = blockDim.y * blockDim.x; 36 | for (int i = index; i < c * m; i += stride) 37 | { 38 | const int l = i / m; 39 | const int j = i % m; 40 | for (int k = 0; k < nsample; ++k) 41 | { 42 | int ii = idx[j * nsample + k]; 43 | atomicAdd(grad_points + l * n + ii, grad_out[(l * m + j) * nsample + k]); 44 | } 45 | } 46 | } 47 | 48 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out) 49 | { 50 | grouping_forward_cuda_kernel<<>>(b, c, n, m, nsample, points, idx, out); 51 | } 52 | 53 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) 54 | { 55 | grouping_backward_cuda_kernel<<>>(b, c, n, m, nsample, grad_out, idx, grad_points); 56 | } 57 | 58 | // input: points(b, c, n) idx(b, npoints, nsample) 59 | // output: out(b, c, npoints, nsample) 60 | __global__ void grouping_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { 61 | int bs_idx = blockIdx.z; 62 | int c_idx = blockIdx.y; 63 | int index = blockIdx.x * blockDim.x + threadIdx.x; 64 | int pt_idx = index / nsample; 65 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 66 | 67 | int sample_idx = index % nsample; 68 | 69 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 70 | int in_idx = bs_idx * c * n + c_idx * n + idx[0]; 71 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 72 | 73 | out[out_idx] = points[in_idx]; 74 | } 75 | 76 | // input: points(b, c, n) idx(b, npoints, nsample) 77 | // output: out(b, c, npoints, nsample) 78 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out) { 79 | 80 | cudaError_t err; 81 | 82 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 83 | dim3 threads(THREADS_PER_BLOCK); 84 | 85 | grouping_forward_cuda_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); 86 | // cudaDeviceSynchronize(); // for using printf in kernel function 87 | err = cudaGetLastError(); 88 | if (cudaSuccess != err) { 89 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 90 | exit(-1); 91 | } 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out); 8 | void grouping_backward_cuda(int b, int c, int n, int m, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 9 | 10 | void grouping_forward_cuda_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | void grouping_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *points, const int *idx, float *out); 17 | void grouping_backward_cuda_launcher(int b, int c, int n, int m, int nsample, const float *grad_out, const int *idx, float *grad_points); 18 | 19 | void grouping_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/grouping_int/grouping_int_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "grouping_int_cuda_kernel.h" 7 | 8 | extern THCState *state; 9 | 10 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 11 | { 12 | const long int *points = points_tensor.data_ptr(); 13 | const int *idx = idx_tensor.data_ptr(); 14 | long int *out = out_tensor.data_ptr(); 15 | grouping_int_forward_cuda_launcher(b, c, n, m, nsample, points, idx, out); 16 | } 17 | 18 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 19 | { 20 | const long int *points = points_tensor.data_ptr(); 21 | const int *idx = idx_tensor.data_ptr(); 22 | long int *out = out_tensor.data_ptr(); 23 | grouping_int_forward_cuda_launcher_fast(b, c, n, m, nsample, points, idx, out); 24 | } -------------------------------------------------------------------------------- /classification/modules/pointops/src/grouping_int/grouping_int_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_int_cuda_kernel.h" 3 | 4 | // input: points(b, c, n) idx(b, m, nsample) 5 | // output: out(b, c, m, nsample) 6 | __global__ void grouping_int_forward_cuda_kernel(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out) 7 | { 8 | int batch_index = blockIdx.x; 9 | points += batch_index * n * c; 10 | idx += batch_index * m * nsample; 11 | out += batch_index * m * nsample * c; 12 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 13 | const int stride = blockDim.y * blockDim.x; 14 | for (int i = index; i < c * m; i += stride) 15 | { 16 | const int l = i / m; 17 | const int j = i % m; 18 | for (int k = 0; k < nsample; ++k) 19 | { 20 | int ii = idx[j * nsample + k]; 21 | out[(l * m + j) * nsample + k] = points[l * n + ii]; 22 | } 23 | } 24 | } 25 | 26 | 27 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out) 28 | { 29 | grouping_int_forward_cuda_kernel<<>>(b, c, n, m, nsample, points, idx, out); 30 | } 31 | 32 | 33 | __global__ void grouping_int_forward_cuda_kernel_fast(int b, int c, int n, int npoints, int nsample, const long int *__restrict__ points, const int *__restrict__ idx, long int *__restrict__ out) 34 | { 35 | int bs_idx = blockIdx.z; 36 | int c_idx = blockIdx.y; 37 | int index = blockIdx.x * blockDim.x + threadIdx.x; 38 | int pt_idx = index / nsample; 39 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 40 | 41 | int sample_idx = index % nsample; 42 | 43 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 44 | int in_idx = bs_idx * c * n + c_idx * n + idx[0]; 45 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 46 | 47 | out[out_idx] = points[in_idx]; 48 | } 49 | 50 | 51 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out) 52 | { 53 | cudaError_t err; 54 | 55 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 56 | dim3 threads(THREADS_PER_BLOCK); 57 | 58 | grouping_int_forward_cuda_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); 59 | // cudaDeviceSynchronize(); // for using printf in kernel function 60 | err = cudaGetLastError(); 61 | if (cudaSuccess != err) { 62 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 63 | exit(-1); 64 | } 65 | } -------------------------------------------------------------------------------- /classification/modules/pointops/src/grouping_int/grouping_int_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_INT_CUDA_KERNEL 2 | #define _GROUPING_INT_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_int_forward_cuda(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out); 8 | 9 | void grouping_int_forward_cuda_fast(int b, int c, int n, int m, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void grouping_int_forward_cuda_launcher(int b, int c, int n, int m, int nsample, const long int *points, const int *idx, long int *out); 16 | 17 | void grouping_int_forward_cuda_launcher_fast(int b, int c, int n, int npoints, int nsample, const long int *points, const int *idx, long int *out); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | #endif 23 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "interpolation_cuda_kernel.h" 6 | 7 | extern THCState *state; 8 | 9 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) 10 | { 11 | const float *unknown = unknown_tensor.data_ptr(); 12 | const float *known = known_tensor.data_ptr(); 13 | float *dist2 = dist2_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | nearestneighbor_cuda_launcher(b, n, m, unknown, known, dist2, idx); 16 | } 17 | 18 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) 19 | { 20 | const float *points = points_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | float *out = out_tensor.data_ptr(); 23 | const int *idx = idx_tensor.data_ptr(); 24 | interpolation_forward_cuda_launcher(b, c, m, n, points, idx, weight, out); 25 | } 26 | 27 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor) 28 | { 29 | const float *grad_out = grad_out_tensor.data_ptr(); 30 | const float *weight = weight_tensor.data_ptr(); 31 | float *grad_points = grad_points_tensor.data_ptr(); 32 | const int *idx = idx_tensor.data_ptr(); 33 | interpolation_backward_cuda_launcher(b, c, n, m, grad_out, idx, weight, grad_points); 34 | } 35 | 36 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 37 | const float *unknown = unknown_tensor.data_ptr(); 38 | const float *known = known_tensor.data_ptr(); 39 | float *dist2 = dist2_tensor.data_ptr(); 40 | int *idx = idx_tensor.data_ptr(); 41 | nearestneighbor_cuda_launcher_fast(b, n, m, unknown, known, dist2, idx); 42 | } 43 | 44 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) { 45 | 46 | const float *points = points_tensor.data_ptr(); 47 | const float *weight = weight_tensor.data_ptr(); 48 | float *out = out_tensor.data_ptr(); 49 | const int *idx = idx_tensor.data_ptr(); 50 | interpolation_forward_cuda_launcher_fast(b, c, m, n, points, idx, weight, out); 51 | } -------------------------------------------------------------------------------- /classification/modules/pointops/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | // input: unknown(b, n, 3) known(b, m, 3) 5 | // output: dist2(b, n, 3), idx(b, n, 3) 6 | __global__ void nearestneighbor_cuda_kernel(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx) 7 | { 8 | int batch_index = blockIdx.x; 9 | unknown += batch_index * n * 3; 10 | known += batch_index * m * 3; 11 | dist2 += batch_index * n * 3; 12 | idx += batch_index * n * 3; 13 | 14 | int index = threadIdx.x; 15 | int stride = blockDim.x; 16 | for (int j = index; j < n; j += stride) 17 | { 18 | float ux = unknown[j * 3 + 0]; 19 | float uy = unknown[j * 3 + 1]; 20 | float uz = unknown[j * 3 + 2]; 21 | 22 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 23 | int besti1 = 0, besti2 = 0, besti3 = 0; 24 | for (int k = 0; k < m; ++k) 25 | { 26 | float x = known[k * 3 + 0]; 27 | float y = known[k * 3 + 1]; 28 | float z = known[k * 3 + 2]; 29 | float d = 30 | (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 31 | if (d < best1) 32 | { 33 | best3 = best2; 34 | besti3 = besti2; 35 | best2 = best1; 36 | besti2 = besti1; 37 | best1 = d; 38 | besti1 = k; 39 | } 40 | else if (d < best2) 41 | { 42 | best3 = best2; 43 | besti3 = besti2; 44 | best2 = d; 45 | besti2 = k; 46 | } 47 | else if (d < best3) 48 | { 49 | best3 = d; 50 | besti3 = k; 51 | } 52 | } 53 | dist2[j * 3 + 0] = best1; 54 | dist2[j * 3 + 1] = best2; 55 | dist2[j * 3 + 2] = best3; 56 | idx[j * 3 + 0] = besti1; 57 | idx[j * 3 + 1] = besti2; 58 | idx[j * 3 + 2] = besti3; 59 | } 60 | } 61 | 62 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 63 | // output: out(b, c, n) 64 | __global__ void interpolation_forward_cuda_kernel(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out) 65 | { 66 | int batch_index = blockIdx.x; 67 | points += batch_index * m * c; 68 | idx += batch_index * n * 3; 69 | weight += batch_index * n * 3; 70 | out += batch_index * n * c; 71 | 72 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 73 | const int stride = blockDim.y * blockDim.x; 74 | for (int i = index; i < c * n; i += stride) 75 | { 76 | const int l = i / n; 77 | const int j = i % n; 78 | float w1 = weight[j * 3 + 0]; 79 | float w2 = weight[j * 3 + 1]; 80 | float w3 = weight[j * 3 + 2]; 81 | int i1 = idx[j * 3 + 0]; 82 | int i2 = idx[j * 3 + 1]; 83 | int i3 = idx[j * 3 + 2]; 84 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + points[l * m + i3] * w3; 85 | } 86 | } 87 | 88 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 89 | // output: grad_points(b, c, m) 90 | __global__ void interpolation_backward_cuda_kernel( int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) 91 | { 92 | int batch_index = blockIdx.x; 93 | grad_out += batch_index * n * c; 94 | idx += batch_index * n * 3; 95 | weight += batch_index * n * 3; 96 | grad_points += batch_index * m * c; 97 | 98 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 99 | const int stride = blockDim.y * blockDim.x; 100 | for (int i = index; i < c * n; i += stride) 101 | { 102 | const int l = i / n; 103 | const int j = i % n; 104 | float w1 = weight[j * 3 + 0]; 105 | float w2 = weight[j * 3 + 1]; 106 | float w3 = weight[j * 3 + 2]; 107 | int i1 = idx[j * 3 + 0]; 108 | int i2 = idx[j * 3 + 1]; 109 | int i3 = idx[j * 3 + 2]; 110 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 111 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 112 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 113 | } 114 | } 115 | 116 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx) 117 | { 118 | nearestneighbor_cuda_kernel<<>>(b, n, m, unknown, known, dist2, idx); 119 | } 120 | 121 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out) 122 | { 123 | interpolation_forward_cuda_kernel<<>>(b, c, m, n, points, idx, weight, out); 124 | } 125 | 126 | void interpolation_backward_cuda_launcher(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) 127 | { 128 | interpolation_backward_cuda_kernel<<>>(b, n, c, m, grad_out, idx, weight, grad_points); 129 | } 130 | 131 | 132 | // input: unknown(b, n, 3) known(b, m, 3) 133 | // output: dist2(b, n, 3), idx(b, n, 3) 134 | __global__ void nearestneighbor_cuda_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) { 135 | 136 | int bs_idx = blockIdx.y; 137 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 138 | if (bs_idx >= b || pt_idx >= n) return; 139 | 140 | unknown += bs_idx * n * 3 + pt_idx * 3; 141 | known += bs_idx * m * 3; 142 | dist2 += bs_idx * n * 3 + pt_idx * 3; 143 | idx += bs_idx * n * 3 + pt_idx * 3; 144 | 145 | float ux = unknown[0]; 146 | float uy = unknown[1]; 147 | float uz = unknown[2]; 148 | 149 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 150 | int besti1 = 0, besti2 = 0, besti3 = 0; 151 | for (int k = 0; k < m; ++k) { 152 | float x = known[k * 3 + 0]; 153 | float y = known[k * 3 + 1]; 154 | float z = known[k * 3 + 2]; 155 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 156 | if (d < best1) { 157 | best3 = best2; besti3 = besti2; 158 | best2 = best1; besti2 = besti1; 159 | best1 = d; besti1 = k; 160 | } 161 | else if (d < best2) { 162 | best3 = best2; besti3 = besti2; 163 | best2 = d; besti2 = k; 164 | } 165 | else if (d < best3) { 166 | best3 = d; besti3 = k; 167 | } 168 | } 169 | dist2[0] = best1; 170 | dist2[1] = best2; 171 | dist2[2] = best3; 172 | 173 | idx[0] = besti1; 174 | idx[1] = besti2; 175 | idx[2] = besti3; 176 | } 177 | 178 | 179 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 180 | // output: out(b, c, n) 181 | __global__ void interpolation_forward_cuda_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) { 182 | 183 | int bs_idx = blockIdx.z; 184 | int c_idx = blockIdx.y; 185 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 186 | 187 | if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; 188 | 189 | weight += bs_idx * n * 3 + pt_idx * 3; 190 | points += bs_idx * c * m + c_idx * m; 191 | idx += bs_idx * n * 3 + pt_idx * 3; 192 | out += bs_idx * c * n + c_idx * n; 193 | 194 | out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]]; 195 | } 196 | 197 | 198 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx) 199 | { 200 | cudaError_t err; 201 | 202 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 203 | dim3 threads(THREADS_PER_BLOCK); 204 | 205 | nearestneighbor_cuda_kernel_fast<<>>(b, n, m, unknown, known, dist2, idx); 206 | 207 | err = cudaGetLastError(); 208 | if (cudaSuccess != err) { 209 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 210 | exit(-1); 211 | } 212 | } 213 | 214 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out) { 215 | 216 | cudaError_t err; 217 | 218 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 219 | dim3 threads(THREADS_PER_BLOCK); 220 | interpolation_forward_cuda_kernel_fast<<>>(b, c, m, n, points, idx, weight, out); 221 | 222 | err = cudaGetLastError(); 223 | if (cudaSuccess != err) { 224 | fprintf(stderr, "CUDA kernel failed : %s\n", 225 | cudaGetErrorString(err)); 226 | exit(-1); 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void nearestneighbor_cuda(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 8 | void interpolation_forward_cuda(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 9 | void interpolation_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 10 | 11 | void nearestneighbor_cuda_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | void interpolation_forward_cuda_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 13 | 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | 18 | void nearestneighbor_cuda_launcher(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx); 19 | void interpolation_forward_cuda_launcher(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out); 20 | void interpolation_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points); 21 | 22 | void nearestneighbor_cuda_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx); 23 | void interpolation_forward_cuda_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | #endif 29 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/knnquery/__init__.py -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "knnquery_cuda_kernel.h" 7 | 8 | extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | 15 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 16 | { 17 | CHECK_INPUT(new_xyz_tensor); 18 | CHECK_INPUT(xyz_tensor); 19 | 20 | const float *new_xyz = new_xyz_tensor.data_ptr(); 21 | const float *xyz = xyz_tensor.data_ptr(); 22 | int *idx = idx_tensor.data_ptr(); 23 | float *dist2 = dist2_tensor.data_ptr(); 24 | 25 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 26 | 27 | knnquery_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream); 28 | } 29 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | // input: xyz (b, n, 3) new_xyz (b, m, 3) 5 | // output: idx (b, m, nsample) dist2 (b, m, nsample) 6 | __global__ void knnquery_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) { 7 | int bs_idx = blockIdx.y; 8 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (bs_idx >= b || pt_idx >= m) return; 10 | 11 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 12 | xyz += bs_idx * n * 3; 13 | idx += bs_idx * m * nsample + pt_idx * nsample; 14 | 15 | float new_x = new_xyz[0]; 16 | float new_y = new_xyz[1]; 17 | float new_z = new_xyz[2]; 18 | 19 | //double* best = new double[nsample]; 20 | //int* besti = new int[nsample]; 21 | double best[200]; 22 | int besti[200]; 23 | for(int i = 0; i < nsample; i++){ 24 | best[i] = 1e40; 25 | besti[i] = 0; 26 | } 27 | for(int k = 0; k < n; k++){ 28 | float x = xyz[k * 3 + 0]; 29 | float y = xyz[k * 3 + 1]; 30 | float z = xyz[k * 3 + 2]; 31 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 32 | for(int j = 0; j < nsample; j++){ 33 | if(d2 < best[j]){ 34 | for(int i = nsample - 1; i > j; i--){ 35 | best[i] = best[i - 1]; 36 | besti[i] = besti[i - 1]; 37 | } 38 | best[j] = d2; 39 | besti[j] = k; 40 | break; 41 | } 42 | } 43 | } 44 | for(int i = 0; i < nsample; i++){ 45 | idx[i] = besti[i]; 46 | dist2[i] = best[i]; 47 | } 48 | //delete []best; 49 | //delete []besti; 50 | } 51 | 52 | 53 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) { 54 | // param new_xyz: (B, m, 3) 55 | // param xyz: (B, n, 3) 56 | // param idx: (B, m, nsample) 57 | 58 | cudaError_t err; 59 | 60 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 61 | dim3 threads(THREADS_PER_BLOCK); 62 | 63 | // fprintf('%d, %d', blocks, threads); 64 | knnquery_cuda_kernel<<>>(b, n, m, nsample, xyz, new_xyz, idx, dist2); 65 | // cudaDeviceSynchronize(); // for using printf in kernel function 66 | 67 | // err = cudaGetLastError(); 68 | // if (cudaSuccess != err) { 69 | // fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 70 | // exit(-1); 71 | // } 72 | } -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | void knnquery_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void knnquery_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery_heap/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/modules/pointops/src/knnquery_heap/__init__.py -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "knnquery_heap_cuda_kernel.h" 7 | 8 | extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | 15 | void knnquery_heap_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 16 | { 17 | CHECK_INPUT(new_xyz_tensor); 18 | CHECK_INPUT(xyz_tensor); 19 | 20 | const float *new_xyz = new_xyz_tensor.data_ptr(); 21 | const float *xyz = xyz_tensor.data_ptr(); 22 | int *idx = idx_tensor.data_ptr(); 23 | float *dist2 = dist2_tensor.data_ptr(); 24 | 25 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 26 | 27 | knnquery_heap_cuda_launcher(b, n, m, nsample, xyz, new_xyz, idx, dist2, stream); 28 | } 29 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_heap_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | // input: xyz (b, n, 3) new_xyz (b, m, 3) 52 | // output: idx (b, m, nsample) dist2 (b, m, nsample) 53 | __global__ void knnquery_heap_cuda_kernel(int b, int n, int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, int *__restrict__ idx, float *__restrict__ dist2) { 54 | int bs_idx = blockIdx.y; 55 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 56 | if (bs_idx >= b || pt_idx >= m) return; 57 | 58 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 59 | xyz += bs_idx * n * 3; 60 | idx += bs_idx * m * nsample + pt_idx * nsample; 61 | dist2 += bs_idx * m * nsample + pt_idx * nsample; 62 | 63 | float new_x = new_xyz[0]; 64 | float new_y = new_xyz[1]; 65 | float new_z = new_xyz[2]; 66 | 67 | float best_dist[100]; 68 | int best_idx[100]; 69 | for(int i = 0; i < nsample; i++){ 70 | best_dist[i] = 1e10; 71 | best_idx[i] = 0; 72 | } 73 | for(int i = 0; i < n; i++){ 74 | float x = xyz[i * 3 + 0]; 75 | float y = xyz[i * 3 + 1]; 76 | float z = xyz[i * 3 + 2]; 77 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 78 | if (d2 < best_dist[0]){ 79 | best_dist[0] = d2; 80 | best_idx[0] = i; 81 | reheap(best_dist, best_idx, nsample); 82 | } 83 | } 84 | heap_sort(best_dist, best_idx, nsample); 85 | for(int i = 0; i < nsample; i++){ 86 | idx[i] = best_idx[i]; 87 | dist2[i] = best_dist[i]; 88 | } 89 | } 90 | 91 | 92 | void knnquery_heap_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream) { 93 | // param new_xyz: (B, m, 3) 94 | // param xyz: (B, n, 3) 95 | // param idx: (B, m, nsample) 96 | 97 | cudaError_t err; 98 | 99 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 100 | dim3 threads(THREADS_PER_BLOCK); 101 | 102 | knnquery_heap_cuda_kernel<<>>(b, n, m, nsample, xyz, new_xyz, idx, dist2); 103 | // cudaDeviceSynchronize(); // for using printf in kernel function 104 | 105 | err = cudaGetLastError(); 106 | if (cudaSuccess != err) { 107 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 108 | exit(-1); 109 | } 110 | } -------------------------------------------------------------------------------- /classification/modules/pointops/src/knnquery_heap/knnquery_heap_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_HEAP_CUDA_KERNEL 2 | #define _KNNQUERY_HEAP_CUDA_KERNEL 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | void knnquery_heap_cuda(int b, int n, int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void knnquery_heap_cuda_launcher(int b, int n, int m, int nsample, const float *xyz, const float *new_xyz, int *idx, float *dist2, cudaStream_t stream); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif -------------------------------------------------------------------------------- /classification/modules/pointops/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ballquery/ballquery_cuda_kernel.h" 5 | #include "grouping/grouping_cuda_kernel.h" 6 | #include "grouping_int/grouping_int_cuda_kernel.h" 7 | #include "sampling/sampling_cuda_kernel.h" 8 | #include "interpolation/interpolation_cuda_kernel.h" 9 | #include "knnquery/knnquery_cuda_kernel.h" 10 | #include "knnquery_heap/knnquery_heap_cuda_kernel.h" 11 | 12 | 13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 14 | m.def("ballquery_cuda", &ballquery_cuda_fast, "ballquery_cuda_fast"); // name in python, cpp function address, docs 15 | 16 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 17 | m.def("knnquery_heap_cuda", &knnquery_heap_cuda, "knnquery_heap_cuda"); 18 | 19 | m.def("grouping_forward_cuda", &grouping_forward_cuda_fast, "grouping_forward_cuda_fast"); 20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 21 | 22 | m.def("grouping_int_forward_cuda", &grouping_int_forward_cuda_fast, "grouping_int_forward_cuda_fast"); 23 | 24 | m.def("gathering_forward_cuda", &gathering_forward_cuda, "gathering_forward_cuda"); 25 | m.def("gathering_backward_cuda", &gathering_backward_cuda, "gathering_backward_cuda"); 26 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 27 | 28 | m.def("nearestneighbor_cuda", &nearestneighbor_cuda_fast, "nearestneighbor_cuda_fast"); 29 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda_fast, "interpolation_forward_cuda_fast"); 30 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 31 | } 32 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "sampling_cuda_kernel.h" 6 | 7 | extern THCState *state; 8 | 9 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) 10 | { 11 | const float *points = points_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *out = out_tensor.data_ptr(); 14 | gathering_forward_cuda_launcher(b, c, n, m, points, idx, out); 15 | } 16 | 17 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) 18 | { 19 | 20 | const float *grad_out = grad_out_tensor.data_ptr(); 21 | const int *idx = idx_tensor.data_ptr(); 22 | float *grad_points = grad_points_tensor.data_ptr(); 23 | gathering_backward_cuda_launcher(b, c, n, m, grad_out, idx, grad_points); 24 | } 25 | 26 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) 27 | { 28 | const float *points = points_tensor.data_ptr(); 29 | float *temp = temp_tensor.data_ptr(); 30 | int *idx = idx_tensor.data_ptr(); 31 | furthestsampling_cuda_launcher(b, n, m, points, temp, idx); 32 | } 33 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/sampling/sampling_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "sampling_cuda_kernel.h" 3 | 4 | // input: points(b, c, n) idx(b, m) 5 | // output: out(b, c, m) 6 | __global__ void gathering_forward_cuda_kernel(int b, int c, int n, int m, const float *points, const int *idx, float *out) 7 | { 8 | for (int i = blockIdx.x; i < b; i += gridDim.x) 9 | { 10 | for (int l = blockIdx.y; l < c; l += gridDim.y) 11 | { 12 | for (int j = threadIdx.x; j < m; j += blockDim.x) 13 | { 14 | int a = idx[i * m + j]; 15 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 16 | } 17 | } 18 | } 19 | } 20 | 21 | // input: grad_out(b, c, m) idx(b, m) 22 | // output: grad_points(b, c, n) 23 | __global__ void gathering_backward_cuda_kernel(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points) 24 | { 25 | for (int i = blockIdx.x; i < b; i += gridDim.x) 26 | { 27 | for (int l = blockIdx.y; l < c; l += gridDim.y) 28 | { 29 | for (int j = threadIdx.x; j < m; j += blockDim.x) 30 | { 31 | int a = idx[i * m + j]; 32 | atomicAdd(grad_points + (i * c + l) * n + a, grad_out[(i * c + l) * m + j]); 33 | } 34 | } 35 | } 36 | } 37 | 38 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out) 39 | { 40 | gathering_forward_cuda_kernel<<>>(b, c, n, m, points, idx, out); 41 | } 42 | 43 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points) 44 | { 45 | gathering_backward_cuda_kernel<<>>(b, c, n, m, grad_out, idx, grad_points); 46 | } 47 | 48 | __device__ void __update(float *dists, int *dists_i, 49 | int idx1, int idx2) { 50 | const float v1 = dists[idx1], v2 = dists[idx2]; 51 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 52 | dists[idx1] = max(v1, v2); 53 | dists_i[idx1] = v2 > v1 ? i2 : i1; 54 | } 55 | 56 | // Input dataset: (b, n, 3), tmp: (b, n) 57 | // Ouput idxs (b, m) 58 | template 59 | __global__ void furthestsampling_cuda_kernel(int b, int n, int m, const float *dataset, float *temp, int *idxs) 60 | { 61 | if (m <= 0) 62 | return; 63 | __shared__ float dists[block_size]; 64 | __shared__ int dists_i[block_size]; 65 | 66 | int batch_index = blockIdx.x; 67 | dataset += batch_index * n * 3; 68 | temp += batch_index * n; 69 | idxs += batch_index * m; 70 | int tid = threadIdx.x; 71 | const int stride = block_size; 72 | int old = 0; 73 | if (threadIdx.x == 0) 74 | idxs[0] = old; 75 | 76 | __syncthreads(); 77 | for (int j = 1; j < m; j++) 78 | { 79 | int besti = 0; 80 | float best = -1; 81 | float x1 = dataset[old * 3 + 0]; 82 | float y1 = dataset[old * 3 + 1]; 83 | float z1 = dataset[old * 3 + 2]; 84 | for (int k = tid; k < n; k += stride) 85 | { 86 | float x2, y2, z2; 87 | x2 = dataset[k * 3 + 0]; 88 | y2 = dataset[k * 3 + 1]; 89 | z2 = dataset[k * 3 + 2]; 90 | //float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 91 | //if (mag <= 1e-3) 92 | // continue; 93 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 94 | float d2 = min(d, temp[k]); 95 | temp[k] = d2; 96 | besti = d2 > best ? k : besti; 97 | best = d2 > best ? d2 : best; 98 | } 99 | dists[tid] = best; 100 | dists_i[tid] = besti; 101 | __syncthreads(); 102 | 103 | if (block_size >= 1024) { 104 | if (tid < 512) { 105 | __update(dists, dists_i, tid, tid + 512); 106 | } 107 | __syncthreads(); 108 | } 109 | if (block_size >= 512) { 110 | if (tid < 256) { 111 | __update(dists, dists_i, tid, tid + 256); 112 | } 113 | __syncthreads(); 114 | } 115 | if (block_size >= 256) { 116 | if (tid < 128) { 117 | __update(dists, dists_i, tid, tid + 128); 118 | } 119 | __syncthreads(); 120 | } 121 | if (block_size >= 128) { 122 | if (tid < 64) { 123 | __update(dists, dists_i, tid, tid + 64); 124 | } 125 | __syncthreads(); 126 | } 127 | if (block_size >= 64) { 128 | if (tid < 32) { 129 | __update(dists, dists_i, tid, tid + 32); 130 | } 131 | __syncthreads(); 132 | } 133 | if (block_size >= 32) { 134 | if (tid < 16) { 135 | __update(dists, dists_i, tid, tid + 16); 136 | } 137 | __syncthreads(); 138 | } 139 | if (block_size >= 16) { 140 | if (tid < 8) { 141 | __update(dists, dists_i, tid, tid + 8); 142 | } 143 | __syncthreads(); 144 | } 145 | if (block_size >= 8) { 146 | if (tid < 4) { 147 | __update(dists, dists_i, tid, tid + 4); 148 | } 149 | __syncthreads(); 150 | } 151 | if (block_size >= 4) { 152 | if (tid < 2) { 153 | __update(dists, dists_i, tid, tid + 2); 154 | } 155 | __syncthreads(); 156 | } 157 | if (block_size >= 2) { 158 | if (tid < 1) { 159 | __update(dists, dists_i, tid, tid + 1); 160 | } 161 | __syncthreads(); 162 | } 163 | 164 | old = dists_i[0]; 165 | if (tid == 0) 166 | idxs[j] = old; 167 | } 168 | } 169 | 170 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs) 171 | { 172 | unsigned int n_threads = opt_n_threads(n); 173 | switch (n_threads) { 174 | case 1024: 175 | furthestsampling_cuda_kernel<1024><<>>(b, n, m, dataset, temp, idxs); 176 | break; 177 | case 512: 178 | furthestsampling_cuda_kernel<512><<>>(b, n, m, dataset, temp, idxs); 179 | break; 180 | case 256: 181 | furthestsampling_cuda_kernel<256><<>>(b, n, m, dataset, temp, idxs); 182 | break; 183 | case 128: 184 | furthestsampling_cuda_kernel<128><<>>(b, n, m, dataset, temp, idxs); 185 | break; 186 | case 64: 187 | furthestsampling_cuda_kernel<64><<>>(b, n, m, dataset, temp, idxs); 188 | break; 189 | case 32: 190 | furthestsampling_cuda_kernel<32><<>>(b, n, m, dataset, temp, idxs); 191 | break; 192 | case 16: 193 | furthestsampling_cuda_kernel<16><<>>(b, n, m, dataset, temp, idxs); 194 | break; 195 | case 8: 196 | furthestsampling_cuda_kernel<8><<>>(b, n, m, dataset, temp, idxs); 197 | break; 198 | case 4: 199 | furthestsampling_cuda_kernel<4><<>>(b, n, m, dataset, temp, idxs); 200 | break; 201 | case 2: 202 | furthestsampling_cuda_kernel<2><<>>(b, n, m, dataset, temp, idxs); 203 | break; 204 | case 1: 205 | furthestsampling_cuda_kernel<1><<>>(b, n, m, dataset, temp, idxs); 206 | break; 207 | default: 208 | furthestsampling_cuda_kernel<512><<>>(b, n, m, dataset, temp, idxs); 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /classification/modules/pointops/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void gathering_forward_cuda(int b, int c, int n, int m, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 8 | void gathering_backward_cuda(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 9 | void furthestsampling_cuda(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | void gathering_forward_cuda_launcher(int b, int c, int n, int m, const float *points, const int *idx, float *out); 16 | void gathering_backward_cuda_launcher(int b, int c, int n, int m, const float *grad_out, const int *idx, float *grad_points); 17 | void furthestsampling_cuda_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs); 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | #endif 23 | -------------------------------------------------------------------------------- /classification/modules/polar_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import torch 7 | import numpy as np 8 | 9 | 10 | def xyz2sphere(xyz, normalize=True): 11 | """ 12 | Convert XYZ to Spherical Coordinate 13 | 14 | reference: https://en.wikipedia.org/wiki/Spherical_coordinate_system 15 | 16 | :param xyz: [B, N, 3] / [B, N, G, 3] 17 | :return: (rho, theta, phi) [B, N, 3] / [B, N, G, 3] 18 | """ 19 | rho = torch.sqrt(torch.sum(torch.pow(xyz, 2), dim=-1, keepdim=True)) 20 | rho = torch.clamp(rho, min=0) # range: [0, inf] 21 | theta = torch.acos(xyz[..., 2, None] / rho) # range: [0, pi] 22 | phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None]) # range: [-pi, pi] 23 | # check nan 24 | idx = rho == 0 25 | theta[idx] = 0 26 | 27 | if normalize: 28 | theta = theta / np.pi # [0, 1] 29 | phi = phi / (2 * np.pi) + .5 # [0, 1] 30 | out = torch.cat([rho, theta, phi], dim=-1) 31 | return out 32 | 33 | 34 | def xyz2cylind(xyz, normalize=True): 35 | """ 36 | Convert XYZ to Cylindrical Coordinate 37 | 38 | reference: https://en.wikipedia.org/wiki/Cylindrical_coordinate_system 39 | 40 | :param normalize: Normalize phi & z 41 | :param xyz: [B, N, 3] / [B, N, G, 3] 42 | :return: (rho, phi, z) [B, N, 3] 43 | """ 44 | rho = torch.sqrt(torch.sum(torch.pow(xyz[..., :2], 2), dim=-1, keepdim=True)) 45 | rho = torch.clamp(rho, 0, 1) # range: [0, 1] 46 | phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None]) # range: [-pi, pi] 47 | z = xyz[..., 2, None] 48 | z = torch.clamp(z, -1, 1) # range: [-1, 1] 49 | 50 | if normalize: 51 | phi = phi / (2 * np.pi) + .5 52 | z = (z + 1.) / 2. 53 | out = torch.cat([rho, phi, z], dim=-1) 54 | return out 55 | -------------------------------------------------------------------------------- /classification/modules/ptaug_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import torch 7 | 8 | 9 | ################# 10 | # MAIN 11 | ################# 12 | 13 | def get_aug_args(args): 14 | dataset = args.dataset 15 | if dataset == 'ScanObjectNN': 16 | aug_args = {'scale_factor': 0.5, 'shift_factor': 0.3} 17 | return aug_args 18 | else: 19 | raise Exception('No such dataset') 20 | 21 | 22 | def transform_point_cloud(batch, args, aug_args, train=True, label=None): 23 | """batch: B x 3/6 x N""" 24 | if args.aug_scale: 25 | batch[:, 0:3] = scale_point_cloud(batch[:, 0:3], aug_args['scale_factor']) 26 | if args.aug_shift: 27 | batch[:, 0:3] = shift_point_cloud(batch[:, 0:3], shift_range=aug_args['shift_factor']) 28 | if label is not None: 29 | return batch, label 30 | return batch 31 | 32 | 33 | ################# 34 | # Shift 35 | ################# 36 | 37 | def shift_point_cloud(batch_data, shift_range=0.2): 38 | """ Randomly shift point cloud. Shift is per point cloud. 39 | Input: 40 | B x C x N array, original batch of point clouds 41 | Return: 42 | B x C x N array, shifted batch of point clouds 43 | """ 44 | shifts = (torch.rand(batch_data.shape[0], 3, 1, device=batch_data.device) * 2. - 1.) * shift_range 45 | batch_data += shifts 46 | return batch_data 47 | 48 | 49 | ################# 50 | # Scale 51 | ################# 52 | 53 | def scale_point_cloud(batch_data, scale_range=0.2): 54 | """ Randomly scale the point cloud. Scale is per point cloud. 55 | Input: 56 | B x C x N array, original batch of point clouds 57 | Return: 58 | B x C x N array, scaled batch of point clouds 59 | """ 60 | scales = (torch.rand(batch_data.shape[0], 3, 1, device=batch_data.device) * 2. - 1.) * scale_range + 1. 61 | batch_data *= scales 62 | return batch_data 63 | -------------------------------------------------------------------------------- /classification/modules/recons_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 05/10/2022 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | from modules.pointnet2_utils import query_knn_point, index_points 9 | 10 | 11 | def _recons_factory(type): 12 | if type == 'knn': 13 | return knn_recons 14 | else: 15 | raise Exception('Not Implemented Reconstruction Type') 16 | 17 | 18 | def knn_recons(k, center, context, cuda=False): 19 | idx = query_knn_point(k, context, center, cuda=cuda) 20 | torch.cuda.empty_cache() 21 | 22 | group_xyz = index_points(context, idx, cuda=cuda, is_group=True) # [B, N, K, C] 23 | torch.cuda.empty_cache() 24 | return group_xyz 25 | 26 | 27 | def cal_normal(group_xyz, random_inv=False, is_group=False): 28 | """ 29 | Calculate Normal Vector (Unit Form + First Term Positive) 30 | 31 | :param group_xyz: [B, N, K=3, 3] / [B, N, G, K=3, 3] 32 | :param random_inv: 33 | :param return_intersect: 34 | :param return_const: 35 | :return: [B, N, 3] 36 | """ 37 | edge_vec1 = group_xyz[..., 1, :] - group_xyz[..., 0, :] # [B, N, 3] 38 | edge_vec2 = group_xyz[..., 2, :] - group_xyz[..., 0, :] # [B, N, 3] 39 | 40 | nor = torch.cross(edge_vec1, edge_vec2, dim=-1) 41 | unit_nor = nor / torch.norm(nor, dim=-1, keepdim=True) # [B, N, 3] / [B, N, G, 3] 42 | if not is_group: 43 | pos_mask = (unit_nor[..., 0] > 0).float() * 2. - 1. # keep x_n positive 44 | else: 45 | pos_mask = (unit_nor[..., 0:1, 0] > 0).float() * 2. - 1. 46 | unit_nor = unit_nor * pos_mask.unsqueeze(-1) 47 | 48 | # batch-wise random inverse normal vector (prob: 0.5) 49 | if random_inv: 50 | random_mask = torch.randint(0, 2, (group_xyz.size(0), 1, 1)).float() * 2. - 1. 51 | random_mask = random_mask.to(unit_nor.device) 52 | if not is_group: 53 | unit_nor = unit_nor * random_mask 54 | else: 55 | unit_nor = unit_nor * random_mask.unsqueeze(-1) 56 | 57 | return unit_nor 58 | 59 | 60 | def pca(X, k, center=True): 61 | """ 62 | Principal Components Analysis impl. with SVD function 63 | 64 | :param X: 65 | :param k: 66 | :param center: 67 | :return: 68 | """ 69 | 70 | n = X.size()[0] 71 | ones = torch.ones(n).view([n, 1]) 72 | h = ((1 / n) * torch.mm(ones, ones.t())) if center else torch.zeros(n * n).view([n, n]) 73 | H = torch.eye(n) - h 74 | X_center = torch.mm(H.double(), X.double()) 75 | u, s, v = torch.svd(X_center) 76 | components = v[:k].t() 77 | explained_variance = torch.mul(s[:k], s[:k]) / (n - 1) 78 | return {'X': X, 'k': k, 'components': components, 79 | 'explained_variance': explained_variance} 80 | 81 | 82 | def cal_center(group_xyz): 83 | """ 84 | Calculate Global Coordinates of the Center of Triangle 85 | 86 | :param group_xyz: [B, N, K, 3] / [B, N, G, K, 3]; K >= 3 87 | :return: [B, N, 3] / [B, N, G, 3] 88 | """ 89 | center = torch.mean(group_xyz, dim=-2) 90 | return center 91 | 92 | 93 | def cal_area(group_xyz): 94 | """ 95 | Calculate Area of Triangle 96 | 97 | :param group_xyz: [B, N, K, 3] / [B, N, G, K, 3]; K = 3 98 | :return: [B, N, 1] / [B, N, G, 1] 99 | """ 100 | pad_shape = group_xyz[..., 0, None].shape 101 | det_xy = torch.det(torch.cat([group_xyz[..., 0, None], group_xyz[..., 1, None], torch.ones(pad_shape)], dim=-1)) 102 | det_yz = torch.det(torch.cat([group_xyz[..., 1, None], group_xyz[..., 2, None], torch.ones(pad_shape)], dim=-1)) 103 | det_zx = torch.det(torch.cat([group_xyz[..., 2, None], group_xyz[..., 0, None], torch.ones(pad_shape)], dim=-1)) 104 | area = torch.sqrt(det_xy ** 2 + det_yz ** 2 + det_zx ** 2).unsqueeze(-1) 105 | return area 106 | 107 | 108 | def cal_const(normal, center, is_normalize=True): 109 | """ 110 | Calculate Constant Term (Standard Version, with x_normal to be 1) 111 | 112 | math:: 113 | const = x_nor * x_0 + y_nor * y_0 + z_nor * z_0 114 | 115 | :param is_normalize: 116 | :param normal: [B, N, 3] / [B, N, G, 3] 117 | :param center: [B, N, 3] / [B, N, G, 3] 118 | :return: [B, N, 1] / [B, N, G, 1] 119 | """ 120 | const = torch.sum(normal * center, dim=-1, keepdim=True) 121 | factor = torch.sqrt(torch.Tensor([3])).to(normal.device) 122 | const = const / factor if is_normalize else const 123 | 124 | return const 125 | 126 | 127 | def check_nan(normal, center, pos=None): 128 | """ 129 | Check & Remove NaN in normal tensor 130 | 131 | :param pos: [B, N, 1] 132 | :param center: [B, N, 3] 133 | :param normal: [B, N, 3] 134 | :return: 135 | """ 136 | B, N, _ = normal.shape 137 | mask = torch.sum(torch.isnan(normal), dim=-1) > 0 138 | mask_first = torch.argmax((~mask).int(), dim=-1) 139 | 140 | normal_first = normal[torch.arange(B), None, mask_first].repeat([1, N, 1]) 141 | normal[mask] = normal_first[mask] 142 | center_first = center[torch.arange(B), None, mask_first].repeat([1, N, 1]) 143 | center[mask] = center_first[mask] 144 | 145 | if pos is not None: 146 | pos_first = pos[torch.arange(B), None, mask_first].repeat([1, N, 1]) 147 | pos[mask] = pos_first[mask] 148 | return normal, center, pos 149 | return normal, center 150 | 151 | 152 | def check_nan_umb(normal, center, pos=None): 153 | """ 154 | Check & Remove NaN in normal tensor 155 | 156 | :param pos: [B, N, G, 1] 157 | :param center: [B, N, G, 3] 158 | :param normal: [B, N, G, 3] 159 | :return: 160 | """ 161 | B, N, G, _ = normal.shape 162 | mask = torch.sum(torch.isnan(normal), dim=-1) > 0 163 | mask_first = torch.argmax((~mask).int(), dim=-1) 164 | b_idx = torch.arange(B).unsqueeze(1).repeat([1, N]) 165 | n_idx = torch.arange(N).unsqueeze(0).repeat([B, 1]) 166 | 167 | normal_first = normal[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1]) 168 | normal[mask] = normal_first[mask] 169 | center_first = center[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1]) 170 | center[mask] = center_first[mask] 171 | 172 | if pos is not None: 173 | pos_first = pos[b_idx, n_idx, None, mask_first].repeat([1, 1, G, 1]) 174 | pos[mask] = pos_first[mask] 175 | return normal, center, pos 176 | return normal, center 177 | 178 | 179 | class SurfaceConstructor(nn.Module): 180 | """ 181 | Surface Constructor for Point Clouds 182 | 183 | Formulation of A Surface: 184 | A * (x - x_0) + B * (y - y_0) + C * (z - z_0) = 0, 185 | where A^2 + B^2 + C^2 = 1 & A > 0 186 | """ 187 | 188 | def __init__(self, r=None, k=3, recons_type='knn', return_dist=False, random_inv=True, cuda=False): 189 | super(SurfaceConstructor, self).__init__() 190 | self.K = k 191 | self.R = r 192 | self.recons = _recons_factory(recons_type) 193 | self.cuda = cuda 194 | 195 | self.return_dist = return_dist 196 | self.random_inv = random_inv 197 | 198 | def forward(self, center, context): 199 | """ 200 | Input: 201 | center: input points position as centroid points, [B, 3, N] 202 | context: input points position as context points, [B, 3, N'] 203 | 204 | Output: 205 | normal: normals of constructed triangles, [B, 3, N] 206 | center: centroids of constructed triangles, [B, 3, N] 207 | pos: position info of constructed triangles, [B, 1, N] 208 | """ 209 | center = center.permute(0, 2, 1) 210 | context = context.permute(0, 2, 1) 211 | 212 | group_xyz = self.recons(self.K, center, context, cuda=self.cuda) 213 | normal = cal_normal(group_xyz, random_inv=self.random_inv) 214 | center = cal_center(group_xyz) 215 | 216 | if self.return_dist: 217 | pos = cal_const(normal, center) 218 | normal, center, pos = check_nan(normal, center, pos) 219 | normal = normal.permute(0, 2, 1) 220 | center = center.permute(0, 2, 1) 221 | pos = pos.permute(0, 2, 1) 222 | return normal, center, pos 223 | 224 | normal, center = check_nan(normal, center) 225 | normal = normal.permute(0, 2, 1) 226 | center = center.permute(0, 2, 1) 227 | 228 | return normal, center 229 | 230 | 231 | if __name__ == '__main__': 232 | xyz = torch.rand(1, 3, 1024) * 2. - 1. 233 | constructor = SurfaceConstructor(return_dist=True) 234 | 235 | normal, center, pos = constructor(xyz, xyz) 236 | print(normal.shape) 237 | print(center.shape) 238 | -------------------------------------------------------------------------------- /classification/scripts/scanobjectnn/repsurf_ssg_umb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -v 3 | 4 | python3 tool/train_cls_scanobjectnn.py \ 5 | --cuda_ops \ 6 | --batch_size 64 \ 7 | --model repsurf.repsurf_ssg_umb \ 8 | --epoch 250 \ 9 | --log_dir repsurf_cls_ssg_umb \ 10 | --gpus 0 \ 11 | --n_workers 12 \ 12 | --return_center \ 13 | --return_dist \ 14 | --return_polar \ 15 | --group_size 8 \ 16 | --umb_pool sum \ 17 | --num_point 1024 -------------------------------------------------------------------------------- /classification/scripts/scanobjectnn/repsurf_ssg_umb_2x.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -v 3 | 4 | python3 tool/train_cls_scanobjectnn.py \ 5 | --cuda_ops \ 6 | --batch_size 64 \ 7 | --model repsurf.repsurf_ssg_umb_2x \ 8 | --epoch 250 \ 9 | --log_dir repsurf_cls_ssg_umb_2x \ 10 | --gpus 0 \ 11 | --n_workers 12 \ 12 | --return_center \ 13 | --return_dist \ 14 | --return_polar \ 15 | --group_size 8 \ 16 | --umb_pool sum \ 17 | --num_point 1024 -------------------------------------------------------------------------------- /classification/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/classification/util/__init__.py -------------------------------------------------------------------------------- /classification/util/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import argparse 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def set_seed(seed): 12 | """ 13 | Setting of Global Seed 14 | 15 | """ 16 | torch.backends.cudnn.enabled = True 17 | torch.backends.cudnn.deterministic = True # consistent results on the cpu and gpu 18 | torch.backends.cudnn.benchmark = True 19 | 20 | np.random.seed(seed) 21 | random.seed(seed) 22 | torch.manual_seed(seed) # cpu 23 | torch.cuda.manual_seed(seed) 24 | torch.cuda.manual_seed_all(seed) # gpu 25 | 26 | 27 | def weight_init(m, init_type): 28 | if init_type == 'xavier': 29 | init_func = torch.nn.init.xavier_normal_ 30 | elif init_type == 'kaiming': 31 | init_func = torch.nn.init.kaiming_normal_ 32 | else: 33 | raise Exception('No such init type') 34 | 35 | if isinstance(m, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.Conv1d)): 36 | init_func(m.weight) 37 | if m.bias is not None: 38 | torch.nn.init.constant_(m.bias, 0) 39 | elif isinstance(m, (torch.nn.BatchNorm2d, torch.nn.BatchNorm1d)): 40 | torch.nn.init.constant_(m.weight, 1) # constant 41 | # torch.nn.init.normal_(m.weight, 1.0, 0.02) # normal 42 | torch.nn.init.constant_(m.bias, 0) 43 | 44 | 45 | class ClsLoss(nn.Module): 46 | def __init__(self): 47 | super(ClsLoss, self).__init__() 48 | 49 | def forward(self, pred, target): 50 | total_loss = F.nll_loss(pred, target) 51 | 52 | return total_loss 53 | 54 | 55 | class SmoothClsLoss(nn.Module): 56 | def __init__(self, smoothing_ratio=0.1): 57 | super(SmoothClsLoss, self).__init__() 58 | self.smoothing_ratio = smoothing_ratio 59 | 60 | def forward(self, pred, target): 61 | eps = self.smoothing_ratio 62 | n_class = pred.size(1) 63 | 64 | one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1) 65 | one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) 66 | # log_prb = F.log_softmax(pred, dim=1) 67 | 68 | loss = -(one_hot * pred).sum(dim=1).mean() 69 | return loss 70 | 71 | 72 | def get_model(args): 73 | module = importlib.import_module('models.%s' % args.model) 74 | return module.Model(args) 75 | 76 | 77 | def get_loss(): 78 | return SmoothClsLoss() 79 | 80 | 81 | def get_test_args(): 82 | return argparse.Namespace() 83 | -------------------------------------------------------------------------------- /segmentation/README.md: -------------------------------------------------------------------------------- 1 | # RepSurf for Segmentation
2 | 3 | By *[Haoxi Ran\*](https://hancyran.github.io/) , Jun Liu, Chengjie Wang* ( * : corresponding contact) 4 | 5 | ### [PDF](https://openaccess.thecvf.com/content/CVPR2022/papers/Ran_Surface_Representation_for_Point_Clouds_CVPR_2022_paper.pdf) | [arXiv](http://arxiv.org/abs/2205.05740) 6 | 7 | 8 | ## Preparation 9 | 10 | ### Environment 11 | 12 | We tested under the environment: 13 | 14 | * python 3.7 15 | * pytorch 1.6.0 / 1.8.0 16 | * cuda 10.1 / 11.1 17 | * gcc 7.2.0 18 | * h5py 19 | * sharedarray 20 | * tensorboardx 21 | 22 | For anaconda user, initialize the conda environment **repsurf-seg** by: 23 | 24 | ``` 25 | sh init.sh 26 | ``` 27 | 28 | ## Experiments 29 | 30 | ### S3DIS Area-5 (Data & Logs: [Google Drive](https://drive.google.com/drive/folders/1jIZuy4RPFJ4YHAE8ScVQgwtBwNGgfKnv?usp=sharing)) 31 | 32 | * Performance using the same settings: 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 |
ModelmIoUmAccOA#ParamsTraining TimeCodeTraining LogTest LogCheckpoint
Point Transformer
(our settings)
70.37 (official: 70.4)77.02 (official: 76.5)90.80 (official: 90.8)7.767M19.91hpointtransformer.pygoogle drivegoogle drivegoogle drive
(30 MB)
PointNet++ SSG (our settings)64.0571.5287.920.968M9.08hpointnet2_ssg.pygoogle drivegoogle drivegoogle drive
(4 MB)
PointNet++ SSG w/ Umbrella RepSurf (ours)68.8676.5490.220.976M9.18hrepsurf_umb_ssg.pygoogle drivegoogle drivegoogle drive
(4 MB)
88 |
89 | 90 | **Note**: 91 | 1. The performance (mIoU/mAcc/OA) are from the final predictions on the whole scenes of S3DIS Area-5, while the results during training is on sub-sampled scenes for fast validation. 92 | 2. The training time of all above implementations is estimated on four NVIDIA RTX 3090. The time in the logs contains both training and validating time. 93 | 3. To speed up the training process, we apply Sectorized FPS (in the first stage) for all above methods. It can save 30~40% training time and does not affect the performance. 94 | 4. To lessen the instability from grid sampling during inference, we apply median filtering to all the above implementations. Besides, it can slightly improve the results (~0.4 mIoU). 95 | 96 | * To (firstly install gdown by **pip install gdown** and) download dataset: 97 | 98 | ``` 99 | cd ./data/S3DIS 100 | gdown https://drive.google.com/u/1/uc?id=1UDM-bjrtqoIR9FWoIRyqLUJGyKEs22fP 101 | tar zxf s3dis.tar.gz && rm s3dis.tar.gz && cd - 102 | ``` 103 | 104 | * To train one model (**Umbrella RepSurf, Point Transformer, PointNet2**) for S3DIS Area-5: 105 | 106 | ``` 107 | sh scripts/s3dis/train_[MODEL].sh # MODEL: repsurf_umb, pointnet2, pointtransformer 108 | ``` 109 | 110 | * To test one model (**Our Umbrella RepSurf, Point Transformer, PointNet2**) for S3DIS Area-5 on whole scenes: 111 | 112 | ``` 113 | sh scripts/s3dis/test_[MODEL].sh # MODEL: repsurf_umb, pointnet2, pointtransformer 114 | ``` 115 | 116 | ## Acknowledgment 117 | 118 | We thank the [Point Transformer Implementation](https://github.com/POSTECH-CVLab/point-transformer) for the library pointops. 119 | 120 | ## License 121 | 122 | RepSurf is under the Apache-2.0 license. Please contact the primary author **Haoxi Ran (ranhaoxi@gmail.com)** for 123 | commercial use. 124 | -------------------------------------------------------------------------------- /segmentation/dataset/S3DISDataLoader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import os 7 | import numpy as np 8 | import SharedArray as SA 9 | from torch.utils.data import Dataset 10 | 11 | from util.data_util import sa_create, data_prepare 12 | 13 | NUM_CLASS = 13 14 | 15 | 16 | class S3DIS(Dataset): 17 | def __init__(self, args, split, coord_transform=None, rgb_transform=None, 18 | rgb_mean=None, rgb_std=None, shuffle_index=False): 19 | super().__init__() 20 | self.args, self.split, self.coord_transform, self.rgb_transform, self.rgb_mean, self.rgb_std, self.shuffle_index = \ 21 | args, split, coord_transform, rgb_transform, rgb_mean, rgb_std, shuffle_index 22 | self.stop_aug = False 23 | data_list = sorted(os.listdir(args.data_dir)) 24 | data_list = [item[:-4] for item in data_list if 'Area_' in item] 25 | if split == 'train': 26 | self.data_list = [item for item in data_list if not 'Area_{}'.format(args.test_area) in item] 27 | else: 28 | self.data_list = [item for item in data_list if 'Area_{}'.format(args.test_area) in item] 29 | self.data_idx = np.arange(len(self.data_list)) 30 | 31 | for item in self.data_list: 32 | if not os.path.exists("/dev/shm/s3dis_{}".format(item)): 33 | data_path = os.path.join(args.data_dir, item + '.npy') 34 | data = np.load(data_path).astype(np.float32) # xyzrgbl, N*7 35 | sa_create("shm://s3dis_{}".format(item), data) 36 | 37 | def __getitem__(self, idx): 38 | data_idx = self.data_idx[idx % len(self.data_idx)] 39 | data = SA.attach("shm://s3dis_{}".format(self.data_list[data_idx])).copy() 40 | coord, feat, label = data[:, 0:3], data[:, 3:6], data[:, 6] 41 | coord, feat, label = \ 42 | data_prepare(coord, feat, label, self.args, self.split, self.coord_transform, self.rgb_transform, 43 | self.rgb_mean, self.rgb_std, self.shuffle_index, self.stop_aug) 44 | 45 | return coord, feat, label 46 | 47 | def __len__(self): 48 | return len(self.data_idx) * self.args.loop 49 | 50 | @staticmethod 51 | def print_weight(data_root, data_list): 52 | print('Computing label weight...') 53 | num_point_list = [] 54 | label_freq = np.zeros(NUM_CLASS) 55 | label_total = np.zeros(NUM_CLASS) 56 | # load data 57 | for idx, item in enumerate(data_list): 58 | data_path = os.path.join(data_root, item + '.npy') 59 | data = np.load(data_path) 60 | labels = data[:, 6] 61 | freq = np.histogram(labels, range(NUM_CLASS + 1))[0] 62 | label_freq += freq 63 | label_total += (freq > 0).astype(np.float) * labels.size 64 | num_point_list.append(labels.size) 65 | 66 | # label weight 67 | label_freq = label_freq / label_total 68 | label_weight = np.median(label_freq) / label_freq 69 | print(label_weight) 70 | 71 | @staticmethod 72 | def print_mean_std(data_root, data_list): 73 | print('Computing color mean & std...') 74 | point_list = [] 75 | for idx, item in enumerate(data_list): 76 | data_path = os.path.join(data_root, item + '.npy') 77 | data = np.load(data_path) 78 | point_list.append(data[:, 3:6]) 79 | 80 | points = np.vstack(point_list) / 255. 81 | mean = np.mean(points, 0) 82 | std = np.std(points, 0) 83 | print(f'mean: {mean}, std:{std}') 84 | -------------------------------------------------------------------------------- /segmentation/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/dataset/__init__.py -------------------------------------------------------------------------------- /segmentation/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mkdir -p log/PointAnalysis/log/S3DIS 4 | mkdir -p log/PointAnalysis/log/ScanNet 5 | mkdir -p data/S3DIS 6 | mkdir -p data/ScanNet 7 | 8 | conda create -n repsurf-seg python=3.7 -y 9 | conda activate repsurf-seg 10 | 11 | #conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.1 -c pytorch -c conda-forge -y 12 | pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html 13 | conda install -c anaconda h5py pyyaml -y 14 | conda install -c conda-forge sharedarray tensorboardx -y 15 | 16 | cd modules/pointops 17 | python3 setup.py install 18 | cd - 19 | -------------------------------------------------------------------------------- /segmentation/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/__init__.py -------------------------------------------------------------------------------- /segmentation/models/pointnet2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/pointnet2/__init__.py -------------------------------------------------------------------------------- /segmentation/models/pointnet2/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | from modules.pointnet2_utils import PointNetSetAbstraction, PointNetFeaturePropagation 9 | 10 | 11 | class Model(nn.Module): 12 | def __init__(self, args): 13 | super(Model, self).__init__() 14 | self.sa1 = PointNetSetAbstraction(4, 32, 6 + 3, [32, 32, 64], num_sector=4) 15 | self.sa2 = PointNetSetAbstraction(4, 32, 64 + 3, [64, 64, 128]) 16 | self.sa3 = PointNetSetAbstraction(4, 32, 128 + 3, [128, 128, 256]) 17 | self.sa4 = PointNetSetAbstraction(4, 32, 256 + 3, [256, 256, 512]) 18 | 19 | self.fp4 = PointNetFeaturePropagation(768, [256, 256]) 20 | self.fp3 = PointNetFeaturePropagation(384, [256, 256]) 21 | self.fp2 = PointNetFeaturePropagation(320, [256, 128]) 22 | self.fp1 = PointNetFeaturePropagation(128, [128, 128, 128]) 23 | 24 | self.classifier = nn.Sequential( 25 | nn.Linear(128, 128), 26 | nn.BatchNorm1d(128), 27 | nn.ReLU(True), 28 | nn.Dropout(0.5), 29 | nn.Linear(128, args.num_class), 30 | ) 31 | 32 | def forward(self, pos_feat_off0): 33 | pos_feat_off0[1] = torch.cat([pos_feat_off0[0], pos_feat_off0[1]], 1) 34 | 35 | pos_feat_off1 = self.sa1(pos_feat_off0) 36 | pos_feat_off2 = self.sa2(pos_feat_off1) 37 | pos_feat_off3 = self.sa3(pos_feat_off2) 38 | pos_feat_off4 = self.sa4(pos_feat_off3) 39 | 40 | pos_feat_off3[1] = self.fp4(pos_feat_off3, pos_feat_off4) 41 | pos_feat_off2[1] = self.fp3(pos_feat_off2, pos_feat_off3) 42 | pos_feat_off1[1] = self.fp2(pos_feat_off1, pos_feat_off2) 43 | pos_feat_off0[1] = self.fp1([pos_feat_off0[0], None, pos_feat_off0[2]], pos_feat_off1) 44 | 45 | feature = self.classifier(pos_feat_off0[1]) 46 | 47 | return feature 48 | -------------------------------------------------------------------------------- /segmentation/models/pointtransformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/pointtransformer/__init__.py -------------------------------------------------------------------------------- /segmentation/models/pointtransformer/pointtransformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from modules.pointtransformer_utils import PointTransformerBlock, TransitionDown, TransitionUp 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self, args): 8 | super().__init__() 9 | block = PointTransformerBlock 10 | num_block = [2, 3, 4, 6, 3] 11 | self.in_c = args.in_channel 12 | self.in_planes, planes = self.in_c, [32, 64, 128, 256, 512] 13 | fpn_planes, fpnhead_planes, share_planes = 128, 64, 8 14 | stride, nsample = [1, 4, 4, 4, 4], [16, 16, 16, 16, 16] 15 | self.enc1 = self._make_enc(block, planes[0], num_block[0], share_planes, stride=stride[0], 16 | nsample=nsample[0]) # N/1 17 | self.enc2 = self._make_enc(block, planes[1], num_block[1], share_planes, stride=stride[1], 18 | nsample=nsample[1], num_sector=4) # N/4 19 | self.enc3 = self._make_enc(block, planes[2], num_block[2], share_planes, stride=stride[2], 20 | nsample=nsample[2]) # N/16 21 | self.enc4 = self._make_enc(block, planes[3], num_block[3], share_planes, stride=stride[3], 22 | nsample=nsample[3]) # N/64 23 | self.enc5 = self._make_enc(block, planes[4], num_block[4], share_planes, stride=stride[4], 24 | nsample=nsample[4]) # N/256 25 | self.dec5 = self._make_dec(block, planes[4], 2, share_planes, nsample=nsample[4], is_head=True) # transform p5 26 | self.dec4 = self._make_dec(block, planes[3], 2, share_planes, nsample=nsample[3]) # fusion p5 and p4 27 | self.dec3 = self._make_dec(block, planes[2], 2, share_planes, nsample=nsample[2]) # fusion p4 and p3 28 | self.dec2 = self._make_dec(block, planes[1], 2, share_planes, nsample=nsample[1]) # fusion p3 and p2 29 | self.dec1 = self._make_dec(block, planes[0], 2, share_planes, nsample=nsample[0]) # fusion p2 and p1 30 | self.cls = nn.Sequential(nn.Linear(planes[0], planes[0]), nn.BatchNorm1d(planes[0]), nn.ReLU(inplace=True), 31 | nn.Linear(planes[0], args.num_class)) 32 | 33 | def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16, num_sector=1): 34 | layers = [TransitionDown(self.in_planes, planes * block.expansion, stride, nsample, num_sector)] 35 | self.in_planes = planes * block.expansion 36 | for _ in range(1, blocks): 37 | layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample)) 38 | return nn.Sequential(*layers) 39 | 40 | def _make_dec(self, block, planes, blocks, share_planes=8, nsample=16, is_head=False): 41 | layers = [TransitionUp(self.in_planes, None if is_head else planes * block.expansion)] 42 | self.in_planes = planes * block.expansion 43 | for _ in range(1, blocks): 44 | layers.append(block(self.in_planes, self.in_planes, share_planes, nsample=nsample)) 45 | return nn.Sequential(*layers) 46 | 47 | def forward(self, pxo, *args): 48 | p0, x0, o0 = pxo # (n, 3), (n, c), (b) 49 | x0 = p0 if self.in_c == 3 else torch.cat((p0, x0), 1) 50 | p1, x1, o1 = self.enc1([p0, x0, o0]) 51 | p2, x2, o2 = self.enc2([p1, x1, o1]) 52 | p3, x3, o3 = self.enc3([p2, x2, o2]) 53 | p4, x4, o4 = self.enc4([p3, x3, o3]) 54 | p5, x5, o5 = self.enc5([p4, x4, o4]) 55 | x5 = self.dec5[1:]([p5, self.dec5[0]([p5, x5, o5]), o5])[1] 56 | x4 = self.dec4[1:]([p4, self.dec4[0]([p4, x4, o4], [p5, x5, o5]), o4])[1] 57 | x3 = self.dec3[1:]([p3, self.dec3[0]([p3, x3, o3], [p4, x4, o4]), o3])[1] 58 | x2 = self.dec2[1:]([p2, self.dec2[0]([p2, x2, o2], [p3, x3, o3]), o2])[1] 59 | x1 = self.dec1[1:]([p1, self.dec1[0]([p1, x1, o1], [p2, x2, o2]), o1])[1] 60 | x = self.cls(x1) 61 | return x 62 | -------------------------------------------------------------------------------- /segmentation/models/repsurf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/models/repsurf/__init__.py -------------------------------------------------------------------------------- /segmentation/models/repsurf/repsurf_umb_ssg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | from modules.repsurface_utils import UmbrellaSurfaceConstructor, SurfaceAbstractionCD, SurfaceFeaturePropagationCD 9 | 10 | 11 | class Model(nn.Module): 12 | def __init__(self, args): 13 | super(Model, self).__init__() 14 | center_channel = 6 if args.return_polar else 3 15 | repsurf_in_channel = 10 16 | repsurf_out_channel = 10 17 | 18 | self.sa1 = SurfaceAbstractionCD(4, 32, args.in_channel + repsurf_out_channel, center_channel, [32, 32, 64], 19 | True, args.return_polar, num_sector=4) 20 | self.sa2 = SurfaceAbstractionCD(4, 32, 64 + repsurf_out_channel, center_channel, [64, 64, 128], 21 | True, args.return_polar) 22 | self.sa3 = SurfaceAbstractionCD(4, 32, 128 + repsurf_out_channel, center_channel, [128, 128, 256], 23 | True, args.return_polar) 24 | self.sa4 = SurfaceAbstractionCD(4, 32, 256 + repsurf_out_channel, center_channel, [256, 256, 512], 25 | True, args.return_polar) 26 | 27 | self.fp4 = SurfaceFeaturePropagationCD(512, 256, [256, 256]) 28 | self.fp3 = SurfaceFeaturePropagationCD(256, 128, [256, 256]) 29 | self.fp2 = SurfaceFeaturePropagationCD(256, 64, [256, 128]) 30 | self.fp1 = SurfaceFeaturePropagationCD(128, None, [128, 128, 128]) 31 | 32 | self.classifier = nn.Sequential( 33 | nn.Linear(128, 128), 34 | nn.BatchNorm1d(128), 35 | nn.ReLU(True), 36 | nn.Dropout(0.5), 37 | nn.Linear(128, args.num_class), 38 | ) 39 | 40 | self.surface_constructor = UmbrellaSurfaceConstructor(args.group_size + 1, repsurf_in_channel, repsurf_out_channel) 41 | 42 | def forward(self, pos_feat_off0): 43 | pos_nor_feat_off0 = [ 44 | pos_feat_off0[0], 45 | self.surface_constructor(pos_feat_off0[0], pos_feat_off0[2]), 46 | torch.cat([pos_feat_off0[0], pos_feat_off0[1]], 1), 47 | pos_feat_off0[2] 48 | ] 49 | 50 | pos_nor_feat_off1 = self.sa1(pos_nor_feat_off0) 51 | pos_nor_feat_off2 = self.sa2(pos_nor_feat_off1) 52 | pos_nor_feat_off3 = self.sa3(pos_nor_feat_off2) 53 | pos_nor_feat_off4 = self.sa4(pos_nor_feat_off3) 54 | 55 | del pos_nor_feat_off0[1], pos_nor_feat_off1[1], pos_nor_feat_off2[1], pos_nor_feat_off3[1], pos_nor_feat_off4[1] 56 | pos_nor_feat_off3[1] = self.fp4(pos_nor_feat_off3, pos_nor_feat_off4) 57 | pos_nor_feat_off2[1] = self.fp3(pos_nor_feat_off2, pos_nor_feat_off3) 58 | pos_nor_feat_off1[1] = self.fp2(pos_nor_feat_off1, pos_nor_feat_off2) 59 | pos_nor_feat_off0[1] = self.fp1([pos_nor_feat_off0[0], None, pos_nor_feat_off0[2]], pos_nor_feat_off1) 60 | 61 | feature = self.classifier(pos_nor_feat_off0[1]) 62 | 63 | return feature 64 | -------------------------------------------------------------------------------- /segmentation/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/__init__.py -------------------------------------------------------------------------------- /segmentation/modules/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from modules.pointops.functions import pointops 11 | 12 | 13 | def sample_and_group(stride, nsample, xyz, points, offset, return_idx=False, num_sector=1): 14 | # sample 15 | if stride > 1: 16 | new_offset, sample_idx = [offset[0].item() // stride], offset[0].item() // stride 17 | for i in range(1, offset.shape[0]): 18 | sample_idx += (offset[i].item() - offset[i - 1].item()) // stride 19 | new_offset.append(sample_idx) 20 | new_offset = torch.cuda.IntTensor(new_offset) 21 | if num_sector > 1: 22 | fps_idx = pointops.sectorized_fps(xyz, offset, new_offset, num_sector) # [M] 23 | else: 24 | fps_idx = pointops.furthestsampling(xyz, offset, new_offset) # [M] 25 | new_xyz = xyz[fps_idx.long(), :] # [M, 3] 26 | else: 27 | new_xyz = xyz 28 | new_offset = offset 29 | 30 | # group 31 | N, M = xyz.shape[0], new_xyz.shape[0] 32 | group_idx, _ = pointops.knnquery(nsample, xyz, new_xyz, offset, new_offset) # [M, nsample] 33 | group_xyz = xyz[group_idx.view(-1).long(), :].view(M, nsample, 3) # [M, nsample, 3] 34 | group_xyz_norm = group_xyz - new_xyz.unsqueeze(1) 35 | 36 | if points is not None and not return_idx: 37 | C = points.shape[1] 38 | group_points = points[group_idx.view(-1).long(), :].view(M, nsample, C) 39 | new_points = torch.cat([group_xyz_norm, group_points], dim=-1) # [M, nsample, 3/6+C] 40 | else: 41 | new_points = group_xyz_norm 42 | 43 | if return_idx: 44 | return new_xyz, new_points, new_offset, group_idx 45 | else: 46 | return new_xyz, new_points, new_offset 47 | 48 | 49 | class PointNetSetAbstraction(nn.Module): 50 | """ 51 | PointNet2 SA Module 52 | 53 | """ 54 | 55 | def __init__(self, stride, nsample, in_channel, mlp, num_sector=1): 56 | super(PointNetSetAbstraction, self).__init__() 57 | self.stride = stride 58 | self.nsample = nsample 59 | self.num_sector = num_sector 60 | self.mlp_convs = nn.ModuleList() 61 | self.mlp_bns = nn.ModuleList() 62 | 63 | last_channel = in_channel 64 | for out_channel in mlp: 65 | self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1)) 66 | self.mlp_bns.append(nn.BatchNorm1d(out_channel)) 67 | last_channel = out_channel 68 | 69 | def forward(self, pos_feat_off): 70 | xyz, points, offset = pos_feat_off # [N, 3], [N, C], [B] 71 | 72 | new_xyz, new_points, new_offset = sample_and_group(self.stride, self.nsample, xyz, points, offset, 73 | num_sector=self.num_sector) 74 | 75 | # new_xyz: sampled points position data, [M, 3] 76 | # new_points: sampled points data, [M, nsample, 3+C] 77 | new_points = new_points.transpose(1, 2).contiguous() # [M, 3+C, nsample] 78 | for i, conv in enumerate(self.mlp_convs): 79 | bn = self.mlp_bns[i] 80 | new_points = F.relu(bn(conv(new_points))) 81 | new_points = torch.max(new_points, 2)[0] 82 | 83 | return [new_xyz, new_points, new_offset] 84 | 85 | 86 | class PointNetFeaturePropagation(nn.Module): 87 | """ 88 | PointNet2 FP Module 89 | 90 | """ 91 | 92 | def __init__(self, in_channel, mlp): 93 | super(PointNetFeaturePropagation, self).__init__() 94 | self.mlp_convs = nn.ModuleList() 95 | self.mlp_bns = nn.ModuleList() 96 | last_channel = in_channel 97 | for out_channel in mlp: 98 | self.mlp_convs.append(nn.Linear(last_channel, out_channel)) 99 | self.mlp_bns.append(nn.BatchNorm1d(out_channel)) 100 | last_channel = out_channel 101 | 102 | def forward(self, pos_feat_off1, pos_feat_off2): 103 | xyz1, points1, offset1 = pos_feat_off1 # [N, 3], [N, C], [B] 104 | xyz2, points2, offset2 = pos_feat_off2 # [M, 3], [M, C], [B] 105 | 106 | idx, dist = pointops.knnquery(3, xyz2, xyz1, offset2, offset1) # [M, 3], [M, 3] 107 | dist_recip = 1.0 / (dist + 1e-8) # [M, 3] 108 | norm = torch.sum(dist_recip, dim=1, keepdim=True) 109 | weight = dist_recip / norm # [M, 3] 110 | 111 | interpolated_points = torch.cuda.FloatTensor(xyz1.shape[0], points2.shape[1]).zero_() 112 | for i in range(3): 113 | interpolated_points += points2[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) 114 | 115 | # skip connection 116 | if points1 is not None: 117 | new_points = torch.cat([points1, interpolated_points], dim=1) # [M, C1+C2] 118 | else: 119 | new_points = interpolated_points 120 | 121 | # mlp 122 | for i, conv in enumerate(self.mlp_convs): 123 | bn = self.mlp_bns[i] 124 | new_points = F.relu(bn(conv(new_points))) 125 | 126 | return new_points 127 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/__init__.py -------------------------------------------------------------------------------- /segmentation/modules/pointops/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/functions/__init__.py -------------------------------------------------------------------------------- /segmentation/modules/pointops/setup.py: -------------------------------------------------------------------------------- 1 | #python3 setup.py install 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | import os 5 | from distutils.sysconfig import get_config_vars 6 | 7 | (opt,) = get_config_vars('OPT') 8 | os.environ['OPT'] = " ".join( 9 | flag for flag in opt.split() if flag != '-Wstrict-prototypes' 10 | ) 11 | 12 | setup( 13 | name='pointops_cuda', 14 | author='Hengshuang Zhao', 15 | ext_modules=[ 16 | CUDAExtension('pointops_cuda', [ 17 | 'src/pointops_api.cpp', 18 | 'src/knnquery/knnquery_cuda.cpp', 19 | 'src/knnquery/knnquery_cuda_kernel.cu', 20 | 'src/sampling/sampling_cuda.cpp', 21 | 'src/sampling/sampling_cuda_kernel.cu', 22 | 'src/grouping/grouping_cuda.cpp', 23 | 'src/grouping/grouping_cuda_kernel.cu', 24 | 'src/interpolation/interpolation_cuda.cpp', 25 | 'src/interpolation/interpolation_cuda_kernel.cu', 26 | 'src/subtraction/subtraction_cuda.cpp', 27 | 'src/subtraction/subtraction_cuda_kernel.cu', 28 | 'src/aggregation/aggregation_cuda.cpp', 29 | 'src/aggregation/aggregation_cuda_kernel.cu', 30 | ], 31 | extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']} 32 | ) 33 | ], 34 | cmdclass={'build_ext': BuildExtension} 35 | ) 36 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/modules/pointops/src/__init__.py -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "aggregation_cuda_kernel.h" 6 | 7 | 8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const float *position = position_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | const int *idx = idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 16 | } 17 | 18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 19 | { 20 | const float *input = input_tensor.data_ptr(); 21 | const float *position = position_tensor.data_ptr(); 22 | const float *weight = weight_tensor.data_ptr(); 23 | const int *idx = idx_tensor.data_ptr(); 24 | const float *grad_output = grad_output_tensor.data_ptr(); 25 | float *grad_input = grad_input_tensor.data_ptr(); 26 | float *grad_position = grad_position_tensor.data_ptr(); 27 | float *grad_weight = grad_weight_tensor.data_ptr(); 28 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 29 | } 30 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "grouping_cuda_kernel.h" 6 | 7 | 8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 14 | } 15 | 16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | float *grad_input = grad_input_tensor.data_ptr(); 21 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 22 | } 23 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "interpolation_cuda_kernel.h" 6 | 7 | 8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 15 | } 16 | 17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 18 | { 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | const int *idx = idx_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | float *grad_input = grad_input_tensor.data_ptr(); 23 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 24 | } 25 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "knnquery_cuda_kernel.h" 6 | 7 | 8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const float *new_xyz = new_xyz_tensor.data_ptr(); 12 | const int *offset = offset_tensor.data_ptr(); 13 | const int *new_offset = new_offset_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | float *dist2 = dist2_tensor.data_ptr(); 16 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 17 | } 18 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knnquery/knnquery_cuda_kernel.h" 5 | #include "sampling/sampling_cuda_kernel.h" 6 | #include "grouping/grouping_cuda_kernel.h" 7 | #include "interpolation/interpolation_cuda_kernel.h" 8 | #include "aggregation/aggregation_cuda_kernel.h" 9 | #include "subtraction/subtraction_cuda_kernel.h" 10 | 11 | 12 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 13 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 14 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 15 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 16 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 17 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 18 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 19 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 20 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 21 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 22 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 23 | } 24 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "sampling_cuda_kernel.h" 6 | 7 | 8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | float *tmp = tmp_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 16 | } 17 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/sampling/sampling_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "sampling_cuda_kernel.h" 3 | 4 | 5 | __device__ void __update(float *dists, int *dists_i, int idx1, int idx2) { 6 | const float v1 = dists[idx1], v2 = dists[idx2]; 7 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 8 | dists[idx1] = max(v1, v2); 9 | dists_i[idx1] = v2 > v1 ? i2 : i1; 10 | } 11 | 12 | // input xyz: (n, 3), tmp: (b, n_max) 13 | // ouput idx (m) 14 | template 15 | __global__ void furthestsampling_cuda_kernel(const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx) 16 | { 17 | __shared__ float dists[block_size]; 18 | __shared__ int dists_i[block_size]; 19 | 20 | int bid = blockIdx.x; 21 | int start_n, end_n, start_m, end_m, old; 22 | if (bid == 0) { 23 | start_n = 0; 24 | end_n = offset[0]; 25 | start_m = 0; 26 | end_m = new_offset[0]; 27 | old = 0; 28 | } 29 | else { 30 | start_n = offset[bid - 1]; 31 | end_n = offset[bid]; 32 | start_m = new_offset[bid - 1]; 33 | end_m = new_offset[bid]; 34 | old = offset[bid - 1]; 35 | } 36 | 37 | const int stride = block_size; 38 | int tid = threadIdx.x; 39 | if (tid == 0) idx[start_m] = start_n; 40 | 41 | __syncthreads(); 42 | for (int j = start_m + 1; j < end_m; j++) 43 | { 44 | int besti = start_n; 45 | float best = -1; 46 | float x1 = xyz[old * 3 + 0]; 47 | float y1 = xyz[old * 3 + 1]; 48 | float z1 = xyz[old * 3 + 2]; 49 | for (int k = start_n + tid; k < end_n; k += stride) 50 | { 51 | float x2 = xyz[k * 3 + 0]; 52 | float y2 = xyz[k * 3 + 1]; 53 | float z2 = xyz[k * 3 + 2]; 54 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 55 | float d2 = min(d, tmp[k]); 56 | tmp[k] = d2; 57 | besti = d2 > best ? k : besti; 58 | best = d2 > best ? d2 : best; 59 | } 60 | dists[tid] = best; 61 | dists_i[tid] = besti; 62 | __syncthreads(); 63 | 64 | if (block_size >= 1024) { 65 | if (tid < 512) { 66 | __update(dists, dists_i, tid, tid + 512); 67 | } 68 | __syncthreads(); 69 | } 70 | if (block_size >= 512) { 71 | if (tid < 256) { 72 | __update(dists, dists_i, tid, tid + 256); 73 | } 74 | __syncthreads(); 75 | } 76 | if (block_size >= 256) { 77 | if (tid < 128) { 78 | __update(dists, dists_i, tid, tid + 128); 79 | } 80 | __syncthreads(); 81 | } 82 | if (block_size >= 128) { 83 | if (tid < 64) { 84 | __update(dists, dists_i, tid, tid + 64); 85 | } 86 | __syncthreads(); 87 | } 88 | if (block_size >= 64) { 89 | if (tid < 32) { 90 | __update(dists, dists_i, tid, tid + 32); 91 | } 92 | __syncthreads(); 93 | } 94 | if (block_size >= 32) { 95 | if (tid < 16) { 96 | __update(dists, dists_i, tid, tid + 16); 97 | } 98 | __syncthreads(); 99 | } 100 | if (block_size >= 16) { 101 | if (tid < 8) { 102 | __update(dists, dists_i, tid, tid + 8); 103 | } 104 | __syncthreads(); 105 | } 106 | if (block_size >= 8) { 107 | if (tid < 4) { 108 | __update(dists, dists_i, tid, tid + 4); 109 | } 110 | __syncthreads(); 111 | } 112 | if (block_size >= 4) { 113 | if (tid < 2) { 114 | __update(dists, dists_i, tid, tid + 2); 115 | } 116 | __syncthreads(); 117 | } 118 | if (block_size >= 2) { 119 | if (tid < 1) { 120 | __update(dists, dists_i, tid, tid + 1); 121 | } 122 | __syncthreads(); 123 | } 124 | 125 | old = dists_i[0]; 126 | if (tid == 0) 127 | idx[j] = old; 128 | } 129 | } 130 | 131 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx) 132 | { 133 | unsigned int n_threads = opt_n_threads(n); 134 | switch (n_threads) { 135 | case 1024: 136 | furthestsampling_cuda_kernel<1024><<>>(xyz, offset, new_offset, tmp, idx); 137 | break; 138 | case 512: 139 | furthestsampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx); 140 | break; 141 | case 256: 142 | furthestsampling_cuda_kernel<256><<>>(xyz, offset, new_offset, tmp, idx); 143 | break; 144 | case 128: 145 | furthestsampling_cuda_kernel<128><<>>(xyz, offset, new_offset, tmp, idx); 146 | break; 147 | case 64: 148 | furthestsampling_cuda_kernel<64><<>>(xyz, offset, new_offset, tmp, idx); 149 | break; 150 | case 32: 151 | furthestsampling_cuda_kernel<32><<>>(xyz, offset, new_offset, tmp, idx); 152 | break; 153 | case 16: 154 | furthestsampling_cuda_kernel<16><<>>(xyz, offset, new_offset, tmp, idx); 155 | break; 156 | case 8: 157 | furthestsampling_cuda_kernel<8><<>>(xyz, offset, new_offset, tmp, idx); 158 | break; 159 | case 4: 160 | furthestsampling_cuda_kernel<4><<>>(xyz, offset, new_offset, tmp, idx); 161 | break; 162 | case 2: 163 | furthestsampling_cuda_kernel<2><<>>(xyz, offset, new_offset, tmp, idx); 164 | break; 165 | case 1: 166 | furthestsampling_cuda_kernel<1><<>>(xyz, offset, new_offset, tmp, idx); 167 | break; 168 | default: 169 | furthestsampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx); 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "subtraction_cuda_kernel.h" 6 | 7 | 8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input1 = input1_tensor.data_ptr(); 11 | const float *input2 = input2_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 15 | } 16 | 17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 18 | { 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *grad_output = grad_output_tensor.data_ptr(); 21 | float *grad_input1 = grad_input1_tensor.data_ptr(); 22 | float *grad_input2 = grad_input2_tensor.data_ptr(); 23 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 24 | } 25 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /segmentation/modules/pointops/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /segmentation/modules/pointtransformer_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from modules.pointops.functions import pointops 5 | 6 | 7 | class PointTransformerLayer(nn.Module): 8 | def __init__(self, in_planes, out_planes, share_planes=8, nsample=16): 9 | super().__init__() 10 | self.mid_planes = mid_planes = out_planes // 1 11 | self.out_planes = out_planes 12 | self.share_planes = share_planes 13 | self.nsample = nsample 14 | self.linear_q = nn.Linear(in_planes, mid_planes) 15 | self.linear_k = nn.Linear(in_planes, mid_planes) 16 | self.linear_v = nn.Linear(in_planes, out_planes) 17 | self.linear_p = nn.Sequential(nn.Linear(3, 3), nn.BatchNorm1d(3), nn.ReLU(inplace=True), 18 | nn.Linear(3, out_planes)) 19 | self.linear_w = nn.Sequential(nn.BatchNorm1d(mid_planes), nn.ReLU(inplace=True), 20 | nn.Linear(mid_planes, mid_planes // share_planes), 21 | nn.BatchNorm1d(mid_planes // share_planes), nn.ReLU(inplace=True), 22 | nn.Linear(out_planes // share_planes, out_planes // share_planes)) 23 | self.softmax = nn.Softmax(dim=1) 24 | 25 | def forward(self, pxo) -> torch.Tensor: 26 | p, x, o = pxo # (n, 3), (n, c), (b) 27 | x_q, x_k, x_v = self.linear_q(x), self.linear_k(x), self.linear_v(x) # (n, c) 28 | x_k = pointops.queryandgroup(self.nsample, p, p, x_k, None, o, o, use_xyz=True) # (n, nsample, 3+c) 29 | x_v = pointops.queryandgroup(self.nsample, p, p, x_v, None, o, o, use_xyz=False) # (n, nsample, c) 30 | p_r, x_k = x_k[:, :, 0:3], x_k[:, :, 3:] 31 | for i, layer in enumerate(self.linear_p): 32 | # (n, nsample, c) 33 | p_r = layer(p_r.transpose(1, 2).contiguous()).transpose(1, 2).contiguous() if i == 1 else layer(p_r) 34 | w = x_k - x_q.unsqueeze(1) + p_r.view(p_r.shape[0], p_r.shape[1], self.out_planes // self.mid_planes, 35 | self.mid_planes).sum(2) # (n, nsample, c) 36 | for i, layer in enumerate(self.linear_w): 37 | w = layer(w.transpose(1, 2).contiguous()).transpose(1, 2).contiguous() if i % 3 == 0 else layer(w) 38 | w = self.softmax(w) # (n, nsample, c) 39 | n, nsample, c = x_v.shape 40 | s = self.share_planes 41 | x = ((x_v + p_r).view(n, nsample, s, c // s) * w.unsqueeze(2)).sum(1).view(n, c) 42 | return x 43 | 44 | 45 | class TransitionDown(nn.Module): 46 | def __init__(self, in_planes, out_planes, stride=1, nsample=16, num_sector=1): 47 | super().__init__() 48 | self.stride, self.nsample, self.num_sector = stride, nsample, num_sector 49 | if stride != 1: 50 | self.linear = nn.Linear(3 + in_planes, out_planes, bias=False) 51 | self.pool = nn.MaxPool1d(nsample) 52 | else: 53 | self.linear = nn.Linear(in_planes, out_planes, bias=False) 54 | self.bn = nn.BatchNorm1d(out_planes) 55 | self.relu = nn.ReLU(inplace=True) 56 | 57 | def forward(self, pxo): 58 | p, x, o = pxo # (n, 3), (n, c), (b) 59 | if self.stride != 1: 60 | n_o, count = [o[0].item() // self.stride], o[0].item() // self.stride 61 | for i in range(1, o.shape[0]): 62 | count += (o[i].item() - o[i - 1].item()) // self.stride 63 | n_o.append(count) 64 | n_o = torch.cuda.IntTensor(n_o) 65 | if self.num_sector > 1 and self.training: 66 | idx = pointops.sectorized_fps(p, o, n_o, self.num_sector) # [M] 67 | else: 68 | idx = pointops.furthestsampling(p, o, n_o) # [M] 69 | n_p = p[idx.long(), :] # (m, 3) 70 | x = pointops.queryandgroup(self.nsample, p, n_p, x, None, o, n_o, use_xyz=True) # (m, 3+c, nsample) 71 | x = self.relu(self.bn(self.linear(x).transpose(1, 2).contiguous())) # (m, c, nsample) 72 | x = self.pool(x).squeeze(-1) # (m, c) 73 | p, o = n_p, n_o 74 | else: 75 | x = self.relu(self.bn(self.linear(x))) # (n, c) 76 | return [p, x, o] 77 | 78 | 79 | class TransitionUp(nn.Module): 80 | def __init__(self, in_planes, out_planes=None): 81 | super().__init__() 82 | if out_planes is None: 83 | self.linear1 = nn.Sequential(nn.Linear(2 * in_planes, in_planes), nn.BatchNorm1d(in_planes), 84 | nn.ReLU(inplace=True)) 85 | self.linear2 = nn.Sequential(nn.Linear(in_planes, in_planes), nn.ReLU(inplace=True)) 86 | else: 87 | self.linear1 = nn.Sequential(nn.Linear(out_planes, out_planes), nn.BatchNorm1d(out_planes), 88 | nn.ReLU(inplace=True)) 89 | self.linear2 = nn.Sequential(nn.Linear(in_planes, out_planes), nn.BatchNorm1d(out_planes), 90 | nn.ReLU(inplace=True)) 91 | 92 | def forward(self, pxo1, pxo2=None): 93 | if pxo2 is None: 94 | _, x, o = pxo1 # (n, 3), (n, c), (b) 95 | x_tmp = [] 96 | for i in range(o.shape[0]): 97 | if i == 0: 98 | s_i, e_i, cnt = 0, o[0], o[0] 99 | else: 100 | s_i, e_i, cnt = o[i - 1], o[i], o[i] - o[i - 1] 101 | x_b = x[s_i:e_i, :] 102 | x_b = torch.cat((x_b, self.linear2(x_b.sum(0, True) / cnt).repeat(cnt, 1)), 1) 103 | x_tmp.append(x_b) 104 | x = torch.cat(x_tmp, 0) 105 | x = self.linear1(x) 106 | else: 107 | p1, x1, o1 = pxo1 108 | p2, x2, o2 = pxo2 109 | x = self.linear1(x1) + pointops.interpolation(p2, p1, self.linear2(x2), o2, o1) 110 | return x 111 | 112 | 113 | class PointTransformerBlock(nn.Module): 114 | expansion = 1 115 | 116 | def __init__(self, in_planes, planes, share_planes=8, nsample=16): 117 | super(PointTransformerBlock, self).__init__() 118 | self.linear1 = nn.Linear(in_planes, planes, bias=False) 119 | self.bn1 = nn.BatchNorm1d(planes) 120 | self.transformer2 = PointTransformerLayer(planes, planes, share_planes, nsample) 121 | self.bn2 = nn.BatchNorm1d(planes) 122 | self.linear3 = nn.Linear(planes, planes * self.expansion, bias=False) 123 | self.bn3 = nn.BatchNorm1d(planes * self.expansion) 124 | self.relu = nn.ReLU(inplace=True) 125 | 126 | def forward(self, pxo): 127 | p, x, o = pxo # (n, 3), (n, c), (b) 128 | identity = x 129 | x = self.relu(self.bn1(self.linear1(x))) 130 | x = self.relu(self.bn2(self.transformer2([p, x, o]))) 131 | x = self.bn3(self.linear3(x)) 132 | x += identity 133 | x = self.relu(x) 134 | return [p, x, o] 135 | -------------------------------------------------------------------------------- /segmentation/modules/polar_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import torch 7 | import numpy as np 8 | 9 | 10 | def xyz2sphere(xyz, normalize=True): 11 | """ 12 | Convert XYZ to Spherical Coordinate 13 | 14 | reference: https://en.wikipedia.org/wiki/Spherical_coordinate_system 15 | 16 | :param xyz: [N, 3] / [N, G, 3] 17 | :return: (rho, theta, phi) [N, 3] / [N, G, 3] 18 | """ 19 | rho = torch.sqrt(torch.sum(torch.pow(xyz, 2), dim=-1, keepdim=True)) 20 | rho = torch.clamp(rho, min=0) # range: [0, inf] 21 | theta = torch.acos(xyz[..., 2, None] / rho) # range: [0, pi] 22 | phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None]) # range: [-pi, pi] 23 | # check nan 24 | idx = rho == 0 25 | theta[idx] = 0 26 | 27 | if normalize: 28 | theta = theta / np.pi # [0, 1] 29 | phi = phi / (2 * np.pi) + .5 # [0, 1] 30 | out = torch.cat([rho, theta, phi], dim=-1) 31 | return out 32 | 33 | 34 | def xyz2cylind(xyz, normalize=True): 35 | """ 36 | Convert XYZ to Cylindrical Coordinate 37 | 38 | reference: https://en.wikipedia.org/wiki/Cylindrical_coordinate_system 39 | 40 | :param normalize: Normalize phi & z 41 | :param xyz: [N, 3] / [N, G, 3] 42 | :return: (rho, phi, z) [N, 3] / [N, G, 3] 43 | """ 44 | rho = torch.sqrt(torch.sum(torch.pow(xyz[..., :2], 2), dim=-1, keepdim=True)) 45 | rho = torch.clamp(rho, 0, 1) # range: [0, 1] 46 | phi = torch.atan2(xyz[..., 1, None], xyz[..., 0, None]) # range: [-pi, pi] 47 | z = xyz[..., 2, None] 48 | z = torch.clamp(z, -1, 1) # range: [-1, 1] 49 | 50 | if normalize: 51 | phi = phi / (2 * np.pi) + .5 52 | z = (z + 1.) / 2. 53 | out = torch.cat([rho, phi, z], dim=-1) 54 | return out 55 | -------------------------------------------------------------------------------- /segmentation/modules/recons_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import torch 7 | import numpy as np 8 | 9 | 10 | def cal_normal(group_xyz, offset, random_inv=False, is_group=False): 11 | """ 12 | Calculate Normal Vector (Unit Form + First Term Positive) 13 | 14 | :param group_xyz: [N, K=3, 3] / [N, G, K=3, 3] 15 | """ 16 | edge_vec1 = group_xyz[..., 1, :] - group_xyz[..., 0, :] # [N, 3] 17 | edge_vec2 = group_xyz[..., 2, :] - group_xyz[..., 0, :] # [N, 3] 18 | 19 | nor = torch.cross(edge_vec1, edge_vec2, dim=-1) 20 | unit_nor = nor / torch.norm(nor, dim=-1, keepdim=True) # [B, N, 3] / [B, N, G, 3] 21 | if not is_group: 22 | pos_mask = (unit_nor[..., 0] > 0).float() * 2. - 1. # keep x_n positive 23 | else: 24 | pos_mask = (unit_nor[..., 0:1, 0] > 0).float() * 2. - 1. 25 | unit_nor = unit_nor * pos_mask.unsqueeze(-1) 26 | 27 | # batch-wise random inverse normal vector (prob: 0.5) 28 | if random_inv: 29 | batch_prob = np.random.rand(offset.shape[0]) < 0.5 30 | random_mask = [] 31 | sample_offset = [0] + list(offset.cpu().numpy()) 32 | for idx in range(len(sample_offset) - 1): 33 | sample_mask = torch.ones((sample_offset[idx+1] - sample_offset[idx], 1), dtype=torch.float32) 34 | if not batch_prob[idx]: 35 | sample_mask *= -1 36 | random_mask.append(sample_mask) 37 | random_mask = torch.cat(random_mask, dim=0).to(unit_nor.device) 38 | # random_mask = torch.randint(0, 2, (group_xyz.size(0), 1)).float() * 2. - 1. 39 | # random_mask = random_mask.to(unit_nor.device) 40 | if not is_group: 41 | unit_nor = unit_nor * random_mask 42 | else: 43 | unit_nor = unit_nor * random_mask.unsqueeze(-1) 44 | 45 | return unit_nor 46 | 47 | 48 | def cal_center(group_xyz): 49 | """ 50 | Calculate Global Coordinates of the Center of Triangle 51 | 52 | :param group_xyz: [N, K, 3] / [N, G, K, 3]; K >= 3 53 | :return: [N, 3] / [N, G, 3] 54 | """ 55 | center = torch.mean(group_xyz, dim=-2) 56 | return center 57 | 58 | 59 | def cal_area(group_xyz): 60 | """ 61 | Calculate Area of Triangle 62 | 63 | :param group_xyz: [N, K, 3] / [N, G, K, 3]; K = 3 64 | :return: [N, 1] / [N, G, 1] 65 | """ 66 | pad_shape = group_xyz[..., 0, None].shape 67 | det_xy = torch.det(torch.cat([group_xyz[..., 0, None], group_xyz[..., 1, None], torch.ones(pad_shape)], dim=-1)) 68 | det_yz = torch.det(torch.cat([group_xyz[..., 1, None], group_xyz[..., 2, None], torch.ones(pad_shape)], dim=-1)) 69 | det_zx = torch.det(torch.cat([group_xyz[..., 2, None], group_xyz[..., 0, None], torch.ones(pad_shape)], dim=-1)) 70 | area = torch.sqrt(det_xy ** 2 + det_yz ** 2 + det_zx ** 2).unsqueeze(-1) 71 | return area 72 | 73 | 74 | def cal_const(normal, center, is_normalize=True): 75 | """ 76 | Calculate Constant Term (Standard Version, with x_normal to be 1) 77 | 78 | math:: 79 | const = x_nor * x_0 + y_nor * y_0 + z_nor * z_0 80 | 81 | :param is_normalize: 82 | :param normal: [N, 3] / [N, G, 3] 83 | :param center: [N, 3] / [N, G, 3] 84 | :return: [N, 1] / [N, G, 1] 85 | """ 86 | const = torch.sum(normal * center, dim=-1, keepdim=True) 87 | factor = torch.sqrt(torch.Tensor([3])).to(normal.device) 88 | const = const / factor if is_normalize else const 89 | 90 | return const 91 | 92 | 93 | def check_nan(normal, center, pos=None): 94 | """ 95 | Check & Remove NaN in normal tensor 96 | 97 | :param pos: [N, 1] 98 | :param center: [N, 3] 99 | :param normal: [N, 3] 100 | """ 101 | N, _ = normal.shape 102 | mask = torch.sum(torch.isnan(normal), dim=-1) > 0 103 | mask_first = torch.argmax((~mask).int(), dim=-1) 104 | 105 | normal_first = normal[None, mask_first].repeat([N, 1]) 106 | normal[mask] = normal_first[mask] 107 | center_first = center[None, mask_first].repeat([N, 1]) 108 | center[mask] = center_first[mask] 109 | 110 | if pos is not None: 111 | pos_first = pos[None, mask_first].repeat([N, 1]) 112 | pos[mask] = pos_first[mask] 113 | return normal, center, pos 114 | return normal, center 115 | 116 | 117 | def check_nan_umb(normal, center, pos=None): 118 | """ 119 | Check & Remove NaN in normal tensor 120 | 121 | :param pos: [N, G, 1] 122 | :param center: [N, G, 3] 123 | :param normal: [N, G, 3] 124 | """ 125 | N, G, _ = normal.shape 126 | mask = torch.sum(torch.isnan(normal), dim=-1) > 0 127 | mask_first = torch.argmax((~mask).int(), dim=-1) 128 | 129 | normal_first = normal[torch.arange(N), None, mask_first].repeat([1, G, 1]) 130 | normal[mask] = normal_first[mask] 131 | center_first = center[torch.arange(N), None, mask_first].repeat([1, G, 1]) 132 | center[mask] = center_first[mask] 133 | 134 | if pos is not None: 135 | pos_first = pos[torch.arange(N), None, mask_first].repeat([1, G, 1]) 136 | pos[mask] = pos_first[mask] 137 | return normal, center, pos 138 | return normal, center 139 | -------------------------------------------------------------------------------- /segmentation/modules/voxelize_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def fnv_hash_vec(arr): 5 | """ 6 | FNV64-1A 7 | 8 | """ 9 | assert arr.ndim == 2 10 | # Floor first for negative coordinates 11 | arr = arr.copy() 12 | arr = arr.astype(np.uint64, copy=False) 13 | hashed_arr = np.uint64(14695981039346656037) * np.ones(arr.shape[0], dtype=np.uint64) 14 | for j in range(arr.shape[1]): 15 | hashed_arr *= np.uint64(1099511628211) 16 | hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) 17 | return hashed_arr 18 | 19 | 20 | def ravel_hash_vec(arr): 21 | """ 22 | Ravel the coordinates after subtracting the min coordinates. 23 | 24 | """ 25 | assert arr.ndim == 2 26 | arr = arr.copy() 27 | arr -= arr.min(0) 28 | arr = arr.astype(np.uint64, copy=False) 29 | arr_max = arr.max(0).astype(np.uint64) + 1 30 | 31 | keys = np.zeros(arr.shape[0], dtype=np.uint64) 32 | # Fortran style indexing 33 | for j in range(arr.shape[1] - 1): 34 | keys += arr[:, j] 35 | keys *= arr_max[j + 1] 36 | keys += arr[:, -1] 37 | return keys 38 | 39 | 40 | def voxelize(coord, voxel_size=0.05, hash_type='fnv', mode=0): 41 | # voxelize coordinates 42 | discrete_coord = np.floor(coord / np.array(voxel_size)) 43 | 44 | # shuffle coordinates 45 | if hash_type == 'ravel': 46 | key = ravel_hash_vec(discrete_coord) 47 | else: 48 | key = fnv_hash_vec(discrete_coord) 49 | 50 | idx_sort = np.argsort(key) 51 | key_sort = key[idx_sort] 52 | _, count = np.unique(key_sort, return_counts=True) 53 | if mode == 0: # train mode 54 | idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + np.random.randint(0, count.max(), count.size) % count 55 | idx_unique = idx_sort[idx_select] 56 | return idx_unique 57 | else: # val mode 58 | return idx_sort, count 59 | -------------------------------------------------------------------------------- /segmentation/scripts/s3dis/test_pointnet2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=./ 4 | 5 | log_dir='pointnet2_A5' 6 | 7 | python3 tool/test_s3dis.py --log_dir ${log_dir} \ 8 | --batch_size_test 12 \ 9 | --gpu_id 0 \ 10 | --model pointnet2.pointnet2_ssg \ 11 | --test_area 5 \ 12 | --filter -------------------------------------------------------------------------------- /segmentation/scripts/s3dis/test_pointtransformer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=./ 4 | 5 | log_dir='pointtransformer_A5' 6 | 7 | python3 tool/test_s3dis.py --log_dir ${log_dir} \ 8 | --batch_size_test 12 \ 9 | --gpu_id 0 \ 10 | --model pointtransformer.pointtransformer \ 11 | --test_area 5 \ 12 | --filter -------------------------------------------------------------------------------- /segmentation/scripts/s3dis/test_repsurf_umb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=./ 4 | 5 | log_dir='repsurf_umb_A5' 6 | 7 | python3 tool/test_s3dis.py --log_dir ${log_dir} \ 8 | --batch_size_test 12 \ 9 | --gpu_id 0 \ 10 | --model repsurf.repsurf_umb_ssg \ 11 | --test_area 5 \ 12 | --filter -------------------------------------------------------------------------------- /segmentation/scripts/s3dis/train_pointnet2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=./ 4 | 5 | log_dir='pointnet2_A5' 6 | 7 | python3 tool/train.py --log_dir ${log_dir} --dataset S3DIS \ 8 | --batch_size 8 \ 9 | --batch_size_val 24 \ 10 | --workers 24 \ 11 | --gpus 0 1 2 3 \ 12 | --model pointnet2.pointnet2_ssg \ 13 | --optimizer AdamW \ 14 | --min_val 60 \ 15 | --epoch 100 \ 16 | --lr_decay_epochs 60 80 \ 17 | --test_area 5 \ 18 | --learning_rate 0.006 \ 19 | --lr_decay 0.1 \ 20 | --weight_decay 1e-2 \ 21 | --aug_scale \ 22 | --color_contrast \ 23 | --color_shift \ 24 | --color_jitter \ 25 | --hs_shift -------------------------------------------------------------------------------- /segmentation/scripts/s3dis/train_pointtransformer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=./ 4 | 5 | log_dir='pointtransformer_A5' 6 | 7 | python3 tool/train.py --log_dir ${log_dir} --dataset S3DIS \ 8 | --batch_size 8 \ 9 | --batch_size_val 24 \ 10 | --workers 24 \ 11 | --gpus 0 1 2 3 \ 12 | --model pointtransformer.pointtransformer \ 13 | --optimizer AdamW \ 14 | --min_val 60 \ 15 | --epoch 100 \ 16 | --lr_decay_epochs 60 80 \ 17 | --test_area 5 \ 18 | --learning_rate 0.006 \ 19 | --lr_decay 0.1 \ 20 | --weight_decay 1e-2 \ 21 | --aug_scale \ 22 | --color_contrast \ 23 | --color_shift \ 24 | --color_jitter \ 25 | --hs_shift -------------------------------------------------------------------------------- /segmentation/scripts/s3dis/train_repsurf_umb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=./ 4 | 5 | log_dir='repsurf_umb_A5' 6 | 7 | python3 tool/train.py --log_dir ${log_dir} --dataset S3DIS \ 8 | --batch_size 8 \ 9 | --batch_size_val 24 \ 10 | --workers 24 \ 11 | --gpus 0 1 2 3 \ 12 | --model repsurf.repsurf_umb_ssg \ 13 | --optimizer AdamW \ 14 | --min_val 60 \ 15 | --epoch 100 \ 16 | --lr_decay_epochs 60 80 \ 17 | --test_area 5 \ 18 | --learning_rate 0.006 \ 19 | --lr_decay 0.1 \ 20 | --weight_decay 1e-2 \ 21 | --freeze_epoch 10 \ 22 | --color_contrast \ 23 | --color_shift \ 24 | --color_jitter \ 25 | --hs_shift 26 | -------------------------------------------------------------------------------- /segmentation/tool/test_s3dis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Haoxi Ran 3 | Date: 06/30/2022 4 | """ 5 | 6 | import json 7 | import os 8 | import time 9 | import random 10 | import numpy as np 11 | import argparse 12 | import collections 13 | from pathlib import Path 14 | 15 | import torch 16 | import torch.nn.parallel 17 | import torch.optim 18 | import torch.utils.data 19 | 20 | from util.utils import AverageMeter, intersectionAndUnion, get_rgb_stat, pc_median_filter_gpu 21 | from util.utils import get_model, get_logger 22 | from modules.voxelize_utils import voxelize 23 | 24 | 25 | LABEL2COLOR = collections.OrderedDict([ 26 | ('ceiling', [0, 255, 0]), ('floor', [0, 0, 255]), ('wall', [0, 255, 255]), ('beam', [255, 255, 0]), 27 | ('column', [255, 0, 255]), ('window', [100, 100, 255]), ('door', [200, 200, 100]), ('chair', [170, 120, 200]), 28 | ('table', [255, 0, 0]), ('bookcase', [200, 100, 100]), ('sofa', [10, 200, 100]), ('board', [200, 200, 200]), 29 | ('clutter', [50, 50, 50])]) 30 | LABEL2CLASS = list(LABEL2COLOR.keys()) 31 | PALETTE = np.array(list(LABEL2COLOR.values()), dtype=np.int64) 32 | 33 | 34 | def parse_args(): 35 | parser = argparse.ArgumentParser('Model') 36 | 37 | # Basic 38 | parser.add_argument('--log_dir', type=str, default=None, help='experiment root') 39 | parser.add_argument('--data_dir', type=str, default='./data/S3DIS/trainval_fullarea', help='data dir') 40 | parser.add_argument('--log_root', type=str, default='./log', help='log root dir') 41 | parser.add_argument('--model_path', type=str, default=None, help='saved model weight') 42 | parser.add_argument('--model', default='pointnet_sem_seg', help='model name [default: pointnet_sem_seg]') 43 | parser.add_argument('--gpu_id', type=str, default='0') 44 | parser.add_argument('--seed', type=int, default=1000, help='Test Seed') 45 | 46 | # Test 47 | parser.add_argument('--batch_size_test', type=int, default=12, help='batch size in test [default: 24]') 48 | parser.add_argument('--test_area', type=int, default=5, help='Which area to use for test [default: 5]') 49 | parser.add_argument('--filter', action='store_true', default=False, help='Apply median filter [default: False]') 50 | parser.add_argument('--data_norm', type=str, default='mean', help='initializer for model [mean, min, z_min]') 51 | parser.add_argument('--visual', action='store_true', default=False, help='Output visual results [default: False]') 52 | 53 | # Modeling 54 | parser.add_argument('--group_size', type=int, default=8, help='Size of umbrella group [default: 8]') 55 | parser.add_argument('--return_polar', action='store_true', default=False, 56 | help='Whether to return polar coordinate in surface abstraction [default: False]') 57 | 58 | return parser.parse_args() 59 | 60 | 61 | def main(): 62 | global args, logger 63 | 64 | args = parse_args() 65 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id 66 | random.seed(args.seed) 67 | np.random.seed(args.seed) 68 | 69 | args.dataset, args.num_class, args.voxel_max, args.voxel_size, args.in_channel, args.ignore_label = \ 70 | 'S3DIS', 13, 80000, 0.04, 6, 255 71 | 72 | experiment_dir = Path(os.path.join(args.log_root, 'PointAnalysis', 'log', 'S3DIS')) 73 | experiment_dir = experiment_dir.joinpath(args.log_dir) 74 | checkpoints_dir = experiment_dir.joinpath('checkpoints/') 75 | args.ckpt_dir = str(checkpoints_dir) 76 | log_dir = experiment_dir.joinpath('logs/') 77 | args.log_dir = str(log_dir) 78 | result_dir = experiment_dir.joinpath('visual/') 79 | result_dir.mkdir(exist_ok=True) 80 | args.result_dir = str(result_dir) 81 | 82 | logger = get_logger(args.log_dir, 'test_%s' % args.model) 83 | logger.info(json.dumps(vars(args), indent=4, sort_keys=True)) 84 | logger.info("=> creating models ...") 85 | model = get_model(args).cuda() 86 | logger.info(model) 87 | 88 | ckpt_file = os.path.join(args.ckpt_dir, 'model_best.pth') if args.model_path is None else args.model_path 89 | if os.path.isfile(ckpt_file): 90 | logger.info("=> loading checkpoint '{}'".format(ckpt_file)) 91 | checkpoint = torch.load(ckpt_file) 92 | state_dict = checkpoint['state_dict'] 93 | new_state_dict = collections.OrderedDict() 94 | for k, v in state_dict.items(): 95 | name = k[7:] 96 | new_state_dict[name] = v.cpu() 97 | model.load_state_dict(new_state_dict, strict=True) 98 | logger.info("=> loaded checkpoint '{}'".format(ckpt_file)) 99 | else: 100 | raise RuntimeError("=> no checkpoint found at '{}'".format(ckpt_file)) 101 | 102 | test(model) 103 | 104 | 105 | def data_prepare(): 106 | """ Return area names of the test dataset """ 107 | data_list = sorted(os.listdir(args.data_dir)) 108 | data_list = [item[:-4] for item in data_list if 'Area_{}'.format(args.test_area) in item] 109 | print("Totally {} samples in val set.".format(len(data_list))) 110 | 111 | return data_list 112 | 113 | 114 | def data_load(data_name): 115 | """ Load data by area name """ 116 | data_path = os.path.join(args.data_dir, data_name + '.npy') 117 | data = np.load(data_path) # xyzrgbl, N*7 118 | coord, feat, label = data[:, :3], data[:, 3:6], data[:, 6] 119 | 120 | idx_data = [] 121 | if args.voxel_size: 122 | idx_sort, count = voxelize(coord - np.min(coord, 0), args.voxel_size, mode=1) 123 | for i in range(count.max()): 124 | idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + i % count 125 | idx_part = idx_sort[idx_select] 126 | idx_data.append(idx_part) 127 | else: 128 | idx_data.append(np.arange(label.shape[0])) 129 | 130 | return coord, feat, label, idx_data 131 | 132 | 133 | def data_process(coord, feat, idx_data): 134 | """ Split points into batches by index """ 135 | idx_size = len(idx_data) 136 | idx_list, coord_list, feat_list, offset_list = [], [], [], [] 137 | for i in range(idx_size): 138 | idx_part = idx_data[i] 139 | coord_part, feat_part = coord[idx_part], feat[idx_part] 140 | if args.voxel_max and coord_part.shape[0] > args.voxel_max: 141 | coord_p, idx_uni, cnt = np.random.rand(coord_part.shape[0]) * 1e-3, np.array([]), 0 142 | while idx_uni.size != idx_part.shape[0]: 143 | init_idx = np.argmin(coord_p) 144 | dist = np.sum(np.power(coord_part - coord_part[init_idx], 2), 1) 145 | idx_crop = np.argsort(dist)[:args.voxel_max] 146 | coord_sub, feat_sub, idx_sub = coord_part[idx_crop], feat_part[idx_crop], idx_part[idx_crop] 147 | dist = dist[idx_crop] 148 | delta = np.square(1 - dist / np.max(dist)) 149 | coord_p[idx_crop] += delta 150 | coord_sub, feat_sub = input_normalize(coord_sub, feat_sub) 151 | idx_list.append(idx_sub), coord_list.append(coord_sub), feat_list.append( 152 | feat_sub), offset_list.append(idx_sub.size) 153 | idx_uni = np.unique(np.concatenate((idx_uni, idx_sub))) 154 | else: 155 | coord_part, feat_part = input_normalize(coord_part, feat_part) 156 | idx_list.append(idx_part), coord_list.append(coord_part), feat_list.append( 157 | feat_part), offset_list.append(idx_part.size) 158 | 159 | return idx_list, coord_list, feat_list, offset_list 160 | 161 | 162 | def input_normalize(coord, feat): 163 | # normalize 164 | if args.data_norm == 'mean': 165 | coord -= np.mean(coord, 0) 166 | elif args.data_norm == 'min': 167 | coord -= np.min(coord, 0) 168 | else: 169 | raise Exception('No such data norm type') 170 | 171 | feat = feat / 255. 172 | if args.color_mean is not None and args.color_std is not None: 173 | feat = (feat - args.color_mean) / args.color_std 174 | return coord, feat 175 | 176 | 177 | def visualize_scene(coord, pred, label, name): 178 | color_pred = PALETTE[pred.astype(np.int64)] 179 | color_gt = PALETTE[label.astype(np.int64)] 180 | pred_save_path = os.path.join(args.result_dir, '{}_pred.txt'.format(name)) 181 | label_save_path = os.path.join(args.result_dir, '{}_label.txt'.format(name)) 182 | np.savetxt(pred_save_path, np.hstack([coord, color_pred]), fmt="%f " * 3 + "%d " * 3) 183 | np.savetxt(label_save_path, np.hstack([coord, color_gt]), fmt="%f " * 3 + "%d " * 3) 184 | 185 | 186 | def test(model): 187 | logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') 188 | batch_time = AverageMeter() 189 | model.eval() 190 | 191 | pred_list, label_list = [], [] 192 | data_list = data_prepare() 193 | args.color_mean, args.color_std = get_rgb_stat(args) 194 | 195 | for idx_scene, scene_name in enumerate(data_list): 196 | end = time.time() 197 | coord, feat, label, idx_data = data_load(scene_name) 198 | idx_list, coord_list, feat_list, offset_list = data_process(coord, feat, idx_data) 199 | 200 | pred = torch.zeros((label.size, args.num_class)).cuda(non_blocking=True) 201 | pred_count = torch.zeros((label.size, args.num_class)).cuda(non_blocking=True) 202 | num_batch = int(np.ceil(len(idx_list) / args.batch_size_test)) 203 | for idx_batch in range(num_batch): 204 | idx_start = idx_batch * args.batch_size_test 205 | idx_end = min((idx_batch + 1) * args.batch_size_test, len(idx_list)) 206 | idx_part, coord_part, feat_part, offset_part = \ 207 | idx_list[idx_start:idx_end], coord_list[idx_start:idx_end], \ 208 | feat_list[idx_start:idx_end], offset_list[idx_start:idx_end] 209 | 210 | idx_part = np.concatenate(idx_part) 211 | coord_part = torch.FloatTensor(np.concatenate(coord_part)).cuda(non_blocking=True) 212 | feat_part = torch.FloatTensor(np.concatenate(feat_part)).cuda(non_blocking=True) 213 | offset_part = torch.IntTensor(np.cumsum(offset_part)).cuda(non_blocking=True) 214 | 215 | with torch.no_grad(): 216 | pred_part = torch.nn.functional.softmax(model([coord_part, feat_part, offset_part]), dim=1) # (n, k) 217 | torch.cuda.empty_cache() 218 | 219 | pred[idx_part, :] += pred_part 220 | pred_count[idx_part, :] += 1. 221 | logger.info('Scene {}/{}, {}/{}, {}/{}'.format(idx_scene + 1, len(data_list), idx_end, len(idx_list), args.voxel_max, idx_part.shape[0])) 222 | 223 | # IoU per scene 224 | pred_choice = np.argmax((pred/pred_count).cpu().numpy(), 1) 225 | coord = coord 226 | label = label 227 | 228 | # median filter 229 | if args.filter: 230 | coord_gpu = torch.from_numpy(coord).float().cuda(non_blocking=True) 231 | pred_gpu = torch.from_numpy(pred_choice).int().cuda(non_blocking=True) 232 | pred_choice = pc_median_filter_gpu(coord_gpu, pred_gpu, 32) 233 | 234 | batch_time.update(time.time() - end) 235 | logger.info('Test: [{}/{}]-{} Batch {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( 236 | idx_scene + 1, len(data_list), label.size, batch_time=batch_time)) 237 | pred_list.append(pred_choice) 238 | label_list.append(label) 239 | 240 | if args.visual: 241 | visualize_scene(coord, pred_choice, label, scene_name) 242 | 243 | # mIoU 244 | intersection, union, target = intersectionAndUnion(np.concatenate(pred_list), np.concatenate(label_list), 245 | args.num_class, args.ignore_label) 246 | iou_class = intersection / (union + 1e-10) 247 | accuracy_class = intersection / (target + 1e-10) 248 | mIoU = np.mean(iou_class) 249 | mAcc = np.mean(accuracy_class) 250 | allAcc = sum(intersection) / (sum(target) + 1e-10) 251 | logger.info('Val result: mIoU / mAcc / OA {:.2f} / {:.2f} / {:.2f}.'.format(mIoU * 100, mAcc * 100, allAcc * 100)) 252 | 253 | for i in range(args.num_class): 254 | logger.info('Class_{} Result: IoU / Acc {:.2f} / {:.2f}, name: {}.'.format( 255 | i, iou_class[i] * 100, accuracy_class[i] * 100, LABEL2CLASS[i])) 256 | logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') 257 | 258 | 259 | if __name__ == '__main__': 260 | main() 261 | -------------------------------------------------------------------------------- /segmentation/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hancyran/RepSurf/29bacd52fe61ffb034c076be13163d415de6684a/segmentation/util/__init__.py -------------------------------------------------------------------------------- /segmentation/util/data_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import SharedArray as SA 3 | import torch 4 | 5 | from modules.voxelize_utils import voxelize 6 | 7 | 8 | def sa_create(name, var): 9 | x = SA.create(name, var.shape, dtype=var.dtype) 10 | x[...] = var[...] 11 | x.flags.writeable = False 12 | return x 13 | 14 | 15 | def collate_fn(batch): 16 | coord, feat, label = list(zip(*batch)) 17 | offset, count = [], 0 18 | for item in coord: 19 | count += item.shape[0] 20 | offset.append(count) 21 | 22 | return torch.cat(coord), torch.cat(feat), torch.cat(label) if label[0] is not None else None, torch.IntTensor( 23 | offset) 24 | 25 | 26 | def data_prepare(coord, feat, label, args, split, coord_transform, rgb_transform, 27 | rgb_mean=None, rgb_std=None, shuffle_index=True, stop_transform=False): 28 | dataset = args.dataset.split('_')[0] 29 | 30 | # coordinate augment 31 | if coord_transform and not stop_transform: 32 | coord, _, _ = coord_transform(coord, None, None) 33 | 34 | # rgb augment 35 | if rgb_transform and not stop_transform: 36 | _, feat, _ = rgb_transform(None, feat, None) 37 | 38 | # grid sampling 39 | if args.voxel_size: 40 | uniq_idx = voxelize(coord - np.min(coord, 0), args.voxel_size) 41 | coord, feat = coord[uniq_idx], feat[uniq_idx] 42 | if label is not None: 43 | label = label[uniq_idx] 44 | 45 | # drop points when overflow 46 | if split != 'val' and args.voxel_max and coord.shape[0] > args.voxel_max: 47 | init_idx = np.random.randint(coord.shape[0]) if 'train' in split else coord.shape[0] // 2 48 | crop_idx = np.argsort(np.sum(np.square(coord - coord[init_idx]), 1))[:args.voxel_max] 49 | coord, feat = coord[crop_idx], feat[crop_idx] 50 | if label is not None: 51 | label = label[crop_idx] 52 | 53 | # shuffle points 54 | if shuffle_index: 55 | shuf_idx = np.arange(coord.shape[0]) 56 | np.random.shuffle(shuf_idx) 57 | coord, feat = coord[shuf_idx], feat[shuf_idx] 58 | if label is not None: 59 | label = label[shuf_idx] 60 | 61 | # coord norm 62 | if args.data_norm == 'mean': 63 | coord -= np.mean(coord, 0) 64 | elif args.data_norm == 'min': 65 | coord -= np.min(coord, 0) 66 | 67 | # rgb norm 68 | if dataset in ['S3DIS', 'ScanNet']: 69 | feat = feat / 255. 70 | if rgb_mean is not None and rgb_std is not None: 71 | feat = (feat - rgb_mean) / rgb_std 72 | 73 | return torch.FloatTensor(coord), torch.FloatTensor(feat), torch.LongTensor(label) if label is not None else None 74 | -------------------------------------------------------------------------------- /segmentation/util/utils.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import argparse 3 | import random 4 | import logging 5 | import sys 6 | 7 | import numpy as np 8 | import torch 9 | from torch import nn 10 | import torch.optim.lr_scheduler as lr_scheduler 11 | 12 | from dataset.S3DISDataLoader import S3DIS 13 | from modules.pointops.functions import pointops 14 | 15 | 16 | def main_process(args): 17 | """ 18 | Determine whether the main process 19 | 20 | """ 21 | return not args.multiprocessing_distributed or ( 22 | args.multiprocessing_distributed and args.rank % args.ngpus_per_node == 0) 23 | 24 | 25 | class AverageMeter(object): 26 | """Computes and stores the average and current value""" 27 | 28 | def __init__(self): 29 | self.reset() 30 | 31 | def reset(self): 32 | self.val = 0 33 | self.avg = 0 34 | self.sum = 0 35 | self.count = 0 36 | 37 | def update(self, val, n=1): 38 | self.val = val 39 | self.sum += val * n 40 | self.count += n 41 | self.avg = self.sum / self.count 42 | 43 | 44 | def intersectionAndUnion(output, target, K, ignore_index=255): 45 | # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. 46 | assert (output.ndim in [1, 2, 3]) 47 | assert output.shape == target.shape 48 | output = output.reshape(output.size).copy() 49 | target = target.reshape(target.size) 50 | output[np.where(target == ignore_index)[0]] = ignore_index 51 | intersection = output[np.where(output == target)[0]] 52 | area_intersection, _ = np.histogram(intersection, bins=np.arange(K + 1)) 53 | area_output, _ = np.histogram(output, bins=np.arange(K + 1)) 54 | area_target, _ = np.histogram(target, bins=np.arange(K + 1)) 55 | area_union = area_output + area_target - area_intersection 56 | return area_intersection, area_union, area_target 57 | 58 | 59 | def intersectionAndUnionGPU(output, target, K, ignore_index=255): 60 | # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. 61 | assert (output.dim() in [1, 2, 3]) 62 | assert output.shape == target.shape 63 | output = output.view(-1) 64 | target = target.view(-1) 65 | output[target == ignore_index] = ignore_index 66 | intersection = output[output == target] 67 | area_intersection = torch.histc(intersection, bins=K, min=0, max=K - 1) 68 | area_output = torch.histc(output, bins=K, min=0, max=K - 1) 69 | area_target = torch.histc(target, bins=K, min=0, max=K - 1) 70 | area_union = area_output + area_target - area_intersection 71 | return area_intersection, area_union, area_target 72 | 73 | 74 | def find_free_port(): 75 | import socket 76 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 77 | # Binding to port 0 will cause the OS to find an available port for us 78 | sock.bind(("", 0)) 79 | port = sock.getsockname()[1] 80 | sock.close() 81 | # NOTE: there is still a chance the port could be taken by other processes. 82 | return port 83 | 84 | 85 | def set_seed(seed): 86 | """ 87 | Setting of Global Seed 88 | 89 | """ 90 | np.random.seed(seed) 91 | random.seed(seed) 92 | torch.manual_seed(seed) # cpu 93 | torch.cuda.manual_seed(seed) 94 | 95 | torch.backends.cudnn.deterministic = True # consistent results on the cpu and gpu 96 | torch.backends.cudnn.benchmark = False 97 | 98 | 99 | def worker_init_fn(worker_id, seed=None): 100 | if seed is not None: 101 | random.seed(seed + worker_id) 102 | np.random.seed(seed + worker_id) 103 | torch.manual_seed(seed + worker_id) 104 | torch.cuda.manual_seed(seed + worker_id) 105 | torch.cuda.manual_seed_all(seed + worker_id) 106 | 107 | 108 | def get_logger(log_dir, model): 109 | logger_name = "main-logger" 110 | logger = logging.getLogger(logger_name) 111 | logging.root.handlers = [] 112 | 113 | logging.basicConfig( 114 | level=logging.INFO, 115 | format="[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s", 116 | handlers=[ 117 | logging.FileHandler('%s/%s.txt' % (log_dir, model)), 118 | logging.StreamHandler(sys.stdout) 119 | ] 120 | ) 121 | 122 | return logger 123 | 124 | 125 | def get_aug_args(args): 126 | dataset = args.dataset 127 | if 'S3DIS' in dataset: 128 | aug_args = {'scale_factor': 0.1, 'scale_ani': True, 'scale_prob': 1., 129 | 'pert_factor': 0.03, 'pert_prob': 1., 'rot_prob': 0.5, 130 | 'shifts': [0.1, 0.1, 0.1], 'shift_prob': 1.} 131 | return aug_args 132 | else: 133 | raise Exception('No such dataset') 134 | 135 | 136 | def get_dataset_obj(args): 137 | dataset_name = args.dataset 138 | if 'S3DIS' in dataset_name: 139 | return S3DIS 140 | 141 | 142 | def get_dataset_description(args): 143 | dataset_name = args.dataset 144 | if 'S3DIS' in dataset_name: 145 | return '%s_A%d' % (dataset_name, args.test_area) 146 | if 'ScanNet' in dataset_name: 147 | return dataset_name 148 | 149 | 150 | def get_loop(args): 151 | if 'S3DIS' in args.dataset: 152 | return 30 153 | if 'ScanNet' in args.dataset: 154 | return 6 155 | else: 156 | raise Exception('No Fixed Loop for the Dataset') 157 | 158 | 159 | def get_class_weights(dataset_name): 160 | # pre-calculate the class weight 161 | if dataset_name == 'S3DIS_A1': 162 | num_per_class = [0.27362621, 0.3134626, 0.18798782, 1.38965602, 1.44210271, 0.86639497, 1.07227331, 163 | 1., 1.05912352, 1.92726327, 0.52329938, 2.04783419, 0.5104427] 164 | elif dataset_name == 'S3DIS_A2': 165 | num_per_class = [0.29036634, 0.34709631, 0.19514767, 1.20129272, 1.39663689, 0.87889087, 1.11586938, 166 | 1., 1.54599972, 1.87057415, 0.56458097, 1.87316536, 0.51576885] 167 | elif dataset_name == 'S3DIS_A3': 168 | num_per_class = [0.27578885, 0.32039725, 0.19055443, 1.14914046, 1.46885687, 0.85450877, 1.05414776, 169 | 1., 1.09680025, 2.09280004, 0.59355243, 1.95746691, 0.50429199] 170 | elif dataset_name == 'S3DIS_A4': 171 | num_per_class = [0.27667177, 0.32612854, 0.19886974, 1.18282174, 1.52145143, 0.8793782, 1.14202999, 172 | 1., 1.0857859, 1.89738584, 0.5964717, 1.95820557, 0.52113351] 173 | elif dataset_name == 'S3DIS_A5': 174 | num_per_class = [0.28459923, 0.32990557, 0.1999722, 1.20798185, 1.33784535, 1., 0.93323316, 1.0753585, 175 | 1.00199521, 1.53657772, 0.7987055, 1.82384844, 0.48565471] 176 | elif dataset_name == 'S3DIS_A6': 177 | num_per_class = [0.29442441, 0.37941846, 0.21360804, 0.9812721, 1.40968965, 0.88577139, 1., 178 | 1.09387107, 1.53238009, 1.61365643, 1.15693894, 1.57821041, 0.47342451] 179 | elif dataset_name == 'ScanNet_train': 180 | num_per_class = [0.32051547, 0.1980627, 0.2621471, 0.74563083, 0.52141879, 0.65918949, 0.73560561, 1.03624985, 181 | 1.00063147, 0.90604468, 0.43435155, 3.91494446, 1.94558718, 1., 0.54871637, 2.13587716, 182 | 1.13931665, 2.06423695, 5.59103054, 1.08557339, 1.35027497] 183 | elif dataset_name == 'ScanNet_trainval': 184 | num_per_class = [0.32051547, 0.1980627, 0.2621471, 0.74563083, 0.52141879, 0.65918949, 0.73560561, 1.03624985, 185 | 1.00063147, 0.90604468, 0.43435155, 3.91494446, 1.94558718, 1., 0.54871637, 2.13587716, 186 | 1.13931665, 2.06423695, 5.59103054, 1.08557339, 1.35027497] 187 | else: 188 | raise Exception('No Prepared Class Weights of Dataset') 189 | return torch.FloatTensor(num_per_class) 190 | 191 | 192 | def get_rgb_stat(args): 193 | if 'S3DIS' in args.dataset: 194 | mean, std = [0.52146571, 0.50457911, 0.44939377], [0.19645595, 0.19576158, 0.20104336] 195 | elif 'ScanNet' in args.dataset: 196 | mean, std = [0.08400667, 0.08400667, 0.08400667], [0.28983903, 0.28983903, 0.28983903] 197 | else: 198 | return None, None 199 | return np.array(mean, dtype=np.float32), np.array(std, dtype=np.float32) 200 | 201 | 202 | def get_model(args): 203 | module = importlib.import_module('models.%s' % args.model) 204 | return module.Model(args) 205 | 206 | 207 | def get_optimizer(args, model): 208 | param_dicts = model.parameters() 209 | if args.optimizer == 'SGD': 210 | optimizer = torch.optim.SGD(param_dicts, lr=args.learning_rate, momentum=args.momentum, 211 | weight_decay=args.weight_decay) 212 | elif args.optimizer == 'AdamW': 213 | optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) 214 | else: 215 | raise Exception('Not impl. such optimizer') 216 | return optimizer 217 | 218 | 219 | def get_scheduler(args, optimizer): 220 | if args.scheduler == 'step': 221 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_decay_epochs, gamma=args.lr_decay) 222 | else: 223 | raise Exception('Not impl. such scheduler') 224 | return scheduler 225 | 226 | 227 | def get_loss(weight=None, ignore_label=None): 228 | return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_label) 229 | 230 | 231 | def get_test_args(): 232 | return argparse.Namespace() 233 | 234 | 235 | def pc_median_filter_gpu(coord, label, group_size=16): 236 | """ 237 | :param coord: coordinates of a whole point cloud [N, 3] 238 | :param label: segmentation results of a whole point cloud [N,] 239 | :param group_size: num of neighbors for filtering 240 | """ 241 | offset = torch.IntTensor([coord.shape[0]]).to(coord.device) 242 | group_idx, _ = pointops.knnquery(group_size, coord, coord, offset, offset) # [N, group_size] 243 | group_label = label[group_idx.view(-1).long()].view(coord.shape[0], group_size) # [N, group_size] 244 | median_label = torch.median(group_label, 1)[0] 245 | return median_label.cpu().numpy() 246 | --------------------------------------------------------------------------------