├── .gitignore ├── LM ├── dataloader_test_LM.py ├── dataloader_test_LMO.py └── dataloader_train_LM.py ├── README.md ├── YCBV ├── CADs │ ├── 002_master_chef_can_pc.ply │ ├── 003_cracker_box_pc.ply │ ├── 004_sugar_box_pc.ply │ ├── 005_tomato_soup_can_pc.ply │ ├── 006_mustard_bottle_pc.ply │ ├── 007_tuna_fish_can_pc.ply │ ├── 008_pudding_box_pc.ply │ ├── 009_gelatin_box_pc.ply │ ├── 010_potted_meat_can_pc.ply │ ├── 011_banana_pc.ply │ ├── 019_pitcher_base_pc.ply │ ├── 021_bleach_cleanser_pc.ply │ ├── 024_bowl_pc.ply │ ├── 025_mug_pc.ply │ ├── 035_power_drill_pc.ply │ ├── 036_wood_block_pc.ply │ ├── 037_scissors_pc.ply │ ├── 040_large_marker_pc.ply │ ├── 051_large_clamp_pc.ply │ ├── 052_extra_large_clamp_pc.ply │ └── 061_foam_brick_pc.ply ├── dataloader_test_YCBV.py ├── dataloader_train_YCBV.py └── utils_YCBV │ ├── classes.txt │ ├── test_data_list.txt │ └── train_data_list.txt ├── configs ├── config_LM.yaml ├── config_YCBV_bs32.yaml └── config_YCBV_bs40.yaml ├── figs └── framework.png ├── libs ├── pointgroup_ops │ ├── functions │ │ └── pointgroup_ops.py │ ├── install.sh │ ├── setup.py │ └── src │ │ ├── bfs_cluster │ │ ├── bfs_cluster.cpp │ │ ├── bfs_cluster.cu │ │ └── bfs_cluster.h │ │ ├── cuda.cu │ │ ├── cuda_utils.h │ │ ├── datatype │ │ ├── datatype.cpp │ │ └── datatype.h │ │ ├── get_iou │ │ ├── get_iou.cpp │ │ ├── get_iou.cu │ │ └── get_iou.h │ │ ├── pointgroup_ops.cpp │ │ ├── pointgroup_ops.h │ │ ├── pointgroup_ops_api.cpp │ │ ├── roipool │ │ ├── roipool.cpp │ │ ├── roipool.cu │ │ └── roipool.h │ │ ├── sec_mean │ │ ├── sec_mean.cpp │ │ ├── sec_mean.cu │ │ └── sec_mean.h │ │ └── voxelize │ │ ├── voxelize.cpp │ │ ├── voxelize.cu │ │ └── voxelize.h ├── pointnet_lib │ ├── install.sh │ ├── pointnet2_modules.py │ ├── pointnet2_utils.py │ ├── pytorch_utils.py │ ├── setup.py │ └── src │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── ball_query_gpu.h │ │ ├── cuda_utils.h │ │ ├── group_points.cpp │ │ ├── group_points_gpu.cu │ │ ├── group_points_gpu.h │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ ├── interpolate_gpu.h │ │ ├── pointnet2_api.cpp │ │ ├── sampling.cpp │ │ ├── sampling_gpu.cu │ │ └── sampling_gpu.h ├── pointnet_sp │ ├── install.sh │ ├── pointnet2_utils.py │ ├── setup.py │ └── src │ │ ├── cuda_utils.h │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ ├── interpolate_gpu.h │ │ └── pointnet2_api.cpp └── spconv │ ├── CMakeLists.txt │ ├── LICENSE │ ├── README.md │ ├── include │ ├── paramsgrid.h │ ├── prettyprint.h │ ├── pybind11_utils.h │ ├── spconv │ │ ├── avgpool.h │ │ ├── box_iou.h │ │ ├── geometry.h │ │ ├── indice.cu.h │ │ ├── indice.h │ │ ├── maxpool.h │ │ ├── mp_helper.h │ │ ├── nms.h │ │ ├── nms_gpu.h │ │ ├── point2voxel.h │ │ ├── pool_ops.h │ │ ├── reordering.cu.h │ │ ├── reordering.h │ │ ├── spconv_ops.h │ │ └── summaryRF.h │ ├── tensorview │ │ ├── helper_kernel.cu.h │ │ ├── helper_launch.h │ │ └── tensorview.h │ ├── torch_utils.h │ └── utility │ │ └── timer.h │ ├── install.sh │ ├── setup.py │ ├── spconv │ ├── __init__.py │ ├── conv.py │ ├── functional.py │ ├── modules.py │ ├── ops.py │ ├── pool.py │ ├── test_utils.py │ └── utils │ │ └── __init__.py │ ├── src │ ├── spconv │ │ ├── CMakeLists.txt │ │ ├── all.cc │ │ ├── avgpool.cu │ │ ├── indice.cc │ │ ├── indice.cu │ │ ├── maxpool.cc │ │ ├── maxpool.cu │ │ ├── reordering.cc │ │ ├── reordering.cu │ │ └── summaryRF.cu │ └── utils │ │ ├── CMakeLists.txt │ │ ├── all.cc │ │ └── nms.cu │ └── third_party │ ├── catch2 │ └── catch.hpp │ └── pybind11 │ ├── .appveyor.yml │ ├── .readthedocs.yml │ ├── .travis.yml │ ├── CMakeLists.txt │ ├── CONTRIBUTING.md │ ├── ISSUE_TEMPLATE.md │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── docs │ ├── Doxyfile │ ├── Makefile │ ├── _static │ │ └── theme_overrides.css │ ├── advanced │ │ ├── cast │ │ │ ├── chrono.rst │ │ │ ├── custom.rst │ │ │ ├── eigen.rst │ │ │ ├── functional.rst │ │ │ ├── index.rst │ │ │ ├── overview.rst │ │ │ ├── stl.rst │ │ │ └── strings.rst │ │ ├── classes.rst │ │ ├── embedding.rst │ │ ├── exceptions.rst │ │ ├── functions.rst │ │ ├── misc.rst │ │ ├── pycpp │ │ │ ├── index.rst │ │ │ ├── numpy.rst │ │ │ ├── object.rst │ │ │ └── utilities.rst │ │ └── smart_ptrs.rst │ ├── basics.rst │ ├── benchmark.py │ ├── benchmark.rst │ ├── changelog.rst │ ├── classes.rst │ ├── compiling.rst │ ├── conf.py │ ├── faq.rst │ ├── index.rst │ ├── intro.rst │ ├── limitations.rst │ ├── pybind11-logo.png │ ├── pybind11_vs_boost_python1.png │ ├── pybind11_vs_boost_python1.svg │ ├── pybind11_vs_boost_python2.png │ ├── pybind11_vs_boost_python2.svg │ ├── reference.rst │ ├── release.rst │ ├── requirements.txt │ └── upgrade.rst │ ├── include │ └── pybind11 │ │ ├── attr.h │ │ ├── buffer_info.h │ │ ├── cast.h │ │ ├── chrono.h │ │ ├── common.h │ │ ├── complex.h │ │ ├── detail │ │ ├── class.h │ │ ├── common.h │ │ ├── descr.h │ │ ├── init.h │ │ ├── internals.h │ │ └── typeid.h │ │ ├── eigen.h │ │ ├── embed.h │ │ ├── eval.h │ │ ├── functional.h │ │ ├── iostream.h │ │ ├── numpy.h │ │ ├── operators.h │ │ ├── options.h │ │ ├── pybind11.h │ │ ├── pytypes.h │ │ ├── stl.h │ │ └── stl_bind.h │ ├── pybind11 │ ├── __init__.py │ ├── __main__.py │ └── _version.py │ ├── setup.cfg │ ├── setup.py │ ├── tests │ ├── CMakeLists.txt │ ├── conftest.py │ ├── constructor_stats.h │ ├── local_bindings.h │ ├── object.h │ ├── pybind11_cross_module_tests.cpp │ ├── pybind11_tests.cpp │ ├── pybind11_tests.h │ ├── pytest.ini │ ├── test_buffers.cpp │ ├── test_buffers.py │ ├── test_builtin_casters.cpp │ ├── test_builtin_casters.py │ ├── test_call_policies.cpp │ ├── test_call_policies.py │ ├── test_callbacks.cpp │ ├── test_callbacks.py │ ├── test_chrono.cpp │ ├── test_chrono.py │ ├── test_class.cpp │ ├── test_class.py │ ├── test_cmake_build │ │ ├── CMakeLists.txt │ │ ├── embed.cpp │ │ ├── installed_embed │ │ │ └── CMakeLists.txt │ │ ├── installed_function │ │ │ └── CMakeLists.txt │ │ ├── installed_target │ │ │ └── CMakeLists.txt │ │ ├── main.cpp │ │ ├── subdirectory_embed │ │ │ └── CMakeLists.txt │ │ ├── subdirectory_function │ │ │ └── CMakeLists.txt │ │ ├── subdirectory_target │ │ │ └── CMakeLists.txt │ │ └── test.py │ ├── test_constants_and_functions.cpp │ ├── test_constants_and_functions.py │ ├── test_copy_move.cpp │ ├── test_copy_move.py │ ├── test_docstring_options.cpp │ ├── test_docstring_options.py │ ├── test_eigen.cpp │ ├── test_eigen.py │ ├── test_embed │ │ ├── CMakeLists.txt │ │ ├── catch.cpp │ │ ├── external_module.cpp │ │ ├── test_interpreter.cpp │ │ └── test_interpreter.py │ ├── test_enum.cpp │ ├── test_enum.py │ ├── test_eval.cpp │ ├── test_eval.py │ ├── test_eval_call.py │ ├── test_exceptions.cpp │ ├── test_exceptions.py │ ├── test_factory_constructors.cpp │ ├── test_factory_constructors.py │ ├── test_gil_scoped.cpp │ ├── test_gil_scoped.py │ ├── test_iostream.cpp │ ├── test_iostream.py │ ├── test_kwargs_and_defaults.cpp │ ├── test_kwargs_and_defaults.py │ ├── test_local_bindings.cpp │ ├── test_local_bindings.py │ ├── test_methods_and_attributes.cpp │ ├── test_methods_and_attributes.py │ ├── test_modules.cpp │ ├── test_modules.py │ ├── test_multiple_inheritance.cpp │ ├── test_multiple_inheritance.py │ ├── test_numpy_array.cpp │ ├── test_numpy_array.py │ ├── test_numpy_dtypes.cpp │ ├── test_numpy_dtypes.py │ ├── test_numpy_vectorize.cpp │ ├── test_numpy_vectorize.py │ ├── test_opaque_types.cpp │ ├── test_opaque_types.py │ ├── test_operator_overloading.cpp │ ├── test_operator_overloading.py │ ├── test_pickling.cpp │ ├── test_pickling.py │ ├── test_pytypes.cpp │ ├── test_pytypes.py │ ├── test_sequences_and_iterators.cpp │ ├── test_sequences_and_iterators.py │ ├── test_smart_ptr.cpp │ ├── test_smart_ptr.py │ ├── test_stl.cpp │ ├── test_stl.py │ ├── test_stl_binders.cpp │ ├── test_stl_binders.py │ ├── test_tagbased_polymorphic.cpp │ ├── test_tagbased_polymorphic.py │ ├── test_virtual_functions.cpp │ └── test_virtual_functions.py │ └── tools │ ├── FindCatch.cmake │ ├── FindEigen3.cmake │ ├── FindPythonLibsNew.cmake │ ├── check-style.sh │ ├── clang │ ├── LICENSE.TXT │ ├── README.md │ ├── __init__.py │ ├── cindex.py │ └── enumerations.py │ ├── libsize.py │ ├── mkdoc.py │ ├── pybind11Config.cmake.in │ └── pybind11Tools.cmake ├── models ├── DCL_Net.py ├── Modules.py └── refiner.py ├── scripts ├── script_eval_LM.sh ├── script_eval_LMO.sh ├── script_eval_YCBV_stage1.sh ├── script_eval_YCBV_stage2.sh ├── script_train_LM.sh ├── script_train_YCBV_stage1.sh └── script_train_YCBV_stage2.sh ├── tools ├── test_LM.py ├── test_LMO.py ├── test_YCBV_stage1.py ├── test_YCBV_stage2.py ├── train_LM.py ├── train_YCBV_stage1.py └── train_YCBV_stage2.py └── utils ├── __init__.py ├── new_constants.pt ├── rotation.py ├── tools_train.py └── transform3D.py /.gitignore: -------------------------------------------------------------------------------- 1 | datasets/* 2 | build/ 3 | dist/ 4 | *.so 5 | *.so.1.0 6 | *.so.1 7 | *.egg-info 8 | *.pyc 9 | DCL_Net_config_LM_id0/ 10 | DCL_Net_config_YCBV_bs32_id0/ 11 | refiner_refiner_config_YCBV_bs40_id0_model_DCL_Net_config_YCBV_bs32_id0_epoch_84/ 12 | log -------------------------------------------------------------------------------- /YCBV/CADs/002_master_chef_can_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/002_master_chef_can_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/003_cracker_box_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/003_cracker_box_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/004_sugar_box_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/004_sugar_box_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/005_tomato_soup_can_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/005_tomato_soup_can_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/006_mustard_bottle_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/006_mustard_bottle_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/007_tuna_fish_can_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/007_tuna_fish_can_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/008_pudding_box_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/008_pudding_box_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/009_gelatin_box_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/009_gelatin_box_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/010_potted_meat_can_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/010_potted_meat_can_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/011_banana_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/011_banana_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/019_pitcher_base_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/019_pitcher_base_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/021_bleach_cleanser_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/021_bleach_cleanser_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/024_bowl_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/024_bowl_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/025_mug_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/025_mug_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/035_power_drill_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/035_power_drill_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/036_wood_block_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/036_wood_block_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/037_scissors_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/037_scissors_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/040_large_marker_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/040_large_marker_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/051_large_clamp_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/051_large_clamp_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/052_extra_large_clamp_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/052_extra_large_clamp_pc.ply -------------------------------------------------------------------------------- /YCBV/CADs/061_foam_brick_pc.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/YCBV/CADs/061_foam_brick_pc.ply -------------------------------------------------------------------------------- /YCBV/utils_YCBV/classes.txt: -------------------------------------------------------------------------------- 1 | 002_master_chef_can 2 | 003_cracker_box 3 | 004_sugar_box 4 | 005_tomato_soup_can 5 | 006_mustard_bottle 6 | 007_tuna_fish_can 7 | 008_pudding_box 8 | 009_gelatin_box 9 | 010_potted_meat_can 10 | 011_banana 11 | 019_pitcher_base 12 | 021_bleach_cleanser 13 | 024_bowl 14 | 025_mug 15 | 035_power_drill 16 | 036_wood_block 17 | 037_scissors 18 | 040_large_marker 19 | 051_large_clamp 20 | 052_extra_large_clamp 21 | 061_foam_brick 22 | -------------------------------------------------------------------------------- /configs/config_LM.yaml: -------------------------------------------------------------------------------- 1 | NAME_PROJECT: DCL-Net for LM/LMO 2 | optimizer: 3 | type : Adam 4 | lr : 0.001 5 | betas: [0.5, 0.999] 6 | eps : 0.000001 7 | 8 | lr_scheduler: 9 | type : StepLR 10 | step_size: 60 11 | gamma : 0.5 12 | 13 | max_epoch : 400 14 | 15 | model: 16 | voxelization_mode: 4 17 | unit_voxel_extent: [0.005, 0.005, 0.005] 18 | voxel_num_limit : [64, 64, 64] 19 | n_inp : 1024 20 | n_tmp : 1024 21 | backbone: 22 | downsample_by_pooling: True 23 | kernel_size : 3 24 | bias : False 25 | loss: 26 | 27 | 28 | 29 | hyper_dataset_train: 30 | name : dataloader_train_LM 31 | input_size : 1024 32 | tmp_size : 1024 33 | unit_voxel_extent: [0.005, 0.005, 0.005] 34 | voxel_num_limit : [64, 64, 64] 35 | voxelization_mode: 4 36 | 37 | hyper_dataloader_train: 38 | bs : 32 39 | num_workers : 10 40 | shuffle : True 41 | drop_last : True 42 | pin_memory : False 43 | 44 | hyper_dataset_test: 45 | name : [dataloader_test_LM, dataloader_test_LMO] 46 | input_size : 1024 47 | tmp_size : 1024 48 | unit_voxel_extent: [0.005, 0.005, 0.005] 49 | voxel_num_limit : [64, 64, 64] 50 | voxelization_mode: 4 51 | 52 | hyper_dataloader_test: 53 | bs : 1 # should be 1 54 | num_workers : 0 55 | shuffle : False 56 | drop_last : False 57 | pin_memory : False 58 | 59 | 60 | per_val: 1 61 | per_write: 10 62 | rd_seed: 1 63 | -------------------------------------------------------------------------------- /configs/config_YCBV_bs32.yaml: -------------------------------------------------------------------------------- 1 | NAME_PROJECT: DCL-Net for YCBV 2 | optimizer: 3 | type : Adam 4 | lr : 0.001 5 | betas: [0.5, 0.999] 6 | eps : 0.000001 7 | 8 | lr_scheduler: 9 | type : WarmupCyclicLR 10 | max_lr : 0.001 11 | base_lr : 0.00001 12 | lr_scheduler_cyc: 13 | max_lr : 0.001 14 | base_lr : 0.000001 15 | step_size_up: 63105 # 21 epoch 16 | step_size_down: 63105 17 | 18 | max_epoch : 210 19 | 20 | model: 21 | voxelization_mode: 4 22 | unit_voxel_extent: [0.006, 0.006, 0.006] 23 | voxel_num_limit : [64, 64, 64] 24 | n_inp : 1024 25 | n_tmp : 1024 26 | backbone: 27 | downsample_by_pooling: True 28 | kernel_size : 3 29 | bias : False 30 | loss: 31 | 32 | 33 | 34 | hyper_dataset_train: 35 | name : dataloader_train_YCBV 36 | input_size : 1024 37 | tmp_size : 1024 38 | unit_voxel_extent: [0.006, 0.006, 0.006] 39 | voxel_num_limit : [64, 64, 64] 40 | voxelization_mode: 4 41 | 42 | hyper_dataloader_train: 43 | bs : 32 44 | num_workers : 10 45 | shuffle : True 46 | drop_last : True 47 | pin_memory : False 48 | 49 | hyper_dataset_test: 50 | name : dataloader_test_YCBV 51 | input_size : 1024 52 | tmp_size : 1024 53 | unit_voxel_extent: [0.006, 0.006, 0.006] 54 | voxel_num_limit : [64, 64, 64] 55 | voxelization_mode: 4 56 | 57 | hyper_dataloader_test: 58 | bs : 1 59 | num_workers : 8 60 | shuffle : False 61 | drop_last : False 62 | pin_memory : False 63 | 64 | 65 | per_val: 1 66 | per_write: 10 67 | rd_seed: 1 68 | per_save : 1 -------------------------------------------------------------------------------- /configs/config_YCBV_bs40.yaml: -------------------------------------------------------------------------------- 1 | NAME_PROJECT: DCL-Net for YCBV 2 | optimizer: 3 | type : Adam 4 | lr : 0.001 5 | betas: [0.5, 0.999] 6 | eps : 0.000001 7 | 8 | lr_scheduler: 9 | type : WarmupCyclicLR 10 | max_lr : 0.001 11 | base_lr : 0.00001 12 | lr_scheduler_cyc: 13 | max_lr : 0.001 14 | base_lr : 0.000001 15 | step_size_up: 50484 # 21 epoch 16 | step_size_down: 50484 17 | 18 | max_epoch : 210 19 | 20 | model: 21 | voxelization_mode: 4 22 | unit_voxel_extent: [0.006, 0.006, 0.006] 23 | voxel_num_limit : [64, 64, 64] 24 | n_inp : 1024 25 | n_tmp : 1024 26 | backbone: 27 | downsample_by_pooling: True 28 | kernel_size : 3 29 | bias : False 30 | loss: 31 | 32 | 33 | 34 | hyper_dataset_train: 35 | name : dataloader_train_YCBV 36 | input_size : 1024 37 | tmp_size : 1024 38 | unit_voxel_extent: [0.006, 0.006, 0.006] 39 | voxel_num_limit : [64, 64, 64] 40 | voxelization_mode: 4 41 | 42 | hyper_dataloader_train: 43 | bs : 40 44 | num_workers : 10 45 | shuffle : True 46 | drop_last : True 47 | pin_memory : False 48 | 49 | hyper_dataset_test: 50 | name : dataloader_test_YCBV 51 | input_size : 1024 52 | tmp_size : 1024 53 | unit_voxel_extent: [0.006, 0.006, 0.006] 54 | voxel_num_limit : [64, 64, 64] 55 | voxelization_mode: 4 56 | 57 | hyper_dataloader_test: 58 | bs : 1 59 | num_workers : 8 60 | shuffle : False 61 | drop_last : False 62 | pin_memory : False 63 | 64 | 65 | per_val: 1 66 | per_write: 10 67 | rd_seed: 1 68 | per_save : 1 -------------------------------------------------------------------------------- /figs/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gorilla-Lab-SCUT/DCL-Net/7bf5fa8fb27b205098018b8d046d8876302723fd/figs/framework.png -------------------------------------------------------------------------------- /libs/pointgroup_ops/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python setup.py build_ext # if any head file not found add the include dir of this env to the CPLUS_INCLUDE_PATH 3 | # eg: export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/home/anaconda3/envs/env_A/include 4 | python setup.py develop -------------------------------------------------------------------------------- /libs/pointgroup_ops/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='PG_OP', 6 | ext_modules=[ 7 | CUDAExtension('PG_OP', [ 8 | 'src/pointgroup_ops_api.cpp', 9 | 10 | 'src/pointgroup_ops.cpp', 11 | 'src/cuda.cu' 12 | ], extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']}) 13 | ], 14 | cmdclass={'build_ext': BuildExtension} 15 | ) -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/bfs_cluster/bfs_cluster.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | #include "bfs_cluster.h" 7 | #include "../cuda_utils.h" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | /* ================================== ballquery_batch_p ================================== */ 15 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) { 16 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 17 | if (pt_idx >= n) return; 18 | 19 | start_len += (pt_idx * 2); 20 | int idx_temp[1000]; 21 | 22 | float radius2 = radius * radius; 23 | float o_x = xyz[pt_idx * 3 + 0]; 24 | float o_y = xyz[pt_idx * 3 + 1]; 25 | float o_z = xyz[pt_idx * 3 + 2]; 26 | 27 | int batch_idx = batch_idxs[pt_idx]; 28 | int start = batch_offsets[batch_idx]; 29 | int end = batch_offsets[batch_idx + 1]; 30 | 31 | int cnt = 0; 32 | for(int k = start; k < end; k++){ 33 | float x = xyz[k * 3 + 0]; 34 | float y = xyz[k * 3 + 1]; 35 | float z = xyz[k * 3 + 2]; 36 | float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z); 37 | if(d2 < radius2){ 38 | if(cnt < 1000){ 39 | idx_temp[cnt] = k; 40 | } 41 | else{ 42 | break; 43 | } 44 | ++cnt; 45 | } 46 | } 47 | 48 | start_len[0] = atomicAdd(cumsum, cnt); 49 | start_len[1] = cnt; 50 | 51 | int thre = n * meanActive; 52 | if(start_len[0] >= thre) return; 53 | 54 | idx += start_len[0]; 55 | if(start_len[0] + cnt >= thre) cnt = thre - start_len[0]; 56 | 57 | for(int k = 0; k < cnt; k++){ 58 | idx[k] = idx_temp[k]; 59 | } 60 | } 61 | 62 | 63 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) { 64 | // param xyz: (n, 3) 65 | // param batch_idxs: (n) 66 | // param batch_offsets: (B + 1) 67 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n 68 | // output start_len: (n, 2), int 69 | 70 | cudaError_t err; 71 | 72 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); 73 | dim3 threads(THREADS_PER_BLOCK); 74 | 75 | int cumsum = 0; 76 | int* p_cumsum; 77 | cudaMalloc((void**)&p_cumsum, sizeof(int)); 78 | cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice); 79 | 80 | ballquery_batch_p_cuda_<<>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum); 81 | 82 | err = cudaGetLastError(); 83 | if (cudaSuccess != err) { 84 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 85 | exit(-1); 86 | } 87 | 88 | cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost); 89 | return cumsum; 90 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/bfs_cluster/bfs_cluster.h: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx & Clustering Algorithm 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef BFS_CLUSTER_H 8 | #define BFS_CLUSTER_H 9 | #include 10 | #include 11 | #include 12 | 13 | #include "../datatype/datatype.h" 14 | 15 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, at::Tensor batch_offsets_tensor, at::Tensor idx_tensor, at::Tensor start_len_tensor, int n, int meanActive, float radius); 16 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream); 17 | 18 | void bfs_cluster(at::Tensor semantic_label_tensor, at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cluster_idxs_tensor, at::Tensor cluster_offsets_tensor, const int N, int threshold); 19 | 20 | #endif //BFS_CLUSTER_H -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "datatype/datatype.h" 3 | 4 | #include "voxelize/voxelize.cu" 5 | #include "bfs_cluster/bfs_cluster.cu" 6 | #include "roipool/roipool.cu" 7 | #include "get_iou/get_iou.cu" 8 | #include "sec_mean/sec_mean.cu" 9 | 10 | template void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, float *feats, float *output_feats, Int *rules, bool average); 11 | 12 | template void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, float *d_output_feats, float *d_feats, Int *rules, bool average); -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | 8 | #define THREADS_PER_BLOCK 512 9 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/datatype/datatype.cpp: -------------------------------------------------------------------------------- 1 | #include "datatype.h" 2 | 3 | template SparseGrid::SparseGrid() : ctr(0) { 4 | // Sparsehash needs a key to be set aside and never used 5 | Point empty_key; 6 | for(Int i = 0; i < dimension; i++){ 7 | empty_key[i] = std::numeric_limits::min(); 8 | } 9 | mp.set_empty_key(empty_key); 10 | } 11 | 12 | ConnectedComponent::ConnectedComponent(){} 13 | 14 | void ConnectedComponent::addPoint(Int pt_idx){ 15 | pt_idxs.push_back(pt_idx); 16 | } 17 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/datatype/datatype.h: -------------------------------------------------------------------------------- 1 | #ifndef DATATYPE_H 2 | #define DATATYPE_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using Int = int32_t; 10 | 11 | template using Point = std::array; 12 | 13 | template struct IntArrayHash{ 14 | std::size_t operator()(Point const &p) const{ 15 | Int hash = 16777619; 16 | for(auto x : p){ 17 | hash *= 2166136261; 18 | hash ^= x; 19 | } 20 | return hash; 21 | } 22 | }; 23 | 24 | template using SparseGridMap = google::dense_hash_map, Int, IntArrayHash, std::equal_to>>; // 25 | 26 | template class SparseGrid{ 27 | public: 28 | Int ctr; 29 | SparseGridMap mp; 30 | SparseGrid(); 31 | }; 32 | 33 | template using SparseGrids = std::vector>; 34 | 35 | using RuleBook = std::vector>; 36 | 37 | class ConnectedComponent{ 38 | public: 39 | std::vector pt_idxs; 40 | 41 | ConnectedComponent(); 42 | void addPoint(Int pt_idx); 43 | }; 44 | 45 | using ConnectedComponents = std::vector; 46 | 47 | #endif //DATATYPE_H -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/get_iou/get_iou.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Get the IoU between predictions and gt masks 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "get_iou.h" 8 | 9 | void get_iou(at::Tensor proposals_idx_tensor, at::Tensor proposals_offset_tensor, at::Tensor instance_labels_tensor, at::Tensor instance_pointnum_tensor, at::Tensor proposals_iou_tensor, int nInstance, int nProposal){ 10 | int *proposals_idx = proposals_idx_tensor.data(); 11 | int *proposals_offset = proposals_offset_tensor.data(); 12 | long *instance_labels = instance_labels_tensor.data(); 13 | int *instance_pointnum = instance_pointnum_tensor.data(); 14 | 15 | float *proposals_iou = proposals_iou_tensor.data(); 16 | 17 | get_iou_cuda(nInstance, nProposal, proposals_idx, proposals_offset, instance_labels, instance_pointnum, proposals_iou); 18 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/get_iou/get_iou.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Get the IoU between predictions and gt masks 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include 8 | #include 9 | #include "get_iou.h" 10 | 11 | 12 | __global__ void get_iou_cuda_(int nInstance, int nProposal, int *proposals_idx, int *proposals_offset, long *instance_labels, int *instance_pointnum, float *proposals_iou){ 13 | for(int proposal_id = blockIdx.x; proposal_id < nProposal; proposal_id += gridDim.x){ 14 | int start = proposals_offset[proposal_id]; 15 | int end = proposals_offset[proposal_id + 1]; 16 | int proposal_total = end - start; 17 | for(int instance_id = threadIdx.x; instance_id < nInstance; instance_id += blockDim.x){ 18 | int instance_total = instance_pointnum[instance_id]; 19 | int intersection = 0; 20 | for(int i = start; i < end; i++){ 21 | int idx = proposals_idx[i]; 22 | if((int)instance_labels[idx] == instance_id){ 23 | intersection += 1; 24 | } 25 | } 26 | proposals_iou[proposal_id * nInstance + instance_id] = (float)intersection / ((float)(proposal_total + instance_total - intersection) + 1e-5); 27 | } 28 | } 29 | } 30 | 31 | //input: proposals_idx (sumNPoint), int 32 | //input: proposals_offset (nProposal + 1), int 33 | //input: instance_labels (N), long, 0~total_nInst-1, -100 34 | //input: instance_pointnum (total_nInst), int 35 | //output: proposals_iou (nProposal, total_nInst), float 36 | void get_iou_cuda(int nInstance, int nProposal, int *proposals_idx, int *proposals_offset, long *instance_labels, int *instance_pointnum, float *proposals_iou){ 37 | get_iou_cuda_<<>>(nInstance, nProposal, proposals_idx, proposals_offset, instance_labels, instance_pointnum, proposals_iou); 38 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/get_iou/get_iou.h: -------------------------------------------------------------------------------- 1 | /* 2 | Get the IoU between predictions and gt masks 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef GET_IOU_H 8 | #define GET_IOU_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | // 15 | void get_iou_cuda(int nInstance, int nProposal, int *proposals_idx, int *proposals_offset, long *instance_labels, int *instance_pointnum, float *proposals_iou); 16 | void get_iou(at::Tensor proposals_idx_tensor, at::Tensor proposals_offset_tensor, at::Tensor instance_labels_tensor, at::Tensor instance_pointnum_tensor, at::Tensor proposals_iou_tensor, int nInstance, int nProposal); 17 | 18 | #endif //GET_IOU_H -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/pointgroup_ops.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "datatype/datatype.cpp" 6 | 7 | #include "voxelize/voxelize.cpp" 8 | #include "bfs_cluster/bfs_cluster.cpp" 9 | #include "roipool/roipool.cpp" 10 | #include "get_iou/get_iou.cpp" 11 | #include "sec_mean/sec_mean.cpp" 12 | 13 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords, /* long M*4 */ at::Tensor output_coords, 14 | /* Int N */ at::Tensor input_map, /* Int M*(maxActive+1) */ at::Tensor output_map, Int batchSize, Int mode){ 15 | voxelize_idx<3>(coords, output_coords, input_map, output_map, batchSize, mode); 16 | } 17 | 18 | void voxelize_fp_feat(/* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 19 | /* cuda float M*C */ at::Tensor output_feats, 20 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, Int maxActive, Int nPlane){ 21 | voxelize_fp(feats, output_feats, output_map, mode, nActive, maxActive, nPlane); 22 | } 23 | 24 | 25 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats, /* cuda float N*C */ at::Tensor d_feats, /* cuda Int M*(maxActive+1) */ at::Tensor output_map, 26 | Int mode, Int nActive, Int maxActive, Int nPlane){ 27 | voxelize_bp(d_output_feats, d_feats, output_map, mode, nActive, maxActive, nPlane); 28 | } 29 | 30 | void point_recover_fp_feat(/* cuda float M*C */ at::Tensor feats, /* cuda float N*C */ at::Tensor output_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, 31 | Int nActive, Int maxActive, Int nPlane){ 32 | point_recover_fp(feats, output_feats, idx_map, nActive, maxActive, nPlane); 33 | } 34 | 35 | void point_recover_bp_feat(/* cuda float N*C */ at::Tensor d_output_feats, /* cuda float M*C */ at::Tensor d_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, 36 | Int nActive, Int maxActive, Int nPlane){ 37 | point_recover_bp(d_output_feats, d_feats, idx_map, nActive, maxActive, nPlane); 38 | } 39 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/pointgroup_ops.h: -------------------------------------------------------------------------------- 1 | #ifndef POINTGROUP_H 2 | #define POINTGROUP_H 3 | #include "datatype/datatype.h" 4 | 5 | #include "bfs_cluster/bfs_cluster.h" 6 | #include "roipool/roipool.h" 7 | #include "get_iou/get_iou.h" 8 | #include "sec_mean/sec_mean.h" 9 | 10 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords, /* long M*4 */ at::Tensor output_coords, 11 | /* Int N */ at::Tensor input_map, /* Int M*(maxActive+1) */ at::Tensor output_map, Int batchSize, Int mode); 12 | 13 | void voxelize_fp_feat(/* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 14 | /* cuda float M*C */ at::Tensor output_feats, 15 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, Int maxActive, Int nPlane); 16 | 17 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats, /* cuda float N*C */ at::Tensor d_feats, /* cuda Int M*(maxActive+1) */ at::Tensor output_map, 18 | Int mode, Int nActive, Int maxActive, Int nPlane); 19 | 20 | void point_recover_fp_feat(/* cuda float M*C */ at::Tensor feats, /* cuda float N*C */ at::Tensor output_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, 21 | Int nActive, Int maxActive, Int nPlane); 22 | 23 | void point_recover_bp_feat(/* cuda float N*C */ at::Tensor d_output_feats, /* cuda float M*C */ at::Tensor d_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, 24 | Int nActive, Int maxActive, Int nPlane); 25 | 26 | 27 | #endif // POINTGROUP_H -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/pointgroup_ops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "pointgroup_ops.h" 5 | 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ 7 | m.def("voxelize_idx", &voxelize_idx_3d, "voxelize_idx"); 8 | m.def("voxelize_fp", &voxelize_fp_feat, "voxelize_fp"); 9 | m.def("voxelize_bp", &voxelize_bp_feat, "voxelize_bp"); 10 | m.def("point_recover_fp", &point_recover_fp_feat, "point_recover_fp"); 11 | m.def("point_recover_bp", &point_recover_bp_feat, "point_recover_bp"); 12 | 13 | m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p"); 14 | m.def("bfs_cluster", &bfs_cluster, "bfs_cluster"); 15 | 16 | m.def("roipool_fp", &roipool_fp, "roipool_fp"); 17 | m.def("roipool_bp", &roipool_bp, "roipool_bp"); 18 | 19 | m.def("get_iou", &get_iou, "get_iou"); 20 | 21 | m.def("sec_mean", &sec_mean, "sec_mean"); 22 | m.def("sec_min", &sec_min, "sec_min"); 23 | m.def("sec_max", &sec_max, "sec_max"); 24 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/roipool/roipool.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ROI Max Pool 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "roipool.h" 8 | 9 | void roipool_fp(at::Tensor feats_tensor, at::Tensor proposals_offset_tensor, at::Tensor output_feats_tensor, at::Tensor output_maxidx_tensor, int nProposal, int C){ 10 | float *feats = feats_tensor.data(); 11 | int *proposals_offset = proposals_offset_tensor.data(); 12 | float *output_feats = output_feats_tensor.data(); 13 | int *output_maxidx = output_maxidx_tensor.data(); 14 | 15 | roipool_fp_cuda(nProposal, C, feats, proposals_offset, output_feats, output_maxidx); 16 | } 17 | 18 | 19 | void roipool_bp(at::Tensor d_feats_tensor, at::Tensor proposals_offset_tensor, at::Tensor output_maxidx_tensor, at::Tensor d_output_feats_tensor, int nProposal, int C){ 20 | float *d_feats = d_feats_tensor.data(); 21 | int *proposals_offset = proposals_offset_tensor.data(); 22 | int *output_maxidx = output_maxidx_tensor.data(); 23 | float *d_output_feats = d_output_feats_tensor.data(); 24 | 25 | roipool_bp_cuda(nProposal, C, d_feats, proposals_offset, output_maxidx, d_output_feats); 26 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/roipool/roipool.h: -------------------------------------------------------------------------------- 1 | /* 2 | ROI Max Pool 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef ROIPOOL_H 8 | #define ROIPOOL_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | // 15 | void roipool_fp(at::Tensor feats_tensor, at::Tensor proposals_offset_tensor, at::Tensor output_feats_tensor, at::Tensor output_maxidx_tensor, int nProposal, int C); 16 | 17 | void roipool_fp_cuda(int nProposal, int C, float *feats, int *proposals_offset, float *output_feats, int *output_maxidx); 18 | 19 | 20 | // 21 | void roipool_bp(at::Tensor d_feats_tensor, at::Tensor proposals_offset_tensor, at::Tensor output_maxidx_tensor, at::Tensor d_output_feats_tensor, int nProposal, int C); 22 | 23 | void roipool_bp_cuda(int nProposal, int C, float *d_feats, int *proposals_offset, int *output_maxidx, float *d_output_feats); 24 | 25 | #endif //ROIPOOL_H 26 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/sec_mean/sec_mean.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Segment Operations (mean, max, min) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "sec_mean.h" 8 | 9 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor, at::Tensor out_tensor, int nProposal, int C){ 10 | int *offsets = offsets_tensor.data(); 11 | float *inp = inp_tensor.data(); 12 | float *out = out_tensor.data(); 13 | 14 | sec_mean_cuda(nProposal, C, inp, offsets, out); 15 | } 16 | 17 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor, at::Tensor out_tensor, int nProposal, int C){ 18 | int *offsets = offsets_tensor.data(); 19 | float *inp = inp_tensor.data(); 20 | float *out = out_tensor.data(); 21 | 22 | sec_min_cuda(nProposal, C, inp, offsets, out); 23 | } 24 | 25 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor, at::Tensor out_tensor, int nProposal, int C){ 26 | int *offsets = offsets_tensor.data(); 27 | float *inp = inp_tensor.data(); 28 | float *out = out_tensor.data(); 29 | 30 | sec_max_cuda(nProposal, C, inp, offsets, out); 31 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/sec_mean/sec_mean.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Segment Operations (mean, max, min) (no bp) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include 8 | #include 9 | #include "sec_mean.h" 10 | 11 | /* ================================== sec_mean ================================== */ 12 | __global__ void sec_mean_cuda_(int nProposal, int C, float *inp, int *offsets, float *out){ 13 | for(int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x){ 14 | int start = offsets[p_id]; 15 | int end = offsets[p_id + 1]; 16 | 17 | float count = (float)(end - start); 18 | 19 | for(int plane = threadIdx.x; plane < C; plane += blockDim.x){ 20 | float mean = 0; 21 | for(int i = start; i < end; i++){ 22 | mean += (inp[i * C + plane] / count); 23 | } 24 | out[p_id * C + plane] = mean; 25 | } 26 | } 27 | } 28 | 29 | //input: inp (N, C) float 30 | //input: offsets (nProposal + 1) int 31 | //output: out (nProposal, C) float 32 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out){ 33 | sec_mean_cuda_<<>>(nProposal, C, inp, offsets, out); 34 | } 35 | 36 | 37 | /* ================================== sec_min ================================== */ 38 | __global__ void sec_min_cuda_(int nProposal, int C, float *inp, int *offsets, float *out){ 39 | for(int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x){ 40 | int start = offsets[p_id]; 41 | int end = offsets[p_id + 1]; 42 | 43 | for(int plane = threadIdx.x; plane < C; plane += blockDim.x){ 44 | float min_val = 1e50; 45 | for(int i = start; i < end; i++){ 46 | if(inp[i * C + plane] < min_val){ 47 | min_val = inp[i * C + plane]; 48 | } 49 | } 50 | out[p_id * C + plane] = min_val; 51 | } 52 | } 53 | } 54 | 55 | //input: inp (N, C) float 56 | //input: offsets (nProposal + 1) int 57 | //output: out (nProposal, C) float 58 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out){ 59 | sec_min_cuda_<<>>(nProposal, C, inp, offsets, out); 60 | } 61 | 62 | 63 | /* ================================== sec_max ================================== */ 64 | __global__ void sec_max_cuda_(int nProposal, int C, float *inp, int *offsets, float *out){ 65 | for(int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x){ 66 | int start = offsets[p_id]; 67 | int end = offsets[p_id + 1]; 68 | 69 | for(int plane = threadIdx.x; plane < C; plane += blockDim.x){ 70 | float max_val = -1e50; 71 | for(int i = start; i < end; i++){ 72 | if(inp[i * C + plane] > max_val){ 73 | max_val = inp[i * C + plane]; 74 | } 75 | } 76 | out[p_id * C + plane] = max_val; 77 | } 78 | } 79 | } 80 | 81 | //input: inp (N, C) float 82 | //input: offsets (nProposal + 1) int 83 | //output: out (nProposal, C) float 84 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out){ 85 | sec_max_cuda_<<>>(nProposal, C, inp, offsets, out); 86 | } -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/sec_mean/sec_mean.h: -------------------------------------------------------------------------------- 1 | /* 2 | Segment Operations (mean, max, min) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef SEC_MEAN_H 8 | #define SEC_MEAN_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor, at::Tensor out_tensor, int nProposal, int C); 15 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out); 16 | 17 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor, at::Tensor out_tensor, int nProposal, int C); 18 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out); 19 | 20 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor, at::Tensor out_tensor, int nProposal, int C); 21 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out); 22 | 23 | 24 | #endif //SEC_MEAN_H 25 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/voxelize/voxelize.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Points to Voxels & Voxels to Points (Modified from SparseConv) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "voxelize.h" 8 | 9 | template 10 | __global__ void voxelize_fp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes, T *feats, T *output_feats, Int *rules, bool average){ 11 | for(int row = blockIdx.x; row < nOutputRows; row += gridDim.x){ 12 | T *out = output_feats + row * nPlanes; 13 | Int *r = rules + row * (maxActive + 1); 14 | Int nActive = r[0]; 15 | T multiplier = (average and nActive > 0) ? (T) 1 / nActive : (T) 1; 16 | for(int i = 1; i <= nActive; i++){ 17 | T *inp = feats + r[i] * nPlanes; 18 | for(int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x){ 19 | atomicAdd(&out[plane], multiplier * inp[plane]); 20 | } 21 | } 22 | } 23 | } 24 | 25 | // input: feats N * C 26 | // input: rules M * (1 + maxActive) 27 | // output: output_feats M * C 28 | template 29 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats, T *output_feats, Int *rules, bool average){ 30 | voxelize_fp_cuda_<<>>(nOutputRows, maxActive, nPlanes, feats, output_feats, rules, average); 31 | } 32 | 33 | 34 | template 35 | __global__ void voxelize_bp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes, T *d_output_feats, T *d_feats, Int *rules, bool average){ 36 | for(int row = blockIdx.x; row < nOutputRows; row += gridDim.x){ 37 | T *out = d_output_feats + row * nPlanes; 38 | Int *r = rules + row * (maxActive + 1); 39 | Int nActive = r[0]; 40 | T multiplier = (average and nActive > 0) ? (T) 1 / nActive : (T) 1; 41 | for(int i = 1; i <= nActive; i++){ 42 | T *inp = d_feats + r[i] * nPlanes; 43 | for(int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x){ 44 | atomicAdd(&inp[plane], multiplier * out[plane]); 45 | } 46 | } 47 | } 48 | } 49 | 50 | template 51 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *d_output_feats, T *d_feats, Int *rules, bool average){ 52 | voxelize_bp_cuda_<<>>(nOutputRows, maxActive, nPlanes, d_output_feats, d_feats, rules, average); 53 | } 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /libs/pointgroup_ops/src/voxelize/voxelize.h: -------------------------------------------------------------------------------- 1 | /* 2 | Points to Voxels & Voxels to Points (Modified from SparseConv) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef VOXELIZE_H 8 | #define VOXELIZE_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | /* ================================== voxelize_idx ================================== */ 15 | template 16 | void voxelize_idx(/* long N*4 */ at::Tensor coords, /* long M*4 */ at::Tensor output_coords, 17 | /* Int N */ at::Tensor input_map, /* Int M*(maxActive+1) */ at::Tensor output_map, Int batchSize, Int mode); 18 | 19 | template 20 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map, Int *rule, Int nOutputRows, Int maxActive); 21 | 22 | template 23 | Int voxelize_inputmap(SparseGrids &SGs, Int *input_map, RuleBook &rules, Int &nActive, long *coords, Int nInputRows, Int nInputColumns, Int batchSize, Int mode); 24 | 25 | /* ================================== voxelize ================================== */ 26 | template 27 | void voxelize_fp(/* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 28 | /* cuda float M*C */ at::Tensor output_feats, 29 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, Int maxActive, Int nPlane); 30 | 31 | template 32 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats, T *output_feats, Int *rules, bool average); 33 | 34 | 35 | // 36 | template 37 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats, /* cuda float N*C */ at::Tensor d_feats, /* cuda Int M*(maxActive+1) */ at::Tensor output_map, 38 | Int mode, Int nActive, Int maxActive, Int nPlane); 39 | 40 | template 41 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *d_output_feats, T *d_feats, Int *rules, bool average); 42 | 43 | 44 | /* ================================== point_recover ================================== */ 45 | template 46 | void point_recover_fp(/* cuda float M*C */ at::Tensor feats, /* cuda float N*C */ at::Tensor output_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, 47 | Int nActive, Int maxActive, Int nPlane); 48 | 49 | // 50 | template 51 | void point_recover_bp(/* cuda float N*C */ at::Tensor d_output_feats, /* cuda float M*C */ at::Tensor d_feats, /* cuda Int M*(maxActive+1) */ at::Tensor idx_map, 52 | Int nActive, Int maxActive, Int nPlane); 53 | 54 | 55 | #endif //VOXELIZE_H 56 | -------------------------------------------------------------------------------- /libs/pointnet_lib/install.sh: -------------------------------------------------------------------------------- 1 | python setup.py build_ext --inplace -------------------------------------------------------------------------------- /libs/pointnet_lib/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='pointnet2', 6 | ext_modules=[ 7 | CUDAExtension('pointnet2_cuda', [ 8 | 'src/pointnet2_api.cpp', 9 | 10 | 'src/ball_query.cpp', 11 | 'src/ball_query_gpu.cu', 12 | 'src/group_points.cpp', 13 | 'src/group_points_gpu.cu', 14 | 'src/interpolate.cpp', 15 | 'src/interpolate_gpu.cu', 16 | 'src/sampling.cpp', 17 | 'src/sampling_gpu.cu', 18 | ], 19 | extra_compile_args={'cxx': ['-g'], 20 | 'nvcc': ['-O2']}) 21 | ], 22 | cmdclass={'build_ext': BuildExtension} 23 | ) 24 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "ball_query_gpu.h" 7 | 8 | extern THCState *state; 9 | 10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 13 | 14 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 15 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) { 16 | CHECK_INPUT(new_xyz_tensor); 17 | CHECK_INPUT(xyz_tensor); 18 | const float *new_xyz = new_xyz_tensor.data(); 19 | const float *xyz = xyz_tensor.data(); 20 | int *idx = idx_tensor.data(); 21 | 22 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 23 | ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream); 24 | return 1; 25 | } -------------------------------------------------------------------------------- /libs/pointnet_lib/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ball_query_gpu.h" 6 | #include "cuda_utils.h" 7 | 8 | 9 | __global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, 10 | const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { 11 | // new_xyz: (B, M, 3) 12 | // xyz: (B, N, 3) 13 | // output: 14 | // idx: (B, M, nsample) 15 | int bs_idx = blockIdx.y; 16 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 17 | if (bs_idx >= b || pt_idx >= m) return; 18 | 19 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 20 | xyz += bs_idx * n * 3; 21 | idx += bs_idx * m * nsample + pt_idx * nsample; 22 | 23 | float radius2 = radius * radius; 24 | float new_x = new_xyz[0]; 25 | float new_y = new_xyz[1]; 26 | float new_z = new_xyz[2]; 27 | 28 | int cnt = 0; 29 | for (int k = 0; k < n; ++k) { 30 | float x = xyz[k * 3 + 0]; 31 | float y = xyz[k * 3 + 1]; 32 | float z = xyz[k * 3 + 2]; 33 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 34 | if (d2 < radius2){ 35 | if (cnt == 0){ 36 | for (int l = 0; l < nsample; ++l) { 37 | idx[l] = k; 38 | } 39 | } 40 | idx[cnt] = k; 41 | ++cnt; 42 | if (cnt >= nsample) break; 43 | } 44 | } 45 | } 46 | 47 | 48 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \ 49 | const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) { 50 | // new_xyz: (B, M, 3) 51 | // xyz: (B, N, 3) 52 | // output: 53 | // idx: (B, M, nsample) 54 | 55 | cudaError_t err; 56 | 57 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 58 | dim3 threads(THREADS_PER_BLOCK); 59 | 60 | ball_query_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 61 | // cudaDeviceSynchronize(); // for using printf in kernel function 62 | err = cudaGetLastError(); 63 | if (cudaSuccess != err) { 64 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 65 | exit(-1); 66 | } 67 | } -------------------------------------------------------------------------------- /libs/pointnet_lib/src/ball_query_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_GPU_H 2 | #define _BALL_QUERY_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 10 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 11 | 12 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, 13 | const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | #define THREADS_PER_BLOCK 256 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | 10 | inline int opt_n_threads(int work_size) { 11 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 12 | 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "group_points_gpu.h" 7 | 8 | extern THCState *state; 9 | 10 | 11 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 12 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { 13 | 14 | float *grad_points = grad_points_tensor.data(); 15 | const int *idx = idx_tensor.data(); 16 | const float *grad_out = grad_out_tensor.data(); 17 | 18 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 19 | 20 | group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream); 21 | return 1; 22 | } 23 | 24 | 25 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 26 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { 27 | 28 | const float *points = points_tensor.data(); 29 | const int *idx = idx_tensor.data(); 30 | float *out = out_tensor.data(); 31 | 32 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 33 | 34 | group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream); 35 | return 1; 36 | } -------------------------------------------------------------------------------- /libs/pointnet_lib/src/group_points_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUP_POINTS_GPU_H 2 | #define _GROUP_POINTS_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 11 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 12 | 13 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 14 | const float *points, const int *idx, float *out, cudaStream_t stream); 15 | 16 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 17 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 18 | 19 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 20 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "interpolate_gpu.h" 10 | 11 | extern THCState *state; 12 | 13 | 14 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 15 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 16 | const float *unknown = unknown_tensor.data(); 17 | const float *known = known_tensor.data(); 18 | float *dist2 = dist2_tensor.data(); 19 | int *idx = idx_tensor.data(); 20 | 21 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 22 | // cudaStream_t stream = THCState_getCurrentStream(state); 23 | three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream); 24 | } 25 | 26 | void knn_wrapper_fast(int b, int n, int m, int k, at::Tensor unknown_tensor, 27 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 28 | const float *unknown = unknown_tensor.data(); 29 | const float *known = known_tensor.data(); 30 | float *dist2 = dist2_tensor.data(); 31 | int *idx = idx_tensor.data(); 32 | 33 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 34 | // cudaStream_t stream = THCState_getCurrentStream(state); 35 | knn_kernel_launcher_fast(b, n, m, k, unknown, known, dist2, idx, stream); 36 | } 37 | 38 | 39 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, 40 | at::Tensor points_tensor, 41 | at::Tensor idx_tensor, 42 | at::Tensor weight_tensor, 43 | at::Tensor out_tensor) { 44 | 45 | const float *points = points_tensor.data(); 46 | const float *weight = weight_tensor.data(); 47 | float *out = out_tensor.data(); 48 | const int *idx = idx_tensor.data(); 49 | 50 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 51 | // cudaStream_t stream = THCState_getCurrentStream(state); 52 | three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream); 53 | } 54 | 55 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, 56 | at::Tensor grad_out_tensor, 57 | at::Tensor idx_tensor, 58 | at::Tensor weight_tensor, 59 | at::Tensor grad_points_tensor) { 60 | 61 | const float *grad_out = grad_out_tensor.data(); 62 | const float *weight = weight_tensor.data(); 63 | float *grad_points = grad_points_tensor.data(); 64 | const int *idx = idx_tensor.data(); 65 | 66 | // cudaStream_t stream = THCState_getCurrentStream(state); 67 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 68 | three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream); 69 | } -------------------------------------------------------------------------------- /libs/pointnet_lib/src/interpolate_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATE_GPU_H 2 | #define _INTERPOLATE_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 11 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | 13 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 14 | const float *known, float *dist2, int *idx, cudaStream_t stream); 15 | 16 | void knn_wrapper_fast(int b, int n, int m, int k, at::Tensor unknown_tensor, 17 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 18 | 19 | void knn_kernel_launcher_fast(int b, int n, int m, int k, const float *unknown, 20 | const float *known, float *dist2, int *idx, cudaStream_t stream); 21 | 22 | 23 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, 24 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 25 | 26 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 27 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream); 28 | 29 | 30 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, 31 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 32 | 33 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 34 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream); 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/pointnet2_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ball_query_gpu.h" 5 | #include "group_points_gpu.h" 6 | #include "sampling_gpu.h" 7 | #include "interpolate_gpu.h" 8 | 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast"); 12 | 13 | m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast"); 14 | m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast"); 15 | 16 | m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast"); 17 | m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast"); 18 | 19 | m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper"); 20 | 21 | m.def("knn_wrapper", &knn_wrapper_fast, "knn_wrapper_fast"); 22 | m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); 23 | m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); 24 | m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); 25 | } 26 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "sampling_gpu.h" 7 | 8 | extern THCState *state; 9 | 10 | 11 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 12 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){ 13 | const float *points = points_tensor.data(); 14 | const int *idx = idx_tensor.data(); 15 | float *out = out_tensor.data(); 16 | 17 | // cudaStream_t stream = THCState_getCurrentStream(state); 18 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 19 | gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream); 20 | return 1; 21 | } 22 | 23 | 24 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 25 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { 26 | 27 | const float *grad_out = grad_out_tensor.data(); 28 | const int *idx = idx_tensor.data(); 29 | float *grad_points = grad_points_tensor.data(); 30 | 31 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 32 | // cudaStream_t stream = THCState_getCurrentStream(state); 33 | gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream); 34 | return 1; 35 | } 36 | 37 | 38 | int furthest_point_sampling_wrapper(int b, int n, int m, 39 | at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) { 40 | 41 | const float *points = points_tensor.data(); 42 | float *temp = temp_tensor.data(); 43 | int *idx = idx_tensor.data(); 44 | 45 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); //THCState_getCurrentStream(state); 46 | // cudaStream_t stream = THCState_getCurrentStream(state); 47 | furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream); 48 | return 1; 49 | } 50 | -------------------------------------------------------------------------------- /libs/pointnet_lib/src/sampling_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_GPU_H 2 | #define _SAMPLING_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 10 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 11 | 12 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 13 | const float *points, const int *idx, float *out, cudaStream_t stream); 14 | 15 | 16 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 17 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 18 | 19 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 20 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); 21 | 22 | 23 | int furthest_point_sampling_wrapper(int b, int n, int m, 24 | at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); 25 | 26 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 27 | const float *dataset, float *temp, int *idxs, cudaStream_t stream); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /libs/pointnet_sp/install.sh: -------------------------------------------------------------------------------- 1 | python setup.py build_ext --inplace -------------------------------------------------------------------------------- /libs/pointnet_sp/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.autograd import Function 4 | import torch.nn as nn 5 | from typing import Tuple 6 | 7 | from . import pointnet2_cuda as pointnet2 8 | 9 | class ThreeNN(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 13 | """ 14 | Find the three nearest neighbors of unknown in known 15 | :param ctx: 16 | :param unknown: (N, 3) 17 | :param known: (M, 3) 18 | :return: 19 | dist: (N, 3) l2 distance to the three nearest neighbors 20 | idx: (N, 3) index of 3 nearest neighbors 21 | """ 22 | assert unknown.is_contiguous() 23 | assert known.is_contiguous() 24 | 25 | N, _ = unknown.size() 26 | m = known.size(0) 27 | dist2 = torch.cuda.FloatTensor(N, 3) 28 | idx = torch.cuda.IntTensor(N, 3) 29 | 30 | pointnet2.three_nn_wrapper(N, m, unknown, known, dist2, idx) 31 | return torch.sqrt(dist2), idx 32 | 33 | @staticmethod 34 | def backward(ctx, a=None, b=None): 35 | return None, None 36 | 37 | 38 | three_nn = ThreeNN.apply 39 | 40 | 41 | class ThreeInterpolate(Function): 42 | 43 | @staticmethod 44 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: 45 | """ 46 | Performs weight linear interpolation on 3 features 47 | :param ctx: 48 | :param features: (M, C) Features descriptors to be interpolated from 49 | :param idx: (n, 3) three nearest neighbors of the target features in features 50 | :param weight: (n, 3) weights 51 | :return: 52 | output: (N, C) tensor of the interpolated features 53 | """ 54 | assert features.is_contiguous() 55 | assert idx.is_contiguous() 56 | assert weight.is_contiguous() 57 | 58 | m, c = features.size() 59 | n = idx.size(0) 60 | ctx.three_interpolate_for_backward = (idx, weight, m) 61 | output = torch.cuda.FloatTensor(n, c) 62 | 63 | pointnet2.three_interpolate_wrapper(c, m, n, features, idx, weight, output) 64 | return output 65 | 66 | @staticmethod 67 | def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 68 | """ 69 | :param ctx: 70 | :param grad_out: (N, C) tensor with gradients of outputs 71 | :return: 72 | grad_features: (M, C) tensor with gradients of features 73 | None: 74 | None: 75 | """ 76 | idx, weight, m = ctx.three_interpolate_for_backward 77 | n, c = grad_out.size() 78 | 79 | grad_features = Variable(torch.cuda.FloatTensor(m, c).zero_()) 80 | grad_out_data = grad_out.data.contiguous() 81 | 82 | pointnet2.three_interpolate_grad_wrapper( c, n, m, grad_out_data, idx, weight, grad_features.data) 83 | return grad_features, None, None 84 | 85 | 86 | three_interpolate = ThreeInterpolate.apply 87 | 88 | 89 | -------------------------------------------------------------------------------- /libs/pointnet_sp/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='pointnet2', 6 | ext_modules=[ 7 | CUDAExtension('pointnet2_cuda', [ 8 | 'src/pointnet2_api.cpp', 9 | 'src/interpolate.cpp', 10 | 'src/interpolate_gpu.cu', 11 | ], 12 | extra_compile_args={'cxx': ['-g'], 13 | 'nvcc': ['-O2']}) 14 | ], 15 | cmdclass={'build_ext': BuildExtension} 16 | ) 17 | -------------------------------------------------------------------------------- /libs/pointnet_sp/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | #define THREADS_PER_BLOCK 256 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | 10 | inline int opt_n_threads(int work_size) { 11 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 12 | 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /libs/pointnet_sp/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "interpolate_gpu.h" 10 | 11 | extern THCState *state; 12 | 13 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor, 14 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 15 | const float *unknown = unknown_tensor.data(); 16 | const float *known = known_tensor.data(); 17 | float *dist2 = dist2_tensor.data(); 18 | int *idx = idx_tensor.data(); 19 | 20 | cudaStream_t stream = THCState_getCurrentStream(state); 21 | three_nn_kernel_launcher_fast(n, m, unknown, known, dist2, idx, stream); 22 | } 23 | 24 | 25 | void three_interpolate_wrapper_fast(int c, int m, int n, 26 | at::Tensor points_tensor, 27 | at::Tensor idx_tensor, 28 | at::Tensor weight_tensor, 29 | at::Tensor out_tensor) { 30 | 31 | const float *points = points_tensor.data(); 32 | const float *weight = weight_tensor.data(); 33 | float *out = out_tensor.data(); 34 | const int *idx = idx_tensor.data(); 35 | 36 | cudaStream_t stream = THCState_getCurrentStream(state); 37 | three_interpolate_kernel_launcher_fast(c, m, n, points, idx, weight, out, stream); 38 | } 39 | 40 | void three_interpolate_grad_wrapper_fast(int c, int n, int m, 41 | at::Tensor grad_out_tensor, 42 | at::Tensor idx_tensor, 43 | at::Tensor weight_tensor, 44 | at::Tensor grad_points_tensor) { 45 | 46 | const float *grad_out = grad_out_tensor.data(); 47 | const float *weight = weight_tensor.data(); 48 | float *grad_points = grad_points_tensor.data(); 49 | const int *idx = idx_tensor.data(); 50 | 51 | cudaStream_t stream = THCState_getCurrentStream(state); 52 | three_interpolate_grad_kernel_launcher_fast(c, n, m, grad_out, idx, weight, grad_points, stream); 53 | } -------------------------------------------------------------------------------- /libs/pointnet_sp/src/interpolate_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATE_GPU_H 2 | #define _INTERPOLATE_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor, 11 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | 13 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown, 14 | const float *known, float *dist2, int *idx, cudaStream_t stream); 15 | 16 | 17 | void three_interpolate_wrapper_fast(int c, int m, int n, at::Tensor points_tensor, 18 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 19 | 20 | void three_interpolate_kernel_launcher_fast(int c, int m, int n, 21 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream); 22 | 23 | 24 | void three_interpolate_grad_wrapper_fast(int c, int n, int m, at::Tensor grad_out_tensor, 25 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 26 | 27 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out, 28 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /libs/pointnet_sp/src/pointnet2_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "interpolate_gpu.h" 5 | 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 7 | m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); 8 | m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); 9 | m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); 10 | } 11 | -------------------------------------------------------------------------------- /libs/spconv/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # include_directories($INCLUDE_PATH$) 2 | cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 3 | project(SparseConv LANGUAGES CXX CUDA VERSION 1.0) 4 | 5 | option(SPCONV_BuildTests "Build the unit tests when BUILD_TESTING is enabled." ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) # avoid gnu++11 be added to CXX flags 7 | 8 | set(CUDA_TOOLKIT_ROOT_DIR "${CMAKE_CUDA_COMPILER}") 9 | get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR}" DIRECTORY) 10 | get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR}" DIRECTORY) 11 | if(WIN32) # true if windows (32 and 64 bit) 12 | set(CUDA_LIB_PATH_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64") 13 | add_compile_definitions(TV_WINDOWS) 14 | else() 15 | set(CUDA_LIB_PATH_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64") 16 | endif() 17 | 18 | find_library(CUDA_CUDART NAMES cudart HINTS ${CUDA_LIB_PATH_HINTS}) 19 | find_library(CUDA_CUBLAS NAMES cublas HINTS ${CUDA_LIB_PATH_HINTS}) 20 | if(CMAKE_BUILD_TYPE STREQUAL "Debug") 21 | add_compile_definitions(TV_DEBUG) 22 | endif() 23 | 24 | find_package(Torch REQUIRED) 25 | 26 | # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) 27 | 28 | add_compile_definitions(SPCONV_CUDA) 29 | add_subdirectory(third_party/pybind11) 30 | 31 | set(ALL_LIBS ${CUDA_CUDART} ${CUDA_CUBLAS} ${TORCH_LIBRARIES}) 32 | 33 | set(ALL_INCLUDE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} 34 | ${PROJECT_SOURCE_DIR}/include) 35 | 36 | 37 | add_subdirectory(src/spconv) 38 | add_subdirectory(src/utils) 39 | 40 | if (SPCONV_BuildTests) 41 | include(CTest) #adds option BUILD_TESTING (default ON) 42 | if(BUILD_TESTING) 43 | enable_testing() 44 | add_subdirectory(test) 45 | endif() 46 | endif() 47 | -------------------------------------------------------------------------------- /libs/spconv/include/paramsgrid.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Yan Yan 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef PARAMS_GRID_H_ 16 | #define PARAMS_GRID_H_ 17 | #include 18 | #include 19 | 20 | namespace detail { 21 | template int getTotalSize(std::vector arg) { return arg.size(); } 22 | 23 | template 24 | int getTotalSize(std::vector arg, std::vector... args) { 25 | return arg.size() * getTotalSize(args...); 26 | } 27 | template int getSize(std::vector arg) { return arg.size(); } 28 | 29 | template 30 | void assigner(TT &src, std::vector counter, std::vector &arg) { 31 | std::get(src) = arg[counter[Idx]]; 32 | } 33 | 34 | template 35 | void assigner(TT &src, std::vector counter, std::vector &arg, 36 | std::vector &... args) { 37 | std::get(src) = arg[counter[Idx]]; 38 | assigner(src, counter, args...); 39 | } 40 | } // namespace detail 41 | template 42 | std::vector> paramsGrid(std::vector... args) { 43 | int length = detail::getTotalSize(args...); 44 | std::vector sizes = {detail::getSize(args)...}; 45 | int size = sizes.size(); 46 | 47 | std::vector> params(length); 48 | std::vector counter(size); 49 | for (int i = 0; i < length; ++i) { 50 | detail::assigner<0>(params[i], counter, args...); 51 | counter[size - 1] += 1; 52 | for (int c = size - 1; c >= 0; --c) { 53 | if (counter[c] == sizes[c] && c > 0) { 54 | counter[c - 1] += 1; 55 | counter[c] = 0; 56 | } 57 | } 58 | } 59 | return params; 60 | } 61 | 62 | #endif -------------------------------------------------------------------------------- /libs/spconv/include/pybind11_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Yan Yan 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | #include 17 | #include 18 | #include // everything needed for embedding 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | namespace py = pybind11; 27 | 28 | template 29 | std::vector array2Vector(TPyObject arr){ 30 | py::array arr_np = arr; 31 | size_t size = arr.attr("size").template cast(); 32 | py::array_t arr_cc = arr_np; 33 | std::vector data(arr_cc.data(), arr_cc.data() + size); 34 | return data; 35 | } 36 | 37 | template 38 | std::vector arrayT2Vector(py::array_t arr) 39 | { 40 | std::vector data(arr.data(), arr.data() + arr.size()); 41 | return data; 42 | } 43 | 44 | template 45 | tv::TensorView array2TensorView(TPyObject arr){ 46 | py::array arr_np = arr; 47 | py::array_t arr_cc = arr_np; 48 | tv::Shape shape; 49 | for (int i = 0; i < arr_cc.ndim(); ++i){ 50 | shape.push_back(arr_cc.shape(i)); 51 | } 52 | return tv::TensorView(arr_cc.mutable_data(), shape); 53 | } 54 | template 55 | tv::TensorView arrayT2TensorView(py::array_t arr){ 56 | tv::Shape shape; 57 | for (int i = 0; i < arr.ndim(); ++i){ 58 | shape.push_back(arr.shape(i)); 59 | } 60 | return tv::TensorView(arr.mutable_data(), shape); 61 | } -------------------------------------------------------------------------------- /libs/spconv/include/spconv/avgpool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Yan Yan 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SPARSE_AVGPOOL_FUNCTOR_H_ 16 | #define SPARSE_AVGPOOL_FUNCTOR_H_ 17 | #include 18 | 19 | namespace spconv 20 | { 21 | namespace functor 22 | { 23 | template 24 | struct SparseAvgPoolForwardFunctor 25 | { 26 | void operator()(const Device& d, tv::TensorView outFeatures, 27 | tv::TensorView inFeatures, 28 | tv::TensorView indices, int size, tv::TensorView summaryrf); 29 | }; 30 | 31 | template 32 | struct SparseAvgPoolBackwardFunctor 33 | { 34 | void operator()(const Device& d, tv::TensorView outFeatures, 35 | tv::TensorView inFeatures, 36 | tv::TensorView dout, 37 | tv::TensorView din, 38 | tv::TensorView indices, int size, 39 | tv::TensorView summaryrf); 40 | }; 41 | 42 | } // namespace functor 43 | } // namespace spconv 44 | 45 | #endif -------------------------------------------------------------------------------- /libs/spconv/include/spconv/maxpool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Yan Yan 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef SPARSE_MAXPOOL_FUNCTOR_H_ 16 | #define SPARSE_MAXPOOL_FUNCTOR_H_ 17 | #include 18 | 19 | namespace spconv 20 | { 21 | namespace functor 22 | { 23 | template 24 | struct SparseMaxPoolForwardFunctor 25 | { 26 | void operator()(const Device& d, tv::TensorView outFeatures, 27 | tv::TensorView inFeatures, 28 | tv::TensorView indices, int size); 29 | }; 30 | 31 | template 32 | struct SparseFieldMaxPoolForwardFunctor 33 | { 34 | void operator()(const Device& d, tv::TensorView outFeatures, 35 | tv::TensorView inFeatures, 36 | tv::TensorView indices, int size, 37 | tv::TensorView inFeature_norms, tv::TensorView outFeature_norms); 38 | }; 39 | 40 | template 41 | struct SparseMaxPoolBackwardFunctor 42 | { 43 | void operator()(const Device& d, tv::TensorView outFeatures, 44 | tv::TensorView inFeatures, 45 | tv::TensorView dout, 46 | tv::TensorView din, 47 | tv::TensorView indices, int size); 48 | }; 49 | 50 | } // namespace functor 51 | } // namespace spconv 52 | 53 | #endif -------------------------------------------------------------------------------- /libs/spconv/include/spconv/mp_helper.h: -------------------------------------------------------------------------------- 1 | #ifndef MP_HELPER_H_ 2 | #define MP_HELPER_H_ 3 | #include 4 | #include 5 | 6 | namespace spconv { 7 | template struct mp_list {}; 8 | 9 | template 10 | using mp_list_c = mp_list...>; 11 | 12 | namespace detail { 13 | 14 | template 15 | constexpr F mp_for_each_impl(mp_list, F &&f) { 16 | return std::initializer_list{(f(T()), 0)...}, std::forward(f); 17 | } 18 | 19 | template constexpr F mp_for_each_impl(mp_list<>, F &&f) { 20 | return std::forward(f); 21 | } 22 | 23 | } // namespace detail 24 | 25 | namespace detail { 26 | 27 | template class B> struct mp_rename_impl { 28 | // An error "no type named 'type'" here means that the first argument to 29 | // mp_rename is not a list 30 | }; 31 | 32 | template