├── CMakeLists.txt
├── LICENSE
├── README.md
├── cfgs
    └── config_cls.yaml
├── cls
    └── model_cls_L6_iter_36567_acc_0.923825.pth
├── data
    ├── ModelNet40Loader.py
    ├── __init__.py
    └── data_utils.py
├── models
    ├── __init__.py
    └── densepoint_cls_L6_k24_g2.py
├── train_cls.py
├── train_cls.sh
├── utils
    ├── __init__.py
    ├── _ext
    │   ├── __init__.py
    │   └── pointnet2
    │   │   └── __init__.py
    ├── build_ffi.py
    ├── cinclude
    │   ├── ball_query_gpu.h
    │   ├── ball_query_wrapper.h
    │   ├── cuda_utils.h
    │   ├── group_points_gpu.h
    │   ├── group_points_wrapper.h
    │   ├── interpolate_gpu.h
    │   ├── interpolate_wrapper.h
    │   ├── sampling_gpu.h
    │   └── sampling_wrapper.h
    ├── csrc
    │   ├── ball_query.c
    │   ├── ball_query_gpu.cu
    │   ├── group_points.c
    │   ├── group_points_gpu.cu
    │   ├── interpolate.c
    │   ├── interpolate_gpu.cu
    │   ├── sampling.c
    │   └── sampling_gpu.cu
    ├── linalg_utils.py
    ├── pointnet2_modules.py
    ├── pointnet2_utils.py
    └── pytorch_utils
    │   ├── __init__.py
    │   └── pytorch_utils.py
└── voting_evaluate_cls.py


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(PointNet2)
 2 | cmake_minimum_required(VERSION 2.8)
 3 | 
 4 | find_package(CUDA REQUIRED)
 5 | 
 6 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude")
 7 | cuda_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude")
 8 | file(GLOB cuda_kernels_src "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrc/*.cu")
 9 | cuda_compile(cuda_kernels SHARED ${cuda_kernels_src} OPTIONS -O3)
10 | 
11 | set(BUILD_CMD python "${CMAKE_CURRENT_SOURCE_DIR}/utils/build_ffi.py")
12 | file(GLOB wrapper_headers "${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude/*wrapper.h")
13 | file(GLOB wrapper_sources "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrs/*.c")
14 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so"
15 | 		   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/utils
16 | 		   COMMAND ${BUILD_CMD} --build --objs ${cuda_kernels}
17 | 		   DEPENDS ${cuda_kernels}
18 | 		   DEPENDS ${wrapper_headers}
19 | 		   DEPENDS ${wrapper_sources}
20 | 		   VERBATIM)
21 | 
22 | add_custom_target(pointnet2_ext ALL
23 | 		  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so")
24 | 
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Yongcheng Liu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | DensePoint
 2 | ===
 3 | This repository contains the code in Pytorch for the paper:
 4 | 
 5 | __DensePoint: Learning Densely Contextual Representation for Efficient Point Cloud Processing__ [[arXiv](https://arxiv.org/abs/1909.03669)] [[CVF](http://openaccess.thecvf.com/content_ICCV_2019/papers/Liu_DensePoint_Learning_Densely_Contextual_Representation_for_Efficient_Point_Cloud_Processing_ICCV_2019_paper.pdf)]
 6 | <br>
 7 | [Yongcheng Liu](https://yochengliu.github.io/), [Bin Fan](http://www.nlpr.ia.ac.cn/fanbin/), [Gaofeng Meng](http://www.escience.cn/people/menggaofeng/index.html;jsessionid=EE2E193290F516D1BA8E2E35A09A9A08-n1), [Jiwen Lu](http://ivg.au.tsinghua.edu.cn/Jiwen_Lu/), [Shiming Xiang](https://scholar.google.com/citations?user=0ggsACEAAAAJ&hl=zh-CN) and [Chunhong Pan](http://people.ucas.ac.cn/~0005314)
 8 | <br>
 9 | [__ICCV 2019__](http://iccv2019.thecvf.com/)
10 | 
11 | ## Citation
12 | 
13 | If our paper is helpful for your research, please consider citing:   
14 | 
15 |         @inproceedings{liu2019densepoint,   
16 |             author = {Yongcheng Liu and    
17 |                             Bin Fan and  
18 |                        Gaofeng Meng and
19 |                            Jiwen Lu and
20 |                       Shiming Xiang and   
21 |                            Chunhong Pan},   
22 |             title = {DensePoint: Learning Densely Contextual Representation for Efficient Point Cloud Processing},   
23 |             booktitle = {IEEE International Conference on Computer Vision (ICCV)},    
24 |             pages = {5239--5248},  
25 |             year = {2019}   
26 |         }   
27 | 
28 | ## Usage: Preparation
29 | 
30 | - Requirement
31 | 
32 |   - Ubuntu 14.04
33 |   - Python 3 (recommend Anaconda3)
34 |   - Pytorch 0.3.\*
35 |   - CMake > 2.8
36 |   - CUDA 8.0 + cuDNN 5.1
37 | 
38 | - Building Kernel
39 | 
40 |       git clone https://github.com/Yochengliu/DensePoint.git 
41 |       cd DensePoint
42 |       mkdir build && cd build
43 |       cmake .. && make
44 | 
45 | - Dataset
46 |   - Shape Classification: download and unzip [ModelNet40](https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip) (415M). Replace `$data_root$` in `cfgs/config_cls.yaml` with the dataset parent path.
47 | 
48 | ## Usage: Training
49 | - Shape Classification
50 | 
51 |       sh train_cls.sh
52 |         
53 | We have trained a 6-layer classification model in `cls` folder, whose accuracy is 92.38%.
54 | 
55 | ## Usage: Evaluation
56 | - Shape Classification
57 | 
58 |       Voting script: voting_evaluate_cls.py
59 |         
60 | You can use our model `cls/model_cls_L6_iter_36567_acc_0.923825.pth` as the checkpoint in `config_cls.yaml`, and after this voting you will get an accuracy of 92.5% if all things go right.
61 | 
62 | ## License
63 | 
64 | The code is released under MIT License (see LICENSE file for details).
65 | 
66 | ## Acknowledgement
67 | 
68 | The code is heavily borrowed from [Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch).
69 |         
70 | ## Contact
71 | 
72 | If you have some ideas or questions about our research to share with us, please contact <yongcheng.liu@nlpr.ia.ac.cn>
73 | 


--------------------------------------------------------------------------------
/cfgs/config_cls.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |     workers: 4
 3 | 
 4 |     num_points: 1024
 5 |     num_classes: 40
 6 |     batch_size: 32
 7 |     
 8 |     base_lr: 0.001
 9 |     lr_clip: 0.00001
10 |     lr_decay: 0.7
11 |     decay_step: 21
12 |     epochs: 200
13 | 
14 |     weight_decay: 0
15 |     bn_momentum: 0.9
16 |     bnm_clip: 0.01
17 |     bn_decay: 0.5
18 |     
19 |     evaluate: 1
20 |     val_freq_epoch: 0.5   # frequency in epoch for validation, can be decimal
21 |     print_freq_iter: 20   # frequency in iteration for printing infomation
22 |     
23 |     input_channels: 0     # feature channels except (x, y, z)
24 |     
25 |     checkpoint: ''        # the model to start from
26 |     save_path: cls
27 |     data_root: $data_root$


--------------------------------------------------------------------------------
/cls/model_cls_L6_iter_36567_acc_0.923825.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yochengliu/DensePoint/2a9393402f9f60d05a1735e78c4eced9f10015d9/cls/model_cls_L6_iter_36567_acc_0.923825.pth


--------------------------------------------------------------------------------
/data/ModelNet40Loader.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.utils.data as data
 3 | import numpy as np
 4 | import os, sys, h5py
 5 | 
 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 7 | sys.path.append(BASE_DIR)
 8 | 
 9 | def _get_data_files(list_filename):
10 |     with open(list_filename) as f:
11 |         return [line.rstrip()[5:] for line in f]
12 | 
13 | def _load_data_file(name):
14 |     f = h5py.File(name)
15 |     data = f['data'][:]
16 |     label = f['label'][:]
17 |     return data, label
18 |     
19 | class ModelNet40Cls(data.Dataset):
20 | 
21 |     def __init__(
22 |             self, num_points, root, transforms=None, train=True
23 |     ):
24 |         super().__init__()
25 | 
26 |         self.transforms = transforms
27 | 
28 |         root = os.path.abspath(root)
29 |         self.folder = "modelnet40_ply_hdf5_2048"
30 |         self.data_dir = os.path.join(root, self.folder)
31 | 
32 |         self.train, self.num_points = train, num_points
33 |         if self.train:
34 |             self.files =  _get_data_files( \
35 |                 os.path.join(self.data_dir, 'train_files.txt'))
36 |         else:
37 |             self.files =  _get_data_files( \
38 |                 os.path.join(self.data_dir, 'test_files.txt'))
39 | 
40 |         point_list, label_list = [], []
41 |         for f in self.files:
42 |             points, labels = _load_data_file(os.path.join(root, f))
43 |             point_list.append(points)
44 |             label_list.append(labels)
45 | 
46 |         self.points = np.concatenate(point_list, 0)
47 |         self.labels = np.concatenate(label_list, 0)
48 | 
49 |     def __getitem__(self, idx):
50 |         pt_idxs = np.arange(0, self.points.shape[1])   # 2048
51 |         if self.train:
52 |             np.random.shuffle(pt_idxs)
53 |         
54 |         current_points = self.points[idx, pt_idxs].copy()
55 |         label = torch.from_numpy(self.labels[idx]).type(torch.LongTensor)
56 |         
57 |         if self.transforms is not None:
58 |             current_points = self.transforms(current_points)
59 |         
60 |         return current_points, label
61 | 
62 |     def __len__(self):
63 |         return self.points.shape[0]
64 | 
65 | if __name__ == "__main__":
66 |     from torchvision import transforms
67 |     import data_utils as d_utils
68 | 
69 |     transforms = transforms.Compose([
70 |         d_utils.PointcloudToTensor(),
71 |         d_utils.PointcloudRotate(axis=np.array([1,0,0])),
72 |         d_utils.PointcloudScale(),
73 |         d_utils.PointcloudTranslate(),
74 |         d_utils.PointcloudJitter()
75 |     ])
76 |     dset = ModelNet40Cls(16, "./", train=True, transforms=transforms)
77 |     print(dset[0][0])
78 |     print(dset[0][1])
79 |     print(len(dset))
80 |     dloader = torch.utils.data.DataLoader(dset, batch_size=32, shuffle=True)
81 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .ModelNet40Loader import ModelNet40Cls


--------------------------------------------------------------------------------
/data/data_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | class PointcloudToTensor(object):
  5 |     def __call__(self, points):
  6 |         return torch.from_numpy(points).float()
  7 | 
  8 | def angle_axis(angle: float, axis: np.ndarray):
  9 |     r"""Returns a 4x4 rotation matrix that performs a rotation around axis by angle
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     angle : float
 14 |         Angle to rotate by
 15 |     axis: np.ndarray
 16 |         Axis to rotate about
 17 | 
 18 |     Returns
 19 |     -------
 20 |     torch.Tensor
 21 |         3x3 rotation matrix
 22 |     """
 23 |     u = axis / np.linalg.norm(axis)
 24 |     cosval, sinval = np.cos(angle), np.sin(angle)
 25 | 
 26 |     # yapf: disable
 27 |     cross_prod_mat = np.array([[0.0, -u[2], u[1]],
 28 |                                 [u[2], 0.0, -u[0]],
 29 |                                 [-u[1], u[0], 0.0]])
 30 | 
 31 |     R = torch.from_numpy(
 32 |         cosval * np.eye(3)
 33 |         + sinval * cross_prod_mat
 34 |         + (1.0 - cosval) * np.outer(u, u)
 35 |     )
 36 |     # yapf: enable
 37 |     return R.float()    
 38 | 
 39 | class PointcloudRotatebyAngle(object):
 40 |     def __init__(self, rotation_angle = 0.0):
 41 |         self.rotation_angle = rotation_angle
 42 | 
 43 |     def __call__(self, pc):
 44 |         normals = pc.size(2) > 3
 45 |         bsize = pc.size(0)
 46 |         for i in range(bsize):
 47 |             cosval = np.cos(self.rotation_angle)
 48 |             sinval = np.sin(self.rotation_angle)
 49 |             rotation_matrix = np.array([[cosval, 0, sinval],
 50 |                                         [0, 1, 0],
 51 |                                         [-sinval, 0, cosval]])
 52 |             rotation_matrix = torch.from_numpy(rotation_matrix).float().cuda()
 53 |             
 54 |             cur_pc = pc[i, :, :]
 55 |             if not normals:
 56 |                 cur_pc = cur_pc @ rotation_matrix
 57 |             else:
 58 |                 pc_xyz = cur_pc[:, 0:3]
 59 |                 pc_normals = cur_pc[:, 3:]
 60 |                 cur_pc[:, 0:3] = pc_xyz @ rotation_matrix
 61 |                 cur_pc[:, 3:] = pc_normals @ rotation_matrix
 62 |                 
 63 |             pc[i, :, :] = cur_pc
 64 |             
 65 |         return pc
 66 | 
 67 | class PointcloudJitter(object):
 68 |     def __init__(self, std=0.01, clip=0.05):
 69 |         self.std, self.clip = std, clip
 70 | 
 71 |     def __call__(self, pc):
 72 |         bsize = pc.size(0)
 73 |         for i in range(bsize):
 74 |             jittered_data = pc.new(pc.size(1), 3).normal_(
 75 |                 mean=0.0, std=self.std
 76 |             ).clamp_(-self.clip, self.clip)
 77 |             pc[i, :, 0:3] += jittered_data
 78 |             
 79 |         return pc
 80 | 
 81 | class PointcloudScaleAndTranslate(object):
 82 |     def __init__(self, scale_low=2. / 3., scale_high=3. / 2., translate_range=0.2):
 83 |         self.scale_low = scale_low
 84 |         self.scale_high = scale_high
 85 |         self.translate_range = translate_range
 86 | 
 87 |     def __call__(self, pc):
 88 |         bsize = pc.size(0)
 89 |         for i in range(bsize):
 90 |             xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3])
 91 |             xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3])
 92 |             
 93 |             pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda()) + torch.from_numpy(xyz2).float().cuda()
 94 |             
 95 |         return pc
 96 |         
 97 | class PointcloudScale(object):
 98 |     def __init__(self, scale_low=2. / 3., scale_high=3. / 2.):
 99 |         self.scale_low = scale_low
100 |         self.scale_high = scale_high
101 | 
102 |     def __call__(self, pc):
103 |         bsize = pc.size(0)
104 |         for i in range(bsize):
105 |             xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3])
106 |             
107 |             pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda())
108 |             
109 |         return pc
110 |         
111 | class PointcloudTranslate(object):
112 |     def __init__(self, translate_range=0.2):
113 |         self.translate_range = translate_range
114 | 
115 |     def __call__(self, pc):
116 |         bsize = pc.size(0)
117 |         for i in range(bsize):
118 |             xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3])
119 |             
120 |             pc[i, :, 0:3] = pc[i, :, 0:3] + torch.from_numpy(xyz2).float().cuda()
121 |             
122 |         return pc
123 | 
124 | class PointcloudRandomInputDropout(object):
125 |     def __init__(self, max_dropout_ratio=0.875):
126 |         assert max_dropout_ratio >= 0 and max_dropout_ratio < 1
127 |         self.max_dropout_ratio = max_dropout_ratio
128 | 
129 |     def __call__(self, pc):
130 |         bsize = pc.size(0)
131 |         for i in range(bsize):
132 |             dropout_ratio = np.random.random() * self.max_dropout_ratio  # 0~0.875
133 |             drop_idx = np.where(np.random.random((pc.size()[1])) <= dropout_ratio)[0]
134 |             if len(drop_idx) > 0:
135 |                 cur_pc = pc[i, :, :]
136 |                 cur_pc[drop_idx.tolist(), 0:3] = cur_pc[0, 0:3].repeat(len(drop_idx), 1)  # set to the first point
137 |                 pc[i, :, :] = cur_pc
138 | 
139 |         return pc
140 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .densepoint_cls_L6_k24_g2 import DensePoint as DensePointCls_L6
2 | 
3 | 


--------------------------------------------------------------------------------
/models/densepoint_cls_L6_k24_g2.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  3 | sys.path.append(BASE_DIR)
  4 | sys.path.append(os.path.join(BASE_DIR, "../utils"))
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.autograd import Variable
  8 | import pytorch_utils as pt_utils
  9 | from pointnet2_modules import PointnetSAModule, PointnetSAModuleMSG
 10 | import numpy as np
 11 | 
 12 | # DensePoint: 2 PPools + 3 PConvs + 1 global pool; narrowness k = 24; group number g = 2
 13 | class DensePoint(nn.Module):
 14 |     r"""
 15 |         PointNet2 with multi-scale grouping
 16 |         Semantic segmentation network that uses feature propogation layers
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         num_classes: int
 21 |             Number of semantics classes to predict over -- size of softmax classifier that run for each point
 22 |         input_channels: int = 6
 23 |             Number of input channels in the feature descriptor for each point.  If the point cloud is Nx9, this
 24 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 25 |         use_xyz: bool = True
 26 |             Whether or not to use the xyz position of a point as a feature
 27 |     """
 28 | 
 29 |     def __init__(self, num_classes, input_channels=0, use_xyz=True):
 30 |         super().__init__()
 31 | 
 32 |         self.SA_modules = nn.ModuleList()
 33 |         
 34 |         # stage 1 begin
 35 |         self.SA_modules.append(
 36 |             PointnetSAModuleMSG(
 37 |                 npoint=512,
 38 |                 radii=[0.25],
 39 |                 nsamples=[64],
 40 |                 mlps=[[input_channels, 96]],
 41 |                 use_xyz=use_xyz,
 42 |                 pool=True
 43 |             )
 44 |         )
 45 |         # stage 1 end
 46 |         
 47 |         # stage 2 begin
 48 |         input_channels = 96
 49 |         self.SA_modules.append(
 50 |             PointnetSAModuleMSG(
 51 |                 npoint=128,
 52 |                 radii=[0.32],
 53 |                 nsamples=[64],
 54 |                 mlps=[[input_channels, 93]],
 55 |                 use_xyz=use_xyz,
 56 |                 pool=True
 57 |             )
 58 |         )
 59 |         
 60 |         input_channels = 93
 61 |         self.SA_modules.append(
 62 |             PointnetSAModuleMSG(
 63 |                 npoint=128,
 64 |                 radii=[0.39],
 65 |                 nsamples=[16],
 66 |                 mlps=[[input_channels, 96]],
 67 |                 group_number=2,
 68 |                 use_xyz=use_xyz,
 69 |                 after_pool=True
 70 |             )
 71 |         )
 72 |         
 73 |         input_channels = 117
 74 |         self.SA_modules.append(
 75 |             PointnetSAModuleMSG(
 76 |                 npoint=128,
 77 |                 radii=[0.39],
 78 |                 nsamples=[16],
 79 |                 mlps=[[input_channels, 96]],
 80 |                 group_number=2,
 81 |                 use_xyz=use_xyz
 82 |             )
 83 |         )
 84 |         
 85 |         input_channels = 141
 86 |         self.SA_modules.append(
 87 |             PointnetSAModuleMSG(
 88 |                 npoint=128,
 89 |                 radii=[0.39],
 90 |                 nsamples=[16],
 91 |                 mlps=[[input_channels, 96]],
 92 |                 group_number=2,
 93 |                 use_xyz=use_xyz,
 94 |                 before_pool=True
 95 |             )
 96 |         )
 97 |         # stage 2 end
 98 |        
 99 |         # global pooling
100 |         input_channels = 165
101 |         self.SA_modules.append(
102 |             PointnetSAModule(
103 |                 mlp=[input_channels, 512], use_xyz=use_xyz
104 |             )
105 |         )
106 | 
107 |         self.FC_layer = nn.Sequential(
108 |             pt_utils.FC(512, 512, activation=nn.ReLU(inplace=True), bn=True),
109 |             nn.Dropout(p=0.5),
110 |             pt_utils.FC(512, 256, activation=nn.ReLU(inplace=True), bn=True),
111 |             nn.Dropout(p=0.5),
112 |             pt_utils.FC(256, num_classes, activation=None)
113 |         )
114 | 
115 |     def _break_up_pc(self, pc):
116 |         xyz = pc[..., 0:3].contiguous()
117 |         features = (
118 |             pc[..., 3:].transpose(1, 2).contiguous()
119 |             if pc.size(-1) > 3 else None
120 |         )
121 |         return xyz, features
122 | 
123 |     def forward(self, pointcloud: torch.cuda.FloatTensor):
124 |         r"""
125 |             Forward pass of the network
126 | 
127 |             Parameters
128 |             ----------
129 |             pointcloud: Variable(torch.cuda.FloatTensor)
130 |                 (B, N, 3 + input_channels) tensor
131 |                 Point cloud to run predicts on
132 |                 Each point in the point-cloud MUST
133 |                 be formated as (x, y, z, features...)
134 |         """
135 |         xyz, features = self._break_up_pc(pointcloud)
136 |         for module in self.SA_modules:
137 |             xyz, features = module(xyz, features)
138 |         
139 |         return self.FC_layer(features.squeeze(-1))
140 | 
141 | if __name__ == "__main__":
142 |     sim_data = Variable(torch.rand(32, 2048, 6))
143 |     sim_data = sim_data.cuda()
144 |     sim_cls = Variable(torch.ones(32, 16))
145 |     sim_cls = sim_cls.cuda()
146 | 
147 |     seg = Pointnet2MSG(num_classes=50, input_channels=3, use_xyz=True)
148 |     seg = seg.cuda()
149 |     out = seg(sim_data, sim_cls)
150 |     print('seg', out.size())
151 | 


--------------------------------------------------------------------------------
/train_cls.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | import torch.optim.lr_scheduler as lr_sched
  4 | import torch.nn as nn
  5 | from torch.utils.data import DataLoader
  6 | from torch.autograd import Variable
  7 | import numpy as np
  8 | import os
  9 | from torchvision import transforms
 10 | from models import DensePointCls_L6 as DensePoint
 11 | from data import ModelNet40Cls
 12 | import utils.pytorch_utils as pt_utils
 13 | import utils.pointnet2_utils as pointnet2_utils
 14 | import data.data_utils as d_utils
 15 | import argparse
 16 | import random
 17 | import yaml
 18 | 
 19 | torch.backends.cudnn.enabled = True
 20 | torch.backends.cudnn.benchmark = True
 21 | torch.backends.cudnn.deterministic = True
 22 | 
 23 | seed = 123
 24 | random.seed(seed)
 25 | np.random.seed(seed)
 26 | torch.manual_seed(seed)            
 27 | torch.cuda.manual_seed(seed)       
 28 | torch.cuda.manual_seed_all(seed)   
 29 | 
 30 | parser = argparse.ArgumentParser(description='DensePoint Shape Classification Training')
 31 | parser.add_argument('--config', default='cfgs/config_cls.yaml', type=str)
 32 | 
 33 | def main():
 34 |     args = parser.parse_args()
 35 |     with open(args.config) as f:
 36 |         config = yaml.load(f)
 37 |     print("\n**************************")
 38 |     for k, v in config['common'].items():
 39 |         setattr(args, k, v)
 40 |         print('\n[%s]:'%(k), v)
 41 |     print("\n**************************\n")
 42 |     
 43 |     try:
 44 |         os.makedirs(args.save_path)
 45 |     except OSError:
 46 |         pass
 47 |     
 48 |     train_transforms = transforms.Compose([
 49 |         d_utils.PointcloudToTensor()
 50 |     ])
 51 |     test_transforms = transforms.Compose([
 52 |         d_utils.PointcloudToTensor()
 53 |     ])
 54 |     
 55 |     train_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=train_transforms)
 56 |     train_dataloader = DataLoader(
 57 |         train_dataset, 
 58 |         batch_size=args.batch_size,
 59 |         shuffle=True, 
 60 |         num_workers=int(args.workers), 
 61 |         pin_memory=True
 62 |     )
 63 | 
 64 |     test_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=test_transforms, train=False)
 65 |     test_dataloader = DataLoader(
 66 |         test_dataset, 
 67 |         batch_size=args.batch_size,
 68 |         shuffle=False, 
 69 |         num_workers=int(args.workers), 
 70 |         pin_memory=True
 71 |     )
 72 |     
 73 |     model = DensePoint(num_classes = args.num_classes, input_channels = args.input_channels, use_xyz = True)
 74 |     model.cuda()
 75 |     optimizer = optim.Adam(
 76 |         model.parameters(), lr=args.base_lr, weight_decay=args.weight_decay)
 77 | 
 78 |     lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), args.lr_clip / args.base_lr)
 79 |     bnm_lmbd = lambda e: max(args.bn_momentum * args.bn_decay**(e // args.decay_step), args.bnm_clip)
 80 |     lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd)
 81 |     bnm_scheduler = pt_utils.BNMomentumScheduler(model, bnm_lmbd)
 82 |     
 83 |     if args.checkpoint is not '':
 84 |         model.load_state_dict(torch.load(args.checkpoint))
 85 |         print('Load model successfully: %s' % (args.checkpoint))
 86 | 
 87 |     criterion = nn.CrossEntropyLoss()
 88 |     num_batch = len(train_dataset)/args.batch_size
 89 |     
 90 |     # training
 91 |     train(train_dataloader, test_dataloader, model, criterion, optimizer, lr_scheduler, bnm_scheduler, args, num_batch)
 92 |     
 93 | 
 94 | def train(train_dataloader, test_dataloader, model, criterion, optimizer, lr_scheduler, bnm_scheduler, args, num_batch):
 95 |     PointcloudScaleAndTranslate = d_utils.PointcloudScaleAndTranslate()   # initialize augmentation
 96 |     global g_acc 
 97 |     g_acc = 0.91    # only save the model whose acc > 0.91
 98 |     batch_count = 0
 99 |     model.train()
100 |     for epoch in range(args.epochs):
101 |         for i, data in enumerate(train_dataloader, 0):
102 |             if lr_scheduler is not None:
103 |                 lr_scheduler.step(epoch)
104 |             if bnm_scheduler is not None:
105 |                 bnm_scheduler.step(epoch-1)
106 |             points, target = data
107 |             points, target = points.cuda(), target.cuda()
108 |             points, target = Variable(points), Variable(target)
109 |             
110 |             # farthest point sampling
111 |             fps_idx = pointnet2_utils.furthest_point_sample(points, 1200)  # (B, npoint)
112 |             fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)]
113 |             points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), fps_idx).transpose(1, 2).contiguous()  # (B, N, 3)
114 |             
115 |             # augmentation
116 |             points.data = PointcloudScaleAndTranslate(points.data)
117 |             
118 |             optimizer.zero_grad()
119 |             
120 |             pred = model(points)
121 |             target = target.view(-1)
122 |             loss = criterion(pred, target)
123 |             loss.backward()
124 |             optimizer.step()
125 |             if i % args.print_freq_iter == 0:
126 |                 print('[epoch %3d: %3d/%3d] \t train loss: %0.6f \t lr: %0.5f' %(epoch+1, i, num_batch, loss.data.clone(), lr_scheduler.get_lr()[0]))
127 |             batch_count += 1
128 |             
129 |             # validation in between an epoch
130 |             if args.evaluate and batch_count % int(args.val_freq_epoch * num_batch) == 0:
131 |                 validate(test_dataloader, model, criterion, args, batch_count)
132 | 
133 | 
134 | def validate(test_dataloader, model, criterion, args, iter): 
135 |     global g_acc
136 |     model.eval()
137 |     losses, preds, labels = [], [], []
138 |     for j, data in enumerate(test_dataloader, 0):
139 |         points, target = data
140 |         points, target = points.cuda(), target.cuda()
141 |         points, target = Variable(points, volatile=True), Variable(target, volatile=True)
142 |         
143 |         # farthest point sampling
144 |         fps_idx = pointnet2_utils.furthest_point_sample(points, args.num_points)  # (B, npoint)
145 |         # fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)]
146 |         points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), fps_idx).transpose(1, 2).contiguous()
147 | 
148 |         pred = model(points)
149 |         target = target.view(-1)
150 |         loss = criterion(pred, target)
151 |         losses.append(loss.data.clone())
152 |         _, pred_choice = torch.max(pred.data, -1)
153 |         
154 |         preds.append(pred_choice)
155 |         labels.append(target.data)
156 |         
157 |     preds = torch.cat(preds, 0)
158 |     labels = torch.cat(labels, 0)
159 |     acc = (preds == labels).sum() / labels.numel()
160 |     print('\nval loss: %0.6f \t acc: %0.6f\n' %(np.array(losses).mean(), acc))
161 |     if acc > g_acc:
162 |         g_acc = acc
163 |         torch.save(model.state_dict(), '%s/cls_iter_%d_acc_%0.6f.pth' % (args.save_path, iter, acc))
164 |     model.train()
165 |     
166 | if __name__ == "__main__":
167 |     main()
168 | 


--------------------------------------------------------------------------------
/train_cls.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | mkdir -p log
3 | now=$(date +"%Y%m%d_%H%M%S")
4 | log_name="Cls_LOG_"$now""
5 | export CUDA_VISIBLE_DEVICES=0
6 | python -u train_cls.py \
7 | --config cfgs/config_cls.yaml \
8 | 2>&1|tee log/$log_name.log &
9 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yochengliu/DensePoint/2a9393402f9f60d05a1735e78c4eced9f10015d9/utils/__init__.py


--------------------------------------------------------------------------------
/utils/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yochengliu/DensePoint/2a9393402f9f60d05a1735e78c4eced9f10015d9/utils/_ext/__init__.py


--------------------------------------------------------------------------------
/utils/_ext/pointnet2/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._pointnet2 import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/utils/build_ffi.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import torch
 3 | import os.path as osp
 4 | from torch.utils.ffi import create_extension
 5 | import sys, argparse, shutil
 6 | 
 7 | base_dir = osp.dirname(osp.abspath(__file__))
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(
12 |         description="Arguments for building pointnet2 ffi extension"
13 |     )
14 |     parser.add_argument("--objs", nargs="*")
15 |     clean_arg = parser.add_mutually_exclusive_group()
16 |     clean_arg.add_argument("--build", dest='build', action="store_true")
17 |     clean_arg.add_argument("--clean", dest='clean', action="store_true")
18 |     parser.set_defaults(build=False, clean=False)
19 | 
20 |     args = parser.parse_args()
21 |     assert args.build or args.clean
22 | 
23 |     return args
24 | 
25 | 
26 | def build(args):
27 |     extra_objects = args.objs
28 |     extra_objects += [a for a in glob.glob('/usr/local/cuda/lib64/*.a')]
29 | 
30 |     ffi = create_extension(
31 |         '_ext.pointnet2',
32 |         headers=[a for a in glob.glob("cinclude/*_wrapper.h")],
33 |         sources=[a for a in glob.glob("csrc/*.c")],
34 |         define_macros=[('WITH_CUDA', None)],
35 |         relative_to=__file__,
36 |         with_cuda=True,
37 |         extra_objects=extra_objects,
38 |         include_dirs=[osp.join(base_dir, 'cinclude')],
39 |         verbose=False,
40 |         package=False
41 |     )
42 |     ffi.build()
43 | 
44 | 
45 | def clean(args):
46 |     shutil.rmtree(osp.join(base_dir, "_ext"))
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     args = parse_args()
51 |     if args.clean:
52 |         clean(args)
53 |     else:
54 |         build(args)
55 | 


--------------------------------------------------------------------------------
/utils/cinclude/ball_query_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_GPU
 2 | #define _BALL_QUERY_GPU
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
 9 | 				     int nsample, const float *xyz,
10 | 				     const float *new_xyz, int *idx,
11 | 				     cudaStream_t stream);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/utils/cinclude/ball_query_wrapper.h:
--------------------------------------------------------------------------------
1 | 
2 | int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
3 | 		       THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor,
4 | 		       THCudaIntTensor *idx_tensor);
5 | 


--------------------------------------------------------------------------------
/utils/cinclude/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | 
 6 | #define TOTAL_THREADS 512
 7 | 
 8 | inline int opt_n_threads(int work_size) {
 9 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
10 | 
11 |     return max(min(1 << pow_2, TOTAL_THREADS), 1);
12 | }
13 | 
14 | inline dim3 opt_block_config(int x, int y) {
15 |     const int x_threads = opt_n_threads(x);
16 |     const int y_threads =
17 |         max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
18 |     dim3 block_config(x_threads, y_threads, 1);
19 | 
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/utils/cinclude/group_points_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_GPU
 2 | #define _BALL_QUERY_GPU
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
 9 | 				 const float *points, const int *idx,
10 | 				 float *out, cudaStream_t stream);
11 | 
12 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
13 | 				      int nsample, const float *grad_out,
14 | 				      const int *idx, float *grad_points,
15 | 				      cudaStream_t stream);
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | #endif
20 | 


--------------------------------------------------------------------------------
/utils/cinclude/group_points_wrapper.h:
--------------------------------------------------------------------------------
1 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample,
2 | 			 THCudaTensor *points_tensor,
3 | 			 THCudaIntTensor *idx_tensor, THCudaTensor *out);
4 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,
5 | 			      THCudaTensor *grad_out_tensor,
6 | 			      THCudaIntTensor *idx_tensor,
7 | 			      THCudaTensor *grad_points_tensor);
8 | 


--------------------------------------------------------------------------------
/utils/cinclude/interpolate_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATE_GPU_H
 2 | #define _INTERPOLATE_GPU_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 9 | 			     const float *known, float *dist2, int *idx,
10 | 			     cudaStream_t stream);
11 | 
12 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
13 | 				      const float *points, const int *idx,
14 | 				      const float *weight, float *out,
15 | 				      cudaStream_t stream);
16 | 
17 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
18 | 					   const float *grad_out,
19 | 					   const int *idx, const float *weight,
20 | 					   float *grad_points,
21 | 					   cudaStream_t stream);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/utils/cinclude/interpolate_wrapper.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor,
 4 | 		      THCudaTensor *known_tensor, THCudaTensor *dist2_tensor,
 5 | 		      THCudaIntTensor *idx_tensor);
 6 | void three_interpolate_wrapper(int b, int c, int m, int n,
 7 | 			       THCudaTensor *points_tensor,
 8 | 			       THCudaIntTensor *idx_tensor,
 9 | 			       THCudaTensor *weight_tensor,
10 | 			       THCudaTensor *out_tensor);
11 | 
12 | void three_interpolate_grad_wrapper(int b, int c, int n, int m,
13 | 				    THCudaTensor *grad_out_tensor,
14 | 				    THCudaIntTensor *idx_tensor,
15 | 				    THCudaTensor *weight_tensor,
16 | 				    THCudaTensor *grad_points_tensor);
17 | 


--------------------------------------------------------------------------------
/utils/cinclude/sampling_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_GPU_H
 2 | #define _SAMPLING_GPU_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 9 | 				  const float *points, const int *idx,
10 | 				  float *out, cudaStream_t stream);
11 | 
12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
13 | 				       const float *grad_out, const int *idx,
14 | 				       float *grad_points, cudaStream_t stream);
15 | 
16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
17 | 					    const float *dataset, float *temp,
18 | 					    int *idxs, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/utils/cinclude/sampling_wrapper.h:
--------------------------------------------------------------------------------
 1 | 
 2 | int gather_points_wrapper(int b, int c, int n, int npoints,
 3 | 			  THCudaTensor *points_tensor,
 4 | 			  THCudaIntTensor *idx_tensor,
 5 | 			  THCudaTensor *out_tensor);
 6 | int gather_points_grad_wrapper(int b, int c, int n, int npoints,
 7 | 			       THCudaTensor *grad_out_tensor,
 8 | 			       THCudaIntTensor *idx_tensor,
 9 | 			       THCudaTensor *grad_points_tensor);
10 | 
11 | int furthest_point_sampling_wrapper(int b, int n, int m,
12 | 				    THCudaTensor *points_tensor,
13 | 				    THCudaTensor *temp_tensor,
14 | 				    THCudaIntTensor *idx_tensor);
15 | 


--------------------------------------------------------------------------------
/utils/csrc/ball_query.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | 
 3 | #include "ball_query_gpu.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
 8 | 		       THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor,
 9 | 		       THCudaIntTensor *idx_tensor) {
10 | 
11 |     const float *new_xyz = THCudaTensor_data(state, new_xyz_tensor);
12 |     const float *xyz = THCudaTensor_data(state, xyz_tensor);
13 |     int *idx = THCudaIntTensor_data(state, idx_tensor);
14 | 
15 |     cudaStream_t stream = THCState_getCurrentStream(state);
16 | 
17 |     query_ball_point_kernel_wrapper(b, n, m, radius, nsample, new_xyz, xyz, idx,
18 | 				    stream);
19 |     return 1;
20 | }
21 | 


--------------------------------------------------------------------------------
/utils/csrc/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include "ball_query_gpu.h"
 6 | #include "cuda_utils.h"
 7 | 
 8 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
 9 | // output: idx(b, m, nsample)
10 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
11 | 					int nsample,
12 | 					const float *__restrict__ new_xyz,
13 | 					const float *__restrict__ xyz,
14 | 					int *__restrict__ idx) {
15 |     int batch_index = blockIdx.x;
16 |     xyz += batch_index * n * 3;
17 |     new_xyz += batch_index * m * 3;
18 |     idx += m * nsample * batch_index;
19 | 
20 |     int index = threadIdx.x;
21 |     int stride = blockDim.x;
22 | 
23 |     float radius2 = radius * radius;
24 |     for (int j = index; j < m; j += stride) {
25 | 	float new_x = new_xyz[j * 3 + 0];
26 | 	float new_y = new_xyz[j * 3 + 1];
27 | 	float new_z = new_xyz[j * 3 + 2];
28 | 	for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
29 | 	    float x = xyz[k * 3 + 0];
30 | 	    float y = xyz[k * 3 + 1];
31 | 	    float z = xyz[k * 3 + 2];
32 | 	    float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
33 | 		       (new_z - z) * (new_z - z);
34 | 	    if (d2 < radius2) {
35 | 		if (cnt == 0) {
36 | 		    for (int l = 0; l < nsample; ++l) {
37 | 			idx[j * nsample + l] = k;
38 | 		    }
39 | 		}
40 | 		idx[j * nsample + cnt] = k;
41 | 		++cnt;
42 | 	    }
43 | 	}
44 |     }
45 | }
46 | 
47 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
48 | 				     int nsample, const float *new_xyz,
49 | 				     const float *xyz, int *idx,
50 | 				     cudaStream_t stream) {
51 | 
52 |     cudaError_t err;
53 |     query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
54 | 	b, n, m, radius, nsample, new_xyz, xyz, idx);
55 | 
56 |     err = cudaGetLastError();
57 |     if (cudaSuccess != err) {
58 | 	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
59 | 	exit(-1);
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/utils/csrc/group_points.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | 
 3 | #include "group_points_gpu.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample,
 8 | 			 THCudaTensor *points_tensor,
 9 | 			 THCudaIntTensor *idx_tensor,
10 | 			 THCudaTensor *out_tensor) {
11 | 
12 |     const float *points = THCudaTensor_data(state, points_tensor);
13 |     const int *idx = THCudaIntTensor_data(state, idx_tensor);
14 |     float *out = THCudaTensor_data(state, out_tensor);
15 | 
16 |     cudaStream_t stream = THCState_getCurrentStream(state);
17 | 
18 |     group_points_kernel_wrapper(b, c, n, npoints, nsample, points, idx, out,
19 | 				stream);
20 |     return 1;
21 | }
22 | 
23 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,
24 | 			      THCudaTensor *grad_out_tensor,
25 | 			      THCudaIntTensor *idx_tensor,
26 | 			      THCudaTensor *grad_points_tensor) {
27 | 
28 |     float *grad_points = THCudaTensor_data(state, grad_points_tensor);
29 |     const int *idx = THCudaIntTensor_data(state, idx_tensor);
30 |     const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
31 | 
32 |     cudaStream_t stream = THCState_getCurrentStream(state);
33 | 
34 |     group_points_grad_kernel_wrapper(b, c, n, npoints, nsample, grad_out, idx,
35 | 				     grad_points, stream);
36 |     return 1;
37 | }
38 | 


--------------------------------------------------------------------------------
/utils/csrc/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "cuda_utils.h"
 5 | #include "group_points_gpu.h"
 6 | 
 7 | // input: points(b, c, n) idx(b, npoints, nsample)
 8 | // output: out(b, c, npoints, nsample)
 9 | __global__ void group_points_kernel(int b, int c, int n, int npoints,
10 | 				    int nsample,
11 | 				    const float *__restrict__ points,
12 | 				    const int *__restrict__ idx,
13 | 				    float *__restrict__ out) {
14 |     int batch_index = blockIdx.x;
15 |     points += batch_index * n * c;
16 |     idx += batch_index * npoints * nsample;
17 |     out += batch_index * npoints * nsample * c;
18 | 
19 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
20 |     const int stride = blockDim.y * blockDim.x;
21 |     for (int i = index; i < c * npoints; i += stride) {
22 | 	const int l = i / npoints;
23 | 	const int j = i % npoints;
24 | 	for (int k = 0; k < nsample; ++k) {
25 | 	    int ii = idx[j * nsample + k];
26 | 	    out[(l * npoints + j) * nsample + k] = points[l * n + ii];
27 | 	}
28 |     }
29 | }
30 | 
31 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
32 | 				 const float *points, const int *idx,
33 | 				 float *out, cudaStream_t stream) {
34 | 
35 |     cudaError_t err;
36 |     group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
37 | 	b, c, n, npoints, nsample, points, idx, out);
38 | 
39 |     err = cudaGetLastError();
40 |     if (cudaSuccess != err) {
41 | 	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
42 | 	exit(-1);
43 |     }
44 | }
45 | 
46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
47 | // output: grad_points(b, c, n)
48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
49 | 					 int nsample,
50 | 					 const float *__restrict__ grad_out,
51 | 					 const int *__restrict__ idx,
52 | 					 float *__restrict__ grad_points) {
53 |     int batch_index = blockIdx.x;
54 |     grad_out += batch_index * npoints * nsample * c;
55 |     idx += batch_index * npoints * nsample;
56 |     grad_points += batch_index * n * c;
57 | 
58 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
59 |     const int stride = blockDim.y * blockDim.x;
60 |     for (int i = index; i < c * npoints; i += stride) {
61 | 	const int l = i / npoints;
62 | 	const int j = i % npoints;
63 | 	for (int k = 0; k < nsample; ++k) {
64 | 	    int ii = idx[j * nsample + k];
65 | 	    atomicAdd(grad_points + l * n + ii,
66 | 		      grad_out[(l * npoints + j) * nsample + k]);
67 | 	}
68 |     }
69 | }
70 | 
71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
72 | 				      int nsample, const float *grad_out,
73 | 				      const int *idx, float *grad_points,
74 | 				      cudaStream_t stream) {
75 |     cudaError_t err;
76 |     group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
77 | 	b, c, n, npoints, nsample, grad_out, idx, grad_points);
78 | 
79 |     err = cudaGetLastError();
80 |     if (cudaSuccess != err) {
81 | 	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
82 | 	exit(-1);
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/utils/csrc/interpolate.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | #include "interpolate_gpu.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor,
11 | 		      THCudaTensor *known_tensor, THCudaTensor *dist2_tensor,
12 | 		      THCudaIntTensor *idx_tensor) {
13 |     const float *unknown = THCudaTensor_data(state, unknown_tensor);
14 |     const float *known = THCudaTensor_data(state, known_tensor);
15 |     float *dist2 = THCudaTensor_data(state, dist2_tensor);
16 |     int *idx = THCudaIntTensor_data(state, idx_tensor);
17 | 
18 |     cudaStream_t stream = THCState_getCurrentStream(state);
19 |     three_nn_kernel_wrapper(b, n, m, unknown, known, dist2, idx, stream);
20 | }
21 | 
22 | void three_interpolate_wrapper(int b, int c, int m, int n,
23 | 			       THCudaTensor *points_tensor,
24 | 			       THCudaIntTensor *idx_tensor,
25 | 			       THCudaTensor *weight_tensor,
26 | 			       THCudaTensor *out_tensor) {
27 | 
28 |     const float *points = THCudaTensor_data(state, points_tensor);
29 |     const float *weight = THCudaTensor_data(state, weight_tensor);
30 |     float *out = THCudaTensor_data(state, out_tensor);
31 |     const int *idx = THCudaIntTensor_data(state, idx_tensor);
32 | 
33 |     cudaStream_t stream = THCState_getCurrentStream(state);
34 |     three_interpolate_kernel_wrapper(b, c, m, n, points, idx, weight, out,
35 | 				     stream);
36 | }
37 | 
38 | void three_interpolate_grad_wrapper(int b, int c, int n, int m,
39 | 				    THCudaTensor *grad_out_tensor,
40 | 				    THCudaIntTensor *idx_tensor,
41 | 				    THCudaTensor *weight_tensor,
42 | 				    THCudaTensor *grad_points_tensor) {
43 | 
44 |     const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
45 |     const float *weight = THCudaTensor_data(state, weight_tensor);
46 |     float *grad_points = THCudaTensor_data(state, grad_points_tensor);
47 |     const int *idx = THCudaIntTensor_data(state, idx_tensor);
48 | 
49 |     cudaStream_t stream = THCState_getCurrentStream(state);
50 |     three_interpolate_grad_kernel_wrapper(b, c, n, m, grad_out, idx, weight,
51 | 					  grad_points, stream);
52 | }
53 | 


--------------------------------------------------------------------------------
/utils/csrc/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "cuda_utils.h"
  6 | #include "interpolate_gpu.h"
  7 | 
  8 | // input: unknown(b, n, 3) known(b, m, 3)
  9 | // output: dist2(b, n, 3), idx(b, n, 3)
 10 | __global__ void three_nn_kernel(int b, int n, int m,
 11 | 				const float *__restrict__ unknown,
 12 | 				const float *__restrict__ known,
 13 | 				float *__restrict__ dist2,
 14 | 				int *__restrict__ idx) {
 15 |     int batch_index = blockIdx.x;
 16 |     unknown += batch_index * n * 3;
 17 |     known += batch_index * m * 3;
 18 |     dist2 += batch_index * n * 3;
 19 |     idx += batch_index * n * 3;
 20 | 
 21 |     int index = threadIdx.x;
 22 |     int stride = blockDim.x;
 23 |     for (int j = index; j < n; j += stride) {
 24 | 	float ux = unknown[j * 3 + 0];
 25 | 	float uy = unknown[j * 3 + 1];
 26 | 	float uz = unknown[j * 3 + 2];
 27 | 
 28 | 	double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 29 | 	int besti1 = 0, besti2 = 0, besti3 = 0;
 30 | 	for (int k = 0; k < m; ++k) {
 31 | 	    float x = known[k * 3 + 0];
 32 | 	    float y = known[k * 3 + 1];
 33 | 	    float z = known[k * 3 + 2];
 34 | 	    float d =
 35 | 		(ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 36 | 	    if (d < best1) {
 37 | 		best3 = best2;
 38 | 		besti3 = besti2;
 39 | 		best2 = best1;
 40 | 		besti2 = besti1;
 41 | 		best1 = d;
 42 | 		besti1 = k;
 43 | 	    } else if (d < best2) {
 44 | 		best3 = best2;
 45 | 		besti3 = besti2;
 46 | 		best2 = d;
 47 | 		besti2 = k;
 48 | 	    } else if (d < best3) {
 49 | 		best3 = d;
 50 | 		besti3 = k;
 51 | 	    }
 52 | 	}
 53 | 	dist2[j * 3 + 0] = best1;
 54 | 	dist2[j * 3 + 1] = best2;
 55 | 	dist2[j * 3 + 2] = best3;
 56 | 
 57 | 	idx[j * 3 + 0] = besti1;
 58 | 	idx[j * 3 + 1] = besti2;
 59 | 	idx[j * 3 + 2] = besti3;
 60 |     }
 61 | }
 62 | 
 63 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 64 | 			     const float *known, float *dist2, int *idx,
 65 | 			     cudaStream_t stream) {
 66 | 
 67 |     cudaError_t err;
 68 |     three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
 69 | 							dist2, idx);
 70 | 
 71 |     err = cudaGetLastError();
 72 |     if (cudaSuccess != err) {
 73 | 	fprintf(stderr, "CUDA kernel "
 74 | 			"failed : %s\n",
 75 | 		cudaGetErrorString(err));
 76 | 	exit(-1);
 77 |     }
 78 | }
 79 | 
 80 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 81 | // output: out(b, c, n)
 82 | __global__ void three_interpolate_kernel(int b, int c, int m, int n,
 83 | 					 const float *__restrict__ points,
 84 | 					 const int *__restrict__ idx,
 85 | 					 const float *__restrict__ weight,
 86 | 					 float *__restrict__ out) {
 87 |     int batch_index = blockIdx.x;
 88 |     points += batch_index * m * c;
 89 | 
 90 |     idx += batch_index * n * 3;
 91 |     weight += batch_index * n * 3;
 92 | 
 93 |     out += batch_index * n * c;
 94 | 
 95 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
 96 |     const int stride = blockDim.y * blockDim.x;
 97 |     for (int i = index; i < c * n; i += stride) {
 98 | 	const int l = i / n;
 99 | 	const int j = i % n;
100 | 	float w1 = weight[j * 3 + 0];
101 | 	float w2 = weight[j * 3 + 1];
102 | 	float w3 = weight[j * 3 + 2];
103 | 
104 | 	int i1 = idx[j * 3 + 0];
105 | 	int i2 = idx[j * 3 + 1];
106 | 	int i3 = idx[j * 3 + 2];
107 | 
108 | 	out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
109 | 		 points[l * m + i3] * w3;
110 |     }
111 | }
112 | 
113 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
114 | 				      const float *points, const int *idx,
115 | 				      const float *weight, float *out,
116 | 				      cudaStream_t stream) {
117 | 
118 |     cudaError_t err;
119 |     three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
120 | 	b, c, m, n, points, idx, weight, out);
121 | 
122 |     err = cudaGetLastError();
123 |     if (cudaSuccess != err) {
124 | 	fprintf(stderr, "CUDA kernel "
125 | 			"failed : %s\n",
126 | 		cudaGetErrorString(err));
127 | 	exit(-1);
128 |     }
129 | }
130 | 
131 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
132 | // output: grad_points(b, c, m)
133 | 
134 | __global__ void three_interpolate_grad_kernel(
135 |     int b, int c, int n, int m, const float *__restrict__ grad_out,
136 |     const int *__restrict__ idx, const float *__restrict__ weight,
137 |     float *__restrict__ grad_points) {
138 |     int batch_index = blockIdx.x;
139 |     grad_out += batch_index * n * c;
140 |     idx += batch_index * n * 3;
141 |     weight += batch_index * n * 3;
142 |     grad_points += batch_index * m * c;
143 | 
144 |     const int index = threadIdx.y * blockDim.x + threadIdx.x;
145 |     const int stride = blockDim.y * blockDim.x;
146 |     for (int i = index; i < c * n; i += stride) {
147 | 	const int l = i / n;
148 | 	const int j = i % n;
149 | 	float w1 = weight[j * 3 + 0];
150 | 	float w2 = weight[j * 3 + 1];
151 | 	float w3 = weight[j * 3 + 2];
152 | 
153 | 	int i1 = idx[j * 3 + 0];
154 | 	int i2 = idx[j * 3 + 1];
155 | 	int i3 = idx[j * 3 + 2];
156 | 
157 | 	atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
158 | 	atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
159 | 	atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
160 |     }
161 | }
162 | 
163 | void three_interpolate_grad_kernel_wrapper(int b, int n, int c, int m,
164 | 					   const float *grad_out,
165 | 					   const int *idx, const float *weight,
166 | 					   float *grad_points,
167 | 					   cudaStream_t stream) {
168 | 
169 |     cudaError_t err;
170 |     three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
171 | 	b, n, c, m, grad_out, idx, weight, grad_points);
172 | 
173 |     err = cudaGetLastError();
174 |     if (cudaSuccess != err) {
175 | 	fprintf(stderr, "CUDA kernel "
176 | 			"failed : %s\n",
177 | 		cudaGetErrorString(err));
178 | 	exit(-1);
179 |     }
180 | }
181 | 


--------------------------------------------------------------------------------
/utils/csrc/sampling.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | 
 3 | #include "sampling_gpu.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int gather_points_wrapper(int b, int c, int n, int npoints,
 8 | 			  THCudaTensor *points_tensor,
 9 | 			  THCudaIntTensor *idx_tensor,
10 | 			  THCudaTensor *out_tensor) {
11 | 
12 |     const float *points = THCudaTensor_data(state, points_tensor);
13 |     const int *idx = THCudaIntTensor_data(state, idx_tensor);
14 |     float *out = THCudaTensor_data(state, out_tensor);
15 | 
16 |     cudaStream_t stream = THCState_getCurrentStream(state);
17 | 
18 |     gather_points_kernel_wrapper(b, c, n, npoints, points, idx, out, stream);
19 |     return 1;
20 | }
21 | 
22 | int gather_points_grad_wrapper(int b, int c, int n, int npoints,
23 | 			       THCudaTensor *grad_out_tensor,
24 | 			       THCudaIntTensor *idx_tensor,
25 | 			       THCudaTensor *grad_points_tensor) {
26 | 
27 |     const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
28 |     const int *idx = THCudaIntTensor_data(state, idx_tensor);
29 |     float *grad_points = THCudaTensor_data(state, grad_points_tensor);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     gather_points_grad_kernel_wrapper(b, c, n, npoints, grad_out, idx,
34 | 				      grad_points, stream);
35 |     return 1;
36 | }
37 | 
38 | int furthest_point_sampling_wrapper(int b, int n, int m,
39 | 				    THCudaTensor *points_tensor,
40 | 				    THCudaTensor *temp_tensor,
41 | 				    THCudaIntTensor *idx_tensor) {
42 | 
43 |     const float *points = THCudaTensor_data(state, points_tensor);
44 |     float *temp = THCudaTensor_data(state, temp_tensor);
45 |     int *idx = THCudaIntTensor_data(state, idx_tensor);
46 | 
47 |     cudaStream_t stream = THCState_getCurrentStream(state);
48 | 
49 |     furthest_point_sampling_kernel_wrapper(b, n, m, points, temp, idx, stream);
50 |     return 1;
51 | }
52 | 


--------------------------------------------------------------------------------
/utils/csrc/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "cuda_utils.h"
  5 | #include "sampling_gpu.h"
  6 | 
  7 | // input: points(b, c, n) idx(b, m)
  8 | // output: out(b, c, m)
  9 | __global__ void gather_points_kernel(int b, int c, int n, int m,
 10 | 				     const float *__restrict__ points,
 11 | 				     const int *__restrict__ idx,
 12 | 				     float *__restrict__ out) {
 13 |     for (int i = blockIdx.x; i < b; i += gridDim.x) {
 14 | 	for (int l = blockIdx.y; l < c; l += gridDim.y) {
 15 | 	    for (int j = threadIdx.x; j < m; j += blockDim.x) {
 16 | 		int a = idx[i * m + j];
 17 | 		out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
 18 | 	    }
 19 | 	}
 20 |     }
 21 | }
 22 | 
 23 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 24 | 				  const float *points, const int *idx,
 25 | 				  float *out, cudaStream_t stream) {
 26 | 
 27 |     cudaError_t err;
 28 |     gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0, stream>>>(
 29 | 	b, c, n, npoints, points, idx, out);
 30 | 
 31 |     err = cudaGetLastError();
 32 |     if (cudaSuccess != err) {
 33 | 	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 34 | 	exit(-1);
 35 |     }
 36 | }
 37 | 
 38 | // input: grad_out(b, c, m) idx(b, m)
 39 | // output: grad_points(b, c, n)
 40 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
 41 | 					  const float *__restrict__ grad_out,
 42 | 					  const int *__restrict__ idx,
 43 | 					  float *__restrict__ grad_points) {
 44 |     for (int i = blockIdx.x; i < b; i += gridDim.x) {
 45 | 	for (int l = blockIdx.y; l < c; l += gridDim.y) {
 46 | 	    for (int j = threadIdx.x; j < m; j += blockDim.x) {
 47 | 		int a = idx[i * m + j];
 48 | 		atomicAdd(grad_points + (i * c + l) * n + a,
 49 | 			  grad_out[(i * c + l) * m + j]);
 50 | 	    }
 51 | 	}
 52 |     }
 53 | }
 54 | 
 55 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 56 | 				       const float *grad_out, const int *idx,
 57 | 				       float *grad_points,
 58 | 				       cudaStream_t stream) {
 59 | 
 60 |     cudaError_t err;
 61 |     gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 62 | 				stream>>>(b, c, n, npoints, grad_out, idx,
 63 | 					  grad_points);
 64 | 
 65 |     err = cudaGetLastError();
 66 |     if (cudaSuccess != err) {
 67 | 	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 68 | 	exit(-1);
 69 |     }
 70 | }
 71 | 
 72 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
 73 | 			 int idx1, int idx2) {
 74 |     const float v1 = dists[idx1], v2 = dists[idx2];
 75 |     const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 76 |     dists[idx1] = max(v1, v2);
 77 |     dists_i[idx1] = v2 > v1 ? i2 : i1;
 78 | }
 79 | 
 80 | // Input dataset: (b, n, 3), tmp: (b, n)
 81 | // Ouput idxs (b, m)
 82 | template <unsigned int block_size>
 83 | __global__ void furthest_point_sampling_kernel(
 84 |     int b, int n, int m, const float *__restrict__ dataset,
 85 |     float *__restrict__ temp, int *__restrict__ idxs) {
 86 |     if (m <= 0)
 87 | 	return;
 88 |     __shared__ float dists[block_size];
 89 |     __shared__ int dists_i[block_size];
 90 | 
 91 |     int batch_index = blockIdx.x;
 92 |     dataset += batch_index * n * 3;
 93 |     temp += batch_index * n;
 94 |     idxs += batch_index * m;
 95 | 
 96 |     int tid = threadIdx.x;
 97 |     const int stride = block_size;
 98 | 
 99 |     int old = 0;
100 |     if (threadIdx.x == 0)
101 | 	idxs[0] = old;
102 | 
103 |     __syncthreads();
104 |     for (int j = 1; j < m; j++) {
105 | 	int besti = 0;
106 | 	float best = -1;
107 | 	float x1 = dataset[old * 3 + 0];
108 | 	float y1 = dataset[old * 3 + 1];
109 | 	float z1 = dataset[old * 3 + 2];
110 | 	for (int k = tid; k < n; k += stride) {
111 | 	    float x2, y2, z2;
112 | 	    x2 = dataset[k * 3 + 0];
113 | 	    y2 = dataset[k * 3 + 1];
114 | 	    z2 = dataset[k * 3 + 2];
115 | 	    float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
116 | 	    if (mag <= 1e-3)
117 | 		continue;
118 | 
119 | 	    float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) +
120 | 		      (z2 - z1) * (z2 - z1);
121 | 
122 | 	    float d2 = min(d, temp[k]);
123 | 	    temp[k] = d2;
124 | 	    besti = d2 > best ? k : besti;
125 | 	    best = d2 > best ? d2 : best;
126 | 	}
127 | 	dists[tid] = best;
128 | 	dists_i[tid] = besti;
129 | 	__syncthreads();
130 | 
131 | 	if (block_size >= 512) {
132 | 	    if (tid < 256) {
133 | 		__update(dists, dists_i, tid, tid + 256);
134 | 	    }
135 | 	    __syncthreads();
136 | 	}
137 | 	if (block_size >= 256) {
138 | 	    if (tid < 128) {
139 | 		__update(dists, dists_i, tid, tid + 128);
140 | 	    }
141 | 	    __syncthreads();
142 | 	}
143 | 	if (block_size >= 128) {
144 | 	    if (tid < 64) {
145 | 		__update(dists, dists_i, tid, tid + 64);
146 | 	    }
147 | 	    __syncthreads();
148 | 	}
149 | 	if (block_size >= 64) {
150 | 	    if (tid < 32) {
151 | 		__update(dists, dists_i, tid, tid + 32);
152 | 	    }
153 | 	    __syncthreads();
154 | 	}
155 | 	if (block_size >= 32) {
156 | 	    if (tid < 16) {
157 | 		__update(dists, dists_i, tid, tid + 16);
158 | 	    }
159 | 	    __syncthreads();
160 | 	}
161 | 	if (block_size >= 16) {
162 | 	    if (tid < 8) {
163 | 		__update(dists, dists_i, tid, tid + 8);
164 | 	    }
165 | 	    __syncthreads();
166 | 	}
167 | 	if (block_size >= 8) {
168 | 	    if (tid < 4) {
169 | 		__update(dists, dists_i, tid, tid + 4);
170 | 	    }
171 | 	    __syncthreads();
172 | 	}
173 | 	if (block_size >= 4) {
174 | 	    if (tid < 2) {
175 | 		__update(dists, dists_i, tid, tid + 2);
176 | 	    }
177 | 	    __syncthreads();
178 | 	}
179 | 	if (block_size >= 2) {
180 | 	    if (tid < 1) {
181 | 		__update(dists, dists_i, tid, tid + 1);
182 | 	    }
183 | 	    __syncthreads();
184 | 	}
185 | 
186 | 	old = dists_i[0];
187 | 	if (tid == 0)
188 | 	    idxs[j] = old;
189 |     }
190 | }
191 | 
192 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
193 | 					    const float *dataset, float *temp,
194 | 					    int *idxs, cudaStream_t stream) {
195 | 
196 |     cudaError_t err;
197 |     unsigned int n_threads = opt_n_threads(n);
198 | 
199 |     switch (n_threads) {
200 |     case 512:
201 | 	furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(
202 | 	    b, n, m, dataset, temp, idxs);
203 | 	break;
204 |     case 256:
205 | 	furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(
206 | 	    b, n, m, dataset, temp, idxs);
207 | 	break;
208 |     case 128:
209 | 	furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(
210 | 	    b, n, m, dataset, temp, idxs);
211 | 	break;
212 |     case 64:
213 | 	furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(
214 | 	    b, n, m, dataset, temp, idxs);
215 | 	break;
216 |     case 32:
217 | 	furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(
218 | 	    b, n, m, dataset, temp, idxs);
219 | 	break;
220 |     case 16:
221 | 	furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(
222 | 	    b, n, m, dataset, temp, idxs);
223 | 	break;
224 |     case 8:
225 | 	furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(
226 | 	    b, n, m, dataset, temp, idxs);
227 | 	break;
228 |     case 4:
229 | 	furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(
230 | 	    b, n, m, dataset, temp, idxs);
231 | 	break;
232 |     case 2:
233 | 	furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(
234 | 	    b, n, m, dataset, temp, idxs);
235 | 	break;
236 |     case 1:
237 | 	furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(
238 | 	    b, n, m, dataset, temp, idxs);
239 | 	break;
240 |     default:
241 | 	furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(
242 | 	    b, n, m, dataset, temp, idxs);
243 |     }
244 | 
245 |     err = cudaGetLastError();
246 |     if (cudaSuccess != err) {
247 | 	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
248 | 	exit(-1);
249 |     }
250 | }
251 | 


--------------------------------------------------------------------------------
/utils/linalg_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from enum import Enum
 3 | 
 4 | PDist2Order = Enum('PDist2Order', 'd_first d_second')
 5 | 
 6 | 
 7 | def pdist2(
 8 |         X: torch.Tensor,
 9 |         Z: torch.Tensor = None,
10 |         order: PDist2Order = PDist2Order.d_second
11 | ) -> torch.Tensor:
12 |     r""" Calculates the pairwise distance between X and Z
13 | 
14 |     D[b, i, j] = l2 distance X[b, i] and Z[b, j]
15 | 
16 |     Parameters
17 |     ---------
18 |     X : torch.Tensor
19 |         X is a (B, N, d) tensor.  There are B batches, and N vectors of dimension d
20 |     Z: torch.Tensor
21 |         Z is a (B, M, d) tensor.  If Z is None, then Z = X
22 | 
23 |     Returns
24 |     -------
25 |     torch.Tensor
26 |         Distance matrix is size (B, N, M)
27 |     """
28 | 
29 |     if order == PDist2Order.d_second:
30 |         if X.dim() == 2:
31 |             X = X.unsqueeze(0)
32 |         if Z is None:
33 |             Z = X
34 |             G = X @ Z.transpose(-2, -1)
35 |             S = (X * X).sum(-1, keepdim=True)
36 |             R = S.transpose(-2, -1)
37 |         else:
38 |             if Z.dim() == 2:
39 |                 Z = Z.unsqueeze(0)
40 |             G = X @ Z.transpose(-2, -1)
41 |             S = (X * X).sum(-1, keepdim=True)
42 |             R = (Z * Z).sum(-1, keepdim=True).transpose(-2, -1)
43 |     else:
44 |         if X.dim() == 2:
45 |             X = X.unsqueeze(0)
46 |         if Z is None:
47 |             Z = X
48 |             G = X.transpose(-2, -1) @ Z
49 |             R = (X * X).sum(-2, keepdim=True)
50 |             S = R.transpose(-2, -1)
51 |         else:
52 |             if Z.dim() == 2:
53 |                 Z = Z.unsqueeze(0)
54 |             G = X.transpose(-2, -1) @ Z
55 |             S = (X * X).sum(-2, keepdim=True).transpose(-2, -1)
56 |             R = (Z * Z).sum(-2, keepdim=True)
57 | 
58 |     return torch.abs(R + S - 2 * G).squeeze(0)
59 | 
60 | 
61 | def pdist2_slow(X, Z=None):
62 |     if Z is None: Z = X
63 |     D = torch.zeros(X.size(0), X.size(2), Z.size(2))
64 | 
65 |     for b in range(D.size(0)):
66 |         for i in range(D.size(1)):
67 |             for j in range(D.size(2)):
68 |                 D[b, i, j] = torch.dist(X[b, :, i], Z[b, :, j])
69 |     return D
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     X = torch.randn(2, 3, 5)
74 |     Z = torch.randn(2, 3, 3)
75 | 
76 |     print(pdist2(X, order=PDist2Order.d_first))
77 |     print(pdist2_slow(X))
78 |     print(torch.dist(pdist2(X, order=PDist2Order.d_first), pdist2_slow(X)))
79 | 


--------------------------------------------------------------------------------
/utils/pointnet2_modules.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | import pointnet2_utils
  6 | import pytorch_utils as pt_utils
  7 | from typing import List
  8 | import numpy as np
  9 | 
 10 | class _PointnetSAModuleBase(nn.Module):
 11 | 
 12 |     def __init__(self):
 13 |         super().__init__()
 14 |         self.npoint = None
 15 |         self.groupers = None
 16 |         self.mlps = None
 17 |         self.pool = False
 18 | 
 19 |     def forward(self, xyz: torch.Tensor,
 20 |                 features: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
 21 |         r"""
 22 |         Parameters
 23 |         ----------
 24 |         xyz : torch.Tensor
 25 |             (B, N, 3) tensor of the xyz coordinates of the points
 26 |         features : torch.Tensor
 27 |             (B, N, C) tensor of the descriptors of the the points
 28 | 
 29 |         Returns
 30 |         -------
 31 |         new_xyz : torch.Tensor
 32 |             (B, npoint, 3) tensor of the new points' xyz
 33 |         new_features : torch.Tensor
 34 |             (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_points descriptors
 35 |         """
 36 | 
 37 |         all_features = 0
 38 |         xyz_flipped = xyz.transpose(1, 2).contiguous()
 39 |         
 40 |         if self.npoint is not None:
 41 |             fps_idx = pointnet2_utils.furthest_point_sample(xyz, self.npoint) \
 42 |                       if self.pool else torch.from_numpy(np.arange(xyz.size(1))).int().cuda().repeat(xyz.size(0), 1)
 43 |             new_xyz = pointnet2_utils.gather_operation(xyz_flipped, fps_idx).transpose(1, 2).contiguous()
 44 |         else:
 45 |             new_xyz = None
 46 |         
 47 |         for i in range(len(self.groupers)):
 48 |             new_features = self.groupers[i](xyz, new_xyz, features)  # (B, C, npoint, nsample)
 49 |             if not self.pool and self.npoint is not None:
 50 |                 new_features = [new_features, features]
 51 |             new_features = self.mlps[i](new_features)   # (B, mlp[-1], npoint)
 52 |             all_features += new_features
 53 |         
 54 |         return new_xyz, all_features
 55 | 
 56 | 
 57 | class PointnetSAModuleMSG(_PointnetSAModuleBase):
 58 |     r"""Pointnet set abstrction layer with multiscale grouping
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     npoint : int
 63 |         Number of points
 64 |     radii : list of float32
 65 |         list of radii to group with
 66 |     nsamples : list of int32
 67 |         Number of samples in each ball query
 68 |     mlps : list of list of int32
 69 |         Spec of the pointnet before the global max_pool for each scale
 70 |     bn : bool
 71 |         Use batchnorm
 72 |     """
 73 | 
 74 |     def __init__(
 75 |             self,
 76 |             *,
 77 |             npoint: int,
 78 |             radii: List[float],
 79 |             nsamples: List[int],
 80 |             mlps: List[List[int]],
 81 |             group_number = 1,
 82 |             use_xyz: bool = True,
 83 |             pool: bool = False,
 84 |             before_pool: bool = False,
 85 |             after_pool: bool = False,
 86 |             bias = True,
 87 |             init = nn.init.kaiming_normal
 88 |     ):
 89 |         super().__init__()
 90 | 
 91 |         assert len(radii) == len(nsamples) == len(mlps)
 92 |         self.pool = pool
 93 |         self.npoint = npoint
 94 |         self.groupers = nn.ModuleList()
 95 |         self.mlps = nn.ModuleList()
 96 |         
 97 |         if pool:
 98 |             C_in = (mlps[0][0] + 3) if use_xyz else mlps[0][0]
 99 |             C_out = mlps[0][1]
100 |             pconv = nn.Conv2d(in_channels = C_in, out_channels = C_out, kernel_size = (1, 1), 
101 |                                        stride = (1, 1), bias = bias)
102 |             init(pconv.weight)
103 |             if bias:
104 |                 nn.init.constant(pconv.bias, 0)
105 |             convs = [pconv]
106 |         
107 |         for i in range(len(radii)):
108 |             radius = radii[i]
109 |             nsample = nsamples[i]
110 |             self.groupers.append(
111 |                 pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)
112 |                 if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
113 |             )
114 |             mlp_spec = mlps[i]
115 |             if use_xyz:
116 |                 mlp_spec[0] += 3
117 |             if npoint is None:
118 |                 self.mlps.append(pt_utils.GloAvgConv(C_in = mlp_spec[0], C_out = mlp_spec[1]))
119 |             elif pool:
120 |                 self.mlps.append(pt_utils.PointConv(C_in = mlp_spec[0], C_out = mlp_spec[1], convs = convs))
121 |             else:
122 |                 self.mlps.append(pt_utils.EnhancedPointConv(C_in = mlp_spec[0], C_out = mlp_spec[1], group_number = group_number, before_pool = before_pool, after_pool = after_pool))
123 | 
124 | 
125 | class PointnetSAModule(PointnetSAModuleMSG):
126 |     r"""Pointnet set abstrction layer
127 | 
128 |     Parameters
129 |     ----------
130 |     npoint : int
131 |         Number of features
132 |     radius : float
133 |         Radius of ball
134 |     nsample : int
135 |         Number of samples in the ball query
136 |     mlp : list
137 |         Spec of the pointnet before the global max_pool
138 |     bn : bool
139 |         Use batchnorm
140 |     """
141 | 
142 |     def __init__(
143 |             self,
144 |             *,
145 |             mlp: List[int],
146 |             npoint: int = None,
147 |             radius: float = None,
148 |             nsample: int = None,
149 |             use_xyz: bool = True
150 |     ):
151 |         super().__init__(
152 |             mlps=[mlp],
153 |             npoint=npoint,
154 |             radii=[radius],
155 |             nsamples=[nsample],
156 |             use_xyz=use_xyz
157 |         )
158 | 
159 | 
160 | class PointnetFPModule(nn.Module):
161 |     r"""Propigates the features of one set to another
162 | 
163 |     Parameters
164 |     ----------
165 |     mlp : list
166 |         Pointnet module parameters
167 |     bn : bool
168 |         Use batchnorm
169 |     """
170 | 
171 |     def __init__(self, *, mlp: List[int], bn: bool = True):
172 |         super().__init__()
173 |         self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
174 | 
175 |     def forward(
176 |             self, unknown: torch.Tensor, known: torch.Tensor,
177 |             unknow_feats: torch.Tensor, known_feats: torch.Tensor
178 |     ) -> torch.Tensor:
179 |         r"""
180 |         Parameters
181 |         ----------
182 |         unknown : torch.Tensor
183 |             (B, n, 3) tensor of the xyz positions of the unknown features
184 |         known : torch.Tensor
185 |             (B, m, 3) tensor of the xyz positions of the known features
186 |         unknow_feats : torch.Tensor
187 |             (B, C1, n) tensor of the features to be propigated to
188 |         known_feats : torch.Tensor
189 |             (B, C2, m) tensor of features to be propigated
190 | 
191 |         Returns
192 |         -------
193 |         new_features : torch.Tensor
194 |             (B, mlp[-1], n) tensor of the features of the unknown features
195 |         """
196 | 
197 |         dist, idx = pointnet2_utils.three_nn(unknown, known)
198 |         dist_recip = 1.0 / (dist + 1e-8)
199 |         norm = torch.sum(dist_recip, dim=2, keepdim=True)
200 |         weight = dist_recip / norm
201 | 
202 |         interpolated_feats = pointnet2_utils.three_interpolate(
203 |             known_feats, idx, weight
204 |         )
205 |         if unknow_feats is not None:
206 |             new_features = torch.cat([interpolated_feats, unknow_feats],
207 |                                      dim=1)  #(B, C2 + C1, n)
208 |         else:
209 |             new_features = interpolated_feats
210 |         
211 |         new_features = new_features.unsqueeze(-1)
212 |         new_features = self.mlp(new_features)
213 | 
214 |         return new_features.squeeze(-1)


--------------------------------------------------------------------------------
/utils/pointnet2_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Variable
  3 | from torch.autograd import Function
  4 | import torch.nn.functional as F
  5 | import torch.nn as nn
  6 | from typing import List, Tuple
  7 | 
  8 | from _ext import pointnet2
  9 | 
 10 | class FurthestPointSampling(Function):
 11 | 
 12 |     @staticmethod
 13 |     def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
 14 |         r"""
 15 |         Uses iterative furthest point sampling to select a set of npoint features that have the largest
 16 |         minimum distance
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         xyz : torch.Tensor
 21 |             (B, N, 3) tensor where N > npoint
 22 |         npoint : int32
 23 |             number of features in the sampled set
 24 | 
 25 |         Returns
 26 |         -------
 27 |         torch.Tensor
 28 |             (B, npoint) tensor containing the set
 29 |         """
 30 |         assert xyz.is_contiguous()
 31 | 
 32 |         B, N, _ = xyz.size()
 33 | 
 34 |         output = torch.cuda.IntTensor(B, npoint)
 35 |         temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
 36 |         pointnet2.furthest_point_sampling_wrapper(
 37 |             B, N, npoint, xyz, temp, output
 38 |         )
 39 |         return output
 40 | 
 41 |     @staticmethod
 42 |     def backward(xyz, a=None):
 43 |         return None, None
 44 | 
 45 | 
 46 | furthest_point_sample = FurthestPointSampling.apply
 47 | 
 48 | 
 49 | class GatherOperation(Function):
 50 | 
 51 |     @staticmethod
 52 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
 53 |         r"""
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         features : torch.Tensor
 58 |             (B, C, N) tensor
 59 | 
 60 |         idx : torch.Tensor
 61 |             (B, npoint) tensor of the features to gather
 62 | 
 63 |         Returns
 64 |         -------
 65 |         torch.Tensor
 66 |             (B, C, npoint) tensor
 67 |         """
 68 |         assert features.is_contiguous()
 69 |         assert idx.is_contiguous()
 70 | 
 71 |         B, npoint = idx.size()
 72 |         _, C, N = features.size()
 73 | 
 74 |         output = torch.cuda.FloatTensor(B, C, npoint)
 75 | 
 76 |         pointnet2.gather_points_wrapper(
 77 |             B, C, N, npoint, features, idx, output
 78 |         )
 79 | 
 80 |         ctx.for_backwards = (idx, C, N)
 81 | 
 82 |         return output
 83 | 
 84 |     @staticmethod
 85 |     def backward(ctx, grad_out):
 86 |         idx, C, N = ctx.for_backwards
 87 |         B, npoint = idx.size()
 88 | 
 89 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
 90 |         grad_out_data = grad_out.data.contiguous()
 91 |         pointnet2.gather_points_grad_wrapper(
 92 |             B, C, N, npoint, grad_out_data, idx, grad_features.data
 93 |         )
 94 | 
 95 |         return grad_features, None
 96 | 
 97 | 
 98 | gather_operation = GatherOperation.apply
 99 | 
100 | 
101 | class ThreeNN(Function):
102 | 
103 |     @staticmethod
104 |     def forward(ctx, unknown: torch.Tensor,
105 |                 known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
106 |         r"""
107 |             Find the three nearest neighbors of unknown in known
108 |         Parameters
109 |         ----------
110 |         unknown : torch.Tensor
111 |             (B, n, 3) tensor of known features
112 |         known : torch.Tensor
113 |             (B, m, 3) tensor of unknown features
114 | 
115 |         Returns
116 |         -------
117 |         dist : torch.Tensor
118 |             (B, n, 3) l2 distance to the three nearest neighbors
119 |         idx : torch.Tensor
120 |             (B, n, 3) index of 3 nearest neighbors
121 |         """
122 |         assert unknown.is_contiguous()
123 |         assert known.is_contiguous()
124 | 
125 |         B, N, _ = unknown.size()
126 |         m = known.size(1)
127 |         dist2 = torch.cuda.FloatTensor(B, N, 3)
128 |         idx = torch.cuda.IntTensor(B, N, 3)
129 | 
130 |         pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
131 | 
132 |         return torch.sqrt(dist2), idx
133 | 
134 |     @staticmethod
135 |     def backward(ctx, a=None, b=None):
136 |         return None, None
137 | 
138 | 
139 | three_nn = ThreeNN.apply
140 | 
141 | 
142 | class ThreeInterpolate(Function):
143 | 
144 |     @staticmethod
145 |     def forward(
146 |             ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor
147 |     ) -> torch.Tensor:
148 |         r"""
149 |             Performs weight linear interpolation on 3 features
150 |         Parameters
151 |         ----------
152 |         features : torch.Tensor
153 |             (B, c, m) Features descriptors to be interpolated from
154 |         idx : torch.Tensor
155 |             (B, n, 3) three nearest neighbors of the target features in features
156 |         weight : torch.Tensor
157 |             (B, n, 3) weights
158 | 
159 |         Returns
160 |         -------
161 |         torch.Tensor
162 |             (B, c, n) tensor of the interpolated features
163 |         """
164 |         assert features.is_contiguous()
165 |         assert idx.is_contiguous()
166 |         assert weight.is_contiguous()
167 | 
168 |         B, c, m = features.size()
169 |         n = idx.size(1)
170 | 
171 |         ctx.three_interpolate_for_backward = (idx, weight, m)
172 | 
173 |         output = torch.cuda.FloatTensor(B, c, n)
174 | 
175 |         pointnet2.three_interpolate_wrapper(
176 |             B, c, m, n, features, idx, weight, output
177 |         )
178 | 
179 |         return output
180 | 
181 |     @staticmethod
182 |     def backward(ctx, grad_out: torch.Tensor
183 |                 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
184 |         r"""
185 |         Parameters
186 |         ----------
187 |         grad_out : torch.Tensor
188 |             (B, c, n) tensor with gradients of ouputs
189 | 
190 |         Returns
191 |         -------
192 |         grad_features : torch.Tensor
193 |             (B, c, m) tensor with gradients of features
194 | 
195 |         None
196 | 
197 |         None
198 |         """
199 |         idx, weight, m = ctx.three_interpolate_for_backward
200 |         B, c, n = grad_out.size()
201 | 
202 |         grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
203 | 
204 |         grad_out_data = grad_out.data.contiguous()
205 |         pointnet2.three_interpolate_grad_wrapper(
206 |             B, c, n, m, grad_out_data, idx, weight, grad_features.data
207 |         )
208 | 
209 |         return grad_features, None, None
210 | 
211 | 
212 | three_interpolate = ThreeInterpolate.apply
213 | 
214 | 
215 | class GroupingOperation(Function):
216 | 
217 |     @staticmethod
218 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
219 |         r"""
220 | 
221 |         Parameters
222 |         ----------
223 |         features : torch.Tensor
224 |             (B, C, N) tensor of features to group
225 |         idx : torch.Tensor
226 |             (B, npoint, nsample) tensor containing the indicies of features to group with
227 | 
228 |         Returns
229 |         -------
230 |         torch.Tensor
231 |             (B, C, npoint, nsample) tensor
232 |         """
233 |         assert features.is_contiguous()
234 |         assert idx.is_contiguous()
235 | 
236 |         B, nfeatures, nsample = idx.size()
237 |         _, C, N = features.size()
238 | 
239 |         output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
240 | 
241 |         pointnet2.group_points_wrapper(
242 |             B, C, N, nfeatures, nsample, features, idx, output
243 |         )
244 | 
245 |         ctx.for_backwards = (idx, N)
246 |         return output
247 | 
248 |     @staticmethod
249 |     def backward(ctx,
250 |                  grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
251 |         r"""
252 | 
253 |         Parameters
254 |         ----------
255 |         grad_out : torch.Tensor
256 |             (B, C, npoint, nsample) tensor of the gradients of the output from forward
257 | 
258 |         Returns
259 |         -------
260 |         torch.Tensor
261 |             (B, C, N) gradient of the features
262 |         None
263 |         """
264 |         idx, N = ctx.for_backwards
265 | 
266 |         B, C, npoint, nsample = grad_out.size()
267 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
268 | 
269 |         grad_out_data = grad_out.data.contiguous()
270 |         pointnet2.group_points_grad_wrapper(
271 |             B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data
272 |         )
273 | 
274 |         return grad_features, None
275 | 
276 | 
277 | grouping_operation = GroupingOperation.apply
278 | 
279 | 
280 | class BallQuery(Function):
281 | 
282 |     @staticmethod
283 |     def forward(
284 |             ctx, radius: float, nsample: int, xyz: torch.Tensor,
285 |             new_xyz: torch.Tensor
286 |     ) -> torch.Tensor:
287 |         r"""
288 | 
289 |         Parameters
290 |         ----------
291 |         radius : float
292 |             radius of the balls
293 |         nsample : int
294 |             maximum number of features in the balls
295 |         xyz : torch.Tensor
296 |             (B, N, 3) xyz coordinates of the features
297 |         new_xyz : torch.Tensor
298 |             (B, npoint, 3) centers of the ball query
299 | 
300 |         Returns
301 |         -------
302 |         torch.Tensor
303 |             (B, npoint, nsample) tensor with the indicies of the features that form the query balls
304 |         """
305 |         assert new_xyz.is_contiguous()
306 |         assert xyz.is_contiguous()
307 | 
308 |         B, N, _ = xyz.size()
309 |         npoint = new_xyz.size(1)
310 |         idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
311 | 
312 |         pointnet2.ball_query_wrapper(
313 |             B, N, npoint, radius, nsample, new_xyz, xyz, idx
314 |         )
315 | 
316 |         return idx
317 | 
318 |     @staticmethod
319 |     def backward(ctx, a=None):
320 |         return None, None, None, None
321 | 
322 | 
323 | ball_query = BallQuery.apply
324 | 
325 | 
326 | class QueryAndGroup(nn.Module):
327 |     r"""
328 |     Groups with a ball query of radius
329 | 
330 |     Parameters
331 |     ---------
332 |     radius : float32
333 |         Radius of ball
334 |     nsample : int32
335 |         Maximum number of features to gather in the ball
336 |     """
337 | 
338 |     def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
339 |         super().__init__()
340 |         self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
341 | 
342 |     def forward(
343 |             self,
344 |             xyz: torch.Tensor,
345 |             new_xyz: torch.Tensor,
346 |             features: torch.Tensor = None
347 |     ) -> Tuple[torch.Tensor]:
348 |         r"""
349 |         Parameters
350 |         ----------
351 |         xyz : torch.Tensor
352 |             xyz coordinates of the features (B, N, 3)
353 |         new_xyz : torch.Tensor
354 |             centriods (B, npoint, 3)
355 |         features : torch.Tensor
356 |             Descriptors of the features (B, C, N)
357 | 
358 |         Returns
359 |         -------
360 |         new_features : torch.Tensor
361 |             (B, 3 + C, npoint, nsample) tensor
362 |         """
363 | 
364 |         idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
365 |         xyz_trans = xyz.transpose(1, 2).contiguous()
366 |         grouped_xyz = grouping_operation(
367 |             xyz_trans, idx
368 |         )  # (B, 3, npoint, nsample)
369 |         grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
370 | 
371 |         if features is not None:
372 |             grouped_features = grouping_operation(features, idx)
373 |             if self.use_xyz:
374 |                 new_features = torch.cat([grouped_xyz, grouped_features],
375 |                                          dim=1)  # (B, C + 3, npoint, nsample)
376 |             else:
377 |                 new_features = grouped_features
378 |         else:
379 |             assert self.use_xyz, "Cannot have not features and not use xyz as a feature!"
380 |             new_features = grouped_xyz
381 | 
382 |         return new_features
383 | 
384 | 
385 | class GroupAll(nn.Module):
386 |     r"""
387 |     Groups all features
388 | 
389 |     Parameters
390 |     ---------
391 |     """
392 | 
393 |     def __init__(self, use_xyz: bool = True):
394 |         super().__init__()
395 |         self.use_xyz = use_xyz
396 | 
397 |     def forward(
398 |             self,
399 |             xyz: torch.Tensor,
400 |             new_xyz: torch.Tensor,
401 |             features: torch.Tensor = None
402 |     ) -> Tuple[torch.Tensor]:
403 |         r"""
404 |         Parameters
405 |         ----------
406 |         xyz : torch.Tensor
407 |             xyz coordinates of the features (B, N, 3)
408 |         new_xyz : torch.Tensor
409 |             Ignored
410 |         features : torch.Tensor
411 |             Descriptors of the features (B, C, N)
412 | 
413 |         Returns
414 |         -------
415 |         new_features : torch.Tensor
416 |             (B, C + 3, 1, N) tensor
417 |         """
418 | 
419 |         grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
420 |         if features is not None:
421 |             grouped_features = features.unsqueeze(2)
422 |             if self.use_xyz:
423 |                 new_features = torch.cat([grouped_xyz, grouped_features],
424 |                                          dim=1)  # (B, 3 + C, 1, N)
425 |             else:
426 |                 new_features = grouped_features
427 |         else:
428 |             new_features = grouped_xyz
429 | 
430 |         return new_features
431 | 


--------------------------------------------------------------------------------
/utils/pytorch_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .pytorch_utils import *
2 | 


--------------------------------------------------------------------------------
/utils/pytorch_utils/pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.autograd.function import InplaceFunction
  6 | from itertools import repeat
  7 | import numpy as np
  8 | import os
  9 | from typing import List, Tuple
 10 | from scipy.stats import t as student_t
 11 | import statistics as stats
 12 | import math
 13 | 
 14 | ########## PointConv begin ############
 15 | class PointConv(nn.Module):
 16 |     '''
 17 |     Input shape: (B, C_in, npoint, nsample)
 18 |     Output shape: (B, C_out, npoint)
 19 |     '''
 20 |     def __init__(self, C_in, C_out, convs=None):
 21 |         super(PointConv, self).__init__()
 22 |         self.bn = nn.BatchNorm2d(C_out)
 23 |         self.activation = nn.ReLU(inplace=True)
 24 |         self.pconv = convs[0]
 25 |         
 26 |     def forward(self, x): # x: (B, C_in, npoint, nsample)
 27 |         nsample = x.size(3)
 28 |         x = self.activation(self.bn(self.pconv(x)))
 29 |         return F.max_pool2d(x, kernel_size = (1, nsample)).squeeze(3)
 30 | ########## PointConv   end ############
 31 | 
 32 | ########## EnhancedPointConv begin ############
 33 | class EnhancedPointConv(nn.Module):
 34 |     '''
 35 |     Input shape: (B, C_in, npoint, nsample)
 36 |     Output shape: (B, C_out, npoint)
 37 |     '''
 38 |     def __init__(self, C_in, C_out, group_number=1, before_pool=False, after_pool=False, init=nn.init.kaiming_normal, bias=True):
 39 |         super(EnhancedPointConv, self).__init__()
 40 |         self.before_pool, self.after_pool = before_pool, after_pool
 41 |         C_small = math.floor(C_out/4)
 42 |         self.conv_phi = nn.Conv2d(in_channels = C_in, out_channels = C_out, groups = group_number, kernel_size = (1, 1),
 43 |                                   stride = (1, 1), bias = bias)    # ~\phi function: grouped version
 44 |         self.conv_psi = nn.Conv1d(in_channels = C_out, out_channels = C_small, kernel_size = 1,
 45 |                               stride = 1, bias = bias)             # \psi function
 46 |         if not after_pool:
 47 |             self.bn_cin = nn.BatchNorm2d(C_in)
 48 |         self.bn_phi = nn.BatchNorm2d(C_out)
 49 |         if before_pool:
 50 |             self.bn_concat = nn.BatchNorm1d(C_in-3+C_small)
 51 |         self.activation = nn.ReLU(inplace=True)
 52 |         self.dropout = nn.Dropout(p=0.2)
 53 | 
 54 |         init(self.conv_phi.weight)
 55 |         init(self.conv_psi.weight)
 56 |         if bias:
 57 |             nn.init.constant(self.conv_phi.bias, 0)
 58 |             nn.init.constant(self.conv_psi.bias, 0)
 59 | 
 60 |     def forward(self, input): # x: (B, C_in, npoint, nsample)
 61 |         x, last_feat = input[0], input[1]
 62 |         nsample = x.size(3)
 63 |         if not self.after_pool:
 64 |             x = self.activation(self.bn_cin(x))
 65 |         x = self.activation(self.bn_phi(self.conv_phi(x)))
 66 |         x = F.max_pool2d(x, kernel_size=(1, nsample)).squeeze(3)
 67 |         x = torch.cat((last_feat, self.dropout(self.conv_psi(x))), dim=1)
 68 |         
 69 |         if self.before_pool:
 70 |             x = self.activation(self.bn_concat(x))
 71 |         return x
 72 | 
 73 | ########## EnhancedPointConv end ############
 74 | 
 75 | 
 76 | ########## global convolutional pooling begin ############
 77 | class GloAvgConv(nn.Module):
 78 |     '''
 79 |     Input shape: (B, C_in, 1, nsample)
 80 |     Output shape: (B, C_out, npoint)
 81 |     '''
 82 |     def __init__(
 83 |             self, 
 84 |             C_in, 
 85 |             C_out, 
 86 |             init=nn.init.kaiming_normal, 
 87 |             bias = True,
 88 |             activation = nn.ReLU(inplace=True)
 89 |     ):
 90 |         super(GloAvgConv, self).__init__()
 91 | 
 92 |         self.conv_avg = nn.Conv2d(in_channels = C_in, out_channels = C_out, kernel_size = (1, 1), 
 93 |                                   stride = (1, 1), bias = bias) 
 94 |         self.bn_avg = nn.BatchNorm2d(C_out)
 95 |         self.activation = activation
 96 |         
 97 |         init(self.conv_avg.weight)
 98 |         if bias:
 99 |             nn.init.constant(self.conv_avg.bias, 0)
100 |         
101 |     def forward(self, x):
102 |         nsample = x.size(3)
103 |         x = self.activation(self.bn_avg(self.conv_avg(x)))
104 |         x = F.max_pool2d(x, kernel_size = (1, nsample)).squeeze(3)
105 |         return x
106 | ########## global convolutional pooling end ############
107 | 
108 | class SharedMLP(nn.Sequential):
109 | 
110 |     def __init__(
111 |             self,
112 |             args: List[int],
113 |             *,
114 |             bn: bool = False,
115 |             activation=nn.ReLU(inplace=True),
116 |             preact: bool = False,
117 |             first: bool = False,
118 |             name: str = ""
119 |     ):
120 |         super().__init__()
121 | 
122 |         for i in range(len(args) - 1):
123 |             self.add_module(
124 |                 name + 'layer{}'.format(i),
125 |                 Conv2d(
126 |                     args[i],
127 |                     args[i + 1],
128 |                     bn=(not first or not preact or (i != 0)) and bn,
129 |                     activation=activation
130 |                     if (not first or not preact or (i != 0)) else None,
131 |                     preact=preact
132 |                 )
133 |             )
134 |             
135 | 
136 | class _BNBase(nn.Sequential):
137 | 
138 |     def __init__(self, in_size, batch_norm=None, name=""):
139 |         super().__init__()
140 |         self.add_module(name + "bn", batch_norm(in_size))
141 | 
142 |         nn.init.constant(self[0].weight, 1.0)
143 |         nn.init.constant(self[0].bias, 0)
144 | 
145 | 
146 | class BatchNorm1d(_BNBase):
147 | 
148 |     def __init__(self, in_size: int, *, name: str = ""):
149 |         super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
150 | 
151 | 
152 | class BatchNorm2d(_BNBase):
153 | 
154 |     def __init__(self, in_size: int, name: str = ""):
155 |         super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
156 | 
157 | 
158 | class BatchNorm3d(_BNBase):
159 | 
160 |     def __init__(self, in_size: int, name: str = ""):
161 |         super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)
162 | 
163 | 
164 | class _ConvBase(nn.Sequential):
165 | 
166 |     def __init__(
167 |             self,
168 |             in_size,
169 |             out_size,
170 |             kernel_size,
171 |             stride,
172 |             padding,
173 |             activation,
174 |             bn,
175 |             init,
176 |             conv=None,
177 |             batch_norm=None,
178 |             bias=True,
179 |             preact=False,
180 |             name=""
181 |     ):
182 |         super().__init__()
183 | 
184 |         bias = bias and (not bn)
185 |         conv_unit = conv(
186 |             in_size,
187 |             out_size,
188 |             kernel_size=kernel_size,
189 |             stride=stride,
190 |             padding=padding,
191 |             bias=bias
192 |         )
193 |         init(conv_unit.weight)
194 |         if bias:
195 |             nn.init.constant(conv_unit.bias, 0)
196 | 
197 |         if bn:
198 |             if not preact:
199 |                 bn_unit = batch_norm(out_size)
200 |             else:
201 |                 bn_unit = batch_norm(in_size)
202 | 
203 |         if preact:
204 |             if bn:
205 |                 self.add_module(name + 'bn', bn_unit)
206 | 
207 |             if activation is not None:
208 |                 self.add_module(name + 'activation', activation)
209 | 
210 |         self.add_module(name + 'conv', conv_unit)
211 | 
212 |         if not preact:
213 |             if bn:
214 |                 self.add_module(name + 'bn', bn_unit)
215 | 
216 |             if activation is not None:
217 |                 self.add_module(name + 'activation', activation)
218 | 
219 | 
220 | class Conv1d(_ConvBase):
221 | 
222 |     def __init__(
223 |             self,
224 |             in_size: int,
225 |             out_size: int,
226 |             *,
227 |             kernel_size: int = 1,
228 |             stride: int = 1,
229 |             padding: int = 0,
230 |             activation=nn.ReLU(inplace=True),
231 |             bn: bool = False,
232 |             init=nn.init.kaiming_normal,
233 |             bias: bool = True,
234 |             preact: bool = False,
235 |             name: str = ""
236 |     ):
237 |         super().__init__(
238 |             in_size,
239 |             out_size,
240 |             kernel_size,
241 |             stride,
242 |             padding,
243 |             activation,
244 |             bn,
245 |             init,
246 |             conv=nn.Conv1d,
247 |             batch_norm=BatchNorm1d,
248 |             bias=bias,
249 |             preact=preact,
250 |             name=name
251 |         )
252 | 
253 | 
254 | class Conv2d(_ConvBase):
255 | 
256 |     def __init__(
257 |             self,
258 |             in_size: int,
259 |             out_size: int,
260 |             *,
261 |             kernel_size: Tuple[int, int] = (1, 1),
262 |             stride: Tuple[int, int] = (1, 1),
263 |             padding: Tuple[int, int] = (0, 0),
264 |             activation=nn.ReLU(inplace=True),
265 |             bn: bool = False,
266 |             init=nn.init.kaiming_normal,
267 |             bias: bool = True,
268 |             preact: bool = False,
269 |             name: str = ""
270 |     ):
271 |         super().__init__(
272 |             in_size,
273 |             out_size,
274 |             kernel_size,
275 |             stride,
276 |             padding,
277 |             activation,
278 |             bn,
279 |             init,
280 |             conv=nn.Conv2d,
281 |             batch_norm=BatchNorm2d,
282 |             bias=bias,
283 |             preact=preact,
284 |             name=name
285 |         )
286 | 
287 | 
288 | class Conv3d(_ConvBase):
289 | 
290 |     def __init__(
291 |             self,
292 |             in_size: int,
293 |             out_size: int,
294 |             *,
295 |             kernel_size: Tuple[int, int, int] = (1, 1, 1),
296 |             stride: Tuple[int, int, int] = (1, 1, 1),
297 |             padding: Tuple[int, int, int] = (0, 0, 0),
298 |             activation=nn.ReLU(inplace=True),
299 |             bn: bool = False,
300 |             init=nn.init.kaiming_normal,
301 |             bias: bool = True,
302 |             preact: bool = False,
303 |             name: str = ""
304 |     ):
305 |         super().__init__(
306 |             in_size,
307 |             out_size,
308 |             kernel_size,
309 |             stride,
310 |             padding,
311 |             activation,
312 |             bn,
313 |             init,
314 |             conv=nn.Conv3d,
315 |             batch_norm=BatchNorm3d,
316 |             bias=bias,
317 |             preact=preact,
318 |             name=name
319 |         )
320 | 
321 | 
322 | class FC(nn.Sequential):
323 | 
324 |     def __init__(
325 |             self,
326 |             in_size: int,
327 |             out_size: int,
328 |             *,
329 |             activation=nn.ReLU(inplace=True),
330 |             bn: bool = False,
331 |             init=None,
332 |             preact: bool = False,
333 |             name: str = ""
334 |     ):
335 |         super().__init__()
336 | 
337 |         fc = nn.Linear(in_size, out_size, bias=not bn)
338 |         if init is not None:
339 |             init(fc.weight)
340 |         if not bn:
341 |             nn.init.constant(fc.bias, 0)
342 | 
343 |         if preact:
344 |             if bn:
345 |                 self.add_module(name + 'bn', BatchNorm1d(in_size))
346 | 
347 |             if activation is not None:
348 |                 self.add_module(name + 'activation', activation)
349 | 
350 |         self.add_module(name + 'fc', fc)
351 | 
352 |         if not preact:
353 |             if bn:
354 |                 self.add_module(name + 'bn', BatchNorm1d(out_size))
355 | 
356 |             if activation is not None:
357 |                 self.add_module(name + 'activation', activation)
358 | 
359 | 
360 | class _DropoutNoScaling(InplaceFunction):
361 | 
362 |     @staticmethod
363 |     def _make_noise(input):
364 |         return input.new().resize_as_(input)
365 | 
366 |     @staticmethod
367 |     def symbolic(g, input, p=0.5, train=False, inplace=False):
368 |         if inplace:
369 |             return None
370 |         n = g.appendNode(
371 |             g.create("Dropout", [input]).f_("ratio",
372 |                                             p).i_("is_test", not train)
373 |         )
374 |         real = g.appendNode(g.createSelect(n, 0))
375 |         g.appendNode(g.createSelect(n, 1))
376 |         return real
377 | 
378 |     @classmethod
379 |     def forward(cls, ctx, input, p=0.5, train=False, inplace=False):
380 |         if p < 0 or p > 1:
381 |             raise ValueError(
382 |                 "dropout probability has to be between 0 and 1, "
383 |                 "but got {}".format(p)
384 |             )
385 |         ctx.p = p
386 |         ctx.train = train
387 |         ctx.inplace = inplace
388 | 
389 |         if ctx.inplace:
390 |             ctx.mark_dirty(input)
391 |             output = input
392 |         else:
393 |             output = input.clone()
394 | 
395 |         if ctx.p > 0 and ctx.train:
396 |             ctx.noise = cls._make_noise(input)
397 |             if ctx.p == 1:
398 |                 ctx.noise.fill_(0)
399 |             else:
400 |                 ctx.noise.bernoulli_(1 - ctx.p)
401 |             ctx.noise = ctx.noise.expand_as(input)
402 |             output.mul_(ctx.noise)
403 | 
404 |         return output
405 | 
406 |     @staticmethod
407 |     def backward(ctx, grad_output):
408 |         if ctx.p > 0 and ctx.train:
409 |             return grad_output.mul(Variable(ctx.noise)), None, None, None
410 |         else:
411 |             return grad_output, None, None, None
412 | 
413 | 
414 | dropout_no_scaling = _DropoutNoScaling.apply
415 | 
416 | 
417 | class _FeatureDropoutNoScaling(_DropoutNoScaling):
418 | 
419 |     @staticmethod
420 |     def symbolic(input, p=0.5, train=False, inplace=False):
421 |         return None
422 | 
423 |     @staticmethod
424 |     def _make_noise(input):
425 |         return input.new().resize_(
426 |             input.size(0), input.size(1), *repeat(1,
427 |                                                   input.dim() - 2)
428 |         )
429 | 
430 | 
431 | feature_dropout_no_scaling = _FeatureDropoutNoScaling.apply
432 | 
433 | 
434 | def group_model_params(model: nn.Module):
435 |     decay_group = []
436 |     no_decay_group = []
437 | 
438 |     for name, param in model.named_parameters():
439 |         if name.find("bn") != -1 or name.find("bias") != -1:
440 |             no_decay_group.append(param)
441 |         else:
442 |             decay_group.append(param)
443 | 
444 |     assert len(list(model.parameters())
445 |               ) == len(decay_group) + len(no_decay_group)
446 | 
447 |     return [
448 |         dict(params=decay_group),
449 |         dict(params=no_decay_group, weight_decay=0.0)
450 |     ]
451 | 
452 | 
453 | def checkpoint_state(model=None, optimizer=None, best_prec=None, epoch=None):
454 |     optim_state = optimizer.state_dict() if optimizer is not None else None
455 |     if model is not None:
456 |         if isinstance(model, torch.nn.DataParallel):
457 |             model_state = model.module.state_dict()
458 |         else:
459 |             model_state = model.state_dict()
460 |     else:
461 |         model_state = None
462 | 
463 |     return {
464 |         'epoch': epoch,
465 |         'best_prec': best_prec,
466 |         'model_state': model_state,
467 |         'optimizer_state': optim_state
468 |     }
469 | 
470 | 
471 | def save_checkpoint(
472 |         state, is_best, filename='checkpoint', bestname='model_best'
473 | ):
474 |     filename = '{}.pth.tar'.format(filename)
475 |     torch.save(state, filename)
476 |     if is_best:
477 |         shutil.copyfile(filename, '{}.pth.tar'.format(bestname))
478 | 
479 | 
480 | def load_checkpoint(model=None, optimizer=None, filename='checkpoint'):
481 |     filename = "{}.pth.tar".format(filename)
482 |     if os.path.isfile(filename):
483 |         print("==> Loading from checkpoint '{}'".format(filename))
484 |         checkpoint = torch.load(filename)
485 |         epoch = checkpoint['epoch']
486 |         best_prec = checkpoint['best_prec']
487 |         if model is not None and checkpoint['model_state'] is not None:
488 |             model.load_state_dict(checkpoint['model_state'])
489 |         if optimizer is not None and checkpoint['optimizer_state'] is not None:
490 |             optimizer.load_state_dict(checkpoint['optimizer_state'])
491 |         print("==> Done")
492 |     else:
493 |         print("==> Checkpoint '{}' not found".format(filename))
494 | 
495 |     return epoch, best_prec
496 | 
497 | 
498 | def variable_size_collate(pad_val=0, use_shared_memory=True):
499 |     import collections
500 |     _numpy_type_map = {
501 |         'float64': torch.DoubleTensor,
502 |         'float32': torch.FloatTensor,
503 |         'float16': torch.HalfTensor,
504 |         'int64': torch.LongTensor,
505 |         'int32': torch.IntTensor,
506 |         'int16': torch.ShortTensor,
507 |         'int8': torch.CharTensor,
508 |         'uint8': torch.ByteTensor,
509 |     }
510 | 
511 |     def wrapped(batch):
512 |         "Puts each data field into a tensor with outer dimension batch size"
513 | 
514 |         error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
515 |         elem_type = type(batch[0])
516 |         if torch.is_tensor(batch[0]):
517 |             max_len = 0
518 |             for b in batch:
519 |                 max_len = max(max_len, b.size(0))
520 | 
521 |             numel = sum([int(b.numel() / b.size(0) * max_len) for b in batch])
522 |             if use_shared_memory:
523 |                 # If we're in a background process, concatenate directly into a
524 |                 # shared memory tensor to avoid an extra copy
525 |                 storage = batch[0].storage()._new_shared(numel)
526 |                 out = batch[0].new(storage)
527 |             else:
528 |                 out = batch[0].new(numel)
529 | 
530 |             out = out.view(
531 |                 len(batch), max_len,
532 |                 *[batch[0].size(i) for i in range(1, batch[0].dim())]
533 |             )
534 |             out.fill_(pad_val)
535 |             for i in range(len(batch)):
536 |                 out[i, 0:batch[i].size(0)] = batch[i]
537 | 
538 |             return out
539 |         elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
540 |                 and elem_type.__name__ != 'string_':
541 |             elem = batch[0]
542 |             if elem_type.__name__ == 'ndarray':
543 |                 # array of string classes and object
544 |                 if re.search('[SaUO]', elem.dtype.str) is not None:
545 |                     raise TypeError(error_msg.format(elem.dtype))
546 | 
547 |                 return wrapped([torch.from_numpy(b) for b in batch])
548 |             if elem.shape == ():  # scalars
549 |                 py_type = float if elem.dtype.name.startswith('float') else int
550 |                 return _numpy_type_map[elem.dtype.name](
551 |                     list(map(py_type, batch))
552 |                 )
553 |         elif isinstance(batch[0], int):
554 |             return torch.LongTensor(batch)
555 |         elif isinstance(batch[0], float):
556 |             return torch.DoubleTensor(batch)
557 |         elif isinstance(batch[0], collections.Mapping):
558 |             return {key: wrapped([d[key] for d in batch]) for key in batch[0]}
559 |         elif isinstance(batch[0], collections.Sequence):
560 |             transposed = zip(*batch)
561 |             return [wrapped(samples) for samples in transposed]
562 | 
563 |         raise TypeError((error_msg.format(type(batch[0]))))
564 | 
565 |     return wrapped
566 | 
567 | 
568 | class TrainValSplitter():
569 |     r"""
570 |         Creates a training and validation split to be used as the sampler in a pytorch DataLoader
571 |     Parameters
572 |     ---------
573 |         numel : int
574 |             Number of elements in the entire training dataset
575 |         percent_train : float
576 |             Percentage of data in the training split
577 |         shuffled : bool
578 |             Whether or not shuffle which data goes to which split
579 |     """
580 | 
581 |     def __init__(
582 |             self, *, numel: int, percent_train: float, shuffled: bool = False
583 |     ):
584 |         indicies = np.array([i for i in range(numel)])
585 |         if shuffled:
586 |             np.random.shuffle(indicies)
587 | 
588 |         self.train = torch.utils.data.sampler.SubsetRandomSampler(
589 |             indicies[0:int(percent_train * numel)]
590 |         )
591 |         self.val = torch.utils.data.sampler.SubsetRandomSampler(
592 |             indicies[int(percent_train * numel):-1]
593 |         )
594 | 
595 | 
596 | class CrossValSplitter():
597 |     r"""
598 |         Class that creates cross validation splits.  The train and val splits can be used in pytorch DataLoaders.  The splits can be updated
599 |         by calling next(self) or using a loop:
600 |             for _ in self:
601 |                 ....
602 |     Parameters
603 |     ---------
604 |         numel : int
605 |             Number of elements in the training set
606 |         k_folds : int
607 |             Number of folds
608 |         shuffled : bool
609 |             Whether or not to shuffle which data goes in which fold
610 |     """
611 | 
612 |     def __init__(self, *, numel: int, k_folds: int, shuffled: bool = False):
613 |         inidicies = np.array([i for i in range(numel)])
614 |         if shuffled:
615 |             np.random.shuffle(inidicies)
616 | 
617 |         self.folds = np.array(np.array_split(inidicies, k_folds), dtype=object)
618 |         self.current_v_ind = -1
619 | 
620 |         self.val = torch.utils.data.sampler.SubsetRandomSampler(self.folds[0])
621 |         self.train = torch.utils.data.sampler.SubsetRandomSampler(
622 |             np.concatenate(self.folds[1:], axis=0)
623 |         )
624 | 
625 |         self.metrics = {}
626 | 
627 |     def __iter__(self):
628 |         self.current_v_ind = -1
629 |         return self
630 | 
631 |     def __len__(self):
632 |         return len(self.folds)
633 | 
634 |     def __getitem__(self, idx):
635 |         assert idx >= 0 and idx < len(self)
636 |         self.val.inidicies = self.folds[idx]
637 |         self.train.inidicies = np.concatenate(
638 |             self.folds[np.arange(len(self)) != idx], axis=0
639 |         )
640 | 
641 |     def __next__(self):
642 |         self.current_v_ind += 1
643 |         if self.current_v_ind >= len(self):
644 |             raise StopIteration
645 | 
646 |         self[self.current_v_ind]
647 | 
648 |     def update_metrics(self, to_post: dict):
649 |         for k, v in to_post.items():
650 |             if k in self.metrics:
651 |                 self.metrics[k].append(v)
652 |             else:
653 |                 self.metrics[k] = [v]
654 | 
655 |     def print_metrics(self):
656 |         for name, samples in self.metrics.items():
657 |             xbar = stats.mean(samples)
658 |             sx = stats.stdev(samples, xbar)
659 |             tstar = student_t.ppf(1.0 - 0.025, len(samples) - 1)
660 |             margin_of_error = tstar * sx / sqrt(len(samples))
661 |             print("{}: {} +/- {}".format(name, xbar, margin_of_error))
662 | 
663 | 
664 | def set_bn_momentum_default(bn_momentum):
665 | 
666 |     def fn(m):
667 |         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
668 |             m.momentum = bn_momentum
669 | 
670 |     return fn
671 | 
672 | 
673 | class BNMomentumScheduler(object):
674 | 
675 |     def __init__(
676 |             self, model, bn_lambda, last_epoch=-1,
677 |             setter=set_bn_momentum_default
678 |     ):
679 |         if not isinstance(model, nn.Module):
680 |             raise RuntimeError(
681 |                 "Class '{}' is not a PyTorch nn Module".format(
682 |                     type(model).__name__
683 |                 )
684 |             )
685 | 
686 |         self.model = model
687 |         self.setter = setter
688 |         self.lmbd = bn_lambda
689 | 
690 |         self.step(last_epoch + 1)
691 |         self.last_epoch = last_epoch
692 | 
693 |     def step(self, epoch=None):
694 |         if epoch is None:
695 |             epoch = self.last_epoch + 1
696 | 
697 |         self.last_epoch = epoch
698 |         self.model.apply(self.setter(self.lmbd(epoch)))
699 | 
700 |     def get_momentum(self, epoch=None):
701 |         if epoch is None:
702 |             epoch = self.last_epoch + 1
703 |         return self.lmbd(epoch)


--------------------------------------------------------------------------------
/voting_evaluate_cls.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | import torch.nn as nn
  4 | from torch.utils.data import DataLoader
  5 | from torch.autograd import Variable
  6 | import torch.nn.functional as F
  7 | import numpy as np
  8 | import os
  9 | from torchvision import transforms
 10 | from models import DensePointCls_L6 as DensePoint
 11 | from data import ModelNet40Cls
 12 | import utils.pytorch_utils as pt_utils
 13 | import utils.pointnet2_utils as pointnet2_utils
 14 | import data.data_utils as d_utils
 15 | import argparse
 16 | import random
 17 | import yaml
 18 | 
 19 | torch.backends.cudnn.enabled = True
 20 | torch.backends.cudnn.benchmark = True
 21 | torch.backends.cudnn.deterministic = True
 22 | 
 23 | seed = 123
 24 | random.seed(seed)
 25 | np.random.seed(seed)
 26 | torch.manual_seed(seed)            
 27 | torch.cuda.manual_seed(seed)       
 28 | torch.cuda.manual_seed_all(seed)   
 29 | 
 30 | parser = argparse.ArgumentParser(description='DensePoint Shape Classification Voting Evaluate')
 31 | parser.add_argument('--config', default='cfgs/config_cls.yaml', type=str)
 32 | 
 33 | NUM_REPEAT = 300
 34 | NUM_VOTE = 10
 35 | 
 36 | def main():
 37 |     args = parser.parse_args()
 38 |     with open(args.config) as f:
 39 |         config = yaml.load(f)
 40 |     for k, v in config['common'].items():
 41 |         setattr(args, k, v)
 42 |     
 43 |     test_transforms = transforms.Compose([
 44 |         d_utils.PointcloudToTensor()
 45 |     ])
 46 | 
 47 |     test_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=test_transforms, train=False)
 48 |     test_dataloader = DataLoader(
 49 |         test_dataset, 
 50 |         batch_size=args.batch_size,
 51 |         shuffle=False, 
 52 |         num_workers=int(args.workers), 
 53 |         pin_memory=True
 54 |     )
 55 |     
 56 |     model = DensePoint(num_classes = args.num_classes, input_channels = args.input_channels, use_xyz = True)
 57 |     model.cuda()
 58 |     
 59 |     if args.checkpoint is not '':
 60 |         model.load_state_dict(torch.load(args.checkpoint))
 61 |         print('Load model successfully: %s' % (args.checkpoint))
 62 |     
 63 |     # evaluate
 64 |     PointcloudScale = d_utils.PointcloudScale()   # initialize random scaling
 65 |     model.eval()
 66 |     global_acc = 0
 67 |     for i in range(NUM_REPEAT):
 68 |         preds = []
 69 |         labels = []
 70 |         for j, data in enumerate(test_dataloader, 0):
 71 |             points, target = data
 72 |             points, target = points.cuda(), target.cuda()
 73 |             points, target = Variable(points, volatile=True), Variable(target, volatile=True)
 74 |             
 75 |             # fastest point sampling
 76 |             fps_idx = pointnet2_utils.furthest_point_sample(points, 1200)  # (B, npoint)
 77 |             pred = 0
 78 |             for v in range(NUM_VOTE):
 79 |                 new_fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)]
 80 |                 new_points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), new_fps_idx).transpose(1, 2).contiguous()
 81 |                 if v > 0:
 82 |                     new_points.data = PointcloudScale(new_points.data)
 83 |                 pred += F.softmax(model(new_points), dim = 1)
 84 |             pred /= NUM_VOTE
 85 |             target = target.view(-1)
 86 |             _, pred_choice = torch.max(pred.data, -1)
 87 |             
 88 |             preds.append(pred_choice)
 89 |             labels.append(target.data)
 90 |     
 91 |         preds = torch.cat(preds, 0)
 92 |         labels = torch.cat(labels, 0)
 93 |         acc = (preds == labels).sum() / labels.numel()
 94 |         if acc > global_acc:
 95 |             global_acc = acc
 96 |         print('Repeat %3d \t Acc: %0.6f' % (i + 1, acc))
 97 |     print('\nBest voting acc: %0.6f' % (global_acc))
 98 |         
 99 | if __name__ == '__main__':
100 |     main()


--------------------------------------------------------------------------------