├── README.md └── RSCNNEQ ├── CMakeLists.txt ├── cfgs └── config_ssn_cls.yaml ├── cls └── model_cls_ssn_iter_70917_acc_0.925446.pth ├── data ├── ModelNet40Loader.py ├── ShapeNetPartLoader.py ├── __init__.py └── data_utils.py ├── models ├── __init__.py └── rscnn_ssn_cls.py ├── train_cls.py ├── train_cls.sh ├── utils ├── build_ffi.py ├── cinclude │ ├── cuda_utils.h │ ├── ellipsoid_query_gpu.h │ ├── ellipsoid_query_wrapper.h │ ├── group_points_gpu.h │ ├── group_points_wrapper.h │ ├── helper_cuda.h │ ├── helper_string.h │ ├── interpolate_gpu.h │ ├── interpolate_wrapper.h │ ├── sampling_gpu.h │ └── sampling_wrapper.h ├── csrc │ ├── ellipsoid_query.c │ ├── ellipsoid_query_gpu.cu │ ├── group_points.c │ ├── group_points_gpu.cu │ ├── interpolate.c │ ├── interpolate_gpu.cu │ ├── sampling.c │ └── sampling_gpu.cu ├── linalg_utils.py ├── pointnet2_modules.py ├── pointnet2_utils.py └── pytorch_utils │ ├── __init__.py │ └── pytorch_utils.py └── voting_evaluate_cls.py /README.md: -------------------------------------------------------------------------------- 1 | # EllipsoidQuery 2 | 3 | This code submission is to reproduce the impact of re-oriented ellipsoid querying on RS-CNN Shape Classification.
4 | 5 | contact email: ssheshap@udel.edu 6 | 7 | Software requirements 8 | ---------------------- 9 | Ubuntu 18.04
10 | Python 3.5 (recommend Anaconda3)
11 | Pytorch 0.4.1
12 | CMake 3.10.2
13 | CUDA 10.0 + cuDNN 7
14 | Cudatoolkit V10.0.130
15 | 16 | Note: Also, works in the environment suggested by the authors of RS-CNN(https://github.com/Yochengliu/Relation-Shape-CNN/).
17 | 18 | Download 19 | -------- 20 | git clone https://github.com/VimsLab/EllipsoidQuery.git
21 | cd EllipsoidQuery/RSCNNEQ
22 | 23 | Building Kernel 24 | --------------- 25 | mkdir build && cd build
26 | cmake .. && make
27 | 28 | Dataset 29 | ------- 30 | Download and unzip ModelNet40 (415M) in data directory.
31 | https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip
32 | 33 | Usage: Train 34 | ------------ 35 | sh train_cls.sh
36 | 37 | Note: We have trained a Single-Scale-Neighborhood classification model in cls folder, whose training accuracy is 92.55% ('cls/model_cls_ssn_iter_70917_acc_0.925446.pth')
38 | 39 | Usage: Evaluation 40 | ----------------- 41 | Modify cfgs/config_ssn_cls.yaml with *.pth file from cls/ folder with highest accuracy.
42 | python voting_evaluate_cls.py
43 | 44 | Note: You can use our model cls/model_cls_ssn_iter_70917_acc_0.925446.pth as the checkpoint in config_ssn_cls.yaml, and with majority voting you will get an accuracy of 93.51%. Due to randomness the accuracy might vary.
45 | 46 | This code has been heaviy borrowed from https://github.com/Yochengliu/Relation-Shape-CNN/ and https://github.com/erikwijmans/Pointnet2_PyTorch
47 | 48 | 49 | To cite our paper please use below bibtex. 50 | 51 | ```BibTex 52 | @InProceedings{Sheshappanavar_2020_CVPR_Workshops, 53 | author = {Venkanna Sheshappanavar, Shivanand and Kambhamettu, Chandra}, 54 | title = {A Novel Local Geometry Capture in PointNet++ for 3D Classification}, 55 | booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, 56 | month = {June}, 57 | year = {2020} 58 | } 59 | ``` 60 | -------------------------------------------------------------------------------- /RSCNNEQ/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(PointNet2) 2 | cmake_minimum_required(VERSION 2.8) 3 | 4 | find_package(CUDA REQUIRED) 5 | 6 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude") 7 | cuda_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude") 8 | file(GLOB cuda_kernels_src "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrc/*.cu") 9 | cuda_compile(cuda_kernels SHARED ${cuda_kernels_src} OPTIONS -O3) 10 | 11 | set(BUILD_CMD python "${CMAKE_CURRENT_SOURCE_DIR}/utils/build_ffi.py") 12 | file(GLOB wrapper_headers "${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude/*wrapper.h") 13 | file(GLOB wrapper_sources "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrs/*.c") 14 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so" 15 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/utils 16 | COMMAND ${BUILD_CMD} --build --objs ${cuda_kernels} 17 | DEPENDS ${cuda_kernels} 18 | DEPENDS ${wrapper_headers} 19 | DEPENDS ${wrapper_sources} 20 | VERBATIM) 21 | 22 | add_custom_target(pointnet2_ext ALL 23 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so") 24 | 25 | -------------------------------------------------------------------------------- /RSCNNEQ/cfgs/config_ssn_cls.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | workers: 4 3 | 4 | num_points: 1024 5 | num_classes: 40 6 | batch_size: 16 7 | 8 | base_lr: 0.002 9 | lr_clip: 0.00001 10 | lr_decay: 0.7 11 | decay_step: 25 12 | epochs: 200 13 | 14 | weight_decay: 0 15 | bn_momentum: 0.9 16 | bnm_clip: 0.01 17 | bn_decay: 0.5 18 | 19 | evaluate: 1 20 | val_freq_epoch: 0.5 # frequency in epoch for validation, can be decimal 21 | print_freq_iter: 20 # frequency in iteration for printing infomation 22 | 23 | input_channels: 0 # feature channels except (x, y, z) 24 | 25 | # h_ij: 0 for 3D Euclidean distance (3D Ed), channels = 1 26 | # 1 for (3D Ed, x_i, x_j, x_j - x_i), channels = 10 27 | # 2 for (2D Ed, x'_i, x'_j, x'_j - x'_i), channels = 10, x' indicates 2D coordinates 28 | relation_prior: 1 29 | 30 | checkpoint: '' # the model to start from 31 | save_path: cls 32 | data_root: data 33 | -------------------------------------------------------------------------------- /RSCNNEQ/cls/model_cls_ssn_iter_70917_acc_0.925446.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VimsLab/EllipsoidQuery/7279e2920991c60a5be650ac37832af0b4c8cd76/RSCNNEQ/cls/model_cls_ssn_iter_70917_acc_0.925446.pth -------------------------------------------------------------------------------- /RSCNNEQ/data/ModelNet40Loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | import numpy as np 4 | import os, sys, h5py 5 | 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | sys.path.append(BASE_DIR) 8 | 9 | def _get_data_files(list_filename): 10 | with open(list_filename) as f: 11 | return [line.rstrip()[5:] for line in f] 12 | 13 | def _load_data_file(name): 14 | f = h5py.File(name) 15 | data = f['data'][:] 16 | label = f['label'][:] 17 | return data, label 18 | 19 | class ModelNet40Cls(data.Dataset): 20 | 21 | def __init__( 22 | self, num_points, root, transforms=None, train=True 23 | ): 24 | super().__init__() 25 | 26 | self.transforms = transforms 27 | 28 | root = os.path.abspath(root) 29 | self.folder = "modelnet40_ply_hdf5_2048" 30 | self.data_dir = os.path.join(root, self.folder) 31 | 32 | self.train, self.num_points = train, num_points 33 | if self.train: 34 | self.files = _get_data_files( \ 35 | os.path.join(self.data_dir, 'train_files.txt')) 36 | else: 37 | self.files = _get_data_files( \ 38 | os.path.join(self.data_dir, 'test_files.txt')) 39 | 40 | point_list, label_list = [], [] 41 | for f in self.files: 42 | points, labels = _load_data_file(os.path.join(root, f)) 43 | point_list.append(points) 44 | label_list.append(labels) 45 | 46 | self.points = np.concatenate(point_list, 0) 47 | self.labels = np.concatenate(label_list, 0) 48 | 49 | def __getitem__(self, idx): 50 | pt_idxs = np.arange(0, self.points.shape[1]) # 2048 51 | if self.train: 52 | np.random.shuffle(pt_idxs) 53 | 54 | current_points = self.points[idx, pt_idxs].copy() 55 | label = torch.from_numpy(self.labels[idx]).type(torch.LongTensor) 56 | 57 | if self.transforms is not None: 58 | current_points = self.transforms(current_points) 59 | 60 | return current_points, label 61 | 62 | def __len__(self): 63 | return self.points.shape[0] 64 | 65 | if __name__ == "__main__": 66 | from torchvision import transforms 67 | import data_utils as d_utils 68 | 69 | transforms = transforms.Compose([ 70 | d_utils.PointcloudToTensor(), 71 | d_utils.PointcloudRotate(axis=np.array([1,0,0])), 72 | d_utils.PointcloudScale(), 73 | d_utils.PointcloudTranslate(), 74 | d_utils.PointcloudJitter() 75 | ]) 76 | dset = ModelNet40Cls(16, "./", train=True, transforms=transforms) 77 | print(dset[0][0]) 78 | print(dset[0][1]) 79 | print(len(dset)) 80 | dloader = torch.utils.data.DataLoader(dset, batch_size=32, shuffle=True) 81 | -------------------------------------------------------------------------------- /RSCNNEQ/data/ShapeNetPartLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import torch 4 | import json 5 | import numpy as np 6 | import sys 7 | import torchvision.transforms as transforms 8 | 9 | def pc_normalize(pc): 10 | l = pc.shape[0] 11 | centroid = np.mean(pc, axis=0) 12 | pc = pc - centroid 13 | m = np.max(np.sqrt(np.sum(pc**2, axis=1))) 14 | pc = pc / m 15 | return pc 16 | 17 | class ShapeNetPart(): 18 | def __init__(self, root, num_points = 2048, split='train', normalize=True, transforms = None): 19 | self.transforms = transforms 20 | self.num_points = num_points 21 | self.root = root 22 | self.catfile = os.path.join(self.root, 'synsetoffset2category.txt') 23 | self.normalize = normalize 24 | 25 | self.cat = {} 26 | with open(self.catfile, 'r') as f: 27 | for line in f: 28 | ls = line.strip().split() 29 | self.cat[ls[0]] = ls[1] 30 | self.cat = {k:v for k,v in self.cat.items()} 31 | 32 | self.meta = {} 33 | with open(os.path.join(self.root, 'train_test_split', 'shuffled_train_file_list.json'), 'r') as f: 34 | train_ids = set([str(d.split('/')[2]) for d in json.load(f)]) 35 | with open(os.path.join(self.root, 'train_test_split', 'shuffled_val_file_list.json'), 'r') as f: 36 | val_ids = set([str(d.split('/')[2]) for d in json.load(f)]) 37 | with open(os.path.join(self.root, 'train_test_split', 'shuffled_test_file_list.json'), 'r') as f: 38 | test_ids = set([str(d.split('/')[2]) for d in json.load(f)]) 39 | for item in self.cat: 40 | self.meta[item] = [] 41 | dir_point = os.path.join(self.root, self.cat[item]) 42 | fns = sorted(os.listdir(dir_point)) 43 | if split=='trainval': 44 | fns = [fn for fn in fns if ((fn[0:-4] in train_ids) or (fn[0:-4] in val_ids))] 45 | elif split=='train': 46 | fns = [fn for fn in fns if fn[0:-4] in train_ids] 47 | elif split=='val': 48 | fns = [fn for fn in fns if fn[0:-4] in val_ids] 49 | elif split=='test': 50 | fns = [fn for fn in fns if fn[0:-4] in test_ids] 51 | else: 52 | print('Unknown split: %s. Exiting..'%(split)) 53 | exit(-1) 54 | 55 | for fn in fns: 56 | token = (os.path.splitext(os.path.basename(fn))[0]) 57 | self.meta[item].append(os.path.join(dir_point, token + '.txt')) 58 | 59 | self.datapath = [] 60 | for item in self.cat: 61 | for fn in self.meta[item]: 62 | self.datapath.append((item, fn)) 63 | 64 | self.classes = dict(zip(self.cat, range(len(self.cat)))) 65 | # Mapping from category ('Chair') to a list of int [10,11,12,13] as segmentation labels 66 | self.seg_classes = {'Earphone': [16, 17, 18], 'Motorbike': [30, 31, 32, 33, 34, 35], 'Rocket': [41, 42, 43], 'Car': [8, 9, 10, 11], 'Laptop': [28, 29], 'Cap': [6, 7], 'Skateboard': [44, 45, 46], 'Mug': [36, 37], 'Guitar': [19, 20, 21], 'Bag': [4, 5], 'Lamp': [24, 25, 26, 27], 'Table': [47, 48, 49], 'Airplane': [0, 1, 2, 3], 'Pistol': [38, 39, 40], 'Chair': [12, 13, 14, 15], 'Knife': [22, 23]} 67 | 68 | self.cache = {} 69 | self.cache_size = 20000 70 | 71 | def __getitem__(self, index): 72 | if index in self.cache: 73 | point_set, seg, cls = self.cache[index] 74 | else: 75 | fn = self.datapath[index] 76 | cat = self.datapath[index][0] 77 | cls = self.classes[cat] 78 | cls = np.array([cls]).astype(np.int64) 79 | data = np.loadtxt(fn[1]).astype(np.float32) 80 | point_set = data[:,0:3] 81 | if self.normalize: 82 | point_set = pc_normalize(point_set) 83 | seg = data[:,-1].astype(np.int64) 84 | if len(self.cache) < self.cache_size: 85 | self.cache[index] = (point_set, seg, cls) 86 | 87 | choice = np.random.choice(len(seg), self.num_points, replace=True) 88 | #resample 89 | point_set = point_set[choice, :] 90 | seg = seg[choice] 91 | if self.transforms is not None: 92 | point_set = self.transforms(point_set) 93 | 94 | return point_set, torch.from_numpy(seg), torch.from_numpy(cls) 95 | 96 | def __len__(self): 97 | return len(self.datapath) 98 | 99 | -------------------------------------------------------------------------------- /RSCNNEQ/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .ModelNet40Loader import ModelNet40Cls 2 | from .ShapeNetPartLoader import ShapeNetPart -------------------------------------------------------------------------------- /RSCNNEQ/data/data_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class PointcloudToTensor(object): 5 | def __call__(self, points): 6 | return torch.from_numpy(points).float() 7 | 8 | def angle_axis(angle: float, axis: np.ndarray): 9 | r"""Returns a 4x4 rotation matrix that performs a rotation around axis by angle 10 | 11 | Parameters 12 | ---------- 13 | angle : float 14 | Angle to rotate by 15 | axis: np.ndarray 16 | Axis to rotate about 17 | 18 | Returns 19 | ------- 20 | torch.Tensor 21 | 3x3 rotation matrix 22 | """ 23 | u = axis / np.linalg.norm(axis) 24 | cosval, sinval = np.cos(angle), np.sin(angle) 25 | 26 | # yapf: disable 27 | cross_prod_mat = np.array([[0.0, -u[2], u[1]], 28 | [u[2], 0.0, -u[0]], 29 | [-u[1], u[0], 0.0]]) 30 | 31 | R = torch.from_numpy( 32 | cosval * np.eye(3) 33 | + sinval * cross_prod_mat 34 | + (1.0 - cosval) * np.outer(u, u) 35 | ) 36 | # yapf: enable 37 | return R.float() 38 | 39 | class PointcloudRotatebyAngle(object): 40 | def __init__(self, rotation_angle = 0.0): 41 | self.rotation_angle = rotation_angle 42 | 43 | def __call__(self, pc): 44 | normals = pc.size(2) > 3 45 | bsize = pc.size()[0] 46 | for i in range(bsize): 47 | cosval = np.cos(self.rotation_angle) 48 | sinval = np.sin(self.rotation_angle) 49 | rotation_matrix = np.array([[cosval, 0, sinval], 50 | [0, 1, 0], 51 | [-sinval, 0, cosval]]) 52 | rotation_matrix = torch.from_numpy(rotation_matrix).float().cuda() 53 | 54 | cur_pc = pc[i, :, :] 55 | if not normals: 56 | cur_pc = cur_pc @ rotation_matrix 57 | else: 58 | pc_xyz = cur_pc[:, 0:3] 59 | pc_normals = cur_pc[:, 3:] 60 | cur_pc[:, 0:3] = pc_xyz @ rotation_matrix 61 | cur_pc[:, 3:] = pc_normals @ rotation_matrix 62 | 63 | pc[i, :, :] = cur_pc 64 | 65 | return pc 66 | 67 | class PointcloudJitter(object): 68 | def __init__(self, std=0.01, clip=0.05): 69 | self.std, self.clip = std, clip 70 | 71 | def __call__(self, pc): 72 | bsize = pc.size()[0] 73 | for i in range(bsize): 74 | jittered_data = pc.new(pc.size(1), 3).normal_( 75 | mean=0.0, std=self.std 76 | ).clamp_(-self.clip, self.clip) 77 | pc[i, :, 0:3] += jittered_data 78 | 79 | return pc 80 | 81 | class PointcloudScaleAndTranslate(object): 82 | def __init__(self, scale_low=2. / 3., scale_high=3. / 2., translate_range=0.2): 83 | self.scale_low = scale_low 84 | self.scale_high = scale_high 85 | self.translate_range = translate_range 86 | 87 | def __call__(self, pc): 88 | bsize = pc.size()[0] 89 | for i in range(bsize): 90 | xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3]) 91 | xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3]) 92 | 93 | pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda()) + torch.from_numpy(xyz2).float().cuda() 94 | 95 | return pc 96 | 97 | class PointcloudScale(object): 98 | def __init__(self, scale_low=2. / 3., scale_high=3. / 2.): 99 | self.scale_low = scale_low 100 | self.scale_high = scale_high 101 | 102 | def __call__(self, pc): 103 | bsize = pc.size()[0] 104 | for i in range(bsize): 105 | xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3]) 106 | 107 | pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda()) 108 | 109 | return pc 110 | 111 | class PointcloudTranslate(object): 112 | def __init__(self, translate_range=0.2): 113 | self.translate_range = translate_range 114 | 115 | def __call__(self, pc): 116 | bsize = pc.size()[0] 117 | for i in range(bsize): 118 | xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3]) 119 | 120 | pc[i, :, 0:3] = pc[i, :, 0:3] + torch.from_numpy(xyz2).float().cuda() 121 | 122 | return pc 123 | 124 | class PointcloudRandomInputDropout(object): 125 | def __init__(self, max_dropout_ratio=0.875): 126 | assert max_dropout_ratio >= 0 and max_dropout_ratio < 1 127 | self.max_dropout_ratio = max_dropout_ratio 128 | 129 | def __call__(self, pc): 130 | bsize = pc.size()[0] 131 | for i in range(bsize): 132 | dropout_ratio = np.random.random() * self.max_dropout_ratio # 0~0.875 133 | drop_idx = np.where(np.random.random((pc.size()[1])) <= dropout_ratio)[0] 134 | if len(drop_idx) > 0: 135 | cur_pc = pc[i, :, :] 136 | cur_pc[drop_idx.tolist(), 0:3] = cur_pc[0, 0:3].repeat(len(drop_idx), 1) # set to the first point 137 | pc[i, :, :] = cur_pc 138 | 139 | return pc 140 | -------------------------------------------------------------------------------- /RSCNNEQ/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .rscnn_ssn_cls import RSCNN_SSN as RSCNN_SSN_Cls 2 | -------------------------------------------------------------------------------- /RSCNNEQ/models/rscnn_ssn_cls.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 3 | sys.path.append(BASE_DIR) 4 | sys.path.append(os.path.join(BASE_DIR, "../utils")) 5 | import torch 6 | import torch.nn as nn 7 | from torch.autograd import Variable 8 | import pytorch_utils as pt_utils 9 | from pointnet2_modules import PointnetSAModule, PointnetSAModuleMSG 10 | import numpy as np 11 | 12 | # Relation-Shape CNN: Single-Scale Neighborhood 13 | class RSCNN_SSN(nn.Module): 14 | r""" 15 | PointNet2 with multi-scale grouping 16 | Semantic segmentation network that uses feature propogation layers 17 | 18 | Parameters 19 | ---------- 20 | num_classes: int 21 | Number of semantics classes to predict over -- size of softmax classifier that run for each point 22 | input_channels: int = 6 23 | Number of input channels in the feature descriptor for each point. If the point cloud is Nx9, this 24 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 25 | use_xyz: bool = True 26 | Whether or not to use the xyz position of a point as a feature 27 | """ 28 | 29 | def __init__(self, num_classes, input_channels=0, relation_prior=1, use_xyz=True): 30 | super().__init__() 31 | 32 | self.SA_modules = nn.ModuleList() 33 | 34 | self.SA_modules.append( 35 | PointnetSAModuleMSG( 36 | npoint=512, 37 | e1s=[0.25], 38 | e2s=[0.15], 39 | e3s=[0.15], 40 | nsamples=[48], 41 | mlps=[[input_channels, 128]], 42 | first_layer=True, 43 | use_xyz=use_xyz, 44 | relation_prior=relation_prior 45 | ) 46 | ) 47 | 48 | self.SA_modules.append( 49 | PointnetSAModuleMSG( 50 | npoint=128, 51 | e1s=[0.50], 52 | e2s=[0.30], 53 | e3s=[0.30], 54 | nsamples=[64], 55 | mlps=[[128, 512]], 56 | use_xyz=use_xyz, 57 | relation_prior=relation_prior 58 | ) 59 | ) 60 | 61 | self.SA_modules.append( 62 | # global convolutional pooling 63 | PointnetSAModule( 64 | nsample = 128, 65 | mlp=[512, 1024], 66 | use_xyz=use_xyz 67 | ) 68 | ) 69 | 70 | self.FC_layer = nn.Sequential( 71 | pt_utils.FC(1024, 512, activation=nn.ReLU(inplace=True), bn=True), 72 | nn.Dropout(p=0.5), 73 | pt_utils.FC(512, 256, activation=nn.ReLU(inplace=True), bn=True), 74 | nn.Dropout(p=0.5), 75 | pt_utils.FC(256, num_classes, activation=None) 76 | ) 77 | 78 | def _break_up_pc(self, pc): 79 | xyz = pc[..., 0:3].contiguous() 80 | features = ( 81 | pc[..., 3:].transpose(1, 2).contiguous() 82 | if pc.size(-1) > 3 else None 83 | ) 84 | return xyz, features 85 | 86 | def forward(self, pointcloud: torch.cuda.FloatTensor): 87 | r""" 88 | Forward pass of the network 89 | 90 | Parameters 91 | ---------- 92 | pointcloud: Variable(torch.cuda.FloatTensor) 93 | (B, N, 3 + input_channels) tensor 94 | Point cloud to run predicts on 95 | Each point in the point-cloud MUST 96 | be formated as (x, y, z, features...) 97 | """ 98 | xyz, features = self._break_up_pc(pointcloud) 99 | for module in self.SA_modules: 100 | xyz, features = module(xyz, features) 101 | return self.FC_layer(features.squeeze(-1)) 102 | 103 | 104 | if __name__ == "__main__": 105 | sim_data = Variable(torch.rand(32, 2048, 6)) 106 | sim_data = sim_data.cuda() 107 | sim_cls = Variable(torch.ones(32, 16)) 108 | sim_cls = sim_cls.cuda() 109 | 110 | seg = RSCNN_SSN(num_classes=50, input_channels=3, use_xyz=True) 111 | seg = seg.cuda() 112 | out = seg(sim_data, sim_cls) 113 | print('seg', out.size()) -------------------------------------------------------------------------------- /RSCNNEQ/train_cls.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | import torch.optim.lr_scheduler as lr_sched 4 | import torch.nn as nn 5 | from torch.utils.data import DataLoader 6 | from torch.autograd import Variable 7 | import numpy as np 8 | import os 9 | from torchvision import transforms 10 | from models import RSCNN_SSN_Cls as RSCNN_SSN 11 | from data import ModelNet40Cls 12 | import utils.pytorch_utils as pt_utils 13 | import utils.pointnet2_utils as pointnet2_utils 14 | import data.data_utils as d_utils 15 | import argparse 16 | import random 17 | import yaml 18 | import gc 19 | 20 | torch.backends.cudnn.enabled = True 21 | torch.backends.cudnn.benchmark = True 22 | torch.backends.cudnn.deterministic = True 23 | 24 | seed = 123 25 | random.seed(seed) 26 | np.random.seed(seed) 27 | torch.manual_seed(seed) 28 | torch.cuda.manual_seed(seed) 29 | torch.cuda.manual_seed_all(seed) 30 | 31 | parser = argparse.ArgumentParser(description='Relation-Shape CNN Shape Classification Training') 32 | parser.add_argument('--config', default='cfgs/config_ssn_cls.yaml', type=str) 33 | 34 | def main(): 35 | # os.system('cp models/rscnn_ssn_cls.py cls/') # bkp of train procedure 36 | # os.system('cp utils/pointnet2_utils.py cls/') 37 | # os.system('cp utils/pointnet2_modules.py cls/') 38 | # os.system('cp utils/csrc/ellipsoid_query_gpu.cu cls/') 39 | # os.system('cp utils/csrc/ellipsoid_query.c cls/') 40 | # os.system('cp cfgs/config_ssn_cls.yaml cls/') 41 | args = parser.parse_args() 42 | with open(args.config) as f: 43 | config = yaml.load(f) 44 | print("\n**************************") 45 | for k, v in config['common'].items(): 46 | setattr(args, k, v) 47 | print('\n[%s]:'%(k), v) 48 | print("\n**************************\n") 49 | 50 | try: 51 | os.makedirs(args.save_path) 52 | except OSError: 53 | pass 54 | 55 | train_transforms = transforms.Compose([ 56 | d_utils.PointcloudToTensor() 57 | ]) 58 | test_transforms = transforms.Compose([ 59 | d_utils.PointcloudToTensor() 60 | ]) 61 | 62 | train_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=train_transforms) 63 | train_dataloader = DataLoader( 64 | train_dataset, 65 | batch_size=args.batch_size, 66 | shuffle=True, 67 | num_workers=int(args.workers), 68 | pin_memory=True 69 | ) 70 | 71 | test_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=test_transforms, train=False) 72 | test_dataloader = DataLoader( 73 | test_dataset, 74 | batch_size=args.batch_size, 75 | shuffle=False, 76 | num_workers=int(args.workers), 77 | pin_memory=True 78 | ) 79 | 80 | model = RSCNN_SSN(num_classes = args.num_classes, input_channels = args.input_channels, relation_prior = args.relation_prior, use_xyz = True) 81 | model.cuda() 82 | optimizer = optim.Adam( 83 | model.parameters(), lr=args.base_lr, weight_decay=args.weight_decay) 84 | 85 | lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), args.lr_clip / args.base_lr) 86 | bnm_lmbd = lambda e: max(args.bn_momentum * args.bn_decay**(e // args.decay_step), args.bnm_clip) 87 | lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd) 88 | bnm_scheduler = pt_utils.BNMomentumScheduler(model, bnm_lmbd) 89 | 90 | if args.checkpoint is not '': 91 | model.load_state_dict(torch.load(args.checkpoint)) 92 | print('Load model successfully: %s' % (args.checkpoint)) 93 | 94 | criterion = nn.CrossEntropyLoss() 95 | num_batch = len(train_dataset)/args.batch_size 96 | 97 | # training 98 | train(train_dataloader, test_dataloader, model, criterion, optimizer, lr_scheduler, bnm_scheduler, args, num_batch) 99 | 100 | 101 | def train(train_dataloader, test_dataloader, model, criterion, optimizer, lr_scheduler, bnm_scheduler, args, num_batch): 102 | PointcloudScaleAndTranslate = d_utils.PointcloudScaleAndTranslate() # initialize augmentation 103 | global g_acc 104 | g_acc = 0.91 # only save the model whose acc > 0.91 105 | batch_count = 0 106 | model.train() 107 | for epoch in range(args.epochs): 108 | for i, data in enumerate(train_dataloader, 0): 109 | if lr_scheduler is not None: 110 | lr_scheduler.step(epoch) 111 | if bnm_scheduler is not None: 112 | bnm_scheduler.step(epoch-1) 113 | points, target = data 114 | points, target = points.cuda(), target.cuda() 115 | points, target = Variable(points), Variable(target) 116 | 117 | # fastest point sampling 118 | fps_idx = pointnet2_utils.furthest_point_sample(points, 1200) # (B, npoint) 119 | fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)] 120 | points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), fps_idx).transpose(1, 2).contiguous() # (B, N, 3) 121 | 122 | # augmentation 123 | points.data = PointcloudScaleAndTranslate(points.data) 124 | 125 | optimizer.zero_grad() 126 | 127 | pred = model(points) 128 | target = target.view(-1) 129 | loss = criterion(pred, target) 130 | loss.backward() 131 | optimizer.step() 132 | if i % args.print_freq_iter == 0: 133 | print('[epoch %3d: %3d/%3d] \t train loss: %0.6f \t lr: %0.5f' %(epoch+1, i, num_batch, loss.data.clone(), lr_scheduler.get_lr()[0])) 134 | batch_count += 1 135 | 136 | # validation in between an epoch 137 | if args.evaluate and batch_count % int(args.val_freq_epoch * num_batch) == 0: 138 | validate(test_dataloader, model, criterion, args, batch_count) 139 | 140 | 141 | def validate(test_dataloader, model, criterion, args, iter): 142 | global g_acc 143 | model.eval() 144 | losses, preds, labels = [], [], [] 145 | gc.collect() 146 | with torch.no_grad(): 147 | for j, data in enumerate(test_dataloader, 0): 148 | points, target = data 149 | points, target = points.cuda(), target.cuda() 150 | # points, target = Variable(points, volatile=True), Variable(target, volatile=True) 151 | 152 | # fastest point sampling 153 | fps_idx = pointnet2_utils.furthest_point_sample(points, args.num_points) # (B, npoint) 154 | # fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)] 155 | points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), fps_idx).transpose(1, 2).contiguous() 156 | 157 | pred = model(points) 158 | target = target.view(-1) 159 | loss = criterion(pred, target) 160 | losses.append(loss.data.clone()) 161 | _, pred_choice = torch.max(pred.data, -1) 162 | 163 | preds.append(pred_choice) 164 | labels.append(target.data) 165 | 166 | preds = torch.cat(preds, 0) 167 | labels = torch.cat(labels, 0) 168 | acc = (preds == labels).sum().item() / labels.numel() 169 | print('\nval loss: %0.6f \t acc: %0.6f\n' %(np.array(losses).mean(), acc)) 170 | if acc >= g_acc or acc > 0.9250: 171 | g_acc = acc 172 | torch.save(model.state_dict(), '%s/cls_ssn_iter_%d_acc_%0.6f.pth' % (args.save_path, iter, acc)) 173 | model.train() 174 | 175 | if __name__ == "__main__": 176 | main() 177 | -------------------------------------------------------------------------------- /RSCNNEQ/train_cls.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | mkdir -p log 3 | now=$(date +"%Y%m%d_%H%M%S") 4 | log_name="Cls_LOG_"$now"" 5 | export CUDA_VISIBLE_DEVICES=0 6 | python -u train_cls.py \ 7 | --config cfgs/config_ssn_cls.yaml \ 8 | 2>&1|tee log/$log_name.log & 9 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/build_ffi.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import torch 3 | import os.path as osp 4 | from torch.utils.ffi import create_extension 5 | import sys, argparse, shutil 6 | 7 | base_dir = osp.dirname(osp.abspath(__file__)) 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description="Arguments for building pointnet2 ffi extension" 13 | ) 14 | parser.add_argument("--objs", nargs="*") 15 | clean_arg = parser.add_mutually_exclusive_group() 16 | clean_arg.add_argument("--build", dest='build', action="store_true") 17 | clean_arg.add_argument("--clean", dest='clean', action="store_true") 18 | parser.set_defaults(build=False, clean=False) 19 | 20 | args = parser.parse_args() 21 | assert args.build or args.clean 22 | 23 | return args 24 | 25 | 26 | def build(args): 27 | extra_objects = args.objs 28 | extra_objects += [a for a in glob.glob('/usr/local/cuda/lib64/*.a')] 29 | 30 | ffi = create_extension( 31 | '_ext.pointnet2', 32 | headers=[a for a in glob.glob("cinclude/*_wrapper.h")], 33 | sources=[a for a in glob.glob("csrc/*.c")], 34 | define_macros=[('WITH_CUDA', None)], 35 | relative_to=__file__, 36 | with_cuda=True, 37 | extra_objects=extra_objects, 38 | include_dirs=[osp.join(base_dir, 'cinclude')], 39 | verbose=False, 40 | package=False 41 | ) 42 | ffi.build() 43 | 44 | 45 | def clean(args): 46 | shutil.rmtree(osp.join(base_dir, "_ext")) 47 | 48 | 49 | if __name__ == "__main__": 50 | args = parse_args() 51 | if args.clean: 52 | clean(args) 53 | else: 54 | build(args) 55 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 512 7 | 8 | inline int opt_n_threads(int work_size) { 9 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 10 | 11 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 12 | } 13 | 14 | inline dim3 opt_block_config(int x, int y) { 15 | const int x_threads = opt_n_threads(x); 16 | const int y_threads = 17 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 18 | dim3 block_config(x_threads, y_threads, 1); 19 | 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/ellipsoid_query_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _ELLIPSOID_QUERY_GPU 2 | #define _ELLIPSOID_QUERY_GPU 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void query_ellipsoid_point_kernel_wrapper(int b, int n, int m, float e1, float e2, float e3, 9 | int nsample, const float *xyz, 10 | const float *new_xyz, const int *fps_idx, int *idx, int *ingroup_pts_cnt, 11 | float *ingroup_out, float *ingroup_cva, float *v, float *d, 12 | cudaStream_t stream); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | #endif 18 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/ellipsoid_query_wrapper.h: -------------------------------------------------------------------------------- 1 | 2 | int ellipsoid_query_wrapper(int b, int n, int m, float e1, float e2, float e3, int nsample, 3 | THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor, THCudaIntTensor *fps_idx_tensor, 4 | THCudaIntTensor *idx_tensor, THCudaIntTensor *ingroup_pts_cnt_tensor, 5 | THCudaTensor *ingroup_out_tensor, THCudaTensor *ingroup_cva_tensor, 6 | THCudaTensor *v_tensor, THCudaTensor *d_tensor); -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/group_points_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_GPU 2 | #define _BALL_QUERY_GPU 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 9 | const float *points, const int *idx, 10 | float *out, cudaStream_t stream); 11 | 12 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | int nsample, const float *grad_out, 14 | const int *idx, float *grad_points, 15 | cudaStream_t stream); 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | #endif 20 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/group_points_wrapper.h: -------------------------------------------------------------------------------- 1 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample, 2 | THCudaTensor *points_tensor, 3 | THCudaIntTensor *idx_tensor, THCudaTensor *out); 4 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample, 5 | THCudaTensor *grad_out_tensor, 6 | THCudaIntTensor *idx_tensor, 7 | THCudaTensor *grad_points_tensor); 8 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/helper_cuda.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | //////////////////////////////////////////////////////////////////////////////// 13 | // These are CUDA Helper functions for initialization and error checking 14 | 15 | #ifndef HELPER_CUDA_H 16 | #define HELPER_CUDA_H 17 | 18 | #pragma once 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "helper_string.h" 25 | 26 | #ifndef EXIT_WAIVED 27 | #define EXIT_WAIVED 2 28 | #endif 29 | 30 | // Note, it is required that your SDK sample to include the proper header files, please 31 | // refer the CUDA examples for examples of the needed CUDA headers, which may change depending 32 | // on which CUDA functions are used. 33 | 34 | // CUDA Runtime error messages 35 | #ifdef __DRIVER_TYPES_H__ 36 | static const char *_cudaGetErrorEnum(cudaError_t error) 37 | { 38 | switch (error) 39 | { 40 | case cudaSuccess: 41 | return "cudaSuccess"; 42 | 43 | case cudaErrorMissingConfiguration: 44 | return "cudaErrorMissingConfiguration"; 45 | 46 | case cudaErrorMemoryAllocation: 47 | return "cudaErrorMemoryAllocation"; 48 | 49 | case cudaErrorInitializationError: 50 | return "cudaErrorInitializationError"; 51 | 52 | case cudaErrorLaunchFailure: 53 | return "cudaErrorLaunchFailure"; 54 | 55 | case cudaErrorPriorLaunchFailure: 56 | return "cudaErrorPriorLaunchFailure"; 57 | 58 | case cudaErrorLaunchTimeout: 59 | return "cudaErrorLaunchTimeout"; 60 | 61 | case cudaErrorLaunchOutOfResources: 62 | return "cudaErrorLaunchOutOfResources"; 63 | 64 | case cudaErrorInvalidDeviceFunction: 65 | return "cudaErrorInvalidDeviceFunction"; 66 | 67 | case cudaErrorInvalidConfiguration: 68 | return "cudaErrorInvalidConfiguration"; 69 | 70 | case cudaErrorInvalidDevice: 71 | return "cudaErrorInvalidDevice"; 72 | 73 | case cudaErrorInvalidValue: 74 | return "cudaErrorInvalidValue"; 75 | 76 | case cudaErrorInvalidPitchValue: 77 | return "cudaErrorInvalidPitchValue"; 78 | 79 | case cudaErrorInvalidSymbol: 80 | return "cudaErrorInvalidSymbol"; 81 | 82 | case cudaErrorMapBufferObjectFailed: 83 | return "cudaErrorMapBufferObjectFailed"; 84 | 85 | case cudaErrorUnmapBufferObjectFailed: 86 | return "cudaErrorUnmapBufferObjectFailed"; 87 | 88 | case cudaErrorInvalidHostPointer: 89 | return "cudaErrorInvalidHostPointer"; 90 | 91 | case cudaErrorInvalidDevicePointer: 92 | return "cudaErrorInvalidDevicePointer"; 93 | 94 | case cudaErrorInvalidTexture: 95 | return "cudaErrorInvalidTexture"; 96 | 97 | case cudaErrorInvalidTextureBinding: 98 | return "cudaErrorInvalidTextureBinding"; 99 | 100 | case cudaErrorInvalidChannelDescriptor: 101 | return "cudaErrorInvalidChannelDescriptor"; 102 | 103 | case cudaErrorInvalidMemcpyDirection: 104 | return "cudaErrorInvalidMemcpyDirection"; 105 | 106 | case cudaErrorAddressOfConstant: 107 | return "cudaErrorAddressOfConstant"; 108 | 109 | case cudaErrorTextureFetchFailed: 110 | return "cudaErrorTextureFetchFailed"; 111 | 112 | case cudaErrorTextureNotBound: 113 | return "cudaErrorTextureNotBound"; 114 | 115 | case cudaErrorSynchronizationError: 116 | return "cudaErrorSynchronizationError"; 117 | 118 | case cudaErrorInvalidFilterSetting: 119 | return "cudaErrorInvalidFilterSetting"; 120 | 121 | case cudaErrorInvalidNormSetting: 122 | return "cudaErrorInvalidNormSetting"; 123 | 124 | case cudaErrorMixedDeviceExecution: 125 | return "cudaErrorMixedDeviceExecution"; 126 | 127 | case cudaErrorCudartUnloading: 128 | return "cudaErrorCudartUnloading"; 129 | 130 | case cudaErrorUnknown: 131 | return "cudaErrorUnknown"; 132 | 133 | case cudaErrorNotYetImplemented: 134 | return "cudaErrorNotYetImplemented"; 135 | 136 | case cudaErrorMemoryValueTooLarge: 137 | return "cudaErrorMemoryValueTooLarge"; 138 | 139 | case cudaErrorInvalidResourceHandle: 140 | return "cudaErrorInvalidResourceHandle"; 141 | 142 | case cudaErrorNotReady: 143 | return "cudaErrorNotReady"; 144 | 145 | case cudaErrorInsufficientDriver: 146 | return "cudaErrorInsufficientDriver"; 147 | 148 | case cudaErrorSetOnActiveProcess: 149 | return "cudaErrorSetOnActiveProcess"; 150 | 151 | case cudaErrorInvalidSurface: 152 | return "cudaErrorInvalidSurface"; 153 | 154 | case cudaErrorNoDevice: 155 | return "cudaErrorNoDevice"; 156 | 157 | case cudaErrorECCUncorrectable: 158 | return "cudaErrorECCUncorrectable"; 159 | 160 | case cudaErrorSharedObjectSymbolNotFound: 161 | return "cudaErrorSharedObjectSymbolNotFound"; 162 | 163 | case cudaErrorSharedObjectInitFailed: 164 | return "cudaErrorSharedObjectInitFailed"; 165 | 166 | case cudaErrorUnsupportedLimit: 167 | return "cudaErrorUnsupportedLimit"; 168 | 169 | case cudaErrorDuplicateVariableName: 170 | return "cudaErrorDuplicateVariableName"; 171 | 172 | case cudaErrorDuplicateTextureName: 173 | return "cudaErrorDuplicateTextureName"; 174 | 175 | case cudaErrorDuplicateSurfaceName: 176 | return "cudaErrorDuplicateSurfaceName"; 177 | 178 | case cudaErrorDevicesUnavailable: 179 | return "cudaErrorDevicesUnavailable"; 180 | 181 | case cudaErrorInvalidKernelImage: 182 | return "cudaErrorInvalidKernelImage"; 183 | 184 | case cudaErrorNoKernelImageForDevice: 185 | return "cudaErrorNoKernelImageForDevice"; 186 | 187 | case cudaErrorIncompatibleDriverContext: 188 | return "cudaErrorIncompatibleDriverContext"; 189 | 190 | case cudaErrorPeerAccessAlreadyEnabled: 191 | return "cudaErrorPeerAccessAlreadyEnabled"; 192 | 193 | case cudaErrorPeerAccessNotEnabled: 194 | return "cudaErrorPeerAccessNotEnabled"; 195 | 196 | case cudaErrorDeviceAlreadyInUse: 197 | return "cudaErrorDeviceAlreadyInUse"; 198 | 199 | case cudaErrorProfilerDisabled: 200 | return "cudaErrorProfilerDisabled"; 201 | 202 | case cudaErrorProfilerNotInitialized: 203 | return "cudaErrorProfilerNotInitialized"; 204 | 205 | case cudaErrorProfilerAlreadyStarted: 206 | return "cudaErrorProfilerAlreadyStarted"; 207 | 208 | case cudaErrorProfilerAlreadyStopped: 209 | return "cudaErrorProfilerAlreadyStopped"; 210 | 211 | /* Since CUDA 4.0*/ 212 | case cudaErrorAssert: 213 | return "cudaErrorAssert"; 214 | 215 | case cudaErrorTooManyPeers: 216 | return "cudaErrorTooManyPeers"; 217 | 218 | case cudaErrorHostMemoryAlreadyRegistered: 219 | return "cudaErrorHostMemoryAlreadyRegistered"; 220 | 221 | case cudaErrorHostMemoryNotRegistered: 222 | return "cudaErrorHostMemoryNotRegistered"; 223 | 224 | /* Since CUDA 5.0 */ 225 | case cudaErrorOperatingSystem: 226 | return "cudaErrorOperatingSystem"; 227 | 228 | case cudaErrorPeerAccessUnsupported: 229 | return "cudaErrorPeerAccessUnsupported"; 230 | 231 | case cudaErrorLaunchMaxDepthExceeded: 232 | return "cudaErrorLaunchMaxDepthExceeded"; 233 | 234 | case cudaErrorLaunchFileScopedTex: 235 | return "cudaErrorLaunchFileScopedTex"; 236 | 237 | case cudaErrorLaunchFileScopedSurf: 238 | return "cudaErrorLaunchFileScopedSurf"; 239 | 240 | case cudaErrorSyncDepthExceeded: 241 | return "cudaErrorSyncDepthExceeded"; 242 | 243 | case cudaErrorLaunchPendingCountExceeded: 244 | return "cudaErrorLaunchPendingCountExceeded"; 245 | 246 | case cudaErrorNotPermitted: 247 | return "cudaErrorNotPermitted"; 248 | 249 | case cudaErrorNotSupported: 250 | return "cudaErrorNotSupported"; 251 | 252 | /* Since CUDA 6.0 */ 253 | case cudaErrorHardwareStackError: 254 | return "cudaErrorHardwareStackError"; 255 | 256 | case cudaErrorIllegalInstruction: 257 | return "cudaErrorIllegalInstruction"; 258 | 259 | case cudaErrorMisalignedAddress: 260 | return "cudaErrorMisalignedAddress"; 261 | 262 | case cudaErrorInvalidAddressSpace: 263 | return "cudaErrorInvalidAddressSpace"; 264 | 265 | case cudaErrorInvalidPc: 266 | return "cudaErrorInvalidPc"; 267 | 268 | case cudaErrorIllegalAddress: 269 | return "cudaErrorIllegalAddress"; 270 | 271 | /* Since CUDA 6.5*/ 272 | case cudaErrorInvalidPtx: 273 | return "cudaErrorInvalidPtx"; 274 | 275 | case cudaErrorInvalidGraphicsContext: 276 | return "cudaErrorInvalidGraphicsContext"; 277 | 278 | case cudaErrorStartupFailure: 279 | return "cudaErrorStartupFailure"; 280 | 281 | case cudaErrorApiFailureBase: 282 | return "cudaErrorApiFailureBase"; 283 | 284 | /* Since CUDA 8.0*/ 285 | case cudaErrorNvlinkUncorrectable : 286 | return "cudaErrorNvlinkUncorrectable"; 287 | } 288 | 289 | return ""; 290 | } 291 | #endif 292 | 293 | #ifdef __cuda_cuda_h__ 294 | // CUDA Driver API errors 295 | static const char *_cudaGetErrorEnum(CUresult error) 296 | { 297 | switch (error) 298 | { 299 | case CUDA_SUCCESS: 300 | return "CUDA_SUCCESS"; 301 | 302 | case CUDA_ERROR_INVALID_VALUE: 303 | return "CUDA_ERROR_INVALID_VALUE"; 304 | 305 | case CUDA_ERROR_OUT_OF_MEMORY: 306 | return "CUDA_ERROR_OUT_OF_MEMORY"; 307 | 308 | case CUDA_ERROR_NOT_INITIALIZED: 309 | return "CUDA_ERROR_NOT_INITIALIZED"; 310 | 311 | case CUDA_ERROR_DEINITIALIZED: 312 | return "CUDA_ERROR_DEINITIALIZED"; 313 | 314 | case CUDA_ERROR_PROFILER_DISABLED: 315 | return "CUDA_ERROR_PROFILER_DISABLED"; 316 | 317 | case CUDA_ERROR_PROFILER_NOT_INITIALIZED: 318 | return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; 319 | 320 | case CUDA_ERROR_PROFILER_ALREADY_STARTED: 321 | return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; 322 | 323 | case CUDA_ERROR_PROFILER_ALREADY_STOPPED: 324 | return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; 325 | 326 | case CUDA_ERROR_NO_DEVICE: 327 | return "CUDA_ERROR_NO_DEVICE"; 328 | 329 | case CUDA_ERROR_INVALID_DEVICE: 330 | return "CUDA_ERROR_INVALID_DEVICE"; 331 | 332 | case CUDA_ERROR_INVALID_IMAGE: 333 | return "CUDA_ERROR_INVALID_IMAGE"; 334 | 335 | case CUDA_ERROR_INVALID_CONTEXT: 336 | return "CUDA_ERROR_INVALID_CONTEXT"; 337 | 338 | case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: 339 | return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; 340 | 341 | case CUDA_ERROR_MAP_FAILED: 342 | return "CUDA_ERROR_MAP_FAILED"; 343 | 344 | case CUDA_ERROR_UNMAP_FAILED: 345 | return "CUDA_ERROR_UNMAP_FAILED"; 346 | 347 | case CUDA_ERROR_ARRAY_IS_MAPPED: 348 | return "CUDA_ERROR_ARRAY_IS_MAPPED"; 349 | 350 | case CUDA_ERROR_ALREADY_MAPPED: 351 | return "CUDA_ERROR_ALREADY_MAPPED"; 352 | 353 | case CUDA_ERROR_NO_BINARY_FOR_GPU: 354 | return "CUDA_ERROR_NO_BINARY_FOR_GPU"; 355 | 356 | case CUDA_ERROR_ALREADY_ACQUIRED: 357 | return "CUDA_ERROR_ALREADY_ACQUIRED"; 358 | 359 | case CUDA_ERROR_NOT_MAPPED: 360 | return "CUDA_ERROR_NOT_MAPPED"; 361 | 362 | case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: 363 | return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; 364 | 365 | case CUDA_ERROR_NOT_MAPPED_AS_POINTER: 366 | return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; 367 | 368 | case CUDA_ERROR_ECC_UNCORRECTABLE: 369 | return "CUDA_ERROR_ECC_UNCORRECTABLE"; 370 | 371 | case CUDA_ERROR_UNSUPPORTED_LIMIT: 372 | return "CUDA_ERROR_UNSUPPORTED_LIMIT"; 373 | 374 | case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: 375 | return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; 376 | 377 | case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: 378 | return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; 379 | 380 | case CUDA_ERROR_INVALID_PTX: 381 | return "CUDA_ERROR_INVALID_PTX"; 382 | 383 | case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: 384 | return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; 385 | 386 | case CUDA_ERROR_NVLINK_UNCORRECTABLE: 387 | return "CUDA_ERROR_NVLINK_UNCORRECTABLE"; 388 | 389 | case CUDA_ERROR_INVALID_SOURCE: 390 | return "CUDA_ERROR_INVALID_SOURCE"; 391 | 392 | case CUDA_ERROR_FILE_NOT_FOUND: 393 | return "CUDA_ERROR_FILE_NOT_FOUND"; 394 | 395 | case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: 396 | return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; 397 | 398 | case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: 399 | return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; 400 | 401 | case CUDA_ERROR_OPERATING_SYSTEM: 402 | return "CUDA_ERROR_OPERATING_SYSTEM"; 403 | 404 | case CUDA_ERROR_INVALID_HANDLE: 405 | return "CUDA_ERROR_INVALID_HANDLE"; 406 | 407 | case CUDA_ERROR_NOT_FOUND: 408 | return "CUDA_ERROR_NOT_FOUND"; 409 | 410 | case CUDA_ERROR_NOT_READY: 411 | return "CUDA_ERROR_NOT_READY"; 412 | 413 | case CUDA_ERROR_ILLEGAL_ADDRESS: 414 | return "CUDA_ERROR_ILLEGAL_ADDRESS"; 415 | 416 | case CUDA_ERROR_LAUNCH_FAILED: 417 | return "CUDA_ERROR_LAUNCH_FAILED"; 418 | 419 | case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: 420 | return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; 421 | 422 | case CUDA_ERROR_LAUNCH_TIMEOUT: 423 | return "CUDA_ERROR_LAUNCH_TIMEOUT"; 424 | 425 | case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: 426 | return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; 427 | 428 | case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: 429 | return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; 430 | 431 | case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: 432 | return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; 433 | 434 | case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: 435 | return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; 436 | 437 | case CUDA_ERROR_CONTEXT_IS_DESTROYED: 438 | return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; 439 | 440 | case CUDA_ERROR_ASSERT: 441 | return "CUDA_ERROR_ASSERT"; 442 | 443 | case CUDA_ERROR_TOO_MANY_PEERS: 444 | return "CUDA_ERROR_TOO_MANY_PEERS"; 445 | 446 | case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: 447 | return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; 448 | 449 | case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: 450 | return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; 451 | 452 | case CUDA_ERROR_HARDWARE_STACK_ERROR: 453 | return "CUDA_ERROR_HARDWARE_STACK_ERROR"; 454 | 455 | case CUDA_ERROR_ILLEGAL_INSTRUCTION: 456 | return "CUDA_ERROR_ILLEGAL_INSTRUCTION"; 457 | 458 | case CUDA_ERROR_MISALIGNED_ADDRESS: 459 | return "CUDA_ERROR_MISALIGNED_ADDRESS"; 460 | 461 | case CUDA_ERROR_INVALID_ADDRESS_SPACE: 462 | return "CUDA_ERROR_INVALID_ADDRESS_SPACE"; 463 | 464 | case CUDA_ERROR_INVALID_PC: 465 | return "CUDA_ERROR_INVALID_PC"; 466 | 467 | case CUDA_ERROR_NOT_PERMITTED: 468 | return "CUDA_ERROR_NOT_PERMITTED"; 469 | 470 | case CUDA_ERROR_NOT_SUPPORTED: 471 | return "CUDA_ERROR_NOT_SUPPORTED"; 472 | 473 | case CUDA_ERROR_UNKNOWN: 474 | return "CUDA_ERROR_UNKNOWN"; 475 | } 476 | 477 | return ""; 478 | } 479 | #endif 480 | 481 | #ifdef CUBLAS_API_H_ 482 | // cuBLAS API errors 483 | static const char *_cudaGetErrorEnum(cublasStatus_t error) 484 | { 485 | switch (error) 486 | { 487 | case CUBLAS_STATUS_SUCCESS: 488 | return "CUBLAS_STATUS_SUCCESS"; 489 | 490 | case CUBLAS_STATUS_NOT_INITIALIZED: 491 | return "CUBLAS_STATUS_NOT_INITIALIZED"; 492 | 493 | case CUBLAS_STATUS_ALLOC_FAILED: 494 | return "CUBLAS_STATUS_ALLOC_FAILED"; 495 | 496 | case CUBLAS_STATUS_INVALID_VALUE: 497 | return "CUBLAS_STATUS_INVALID_VALUE"; 498 | 499 | case CUBLAS_STATUS_ARCH_MISMATCH: 500 | return "CUBLAS_STATUS_ARCH_MISMATCH"; 501 | 502 | case CUBLAS_STATUS_MAPPING_ERROR: 503 | return "CUBLAS_STATUS_MAPPING_ERROR"; 504 | 505 | case CUBLAS_STATUS_EXECUTION_FAILED: 506 | return "CUBLAS_STATUS_EXECUTION_FAILED"; 507 | 508 | case CUBLAS_STATUS_INTERNAL_ERROR: 509 | return "CUBLAS_STATUS_INTERNAL_ERROR"; 510 | 511 | case CUBLAS_STATUS_NOT_SUPPORTED: 512 | return "CUBLAS_STATUS_NOT_SUPPORTED"; 513 | 514 | case CUBLAS_STATUS_LICENSE_ERROR: 515 | return "CUBLAS_STATUS_LICENSE_ERROR"; 516 | } 517 | 518 | return ""; 519 | } 520 | #endif 521 | 522 | #ifdef _CUFFT_H_ 523 | // cuFFT API errors 524 | static const char *_cudaGetErrorEnum(cufftResult error) 525 | { 526 | switch (error) 527 | { 528 | case CUFFT_SUCCESS: 529 | return "CUFFT_SUCCESS"; 530 | 531 | case CUFFT_INVALID_PLAN: 532 | return "CUFFT_INVALID_PLAN"; 533 | 534 | case CUFFT_ALLOC_FAILED: 535 | return "CUFFT_ALLOC_FAILED"; 536 | 537 | case CUFFT_INVALID_TYPE: 538 | return "CUFFT_INVALID_TYPE"; 539 | 540 | case CUFFT_INVALID_VALUE: 541 | return "CUFFT_INVALID_VALUE"; 542 | 543 | case CUFFT_INTERNAL_ERROR: 544 | return "CUFFT_INTERNAL_ERROR"; 545 | 546 | case CUFFT_EXEC_FAILED: 547 | return "CUFFT_EXEC_FAILED"; 548 | 549 | case CUFFT_SETUP_FAILED: 550 | return "CUFFT_SETUP_FAILED"; 551 | 552 | case CUFFT_INVALID_SIZE: 553 | return "CUFFT_INVALID_SIZE"; 554 | 555 | case CUFFT_UNALIGNED_DATA: 556 | return "CUFFT_UNALIGNED_DATA"; 557 | 558 | case CUFFT_INCOMPLETE_PARAMETER_LIST: 559 | return "CUFFT_INCOMPLETE_PARAMETER_LIST"; 560 | 561 | case CUFFT_INVALID_DEVICE: 562 | return "CUFFT_INVALID_DEVICE"; 563 | 564 | case CUFFT_PARSE_ERROR: 565 | return "CUFFT_PARSE_ERROR"; 566 | 567 | case CUFFT_NO_WORKSPACE: 568 | return "CUFFT_NO_WORKSPACE"; 569 | 570 | case CUFFT_NOT_IMPLEMENTED: 571 | return "CUFFT_NOT_IMPLEMENTED"; 572 | 573 | case CUFFT_LICENSE_ERROR: 574 | return "CUFFT_LICENSE_ERROR"; 575 | 576 | case CUFFT_NOT_SUPPORTED: 577 | return "CUFFT_NOT_SUPPORTED"; 578 | } 579 | 580 | return ""; 581 | } 582 | #endif 583 | 584 | 585 | #ifdef CUSPARSEAPI 586 | // cuSPARSE API errors 587 | static const char *_cudaGetErrorEnum(cusparseStatus_t error) 588 | { 589 | switch (error) 590 | { 591 | case CUSPARSE_STATUS_SUCCESS: 592 | return "CUSPARSE_STATUS_SUCCESS"; 593 | 594 | case CUSPARSE_STATUS_NOT_INITIALIZED: 595 | return "CUSPARSE_STATUS_NOT_INITIALIZED"; 596 | 597 | case CUSPARSE_STATUS_ALLOC_FAILED: 598 | return "CUSPARSE_STATUS_ALLOC_FAILED"; 599 | 600 | case CUSPARSE_STATUS_INVALID_VALUE: 601 | return "CUSPARSE_STATUS_INVALID_VALUE"; 602 | 603 | case CUSPARSE_STATUS_ARCH_MISMATCH: 604 | return "CUSPARSE_STATUS_ARCH_MISMATCH"; 605 | 606 | case CUSPARSE_STATUS_MAPPING_ERROR: 607 | return "CUSPARSE_STATUS_MAPPING_ERROR"; 608 | 609 | case CUSPARSE_STATUS_EXECUTION_FAILED: 610 | return "CUSPARSE_STATUS_EXECUTION_FAILED"; 611 | 612 | case CUSPARSE_STATUS_INTERNAL_ERROR: 613 | return "CUSPARSE_STATUS_INTERNAL_ERROR"; 614 | 615 | case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: 616 | return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; 617 | } 618 | 619 | return ""; 620 | } 621 | #endif 622 | 623 | #ifdef CUSOLVER_COMMON_H_ 624 | //cuSOLVER API errors 625 | static const char *_cudaGetErrorEnum(cusolverStatus_t error) 626 | { 627 | switch(error) 628 | { 629 | case CUSOLVER_STATUS_SUCCESS: 630 | return "CUSOLVER_STATUS_SUCCESS"; 631 | case CUSOLVER_STATUS_NOT_INITIALIZED: 632 | return "CUSOLVER_STATUS_NOT_INITIALIZED"; 633 | case CUSOLVER_STATUS_ALLOC_FAILED: 634 | return "CUSOLVER_STATUS_ALLOC_FAILED"; 635 | case CUSOLVER_STATUS_INVALID_VALUE: 636 | return "CUSOLVER_STATUS_INVALID_VALUE"; 637 | case CUSOLVER_STATUS_ARCH_MISMATCH: 638 | return "CUSOLVER_STATUS_ARCH_MISMATCH"; 639 | case CUSOLVER_STATUS_MAPPING_ERROR: 640 | return "CUSOLVER_STATUS_MAPPING_ERROR"; 641 | case CUSOLVER_STATUS_EXECUTION_FAILED: 642 | return "CUSOLVER_STATUS_EXECUTION_FAILED"; 643 | case CUSOLVER_STATUS_INTERNAL_ERROR: 644 | return "CUSOLVER_STATUS_INTERNAL_ERROR"; 645 | case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: 646 | return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; 647 | case CUSOLVER_STATUS_NOT_SUPPORTED : 648 | return "CUSOLVER_STATUS_NOT_SUPPORTED "; 649 | case CUSOLVER_STATUS_ZERO_PIVOT: 650 | return "CUSOLVER_STATUS_ZERO_PIVOT"; 651 | case CUSOLVER_STATUS_INVALID_LICENSE: 652 | return "CUSOLVER_STATUS_INVALID_LICENSE"; 653 | } 654 | 655 | return ""; 656 | 657 | } 658 | #endif 659 | 660 | #ifdef CURAND_H_ 661 | // cuRAND API errors 662 | static const char *_cudaGetErrorEnum(curandStatus_t error) 663 | { 664 | switch (error) 665 | { 666 | case CURAND_STATUS_SUCCESS: 667 | return "CURAND_STATUS_SUCCESS"; 668 | 669 | case CURAND_STATUS_VERSION_MISMATCH: 670 | return "CURAND_STATUS_VERSION_MISMATCH"; 671 | 672 | case CURAND_STATUS_NOT_INITIALIZED: 673 | return "CURAND_STATUS_NOT_INITIALIZED"; 674 | 675 | case CURAND_STATUS_ALLOCATION_FAILED: 676 | return "CURAND_STATUS_ALLOCATION_FAILED"; 677 | 678 | case CURAND_STATUS_TYPE_ERROR: 679 | return "CURAND_STATUS_TYPE_ERROR"; 680 | 681 | case CURAND_STATUS_OUT_OF_RANGE: 682 | return "CURAND_STATUS_OUT_OF_RANGE"; 683 | 684 | case CURAND_STATUS_LENGTH_NOT_MULTIPLE: 685 | return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; 686 | 687 | case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: 688 | return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; 689 | 690 | case CURAND_STATUS_LAUNCH_FAILURE: 691 | return "CURAND_STATUS_LAUNCH_FAILURE"; 692 | 693 | case CURAND_STATUS_PREEXISTING_FAILURE: 694 | return "CURAND_STATUS_PREEXISTING_FAILURE"; 695 | 696 | case CURAND_STATUS_INITIALIZATION_FAILED: 697 | return "CURAND_STATUS_INITIALIZATION_FAILED"; 698 | 699 | case CURAND_STATUS_ARCH_MISMATCH: 700 | return "CURAND_STATUS_ARCH_MISMATCH"; 701 | 702 | case CURAND_STATUS_INTERNAL_ERROR: 703 | return "CURAND_STATUS_INTERNAL_ERROR"; 704 | } 705 | 706 | return ""; 707 | } 708 | #endif 709 | 710 | #ifdef NV_NPPIDEFS_H 711 | // NPP API errors 712 | static const char *_cudaGetErrorEnum(NppStatus error) 713 | { 714 | switch (error) 715 | { 716 | case NPP_NOT_SUPPORTED_MODE_ERROR: 717 | return "NPP_NOT_SUPPORTED_MODE_ERROR"; 718 | 719 | case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: 720 | return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; 721 | 722 | case NPP_RESIZE_NO_OPERATION_ERROR: 723 | return "NPP_RESIZE_NO_OPERATION_ERROR"; 724 | 725 | case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: 726 | return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; 727 | 728 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 729 | 730 | case NPP_BAD_ARG_ERROR: 731 | return "NPP_BAD_ARGUMENT_ERROR"; 732 | 733 | case NPP_COEFF_ERROR: 734 | return "NPP_COEFFICIENT_ERROR"; 735 | 736 | case NPP_RECT_ERROR: 737 | return "NPP_RECTANGLE_ERROR"; 738 | 739 | case NPP_QUAD_ERROR: 740 | return "NPP_QUADRANGLE_ERROR"; 741 | 742 | case NPP_MEM_ALLOC_ERR: 743 | return "NPP_MEMORY_ALLOCATION_ERROR"; 744 | 745 | case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: 746 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; 747 | 748 | case NPP_INVALID_INPUT: 749 | return "NPP_INVALID_INPUT"; 750 | 751 | case NPP_POINTER_ERROR: 752 | return "NPP_POINTER_ERROR"; 753 | 754 | case NPP_WARNING: 755 | return "NPP_WARNING"; 756 | 757 | case NPP_ODD_ROI_WARNING: 758 | return "NPP_ODD_ROI_WARNING"; 759 | #else 760 | 761 | // These are for CUDA 5.5 or higher 762 | case NPP_BAD_ARGUMENT_ERROR: 763 | return "NPP_BAD_ARGUMENT_ERROR"; 764 | 765 | case NPP_COEFFICIENT_ERROR: 766 | return "NPP_COEFFICIENT_ERROR"; 767 | 768 | case NPP_RECTANGLE_ERROR: 769 | return "NPP_RECTANGLE_ERROR"; 770 | 771 | case NPP_QUADRANGLE_ERROR: 772 | return "NPP_QUADRANGLE_ERROR"; 773 | 774 | case NPP_MEMORY_ALLOCATION_ERR: 775 | return "NPP_MEMORY_ALLOCATION_ERROR"; 776 | 777 | case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: 778 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; 779 | 780 | case NPP_INVALID_HOST_POINTER_ERROR: 781 | return "NPP_INVALID_HOST_POINTER_ERROR"; 782 | 783 | case NPP_INVALID_DEVICE_POINTER_ERROR: 784 | return "NPP_INVALID_DEVICE_POINTER_ERROR"; 785 | #endif 786 | 787 | case NPP_LUT_NUMBER_OF_LEVELS_ERROR: 788 | return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; 789 | 790 | case NPP_TEXTURE_BIND_ERROR: 791 | return "NPP_TEXTURE_BIND_ERROR"; 792 | 793 | case NPP_WRONG_INTERSECTION_ROI_ERROR: 794 | return "NPP_WRONG_INTERSECTION_ROI_ERROR"; 795 | 796 | case NPP_NOT_EVEN_STEP_ERROR: 797 | return "NPP_NOT_EVEN_STEP_ERROR"; 798 | 799 | case NPP_INTERPOLATION_ERROR: 800 | return "NPP_INTERPOLATION_ERROR"; 801 | 802 | case NPP_RESIZE_FACTOR_ERROR: 803 | return "NPP_RESIZE_FACTOR_ERROR"; 804 | 805 | case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: 806 | return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; 807 | 808 | 809 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 810 | 811 | case NPP_MEMFREE_ERR: 812 | return "NPP_MEMFREE_ERR"; 813 | 814 | case NPP_MEMSET_ERR: 815 | return "NPP_MEMSET_ERR"; 816 | 817 | case NPP_MEMCPY_ERR: 818 | return "NPP_MEMCPY_ERROR"; 819 | 820 | case NPP_MIRROR_FLIP_ERR: 821 | return "NPP_MIRROR_FLIP_ERR"; 822 | #else 823 | 824 | case NPP_MEMFREE_ERROR: 825 | return "NPP_MEMFREE_ERROR"; 826 | 827 | case NPP_MEMSET_ERROR: 828 | return "NPP_MEMSET_ERROR"; 829 | 830 | case NPP_MEMCPY_ERROR: 831 | return "NPP_MEMCPY_ERROR"; 832 | 833 | case NPP_MIRROR_FLIP_ERROR: 834 | return "NPP_MIRROR_FLIP_ERROR"; 835 | #endif 836 | 837 | case NPP_ALIGNMENT_ERROR: 838 | return "NPP_ALIGNMENT_ERROR"; 839 | 840 | case NPP_STEP_ERROR: 841 | return "NPP_STEP_ERROR"; 842 | 843 | case NPP_SIZE_ERROR: 844 | return "NPP_SIZE_ERROR"; 845 | 846 | case NPP_NULL_POINTER_ERROR: 847 | return "NPP_NULL_POINTER_ERROR"; 848 | 849 | case NPP_CUDA_KERNEL_EXECUTION_ERROR: 850 | return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; 851 | 852 | case NPP_NOT_IMPLEMENTED_ERROR: 853 | return "NPP_NOT_IMPLEMENTED_ERROR"; 854 | 855 | case NPP_ERROR: 856 | return "NPP_ERROR"; 857 | 858 | case NPP_SUCCESS: 859 | return "NPP_SUCCESS"; 860 | 861 | case NPP_WRONG_INTERSECTION_QUAD_WARNING: 862 | return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; 863 | 864 | case NPP_MISALIGNED_DST_ROI_WARNING: 865 | return "NPP_MISALIGNED_DST_ROI_WARNING"; 866 | 867 | case NPP_AFFINE_QUAD_INCORRECT_WARNING: 868 | return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; 869 | 870 | case NPP_DOUBLE_SIZE_WARNING: 871 | return "NPP_DOUBLE_SIZE_WARNING"; 872 | 873 | case NPP_WRONG_INTERSECTION_ROI_WARNING: 874 | return "NPP_WRONG_INTERSECTION_ROI_WARNING"; 875 | 876 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 877 | /* These are 6.0 or higher */ 878 | case NPP_LUT_PALETTE_BITSIZE_ERROR: 879 | return "NPP_LUT_PALETTE_BITSIZE_ERROR"; 880 | 881 | case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: 882 | return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; 883 | 884 | case NPP_QUALITY_INDEX_ERROR: 885 | return "NPP_QUALITY_INDEX_ERROR"; 886 | 887 | case NPP_CHANNEL_ORDER_ERROR: 888 | return "NPP_CHANNEL_ORDER_ERROR"; 889 | 890 | case NPP_ZERO_MASK_VALUE_ERROR: 891 | return "NPP_ZERO_MASK_VALUE_ERROR"; 892 | 893 | case NPP_NUMBER_OF_CHANNELS_ERROR: 894 | return "NPP_NUMBER_OF_CHANNELS_ERROR"; 895 | 896 | case NPP_COI_ERROR: 897 | return "NPP_COI_ERROR"; 898 | 899 | case NPP_DIVISOR_ERROR: 900 | return "NPP_DIVISOR_ERROR"; 901 | 902 | case NPP_CHANNEL_ERROR: 903 | return "NPP_CHANNEL_ERROR"; 904 | 905 | case NPP_STRIDE_ERROR: 906 | return "NPP_STRIDE_ERROR"; 907 | 908 | case NPP_ANCHOR_ERROR: 909 | return "NPP_ANCHOR_ERROR"; 910 | 911 | case NPP_MASK_SIZE_ERROR: 912 | return "NPP_MASK_SIZE_ERROR"; 913 | 914 | case NPP_MOMENT_00_ZERO_ERROR: 915 | return "NPP_MOMENT_00_ZERO_ERROR"; 916 | 917 | case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: 918 | return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; 919 | 920 | case NPP_THRESHOLD_ERROR: 921 | return "NPP_THRESHOLD_ERROR"; 922 | 923 | case NPP_CONTEXT_MATCH_ERROR: 924 | return "NPP_CONTEXT_MATCH_ERROR"; 925 | 926 | case NPP_FFT_FLAG_ERROR: 927 | return "NPP_FFT_FLAG_ERROR"; 928 | 929 | case NPP_FFT_ORDER_ERROR: 930 | return "NPP_FFT_ORDER_ERROR"; 931 | 932 | case NPP_SCALE_RANGE_ERROR: 933 | return "NPP_SCALE_RANGE_ERROR"; 934 | 935 | case NPP_DATA_TYPE_ERROR: 936 | return "NPP_DATA_TYPE_ERROR"; 937 | 938 | case NPP_OUT_OFF_RANGE_ERROR: 939 | return "NPP_OUT_OFF_RANGE_ERROR"; 940 | 941 | case NPP_DIVIDE_BY_ZERO_ERROR: 942 | return "NPP_DIVIDE_BY_ZERO_ERROR"; 943 | 944 | case NPP_RANGE_ERROR: 945 | return "NPP_RANGE_ERROR"; 946 | 947 | case NPP_NO_MEMORY_ERROR: 948 | return "NPP_NO_MEMORY_ERROR"; 949 | 950 | case NPP_ERROR_RESERVED: 951 | return "NPP_ERROR_RESERVED"; 952 | 953 | case NPP_NO_OPERATION_WARNING: 954 | return "NPP_NO_OPERATION_WARNING"; 955 | 956 | case NPP_DIVIDE_BY_ZERO_WARNING: 957 | return "NPP_DIVIDE_BY_ZERO_WARNING"; 958 | #endif 959 | 960 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000 961 | /* These are 7.0 or higher */ 962 | case NPP_OVERFLOW_ERROR: 963 | return "NPP_OVERFLOW_ERROR"; 964 | 965 | case NPP_CORRUPTED_DATA_ERROR: 966 | return "NPP_CORRUPTED_DATA_ERROR"; 967 | #endif 968 | } 969 | 970 | return ""; 971 | } 972 | #endif 973 | 974 | #ifdef __DRIVER_TYPES_H__ 975 | #ifndef DEVICE_RESET 976 | #define DEVICE_RESET cudaDeviceReset(); 977 | #endif 978 | #else 979 | #ifndef DEVICE_RESET 980 | #define DEVICE_RESET 981 | #endif 982 | #endif 983 | 984 | template< typename T > 985 | void check(T result, char const *const func, const char *const file, int const line) 986 | { 987 | if (result) 988 | { 989 | fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", 990 | file, line, static_cast(result), _cudaGetErrorEnum(result), func); 991 | DEVICE_RESET 992 | // Make sure we call CUDA Device Reset before exiting 993 | exit(EXIT_FAILURE); 994 | } 995 | } 996 | 997 | #ifdef __DRIVER_TYPES_H__ 998 | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error 999 | #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) 1000 | 1001 | // This will output the proper error string when calling cudaGetLastError 1002 | #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) 1003 | 1004 | inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) 1005 | { 1006 | cudaError_t err = cudaGetLastError(); 1007 | 1008 | if (cudaSuccess != err) 1009 | { 1010 | fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", 1011 | file, line, errorMessage, (int)err, cudaGetErrorString(err)); 1012 | DEVICE_RESET 1013 | exit(EXIT_FAILURE); 1014 | } 1015 | } 1016 | #endif 1017 | 1018 | #ifndef MAX 1019 | #define MAX(a,b) (a > b ? a : b) 1020 | #endif 1021 | 1022 | // Float To Int conversion 1023 | inline int ftoi(float value) 1024 | { 1025 | return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5)); 1026 | } 1027 | 1028 | // Beginning of GPU Architecture definitions 1029 | inline int _ConvertSMVer2Cores(int major, int minor) 1030 | { 1031 | // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM 1032 | typedef struct 1033 | { 1034 | int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version 1035 | int Cores; 1036 | } sSMtoCores; 1037 | 1038 | sSMtoCores nGpuArchCoresPerSM[] = 1039 | { 1040 | { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class 1041 | { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class 1042 | { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class 1043 | { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class 1044 | { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class 1045 | { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class 1046 | { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class 1047 | { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class 1048 | { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class 1049 | { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class 1050 | { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class 1051 | { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class 1052 | { -1, -1 } 1053 | }; 1054 | 1055 | int index = 0; 1056 | 1057 | while (nGpuArchCoresPerSM[index].SM != -1) 1058 | { 1059 | if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) 1060 | { 1061 | return nGpuArchCoresPerSM[index].Cores; 1062 | } 1063 | 1064 | index++; 1065 | } 1066 | 1067 | // If we don't find the values, we default use the previous one to run properly 1068 | printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); 1069 | return nGpuArchCoresPerSM[index-1].Cores; 1070 | } 1071 | // end of GPU Architecture definitions 1072 | 1073 | #ifdef __CUDA_RUNTIME_H__ 1074 | // General GPU Device CUDA Initialization 1075 | inline int gpuDeviceInit(int devID) 1076 | { 1077 | int device_count; 1078 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 1079 | 1080 | if (device_count == 0) 1081 | { 1082 | fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); 1083 | exit(EXIT_FAILURE); 1084 | } 1085 | 1086 | if (devID < 0) 1087 | { 1088 | devID = 0; 1089 | } 1090 | 1091 | if (devID > device_count-1) 1092 | { 1093 | fprintf(stderr, "\n"); 1094 | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); 1095 | fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); 1096 | fprintf(stderr, "\n"); 1097 | return -devID; 1098 | } 1099 | 1100 | cudaDeviceProp deviceProp; 1101 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); 1102 | 1103 | if (deviceProp.computeMode == cudaComputeModeProhibited) 1104 | { 1105 | fprintf(stderr, "Error: device is running in , no threads can use ::cudaSetDevice().\n"); 1106 | return -1; 1107 | } 1108 | 1109 | if (deviceProp.major < 1) 1110 | { 1111 | fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); 1112 | exit(EXIT_FAILURE); 1113 | } 1114 | 1115 | checkCudaErrors(cudaSetDevice(devID)); 1116 | printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); 1117 | 1118 | return devID; 1119 | } 1120 | 1121 | // This function returns the best GPU (with maximum GFLOPS) 1122 | inline int gpuGetMaxGflopsDeviceId() 1123 | { 1124 | int current_device = 0, sm_per_multiproc = 0; 1125 | int max_perf_device = 0; 1126 | int device_count = 0, best_SM_arch = 0; 1127 | int devices_prohibited = 0; 1128 | 1129 | unsigned long long max_compute_perf = 0; 1130 | cudaDeviceProp deviceProp; 1131 | cudaGetDeviceCount(&device_count); 1132 | 1133 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 1134 | 1135 | if (device_count == 0) 1136 | { 1137 | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); 1138 | exit(EXIT_FAILURE); 1139 | } 1140 | 1141 | // Find the best major SM Architecture GPU device 1142 | while (current_device < device_count) 1143 | { 1144 | cudaGetDeviceProperties(&deviceProp, current_device); 1145 | 1146 | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list 1147 | if (deviceProp.computeMode != cudaComputeModeProhibited) 1148 | { 1149 | if (deviceProp.major > 0 && deviceProp.major < 9999) 1150 | { 1151 | best_SM_arch = MAX(best_SM_arch, deviceProp.major); 1152 | } 1153 | } 1154 | else 1155 | { 1156 | devices_prohibited++; 1157 | } 1158 | 1159 | current_device++; 1160 | } 1161 | 1162 | if (devices_prohibited == device_count) 1163 | { 1164 | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n"); 1165 | exit(EXIT_FAILURE); 1166 | } 1167 | 1168 | // Find the best CUDA capable GPU device 1169 | current_device = 0; 1170 | 1171 | while (current_device < device_count) 1172 | { 1173 | cudaGetDeviceProperties(&deviceProp, current_device); 1174 | 1175 | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list 1176 | if (deviceProp.computeMode != cudaComputeModeProhibited) 1177 | { 1178 | if (deviceProp.major == 9999 && deviceProp.minor == 9999) 1179 | { 1180 | sm_per_multiproc = 1; 1181 | } 1182 | else 1183 | { 1184 | sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); 1185 | } 1186 | 1187 | unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; 1188 | 1189 | if (compute_perf > max_compute_perf) 1190 | { 1191 | // If we find GPU with SM major > 2, search only these 1192 | if (best_SM_arch > 2) 1193 | { 1194 | // If our device==dest_SM_arch, choose this, or else pass 1195 | if (deviceProp.major == best_SM_arch) 1196 | { 1197 | max_compute_perf = compute_perf; 1198 | max_perf_device = current_device; 1199 | } 1200 | } 1201 | else 1202 | { 1203 | max_compute_perf = compute_perf; 1204 | max_perf_device = current_device; 1205 | } 1206 | } 1207 | } 1208 | 1209 | ++current_device; 1210 | } 1211 | 1212 | return max_perf_device; 1213 | } 1214 | 1215 | 1216 | // Initialization code to find the best CUDA Device 1217 | inline int findCudaDevice(int argc, const char **argv) 1218 | { 1219 | cudaDeviceProp deviceProp; 1220 | int devID = 0; 1221 | 1222 | // If the command-line has a device number specified, use it 1223 | if (checkCmdLineFlag(argc, argv, "device")) 1224 | { 1225 | devID = getCmdLineArgumentInt(argc, argv, "device="); 1226 | 1227 | if (devID < 0) 1228 | { 1229 | printf("Invalid command line parameter\n "); 1230 | exit(EXIT_FAILURE); 1231 | } 1232 | else 1233 | { 1234 | devID = gpuDeviceInit(devID); 1235 | 1236 | if (devID < 0) 1237 | { 1238 | printf("exiting...\n"); 1239 | exit(EXIT_FAILURE); 1240 | } 1241 | } 1242 | } 1243 | else 1244 | { 1245 | // Otherwise pick the device with highest Gflops/s 1246 | devID = gpuGetMaxGflopsDeviceId(); 1247 | checkCudaErrors(cudaSetDevice(devID)); 1248 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); 1249 | printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); 1250 | } 1251 | 1252 | return devID; 1253 | } 1254 | 1255 | // General check for CUDA GPU SM Capabilities 1256 | inline bool checkCudaCapabilities(int major_version, int minor_version) 1257 | { 1258 | cudaDeviceProp deviceProp; 1259 | deviceProp.major = 0; 1260 | deviceProp.minor = 0; 1261 | int dev; 1262 | 1263 | checkCudaErrors(cudaGetDevice(&dev)); 1264 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); 1265 | 1266 | if ((deviceProp.major > major_version) || 1267 | (deviceProp.major == major_version && deviceProp.minor >= minor_version)) 1268 | { 1269 | printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); 1270 | return true; 1271 | } 1272 | else 1273 | { 1274 | printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); 1275 | return false; 1276 | } 1277 | } 1278 | #endif 1279 | 1280 | // end of CUDA Helper Functions 1281 | 1282 | 1283 | #endif 1284 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/helper_string.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // These are helper functions for the SDK samples (string parsing, timers, etc) 13 | #ifndef STRING_HELPER_H 14 | #define STRING_HELPER_H 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 22 | #ifndef _CRT_SECURE_NO_DEPRECATE 23 | #define _CRT_SECURE_NO_DEPRECATE 24 | #endif 25 | #ifndef STRCASECMP 26 | #define STRCASECMP _stricmp 27 | #endif 28 | #ifndef STRNCASECMP 29 | #define STRNCASECMP _strnicmp 30 | #endif 31 | #ifndef STRCPY 32 | #define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) 33 | #endif 34 | 35 | #ifndef FOPEN 36 | #define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) 37 | #endif 38 | #ifndef FOPEN_FAIL 39 | #define FOPEN_FAIL(result) (result != 0) 40 | #endif 41 | #ifndef SSCANF 42 | #define SSCANF sscanf_s 43 | #endif 44 | #ifndef SPRINTF 45 | #define SPRINTF sprintf_s 46 | #endif 47 | #else // Linux Includes 48 | #include 49 | #include 50 | 51 | #ifndef STRCASECMP 52 | #define STRCASECMP strcasecmp 53 | #endif 54 | #ifndef STRNCASECMP 55 | #define STRNCASECMP strncasecmp 56 | #endif 57 | #ifndef STRCPY 58 | #define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) 59 | #endif 60 | 61 | #ifndef FOPEN 62 | #define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) 63 | #endif 64 | #ifndef FOPEN_FAIL 65 | #define FOPEN_FAIL(result) (result == NULL) 66 | #endif 67 | #ifndef SSCANF 68 | #define SSCANF sscanf 69 | #endif 70 | #ifndef SPRINTF 71 | #define SPRINTF sprintf 72 | #endif 73 | #endif 74 | 75 | #ifndef EXIT_WAIVED 76 | #define EXIT_WAIVED 2 77 | #endif 78 | 79 | // CUDA Utility Helper Functions 80 | inline int stringRemoveDelimiter(char delimiter, const char *string) 81 | { 82 | int string_start = 0; 83 | 84 | while (string[string_start] == delimiter) 85 | { 86 | string_start++; 87 | } 88 | 89 | if (string_start >= (int)strlen(string)-1) 90 | { 91 | return 0; 92 | } 93 | 94 | return string_start; 95 | } 96 | 97 | inline int getFileExtension(char *filename, char **extension) 98 | { 99 | int string_length = (int)strlen(filename); 100 | 101 | while (filename[string_length--] != '.') 102 | { 103 | if (string_length == 0) 104 | break; 105 | } 106 | 107 | if (string_length > 0) string_length += 2; 108 | 109 | if (string_length == 0) 110 | *extension = NULL; 111 | else 112 | *extension = &filename[string_length]; 113 | 114 | return string_length; 115 | } 116 | 117 | 118 | inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref) 119 | { 120 | bool bFound = false; 121 | 122 | if (argc >= 1) 123 | { 124 | for (int i=1; i < argc; i++) 125 | { 126 | int string_start = stringRemoveDelimiter('-', argv[i]); 127 | const char *string_argv = &argv[i][string_start]; 128 | 129 | const char *equal_pos = strchr(string_argv, '='); 130 | int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); 131 | 132 | int length = (int)strlen(string_ref); 133 | 134 | if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length)) 135 | { 136 | bFound = true; 137 | continue; 138 | } 139 | } 140 | } 141 | 142 | return bFound; 143 | } 144 | 145 | // This function wraps the CUDA Driver API into a template function 146 | template 147 | inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value) 148 | { 149 | bool bFound = false; 150 | 151 | if (argc >= 1) 152 | { 153 | for (int i=1; i < argc; i++) 154 | { 155 | int string_start = stringRemoveDelimiter('-', argv[i]); 156 | const char *string_argv = &argv[i][string_start]; 157 | int length = (int)strlen(string_ref); 158 | 159 | if (!STRNCASECMP(string_argv, string_ref, length)) 160 | { 161 | if (length+1 <= (int)strlen(string_argv)) 162 | { 163 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 164 | *value = (T)atoi(&string_argv[length + auto_inc]); 165 | } 166 | 167 | bFound = true; 168 | i=argc; 169 | } 170 | } 171 | } 172 | 173 | return bFound; 174 | } 175 | 176 | inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref) 177 | { 178 | bool bFound = false; 179 | int value = -1; 180 | 181 | if (argc >= 1) 182 | { 183 | for (int i=1; i < argc; i++) 184 | { 185 | int string_start = stringRemoveDelimiter('-', argv[i]); 186 | const char *string_argv = &argv[i][string_start]; 187 | int length = (int)strlen(string_ref); 188 | 189 | if (!STRNCASECMP(string_argv, string_ref, length)) 190 | { 191 | if (length+1 <= (int)strlen(string_argv)) 192 | { 193 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 194 | value = atoi(&string_argv[length + auto_inc]); 195 | } 196 | else 197 | { 198 | value = 0; 199 | } 200 | 201 | bFound = true; 202 | continue; 203 | } 204 | } 205 | } 206 | 207 | if (bFound) 208 | { 209 | return value; 210 | } 211 | else 212 | { 213 | return 0; 214 | } 215 | } 216 | 217 | inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref) 218 | { 219 | bool bFound = false; 220 | float value = -1; 221 | 222 | if (argc >= 1) 223 | { 224 | for (int i=1; i < argc; i++) 225 | { 226 | int string_start = stringRemoveDelimiter('-', argv[i]); 227 | const char *string_argv = &argv[i][string_start]; 228 | int length = (int)strlen(string_ref); 229 | 230 | if (!STRNCASECMP(string_argv, string_ref, length)) 231 | { 232 | if (length+1 <= (int)strlen(string_argv)) 233 | { 234 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 235 | value = (float)atof(&string_argv[length + auto_inc]); 236 | } 237 | else 238 | { 239 | value = 0.f; 240 | } 241 | 242 | bFound = true; 243 | continue; 244 | } 245 | } 246 | } 247 | 248 | if (bFound) 249 | { 250 | return value; 251 | } 252 | else 253 | { 254 | return 0; 255 | } 256 | } 257 | 258 | inline bool getCmdLineArgumentString(const int argc, const char **argv, 259 | const char *string_ref, char **string_retval) 260 | { 261 | bool bFound = false; 262 | 263 | if (argc >= 1) 264 | { 265 | for (int i=1; i < argc; i++) 266 | { 267 | int string_start = stringRemoveDelimiter('-', argv[i]); 268 | char *string_argv = (char *)&argv[i][string_start]; 269 | int length = (int)strlen(string_ref); 270 | 271 | if (!STRNCASECMP(string_argv, string_ref, length)) 272 | { 273 | *string_retval = &string_argv[length+1]; 274 | bFound = true; 275 | continue; 276 | } 277 | } 278 | } 279 | 280 | if (!bFound) 281 | { 282 | *string_retval = NULL; 283 | } 284 | 285 | return bFound; 286 | } 287 | 288 | ////////////////////////////////////////////////////////////////////////////// 289 | //! Find the path for a file assuming that 290 | //! files are found in the searchPath. 291 | //! 292 | //! @return the path if succeeded, otherwise 0 293 | //! @param filename name of the file 294 | //! @param executable_path optional absolute path of the executable 295 | ////////////////////////////////////////////////////////////////////////////// 296 | inline char *sdkFindFilePath(const char *filename, const char *executable_path) 297 | { 298 | // defines a variable that is replaced with the name of the executable 299 | 300 | // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files) 301 | // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc 302 | const char *searchPath[] = 303 | { 304 | "./", // same dir 305 | "./_data_files/", 306 | "./common/", // "/common/" subdir 307 | "./common/data/", // "/common/data/" subdir 308 | "./data/", // "/data/" subdir 309 | "./src/", // "/src/" subdir 310 | "./src//data/", // "/src//data/" subdir 311 | "./inc/", // "/inc/" subdir 312 | "./0_Simple/", // "/0_Simple/" subdir 313 | "./1_Utilities/", // "/1_Utilities/" subdir 314 | "./2_Graphics/", // "/2_Graphics/" subdir 315 | "./3_Imaging/", // "/3_Imaging/" subdir 316 | "./4_Finance/", // "/4_Finance/" subdir 317 | "./5_Simulations/", // "/5_Simulations/" subdir 318 | "./6_Advanced/", // "/6_Advanced/" subdir 319 | "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir 320 | "./8_Android/", // "/8_Android/" subdir 321 | "./samples/", // "/samples/" subdir 322 | 323 | "./0_Simple//data/", // "/0_Simple//data/" subdir 324 | "./1_Utilities//data/", // "/1_Utilities//data/" subdir 325 | "./2_Graphics//data/", // "/2_Graphics//data/" subdir 326 | "./3_Imaging//data/", // "/3_Imaging//data/" subdir 327 | "./4_Finance//data/", // "/4_Finance//data/" subdir 328 | "./5_Simulations//data/", // "/5_Simulations//data/" subdir 329 | "./6_Advanced//data/", // "/6_Advanced//data/" subdir 330 | "./7_CUDALibraries//", // "/7_CUDALibraries//" subdir 331 | "./7_CUDALibraries//data/", // "/7_CUDALibraries//data/" subdir 332 | 333 | "../", // up 1 in tree 334 | "../common/", // up 1 in tree, "/common/" subdir 335 | "../common/data/", // up 1 in tree, "/common/data/" subdir 336 | "../data/", // up 1 in tree, "/data/" subdir 337 | "../src/", // up 1 in tree, "/src/" subdir 338 | "../inc/", // up 1 in tree, "/inc/" subdir 339 | 340 | "../0_Simple//data/", // up 1 in tree, "/0_Simple//" subdir 341 | "../1_Utilities//data/", // up 1 in tree, "/1_Utilities//" subdir 342 | "../2_Graphics//data/", // up 1 in tree, "/2_Graphics//" subdir 343 | "../3_Imaging//data/", // up 1 in tree, "/3_Imaging//" subdir 344 | "../4_Finance//data/", // up 1 in tree, "/4_Finance//" subdir 345 | "../5_Simulations//data/", // up 1 in tree, "/5_Simulations//" subdir 346 | "../6_Advanced//data/", // up 1 in tree, "/6_Advanced//" subdir 347 | "../7_CUDALibraries//data/",// up 1 in tree, "/7_CUDALibraries//" subdir 348 | "../8_Android//data/", // up 1 in tree, "/8_Android//" subdir 349 | "../samples//data/", // up 1 in tree, "/samples//" subdir 350 | "../../", // up 2 in tree 351 | "../../common/", // up 2 in tree, "/common/" subdir 352 | "../../common/data/", // up 2 in tree, "/common/data/" subdir 353 | "../../data/", // up 2 in tree, "/data/" subdir 354 | "../../src/", // up 2 in tree, "/src/" subdir 355 | "../../inc/", // up 2 in tree, "/inc/" subdir 356 | "../../sandbox//data/", // up 2 in tree, "/sandbox//" subdir 357 | "../../0_Simple//data/", // up 2 in tree, "/0_Simple//" subdir 358 | "../../1_Utilities//data/", // up 2 in tree, "/1_Utilities//" subdir 359 | "../../2_Graphics//data/", // up 2 in tree, "/2_Graphics//" subdir 360 | "../../3_Imaging//data/", // up 2 in tree, "/3_Imaging//" subdir 361 | "../../4_Finance//data/", // up 2 in tree, "/4_Finance//" subdir 362 | "../../5_Simulations//data/", // up 2 in tree, "/5_Simulations//" subdir 363 | "../../6_Advanced//data/", // up 2 in tree, "/6_Advanced//" subdir 364 | "../../7_CUDALibraries//data/", // up 2 in tree, "/7_CUDALibraries//" subdir 365 | "../../8_Android//data/", // up 2 in tree, "/8_Android//" subdir 366 | "../../samples//data/", // up 2 in tree, "/samples//" subdir 367 | "../../../", // up 3 in tree 368 | "../../../src//", // up 3 in tree, "/src//" subdir 369 | "../../../src//data/", // up 3 in tree, "/src//data/" subdir 370 | "../../../src//src/", // up 3 in tree, "/src//src/" subdir 371 | "../../../src//inc/", // up 3 in tree, "/src//inc/" subdir 372 | "../../../sandbox//", // up 3 in tree, "/sandbox//" subdir 373 | "../../../sandbox//data/", // up 3 in tree, "/sandbox//data/" subdir 374 | "../../../sandbox//src/", // up 3 in tree, "/sandbox//src/" subdir 375 | "../../../sandbox//inc/", // up 3 in tree, "/sandbox//inc/" subdir 376 | "../../../0_Simple//data/", // up 3 in tree, "/0_Simple//" subdir 377 | "../../../1_Utilities//data/", // up 3 in tree, "/1_Utilities//" subdir 378 | "../../../2_Graphics//data/", // up 3 in tree, "/2_Graphics//" subdir 379 | "../../../3_Imaging//data/", // up 3 in tree, "/3_Imaging//" subdir 380 | "../../../4_Finance//data/", // up 3 in tree, "/4_Finance//" subdir 381 | "../../../5_Simulations//data/", // up 3 in tree, "/5_Simulations//" subdir 382 | "../../../6_Advanced//data/", // up 3 in tree, "/6_Advanced//" subdir 383 | "../../../7_CUDALibraries//data/", // up 3 in tree, "/7_CUDALibraries//" subdir 384 | "../../../8_Android//data/", // up 3 in tree, "/8_Android//" subdir 385 | "../../../0_Simple//", // up 3 in tree, "/0_Simple//" subdir 386 | "../../../1_Utilities//", // up 3 in tree, "/1_Utilities//" subdir 387 | "../../../2_Graphics//", // up 3 in tree, "/2_Graphics//" subdir 388 | "../../../3_Imaging//", // up 3 in tree, "/3_Imaging//" subdir 389 | "../../../4_Finance//", // up 3 in tree, "/4_Finance//" subdir 390 | "../../../5_Simulations//", // up 3 in tree, "/5_Simulations//" subdir 391 | "../../../6_Advanced//", // up 3 in tree, "/6_Advanced//" subdir 392 | "../../../7_CUDALibraries//", // up 3 in tree, "/7_CUDALibraries//" subdir 393 | "../../../8_Android//", // up 3 in tree, "/8_Android//" subdir 394 | "../../../samples//data/", // up 3 in tree, "/samples//" subdir 395 | "../../../common/", // up 3 in tree, "../../../common/" subdir 396 | "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir 397 | "../../../data/", // up 3 in tree, "../../../data/" subdir 398 | "../../../../", // up 4 in tree 399 | "../../../../src//", // up 4 in tree, "/src//" subdir 400 | "../../../../src//data/", // up 4 in tree, "/src//data/" subdir 401 | "../../../../src//src/", // up 4 in tree, "/src//src/" subdir 402 | "../../../../src//inc/", // up 4 in tree, "/src//inc/" subdir 403 | "../../../../sandbox//", // up 4 in tree, "/sandbox//" subdir 404 | "../../../../sandbox//data/", // up 4 in tree, "/sandbox//data/" subdir 405 | "../../../../sandbox//src/", // up 4 in tree, "/sandbox//src/" subdir 406 | "../../../../sandbox//inc/", // up 4 in tree, "/sandbox//inc/" subdir 407 | "../../../../0_Simple//data/", // up 4 in tree, "/0_Simple//" subdir 408 | "../../../../1_Utilities//data/", // up 4 in tree, "/1_Utilities//" subdir 409 | "../../../../2_Graphics//data/", // up 4 in tree, "/2_Graphics//" subdir 410 | "../../../../3_Imaging//data/", // up 4 in tree, "/3_Imaging//" subdir 411 | "../../../../4_Finance//data/", // up 4 in tree, "/4_Finance//" subdir 412 | "../../../../5_Simulations//data/",// up 4 in tree, "/5_Simulations//" subdir 413 | "../../../../6_Advanced//data/", // up 4 in tree, "/6_Advanced//" subdir 414 | "../../../../7_CUDALibraries//data/", // up 4 in tree, "/7_CUDALibraries//" subdir 415 | "../../../../8_Android//data/", // up 4 in tree, "/8_Android//" subdir 416 | "../../../../0_Simple//", // up 4 in tree, "/0_Simple//" subdir 417 | "../../../../1_Utilities//", // up 4 in tree, "/1_Utilities//" subdir 418 | "../../../../2_Graphics//", // up 4 in tree, "/2_Graphics//" subdir 419 | "../../../../3_Imaging//", // up 4 in tree, "/3_Imaging//" subdir 420 | "../../../../4_Finance//", // up 4 in tree, "/4_Finance//" subdir 421 | "../../../../5_Simulations//",// up 4 in tree, "/5_Simulations//" subdir 422 | "../../../../6_Advanced//", // up 4 in tree, "/6_Advanced//" subdir 423 | "../../../../7_CUDALibraries//", // up 4 in tree, "/7_CUDALibraries//" subdir 424 | "../../../../8_Android//", // up 4 in tree, "/8_Android//" subdir 425 | "../../../../samples//data/", // up 4 in tree, "/samples//" subdir 426 | "../../../../common/", // up 4 in tree, "../../../common/" subdir 427 | "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir 428 | "../../../../data/", // up 4 in tree, "../../../data/" subdir 429 | "../../../../../", // up 5 in tree 430 | "../../../../../src//", // up 5 in tree, "/src//" subdir 431 | "../../../../../src//data/", // up 5 in tree, "/src//data/" subdir 432 | "../../../../../src//src/", // up 5 in tree, "/src//src/" subdir 433 | "../../../../../src//inc/", // up 5 in tree, "/src//inc/" subdir 434 | "../../../../../sandbox//", // up 5 in tree, "/sandbox//" subdir 435 | "../../../../../sandbox//data/", // up 5 in tree, "/sandbox//data/" subdir 436 | "../../../../../sandbox//src/", // up 5 in tree, "/sandbox//src/" subdir 437 | "../../../../../sandbox//inc/", // up 5 in tree, "/sandbox//inc/" subdir 438 | "../../../../../0_Simple//data/", // up 5 in tree, "/0_Simple//" subdir 439 | "../../../../../1_Utilities//data/", // up 5 in tree, "/1_Utilities//" subdir 440 | "../../../../../2_Graphics//data/", // up 5 in tree, "/2_Graphics//" subdir 441 | "../../../../../3_Imaging//data/", // up 5 in tree, "/3_Imaging//" subdir 442 | "../../../../../4_Finance//data/", // up 5 in tree, "/4_Finance//" subdir 443 | "../../../../../5_Simulations//data/",// up 5 in tree, "/5_Simulations//" subdir 444 | "../../../../../6_Advanced//data/", // up 5 in tree, "/6_Advanced//" subdir 445 | "../../../../../7_CUDALibraries//data/", // up 5 in tree, "/7_CUDALibraries//" subdir 446 | "../../../../../8_Android//data/", // up 5 in tree, "/8_Android//" subdir 447 | "../../../../../samples//data/", // up 5 in tree, "/samples//" subdir 448 | "../../../../../common/", // up 5 in tree, "../../../common/" subdir 449 | "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir 450 | }; 451 | 452 | // Extract the executable name 453 | std::string executable_name; 454 | 455 | if (executable_path != 0) 456 | { 457 | executable_name = std::string(executable_path); 458 | 459 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 460 | // Windows path delimiter 461 | size_t delimiter_pos = executable_name.find_last_of('\\'); 462 | executable_name.erase(0, delimiter_pos + 1); 463 | 464 | if (executable_name.rfind(".exe") != std::string::npos) 465 | { 466 | // we strip .exe, only if the .exe is found 467 | executable_name.resize(executable_name.size() - 4); 468 | } 469 | 470 | #else 471 | // Linux & OSX path delimiter 472 | size_t delimiter_pos = executable_name.find_last_of('/'); 473 | executable_name.erase(0,delimiter_pos+1); 474 | #endif 475 | } 476 | 477 | // Loop over all search paths and return the first hit 478 | for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i) 479 | { 480 | std::string path(searchPath[i]); 481 | size_t executable_name_pos = path.find(""); 482 | 483 | // If there is executable_name variable in the searchPath 484 | // replace it with the value 485 | if (executable_name_pos != std::string::npos) 486 | { 487 | if (executable_path != 0) 488 | { 489 | path.replace(executable_name_pos, strlen(""), executable_name); 490 | } 491 | else 492 | { 493 | // Skip this path entry if no executable argument is given 494 | continue; 495 | } 496 | } 497 | 498 | #ifdef _DEBUG 499 | printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); 500 | #endif 501 | 502 | // Test if the file exists 503 | path.append(filename); 504 | FILE *fp; 505 | FOPEN(fp, path.c_str(), "rb"); 506 | 507 | if (fp != NULL) 508 | { 509 | fclose(fp); 510 | // File found 511 | // returning an allocated array here for backwards compatibility reasons 512 | char *file_path = (char *) malloc(path.length() + 1); 513 | STRCPY(file_path, path.length() + 1, path.c_str()); 514 | return file_path; 515 | } 516 | 517 | if (fp) 518 | { 519 | fclose(fp); 520 | } 521 | } 522 | 523 | // File not found 524 | return 0; 525 | } 526 | 527 | #endif 528 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/interpolate_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATE_GPU_H 2 | #define _INTERPOLATE_GPU_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 9 | const float *known, float *dist2, int *idx, 10 | cudaStream_t stream); 11 | 12 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 13 | const float *points, const int *idx, 14 | const float *weight, float *out, 15 | cudaStream_t stream); 16 | 17 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 18 | const float *grad_out, 19 | const int *idx, const float *weight, 20 | float *grad_points, 21 | cudaStream_t stream); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/interpolate_wrapper.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor, 4 | THCudaTensor *known_tensor, THCudaTensor *dist2_tensor, 5 | THCudaIntTensor *idx_tensor); 6 | void three_interpolate_wrapper(int b, int c, int m, int n, 7 | THCudaTensor *points_tensor, 8 | THCudaIntTensor *idx_tensor, 9 | THCudaTensor *weight_tensor, 10 | THCudaTensor *out_tensor); 11 | 12 | void three_interpolate_grad_wrapper(int b, int c, int n, int m, 13 | THCudaTensor *grad_out_tensor, 14 | THCudaIntTensor *idx_tensor, 15 | THCudaTensor *weight_tensor, 16 | THCudaTensor *grad_points_tensor); 17 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/sampling_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_GPU_H 2 | #define _SAMPLING_GPU_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 9 | const float *points, const int *idx, 10 | float *out, cudaStream_t stream); 11 | 12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | const float *grad_out, const int *idx, 14 | float *grad_points, cudaStream_t stream); 15 | 16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 17 | const float *dataset, float *temp, 18 | int *idxs, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/cinclude/sampling_wrapper.h: -------------------------------------------------------------------------------- 1 | 2 | int gather_points_wrapper(int b, int c, int n, int npoints, 3 | THCudaTensor *points_tensor, 4 | THCudaIntTensor *idx_tensor, 5 | THCudaTensor *out_tensor); 6 | int gather_points_grad_wrapper(int b, int c, int n, int npoints, 7 | THCudaTensor *grad_out_tensor, 8 | THCudaIntTensor *idx_tensor, 9 | THCudaTensor *grad_points_tensor); 10 | 11 | int furthest_point_sampling_wrapper(int b, int n, int m, 12 | THCudaTensor *points_tensor, 13 | THCudaTensor *temp_tensor, 14 | THCudaIntTensor *idx_tensor); 15 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/ellipsoid_query.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "ellipsoid_query_gpu.h" 4 | 5 | extern THCState *state; 6 | 7 | int ellipsoid_query_wrapper(int b, int n, int m, float e1, float e2, float e3, int nsample, 8 | THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor, THCudaIntTensor *fps_idx_tensor, 9 | THCudaIntTensor *idx_tensor,THCudaIntTensor *ingroup_pts_cnt_tensor, THCudaTensor *ingroup_out_tensor, THCudaTensor *ingroup_cva_tensor, THCudaTensor *v_tensor,THCudaTensor *d_tensor) { 10 | 11 | const float *new_xyz = THCudaTensor_data(state, new_xyz_tensor); 12 | const float *xyz = THCudaTensor_data(state, xyz_tensor); 13 | const int *fps_idx = THCudaIntTensor_data(state, fps_idx_tensor); 14 | int *idx = THCudaIntTensor_data(state, idx_tensor); 15 | //below tensors added by me 16 | int *ingroup_pts_cnt = THCudaIntTensor_data(state, ingroup_pts_cnt_tensor); 17 | float *ingroup_out = THCudaTensor_data(state, ingroup_out_tensor); 18 | float *ingroup_cva = THCudaTensor_data(state, ingroup_cva_tensor); 19 | float *v = THCudaTensor_data(state, v_tensor); 20 | float *d = THCudaTensor_data(state, d_tensor); 21 | 22 | cudaStream_t stream = THCState_getCurrentStream(state); 23 | 24 | query_ellipsoid_point_kernel_wrapper(b, n, m, e1, e2, e3, nsample, new_xyz, xyz, fps_idx, idx, ingroup_pts_cnt, ingroup_out, ingroup_cva, v, d, 25 | stream); 26 | return 1; 27 | } 28 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/ellipsoid_query_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include // memset 8 | #include // rand, RAND_MAX 9 | #include // sqrtf 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "helper_cuda.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "ellipsoid_query_gpu.h" 32 | #include "cuda_utils.h" 33 | int row = 0; 34 | int col = 0; 35 | using namespace std; 36 | const int max_iter = 1000; 37 | 38 | /* ---------------------------------------------------------------- */ 39 | // 40 | // the following functions come from here: 41 | // 42 | // https://people.sc.fsu.edu/~jburkardt/cpp_src/jacobi_eigenvalue/jacobi_eigenvalue.cpp 43 | // 44 | // attributed to j. burkardt, FSU 45 | // they are unmodified except to add __host__ __device__ decorations 46 | // 47 | //****************************************************************************80 48 | __device__ void r8mat_diag_get_vector(int n, float a[], float v[]) 49 | { 50 | int i; 51 | 52 | for ( i = 0; i < n; i++ ) 53 | { 54 | v[i] = a[i+i*n]; 55 | } 56 | 57 | return; 58 | } 59 | //****************************************************************************80 60 | __device__ void r8mat_identity(int n, float a[]) 61 | { 62 | int i; 63 | int j; 64 | int k; 65 | 66 | k = 0; 67 | for ( j = 0; j < n; j++ ) 68 | { 69 | for ( i = 0; i < n; i++ ) 70 | { 71 | if ( i == j ) 72 | { 73 | a[k] = 1.0; 74 | } 75 | else 76 | { 77 | a[k] = 0.0; 78 | } 79 | k = k + 1; 80 | } 81 | } 82 | 83 | return; 84 | } 85 | //****************************************************************************80 86 | __device__ void jacobi_eigenvalue(int n, float a[], int it_max, float v[], float d[], int &it_num, int &rot_num) 87 | { 88 | float *bw; 89 | float c; 90 | float g; 91 | float gapq; 92 | float h; 93 | int i; 94 | int j; 95 | int k; 96 | int l; 97 | int m; 98 | int p; 99 | int q; 100 | float s; 101 | float t; 102 | float tau; 103 | float term; 104 | float termp; 105 | float termq; 106 | float theta; 107 | float thresh; 108 | float w; 109 | float *zw; 110 | 111 | r8mat_identity ( n, v ); 112 | 113 | r8mat_diag_get_vector ( n, a, d ); 114 | 115 | bw = new float[n]; 116 | zw = new float[n]; 117 | 118 | for ( i = 0; i < n; i++ ) 119 | { 120 | bw[i] = d[i]; 121 | zw[i] = 0.0; 122 | } 123 | it_num = 0; 124 | rot_num = 0; 125 | 126 | while ( it_num < it_max ) 127 | { 128 | it_num = it_num + 1; 129 | // 130 | // The convergence threshold is based on the size of the elements in 131 | // the strict upper triangle of the matrix. 132 | // 133 | thresh = 0.0; 134 | for ( j = 0; j < n; j++ ) 135 | { 136 | for ( i = 0; i < j; i++ ) 137 | { 138 | thresh = thresh + a[i+j*n] * a[i+j*n]; 139 | } 140 | } 141 | 142 | thresh = sqrt ( thresh ) / ( float ) ( 4 * n ); 143 | 144 | if ( thresh == 0.0 ) 145 | { 146 | break; 147 | } 148 | 149 | for ( p = 0; p < n; p++ ) 150 | { 151 | for ( q = p + 1; q < n; q++ ) 152 | { 153 | gapq = 10.0 * fabs ( a[p+q*n] ); 154 | termp = gapq + fabs ( d[p] ); 155 | termq = gapq + fabs ( d[q] ); 156 | // 157 | // Annihilate tiny offdiagonal elements. 158 | // 159 | if ( 4 < it_num && 160 | termp == fabs ( d[p] ) && 161 | termq == fabs ( d[q] ) ) 162 | { 163 | a[p+q*n] = 0.0; 164 | } 165 | // 166 | // Otherwise, apply a rotation. 167 | // 168 | else if ( thresh <= fabs ( a[p+q*n] ) ) 169 | { 170 | h = d[q] - d[p]; 171 | term = fabs ( h ) + gapq; 172 | 173 | if ( term == fabs ( h ) ) 174 | { 175 | t = a[p+q*n] / h; 176 | } 177 | else 178 | { 179 | theta = 0.5 * h / a[p+q*n]; 180 | t = 1.0 / ( fabs ( theta ) + sqrt ( 1.0 + theta * theta ) ); 181 | if ( theta < 0.0 ) 182 | { 183 | t = - t; 184 | } 185 | } 186 | c = 1.0 / sqrt ( 1.0 + t * t ); 187 | s = t * c; 188 | tau = s / ( 1.0 + c ); 189 | h = t * a[p+q*n]; 190 | // 191 | // Accumulate corrections to diagonal elements. 192 | // 193 | zw[p] = zw[p] - h; 194 | zw[q] = zw[q] + h; 195 | d[p] = d[p] - h; 196 | d[q] = d[q] + h; 197 | 198 | a[p+q*n] = 0.0; 199 | // 200 | // Rotate, using information from the upper triangle of A only. 201 | // 202 | for ( j = 0; j < p; j++ ) 203 | { 204 | g = a[j+p*n]; 205 | h = a[j+q*n]; 206 | a[j+p*n] = g - s * ( h + g * tau ); 207 | a[j+q*n] = h + s * ( g - h * tau ); 208 | } 209 | 210 | for ( j = p + 1; j < q; j++ ) 211 | { 212 | g = a[p+j*n]; 213 | h = a[j+q*n]; 214 | a[p+j*n] = g - s * ( h + g * tau ); 215 | a[j+q*n] = h + s * ( g - h * tau ); 216 | } 217 | 218 | for ( j = q + 1; j < n; j++ ) 219 | { 220 | g = a[p+j*n]; 221 | h = a[q+j*n]; 222 | a[p+j*n] = g - s * ( h + g * tau ); 223 | a[q+j*n] = h + s * ( g - h * tau ); 224 | } 225 | // 226 | // Accumulate information in the eigenvector matrix. 227 | // 228 | for ( j = 0; j < n; j++ ) 229 | { 230 | g = v[j+p*n]; 231 | h = v[j+q*n]; 232 | v[j+p*n] = g - s * ( h + g * tau ); 233 | v[j+q*n] = h + s * ( g - h * tau ); 234 | } 235 | rot_num = rot_num + 1; 236 | } 237 | } 238 | } 239 | 240 | for ( i = 0; i < n; i++ ) 241 | { 242 | bw[i] = bw[i] + zw[i]; 243 | d[i] = bw[i]; 244 | zw[i] = 0.0; 245 | } 246 | } 247 | // 248 | // Restore upper triangle of input matrix. 249 | // 250 | for ( j = 0; j < n; j++ ) 251 | { 252 | for ( i = 0; i < j; i++ ) 253 | { 254 | a[i+j*n] = a[j+i*n]; 255 | } 256 | } 257 | // 258 | // Ascending sort the eigenvalues and eigenvectors. 259 | // 260 | for ( k = 0; k < n - 1; k++ ) 261 | { 262 | m = k; 263 | for ( l = k + 1; l < n; l++ ) 264 | { 265 | if ( d[l] < d[m] ) 266 | { 267 | m = l; 268 | } 269 | } 270 | 271 | if ( m != k ) 272 | { 273 | t = d[m]; 274 | d[m] = d[k]; 275 | d[k] = t; 276 | for ( i = 0; i < n; i++ ) 277 | { 278 | w = v[i+m*n]; 279 | v[i+m*n] = v[i+k*n]; 280 | v[i+k*n] = w; 281 | } 282 | } 283 | } 284 | 285 | delete [] bw; 286 | delete [] zw; 287 | 288 | return; 289 | } 290 | 291 | void initialize_matrix(int mat_id, int n, float *mat, float *v){ 292 | 293 | for (int i = 0; i < n*n; i++) *(v+(mat_id*n*n)+i) = mat[i]; 294 | } 295 | 296 | // end of FSU code 297 | /* ---------------------------------------------------------------- */ 298 | 299 | //Ellipsoid querying 300 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 301 | // output: idx(b, m, nsample) 302 | __global__ void query_ellipsoid_point_kernel(int b, int n, int m, float e1, float e2, float e3, 303 | int nsample, 304 | const float *__restrict__ new_xyz, 305 | const float *__restrict__ xyz, 306 | const int *__restrict__ fps_idx, 307 | int *__restrict__ idx, 308 | int *__restrict__ ingroup_pts_cnt, 309 | float *__restrict__ ingroup_out, 310 | float *__restrict__ ingroup_cva, 311 | float *__restrict__ v, 312 | float *__restrict__ d){ 313 | int batch_index = blockIdx.x; 314 | int c = 3; 315 | xyz += batch_index * n * 3; 316 | new_xyz += batch_index * m * 3; 317 | fps_idx += batch_index * m; 318 | idx += m * nsample * batch_index; 319 | ingroup_pts_cnt += m*batch_index; 320 | ingroup_out += m*nsample*3*batch_index; 321 | ingroup_cva += m*3*3*batch_index; 322 | v += m*3*3*batch_index; 323 | d += m*3*batch_index; 324 | 325 | int index = threadIdx.x; 326 | int stride = blockDim.x; 327 | //squares of axis-lengths 328 | float aa = e1 * e1; 329 | float bb = e2 * e2; 330 | float cc = e3 * e3; 331 | for (int j = index; j < m; j += stride) { 332 | float new_x = new_xyz[j * 3 + 0]; 333 | float new_y = new_xyz[j * 3 + 1]; 334 | float new_z = new_xyz[j * 3 + 2]; 335 | for (int l = 0; l < nsample; ++l) { 336 | idx[j * nsample + l] = fps_idx[j]; 337 | } 338 | int cnt = 0; 339 | for (int k = 0; k < n && cnt < nsample; ++k) { 340 | float x = xyz[k * 3 + 0]; 341 | float y = xyz[k * 3 + 1]; 342 | float z = xyz[k * 3 + 2]; 343 | //first round of ellipsoid querying 344 | float d2 = max(sqrtf(((new_x - x) * (new_x - x)/aa) + ((new_y - y) * (new_y - y)/bb) + 345 | ((new_z - z) * (new_z - z)/cc)),1e-20f); 346 | if (d2 <= 1 && d2 > 0) { 347 | idx[j * nsample + cnt] = k; 348 | ++cnt; 349 | } 350 | } 351 | ingroup_pts_cnt[j] = cnt; 352 | 353 | //grouping of ellipsoid-queried points 354 | for (int k=0;k=3){ 365 | for(int k=0;k= e1/4.0){ 393 | //if more points are on one side of the centroid 394 | for(int up=0;up=3)){ 434 | //Eigendecomposition 435 | jacobi_eigenvalue(c, ingroup_cva+(j*c*c), max_iter, v+(j*c*c), d+(j*c), it_num, rot_num); 436 | cnt = ingroup_pts_cnt[j]; 437 | for (int k=0;k>>( 486 | b, n, m, e1, e2, e3, nsample, new_xyz, xyz, fps_idx, idx, ingroup_pts_cnt, ingroup_out, ingroup_cva, v, d); 487 | 488 | err = cudaGetLastError(); 489 | if (cudaSuccess != err) { 490 | fprintf(stderr, "CUDA kernel failed inside ellipsoid wrapper: %s\n", cudaGetErrorString(err)); 491 | exit(-1); 492 | } 493 | } 494 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/group_points.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "group_points_gpu.h" 4 | 5 | extern THCState *state; 6 | 7 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample, 8 | THCudaTensor *points_tensor, 9 | THCudaIntTensor *idx_tensor, 10 | THCudaTensor *out_tensor) { 11 | 12 | const float *points = THCudaTensor_data(state, points_tensor); 13 | const int *idx = THCudaIntTensor_data(state, idx_tensor); 14 | float *out = THCudaTensor_data(state, out_tensor); 15 | 16 | cudaStream_t stream = THCState_getCurrentStream(state); 17 | 18 | group_points_kernel_wrapper(b, c, n, npoints, nsample, points, idx, out, 19 | stream); 20 | return 1; 21 | } 22 | 23 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample, 24 | THCudaTensor *grad_out_tensor, 25 | THCudaIntTensor *idx_tensor, 26 | THCudaTensor *grad_points_tensor) { 27 | 28 | float *grad_points = THCudaTensor_data(state, grad_points_tensor); 29 | const int *idx = THCudaIntTensor_data(state, idx_tensor); 30 | const float *grad_out = THCudaTensor_data(state, grad_out_tensor); 31 | 32 | cudaStream_t stream = THCState_getCurrentStream(state); 33 | 34 | group_points_grad_kernel_wrapper(b, c, n, npoints, nsample, grad_out, idx, 35 | grad_points, stream); 36 | return 1; 37 | } 38 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | #include "group_points_gpu.h" 6 | 7 | // input: points(b, c, n) idx(b, npoints, nsample) 8 | // output: out(b, c, npoints, nsample) 9 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 10 | int nsample, 11 | const float *__restrict__ points, 12 | const int *__restrict__ idx, 13 | float *__restrict__ out) { 14 | int batch_index = blockIdx.x; 15 | points += batch_index * n * c; 16 | idx += batch_index * npoints * nsample; 17 | out += batch_index * npoints * nsample * c; 18 | 19 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 20 | const int stride = blockDim.y * blockDim.x; 21 | for (int i = index; i < c * npoints; i += stride) { 22 | const int l = i / npoints; 23 | const int j = i % npoints; 24 | for (int k = 0; k < nsample; ++k) { 25 | int ii = idx[j * nsample + k]; 26 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 27 | } 28 | } 29 | } 30 | 31 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 32 | const float *points, const int *idx, 33 | float *out, cudaStream_t stream) { 34 | 35 | cudaError_t err; 36 | group_points_kernel<<>>( 37 | b, c, n, npoints, nsample, points, idx, out); 38 | 39 | err = cudaGetLastError(); 40 | if (cudaSuccess != err) { 41 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 42 | exit(-1); 43 | } 44 | } 45 | 46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 47 | // output: grad_points(b, c, n) 48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 49 | int nsample, 50 | const float *__restrict__ grad_out, 51 | const int *__restrict__ idx, 52 | float *__restrict__ grad_points) { 53 | int batch_index = blockIdx.x; 54 | grad_out += batch_index * npoints * nsample * c; 55 | idx += batch_index * npoints * nsample; 56 | grad_points += batch_index * n * c; 57 | 58 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 59 | const int stride = blockDim.y * blockDim.x; 60 | for (int i = index; i < c * npoints; i += stride) { 61 | const int l = i / npoints; 62 | const int j = i % npoints; 63 | for (int k = 0; k < nsample; ++k) { 64 | int ii = idx[j * nsample + k]; 65 | atomicAdd(grad_points + l * n + ii, 66 | grad_out[(l * npoints + j) * nsample + k]); 67 | } 68 | } 69 | } 70 | 71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 72 | int nsample, const float *grad_out, 73 | const int *idx, float *grad_points, 74 | cudaStream_t stream) { 75 | cudaError_t err; 76 | group_points_grad_kernel<<>>( 77 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 78 | 79 | err = cudaGetLastError(); 80 | if (cudaSuccess != err) { 81 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 82 | exit(-1); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/interpolate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "interpolate_gpu.h" 7 | 8 | extern THCState *state; 9 | 10 | void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor, 11 | THCudaTensor *known_tensor, THCudaTensor *dist2_tensor, 12 | THCudaIntTensor *idx_tensor) { 13 | const float *unknown = THCudaTensor_data(state, unknown_tensor); 14 | const float *known = THCudaTensor_data(state, known_tensor); 15 | float *dist2 = THCudaTensor_data(state, dist2_tensor); 16 | int *idx = THCudaIntTensor_data(state, idx_tensor); 17 | 18 | cudaStream_t stream = THCState_getCurrentStream(state); 19 | three_nn_kernel_wrapper(b, n, m, unknown, known, dist2, idx, stream); 20 | } 21 | 22 | void three_interpolate_wrapper(int b, int c, int m, int n, 23 | THCudaTensor *points_tensor, 24 | THCudaIntTensor *idx_tensor, 25 | THCudaTensor *weight_tensor, 26 | THCudaTensor *out_tensor) { 27 | 28 | const float *points = THCudaTensor_data(state, points_tensor); 29 | const float *weight = THCudaTensor_data(state, weight_tensor); 30 | float *out = THCudaTensor_data(state, out_tensor); 31 | const int *idx = THCudaIntTensor_data(state, idx_tensor); 32 | 33 | cudaStream_t stream = THCState_getCurrentStream(state); 34 | three_interpolate_kernel_wrapper(b, c, m, n, points, idx, weight, out, 35 | stream); 36 | } 37 | 38 | void three_interpolate_grad_wrapper(int b, int c, int n, int m, 39 | THCudaTensor *grad_out_tensor, 40 | THCudaIntTensor *idx_tensor, 41 | THCudaTensor *weight_tensor, 42 | THCudaTensor *grad_points_tensor) { 43 | 44 | const float *grad_out = THCudaTensor_data(state, grad_out_tensor); 45 | const float *weight = THCudaTensor_data(state, weight_tensor); 46 | float *grad_points = THCudaTensor_data(state, grad_points_tensor); 47 | const int *idx = THCudaIntTensor_data(state, idx_tensor); 48 | 49 | cudaStream_t stream = THCState_getCurrentStream(state); 50 | three_interpolate_grad_kernel_wrapper(b, c, n, m, grad_out, idx, weight, 51 | grad_points, stream); 52 | } 53 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cuda_utils.h" 6 | #include "interpolate_gpu.h" 7 | 8 | // input: unknown(b, n, 3) known(b, m, 3) 9 | // output: dist2(b, n, 3), idx(b, n, 3) 10 | __global__ void three_nn_kernel(int b, int n, int m, 11 | const float *__restrict__ unknown, 12 | const float *__restrict__ known, 13 | float *__restrict__ dist2, 14 | int *__restrict__ idx) { 15 | int batch_index = blockIdx.x; 16 | unknown += batch_index * n * 3; 17 | known += batch_index * m * 3; 18 | dist2 += batch_index * n * 3; 19 | idx += batch_index * n * 3; 20 | 21 | int index = threadIdx.x; 22 | int stride = blockDim.x; 23 | for (int j = index; j < n; j += stride) { 24 | float ux = unknown[j * 3 + 0]; 25 | float uy = unknown[j * 3 + 1]; 26 | float uz = unknown[j * 3 + 2]; 27 | 28 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 29 | int besti1 = 0, besti2 = 0, besti3 = 0; 30 | for (int k = 0; k < m; ++k) { 31 | float x = known[k * 3 + 0]; 32 | float y = known[k * 3 + 1]; 33 | float z = known[k * 3 + 2]; 34 | float d = 35 | (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 36 | if (d < best1) { 37 | best3 = best2; 38 | besti3 = besti2; 39 | best2 = best1; 40 | besti2 = besti1; 41 | best1 = d; 42 | besti1 = k; 43 | } else if (d < best2) { 44 | best3 = best2; 45 | besti3 = besti2; 46 | best2 = d; 47 | besti2 = k; 48 | } else if (d < best3) { 49 | best3 = d; 50 | besti3 = k; 51 | } 52 | } 53 | dist2[j * 3 + 0] = best1; 54 | dist2[j * 3 + 1] = best2; 55 | dist2[j * 3 + 2] = best3; 56 | 57 | idx[j * 3 + 0] = besti1; 58 | idx[j * 3 + 1] = besti2; 59 | idx[j * 3 + 2] = besti3; 60 | } 61 | } 62 | 63 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 64 | const float *known, float *dist2, int *idx, 65 | cudaStream_t stream) { 66 | 67 | cudaError_t err; 68 | three_nn_kernel<<>>(b, n, m, unknown, known, 69 | dist2, idx); 70 | 71 | err = cudaGetLastError(); 72 | if (cudaSuccess != err) { 73 | fprintf(stderr, "CUDA kernel " 74 | "failed : %s\n", 75 | cudaGetErrorString(err)); 76 | exit(-1); 77 | } 78 | } 79 | 80 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 81 | // output: out(b, c, n) 82 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 83 | const float *__restrict__ points, 84 | const int *__restrict__ idx, 85 | const float *__restrict__ weight, 86 | float *__restrict__ out) { 87 | int batch_index = blockIdx.x; 88 | points += batch_index * m * c; 89 | 90 | idx += batch_index * n * 3; 91 | weight += batch_index * n * 3; 92 | 93 | out += batch_index * n * c; 94 | 95 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 96 | const int stride = blockDim.y * blockDim.x; 97 | for (int i = index; i < c * n; i += stride) { 98 | const int l = i / n; 99 | const int j = i % n; 100 | float w1 = weight[j * 3 + 0]; 101 | float w2 = weight[j * 3 + 1]; 102 | float w3 = weight[j * 3 + 2]; 103 | 104 | int i1 = idx[j * 3 + 0]; 105 | int i2 = idx[j * 3 + 1]; 106 | int i3 = idx[j * 3 + 2]; 107 | 108 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 109 | points[l * m + i3] * w3; 110 | } 111 | } 112 | 113 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 114 | const float *points, const int *idx, 115 | const float *weight, float *out, 116 | cudaStream_t stream) { 117 | 118 | cudaError_t err; 119 | three_interpolate_kernel<<>>( 120 | b, c, m, n, points, idx, weight, out); 121 | 122 | err = cudaGetLastError(); 123 | if (cudaSuccess != err) { 124 | fprintf(stderr, "CUDA kernel " 125 | "failed : %s\n", 126 | cudaGetErrorString(err)); 127 | exit(-1); 128 | } 129 | } 130 | 131 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 132 | // output: grad_points(b, c, m) 133 | 134 | __global__ void three_interpolate_grad_kernel( 135 | int b, int c, int n, int m, const float *__restrict__ grad_out, 136 | const int *__restrict__ idx, const float *__restrict__ weight, 137 | float *__restrict__ grad_points) { 138 | int batch_index = blockIdx.x; 139 | grad_out += batch_index * n * c; 140 | idx += batch_index * n * 3; 141 | weight += batch_index * n * 3; 142 | grad_points += batch_index * m * c; 143 | 144 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 145 | const int stride = blockDim.y * blockDim.x; 146 | for (int i = index; i < c * n; i += stride) { 147 | const int l = i / n; 148 | const int j = i % n; 149 | float w1 = weight[j * 3 + 0]; 150 | float w2 = weight[j * 3 + 1]; 151 | float w3 = weight[j * 3 + 2]; 152 | 153 | int i1 = idx[j * 3 + 0]; 154 | int i2 = idx[j * 3 + 1]; 155 | int i3 = idx[j * 3 + 2]; 156 | 157 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 158 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 159 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 160 | } 161 | } 162 | 163 | void three_interpolate_grad_kernel_wrapper(int b, int n, int c, int m, 164 | const float *grad_out, 165 | const int *idx, const float *weight, 166 | float *grad_points, 167 | cudaStream_t stream) { 168 | 169 | cudaError_t err; 170 | three_interpolate_grad_kernel<<>>( 171 | b, n, c, m, grad_out, idx, weight, grad_points); 172 | 173 | err = cudaGetLastError(); 174 | if (cudaSuccess != err) { 175 | fprintf(stderr, "CUDA kernel " 176 | "failed : %s\n", 177 | cudaGetErrorString(err)); 178 | exit(-1); 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/sampling.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "sampling_gpu.h" 4 | 5 | extern THCState *state; 6 | 7 | int gather_points_wrapper(int b, int c, int n, int npoints, 8 | THCudaTensor *points_tensor, 9 | THCudaIntTensor *idx_tensor, 10 | THCudaTensor *out_tensor) { 11 | 12 | const float *points = THCudaTensor_data(state, points_tensor); 13 | const int *idx = THCudaIntTensor_data(state, idx_tensor); 14 | float *out = THCudaTensor_data(state, out_tensor); 15 | 16 | cudaStream_t stream = THCState_getCurrentStream(state); 17 | 18 | gather_points_kernel_wrapper(b, c, n, npoints, points, idx, out, stream); 19 | return 1; 20 | } 21 | 22 | int gather_points_grad_wrapper(int b, int c, int n, int npoints, 23 | THCudaTensor *grad_out_tensor, 24 | THCudaIntTensor *idx_tensor, 25 | THCudaTensor *grad_points_tensor) { 26 | 27 | const float *grad_out = THCudaTensor_data(state, grad_out_tensor); 28 | const int *idx = THCudaIntTensor_data(state, idx_tensor); 29 | float *grad_points = THCudaTensor_data(state, grad_points_tensor); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | gather_points_grad_kernel_wrapper(b, c, n, npoints, grad_out, idx, 34 | grad_points, stream); 35 | return 1; 36 | } 37 | 38 | int furthest_point_sampling_wrapper(int b, int n, int m, 39 | THCudaTensor *points_tensor, 40 | THCudaTensor *temp_tensor, 41 | THCudaIntTensor *idx_tensor) { 42 | 43 | const float *points = THCudaTensor_data(state, points_tensor); 44 | float *temp = THCudaTensor_data(state, temp_tensor); 45 | int *idx = THCudaIntTensor_data(state, idx_tensor); 46 | 47 | cudaStream_t stream = THCState_getCurrentStream(state); 48 | 49 | furthest_point_sampling_kernel_wrapper(b, n, m, points, temp, idx, stream); 50 | return 1; 51 | } 52 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/csrc/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | #include "sampling_gpu.h" 6 | 7 | // input: points(b, c, n) idx(b, m) 8 | // output: out(b, c, m) 9 | __global__ void gather_points_kernel(int b, int c, int n, int m, 10 | const float *__restrict__ points, 11 | const int *__restrict__ idx, 12 | float *__restrict__ out) { 13 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 14 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 15 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 16 | int a = idx[i * m + j]; 17 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 18 | } 19 | } 20 | } 21 | } 22 | 23 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 24 | const float *points, const int *idx, 25 | float *out, cudaStream_t stream) { 26 | 27 | cudaError_t err; 28 | gather_points_kernel<<>>( 29 | b, c, n, npoints, points, idx, out); 30 | 31 | err = cudaGetLastError(); 32 | if (cudaSuccess != err) { 33 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 34 | exit(-1); 35 | } 36 | } 37 | 38 | // input: grad_out(b, c, m) idx(b, m) 39 | // output: grad_points(b, c, n) 40 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m, 41 | const float *__restrict__ grad_out, 42 | const int *__restrict__ idx, 43 | float *__restrict__ grad_points) { 44 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 45 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 46 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 47 | int a = idx[i * m + j]; 48 | atomicAdd(grad_points + (i * c + l) * n + a, 49 | grad_out[(i * c + l) * m + j]); 50 | } 51 | } 52 | } 53 | } 54 | 55 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 56 | const float *grad_out, const int *idx, 57 | float *grad_points, 58 | cudaStream_t stream) { 59 | 60 | cudaError_t err; 61 | gather_points_grad_kernel<<>>(b, c, n, npoints, grad_out, idx, 63 | grad_points); 64 | 65 | err = cudaGetLastError(); 66 | if (cudaSuccess != err) { 67 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 68 | exit(-1); 69 | } 70 | } 71 | 72 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, 73 | int idx1, int idx2) { 74 | const float v1 = dists[idx1], v2 = dists[idx2]; 75 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 76 | dists[idx1] = max(v1, v2); 77 | dists_i[idx1] = v2 > v1 ? i2 : i1; 78 | } 79 | 80 | // Input dataset: (b, n, 3), tmp: (b, n) 81 | // Ouput idxs (b, m) 82 | template 83 | __global__ void furthest_point_sampling_kernel( 84 | int b, int n, int m, const float *__restrict__ dataset, 85 | float *__restrict__ temp, int *__restrict__ idxs) { 86 | if (m <= 0) 87 | return; 88 | __shared__ float dists[block_size]; 89 | __shared__ int dists_i[block_size]; 90 | 91 | int batch_index = blockIdx.x; 92 | dataset += batch_index * n * 3; 93 | temp += batch_index * n; 94 | idxs += batch_index * m; 95 | 96 | int tid = threadIdx.x; 97 | const int stride = block_size; 98 | 99 | int old = 0; 100 | if (threadIdx.x == 0) 101 | idxs[0] = old; 102 | 103 | __syncthreads(); 104 | for (int j = 1; j < m; j++) { 105 | int besti = 0; 106 | float best = -1; 107 | float x1 = dataset[old * 3 + 0]; 108 | float y1 = dataset[old * 3 + 1]; 109 | float z1 = dataset[old * 3 + 2]; 110 | for (int k = tid; k < n; k += stride) { 111 | float x2, y2, z2; 112 | x2 = dataset[k * 3 + 0]; 113 | y2 = dataset[k * 3 + 1]; 114 | z2 = dataset[k * 3 + 2]; 115 | float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 116 | if (mag <= 1e-3) 117 | continue; 118 | 119 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + 120 | (z2 - z1) * (z2 - z1); 121 | 122 | float d2 = min(d, temp[k]); 123 | temp[k] = d2; 124 | besti = d2 > best ? k : besti; 125 | best = d2 > best ? d2 : best; 126 | } 127 | dists[tid] = best; 128 | dists_i[tid] = besti; 129 | __syncthreads(); 130 | 131 | if (block_size >= 512) { 132 | if (tid < 256) { 133 | __update(dists, dists_i, tid, tid + 256); 134 | } 135 | __syncthreads(); 136 | } 137 | if (block_size >= 256) { 138 | if (tid < 128) { 139 | __update(dists, dists_i, tid, tid + 128); 140 | } 141 | __syncthreads(); 142 | } 143 | if (block_size >= 128) { 144 | if (tid < 64) { 145 | __update(dists, dists_i, tid, tid + 64); 146 | } 147 | __syncthreads(); 148 | } 149 | if (block_size >= 64) { 150 | if (tid < 32) { 151 | __update(dists, dists_i, tid, tid + 32); 152 | } 153 | __syncthreads(); 154 | } 155 | if (block_size >= 32) { 156 | if (tid < 16) { 157 | __update(dists, dists_i, tid, tid + 16); 158 | } 159 | __syncthreads(); 160 | } 161 | if (block_size >= 16) { 162 | if (tid < 8) { 163 | __update(dists, dists_i, tid, tid + 8); 164 | } 165 | __syncthreads(); 166 | } 167 | if (block_size >= 8) { 168 | if (tid < 4) { 169 | __update(dists, dists_i, tid, tid + 4); 170 | } 171 | __syncthreads(); 172 | } 173 | if (block_size >= 4) { 174 | if (tid < 2) { 175 | __update(dists, dists_i, tid, tid + 2); 176 | } 177 | __syncthreads(); 178 | } 179 | if (block_size >= 2) { 180 | if (tid < 1) { 181 | __update(dists, dists_i, tid, tid + 1); 182 | } 183 | __syncthreads(); 184 | } 185 | 186 | old = dists_i[0]; 187 | if (tid == 0) 188 | idxs[j] = old; 189 | } 190 | } 191 | 192 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 193 | const float *dataset, float *temp, 194 | int *idxs, cudaStream_t stream) { 195 | 196 | cudaError_t err; 197 | unsigned int n_threads = opt_n_threads(n); 198 | 199 | switch (n_threads) { 200 | case 512: 201 | furthest_point_sampling_kernel<512><<>>( 202 | b, n, m, dataset, temp, idxs); 203 | break; 204 | case 256: 205 | furthest_point_sampling_kernel<256><<>>( 206 | b, n, m, dataset, temp, idxs); 207 | break; 208 | case 128: 209 | furthest_point_sampling_kernel<128><<>>( 210 | b, n, m, dataset, temp, idxs); 211 | break; 212 | case 64: 213 | furthest_point_sampling_kernel<64><<>>( 214 | b, n, m, dataset, temp, idxs); 215 | break; 216 | case 32: 217 | furthest_point_sampling_kernel<32><<>>( 218 | b, n, m, dataset, temp, idxs); 219 | break; 220 | case 16: 221 | furthest_point_sampling_kernel<16><<>>( 222 | b, n, m, dataset, temp, idxs); 223 | break; 224 | case 8: 225 | furthest_point_sampling_kernel<8><<>>( 226 | b, n, m, dataset, temp, idxs); 227 | break; 228 | case 4: 229 | furthest_point_sampling_kernel<4><<>>( 230 | b, n, m, dataset, temp, idxs); 231 | break; 232 | case 2: 233 | furthest_point_sampling_kernel<2><<>>( 234 | b, n, m, dataset, temp, idxs); 235 | break; 236 | case 1: 237 | furthest_point_sampling_kernel<1><<>>( 238 | b, n, m, dataset, temp, idxs); 239 | break; 240 | default: 241 | furthest_point_sampling_kernel<512><<>>( 242 | b, n, m, dataset, temp, idxs); 243 | } 244 | 245 | err = cudaGetLastError(); 246 | if (cudaSuccess != err) { 247 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 248 | exit(-1); 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/linalg_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from enum import Enum 3 | 4 | PDist2Order = Enum('PDist2Order', 'd_first d_second') 5 | 6 | 7 | def pdist2( 8 | X: torch.Tensor, 9 | Z: torch.Tensor = None, 10 | order: PDist2Order = PDist2Order.d_second 11 | ) -> torch.Tensor: 12 | r""" Calculates the pairwise distance between X and Z 13 | 14 | D[b, i, j] = l2 distance X[b, i] and Z[b, j] 15 | 16 | Parameters 17 | --------- 18 | X : torch.Tensor 19 | X is a (B, N, d) tensor. There are B batches, and N vectors of dimension d 20 | Z: torch.Tensor 21 | Z is a (B, M, d) tensor. If Z is None, then Z = X 22 | 23 | Returns 24 | ------- 25 | torch.Tensor 26 | Distance matrix is size (B, N, M) 27 | """ 28 | 29 | if order == PDist2Order.d_second: 30 | if X.dim() == 2: 31 | X = X.unsqueeze(0) 32 | if Z is None: 33 | Z = X 34 | G = X @ Z.transpose(-2, -1) 35 | S = (X * X).sum(-1, keepdim=True) 36 | R = S.transpose(-2, -1) 37 | else: 38 | if Z.dim() == 2: 39 | Z = Z.unsqueeze(0) 40 | G = X @ Z.transpose(-2, -1) 41 | S = (X * X).sum(-1, keepdim=True) 42 | R = (Z * Z).sum(-1, keepdim=True).transpose(-2, -1) 43 | else: 44 | if X.dim() == 2: 45 | X = X.unsqueeze(0) 46 | if Z is None: 47 | Z = X 48 | G = X.transpose(-2, -1) @ Z 49 | R = (X * X).sum(-2, keepdim=True) 50 | S = R.transpose(-2, -1) 51 | else: 52 | if Z.dim() == 2: 53 | Z = Z.unsqueeze(0) 54 | G = X.transpose(-2, -1) @ Z 55 | S = (X * X).sum(-2, keepdim=True).transpose(-2, -1) 56 | R = (Z * Z).sum(-2, keepdim=True) 57 | 58 | return torch.abs(R + S - 2 * G).squeeze(0) 59 | 60 | 61 | def pdist2_slow(X, Z=None): 62 | if Z is None: Z = X 63 | D = torch.zeros(X.size(0), X.size(2), Z.size(2)) 64 | 65 | for b in range(D.size(0)): 66 | for i in range(D.size(1)): 67 | for j in range(D.size(2)): 68 | D[b, i, j] = torch.dist(X[b, :, i], Z[b, :, j]) 69 | return D 70 | 71 | 72 | if __name__ == "__main__": 73 | X = torch.randn(2, 3, 5) 74 | Z = torch.randn(2, 3, 3) 75 | 76 | print(pdist2(X, order=PDist2Order.d_first)) 77 | print(pdist2_slow(X)) 78 | print(torch.dist(pdist2(X, order=PDist2Order.d_first), pdist2_slow(X))) 79 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/pointnet2_modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import pointnet2_utils 6 | import pytorch_utils as pt_utils 7 | from typing import List 8 | import numpy as np 9 | import time 10 | import math 11 | 12 | class _PointnetSAModuleBase(nn.Module): 13 | 14 | def __init__(self): 15 | super().__init__() 16 | self.npoint = None 17 | self.groupers = None 18 | self.mlps = None 19 | 20 | def forward(self, xyz: torch.Tensor, 21 | features: torch.Tensor = None) -> (torch.Tensor, torch.Tensor): 22 | r""" 23 | Parameters 24 | ---------- 25 | xyz : torch.Tensor 26 | (B, N, 3) tensor of the xyz coordinates of the points 27 | features : torch.Tensor 28 | (B, N, C) tensor of the descriptors of the the points 29 | 30 | Returns 31 | ------- 32 | new_xyz : torch.Tensor 33 | (B, npoint, 3) tensor of the new points' xyz 34 | new_features : torch.Tensor 35 | (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_points descriptors 36 | """ 37 | 38 | new_features_list = [] 39 | xyz_flipped = xyz.transpose(1, 2).contiguous() 40 | if self.npoint is not None: 41 | fps_idx = pointnet2_utils.furthest_point_sample(xyz, self.npoint) # (B, npoint) 42 | new_xyz = pointnet2_utils.gather_operation(xyz_flipped, fps_idx).transpose(1, 2).contiguous() 43 | fps_idx = fps_idx.data 44 | else: 45 | new_xyz = None 46 | fps_idx = None 47 | 48 | for i in range(len(self.groupers)): 49 | new_features = self.groupers[i](xyz, new_xyz, features, fps_idx) if self.npoint is not None else self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample) 50 | new_features = self.mlps[i]( 51 | new_features 52 | ) # (B, mlp[-1], npoint) 53 | 54 | new_features_list.append(new_features) 55 | 56 | return new_xyz, torch.cat(new_features_list, dim=1) 57 | 58 | 59 | class PointnetSAModuleMSG(_PointnetSAModuleBase): 60 | r"""Pointnet set abstrction layer with multiscale grouping 61 | 62 | Parameters 63 | ---------- 64 | npoint : int 65 | Number of points 66 | e1 : list of float32 67 | e2 : list of float32 68 | e3 : list of float32 69 | list of semi-axis lengths to group with 70 | nsamples : list of int32 71 | Number of samples in each ellipsoid query 72 | mlps : list of list of int32 73 | Spec of the pointnet before the global max_pool for each scale 74 | bn : bool 75 | Use batchnorm 76 | """ 77 | 78 | def __init__( 79 | self, 80 | *, 81 | npoint: int, 82 | e1s: List[float], 83 | e2s: List[float], 84 | e3s: List[float], 85 | nsamples: List[int], 86 | mlps: List[List[int]], 87 | use_xyz: bool = True, 88 | bias = True, 89 | init = nn.init.kaiming_normal_, 90 | first_layer = False, 91 | relation_prior = 1 92 | ): 93 | super().__init__() 94 | assert len(e1s) == len(nsamples) == len(mlps) 95 | self.npoint = npoint 96 | self.groupers = nn.ModuleList() 97 | self.mlps = nn.ModuleList() 98 | 99 | # initialize shared mapping functions 100 | C_in = (mlps[0][0] + 3) if use_xyz else mlps[0][0] 101 | C_out = mlps[0][1] 102 | 103 | if relation_prior == 0: 104 | in_channels = 1 105 | elif relation_prior == 1 or relation_prior == 2: 106 | in_channels = 10 107 | else: 108 | assert False, "relation_prior can only be 0, 1, 2." 109 | 110 | if first_layer: 111 | mapping_func1 = nn.Conv2d(in_channels = in_channels, out_channels = math.floor(C_out / 2), kernel_size = (1, 1), 112 | stride = (1, 1), bias = bias) 113 | mapping_func2 = nn.Conv2d(in_channels = math.floor(C_out / 2), out_channels = 16, kernel_size = (1, 1), 114 | stride = (1, 1), bias = bias) 115 | xyz_raising = nn.Conv2d(in_channels = C_in, out_channels = 16, kernel_size = (1, 1), 116 | stride = (1, 1), bias = bias) 117 | init(xyz_raising.weight) 118 | if bias: 119 | nn.init.constant_(xyz_raising.bias, 0) 120 | elif npoint is not None: 121 | mapping_func1 = nn.Conv2d(in_channels = in_channels, out_channels = math.floor(C_out / 4), kernel_size = (1, 1), 122 | stride = (1, 1), bias = bias) 123 | mapping_func2 = nn.Conv2d(in_channels = math.floor(C_out / 4), out_channels = C_in, kernel_size = (1, 1), 124 | stride = (1, 1), bias = bias) 125 | if npoint is not None: 126 | init(mapping_func1.weight) 127 | init(mapping_func2.weight) 128 | if bias: 129 | nn.init.constant_(mapping_func1.bias, 0) 130 | nn.init.constant_(mapping_func2.bias, 0) 131 | 132 | # channel raising mapping 133 | cr_mapping = nn.Conv1d(in_channels = C_in if not first_layer else 16, out_channels = C_out, kernel_size = 1, 134 | stride = 1, bias = bias) 135 | init(cr_mapping.weight) 136 | nn.init.constant_(cr_mapping.bias, 0) 137 | 138 | if first_layer: 139 | mapping = [mapping_func1, mapping_func2, cr_mapping, xyz_raising] 140 | elif npoint is not None: 141 | mapping = [mapping_func1, mapping_func2, cr_mapping] 142 | 143 | for i in range(len(e1s)): 144 | e1 = e1s[i] 145 | e2 = e2s[i] 146 | e3 = e3s[i] 147 | nsample = nsamples[i] 148 | self.groupers.append( 149 | pointnet2_utils.QueryAndGroupE(e1, e2, e3, nsample, use_xyz=use_xyz) 150 | if npoint is not None else pointnet2_utils.GroupAll(use_xyz) 151 | ) 152 | mlp_spec = mlps[i] 153 | if use_xyz: 154 | mlp_spec[0] += 3 155 | if npoint is not None: 156 | self.mlps.append(pt_utils.SharedRSConv(mlp_spec, mapping = mapping, relation_prior = relation_prior, first_layer = first_layer)) 157 | else: # global convolutional pooling 158 | self.mlps.append(pt_utils.GloAvgConv(C_in = C_in, C_out = C_out)) 159 | 160 | 161 | class PointnetSAModule(PointnetSAModuleMSG): 162 | r"""Pointnet set abstrction layer 163 | 164 | Parameters 165 | ---------- 166 | npoint : int 167 | Number of features 168 | e1 : float 169 | e2 : float 170 | e3 : float 171 | semi-axis lengths of ellipsoid 172 | nsample : int 173 | Number of samples in the ellipsoid query 174 | mlp : list 175 | Spec of the pointnet before the global max_pool 176 | bn : bool 177 | Use batchnorm 178 | """ 179 | 180 | def __init__( 181 | self, 182 | *, 183 | mlp: List[int], 184 | npoint: int = None, 185 | e1: float = None, 186 | e2: float = None, 187 | e3: float = None, 188 | nsample: int = None, 189 | use_xyz: bool = True 190 | ): 191 | super().__init__( 192 | mlps=[mlp], 193 | npoint=npoint, 194 | e1s=[e1], 195 | e2s=[e2], 196 | e3s=[e3], 197 | nsamples=[nsample], 198 | use_xyz=use_xyz 199 | ) 200 | 201 | 202 | class PointnetFPModule(nn.Module): 203 | r"""Propigates the features of one set to another 204 | 205 | Parameters 206 | ---------- 207 | mlp : list 208 | Pointnet module parameters 209 | bn : bool 210 | Use batchnorm 211 | """ 212 | 213 | def __init__(self, *, mlp: List[int], bn: bool = True): 214 | super().__init__() 215 | self.mlp = pt_utils.SharedMLP(mlp, bn=bn) 216 | 217 | def forward( 218 | self, unknown: torch.Tensor, known: torch.Tensor, 219 | unknow_feats: torch.Tensor, known_feats: torch.Tensor 220 | ) -> torch.Tensor: 221 | r""" 222 | Parameters 223 | ---------- 224 | unknown : torch.Tensor 225 | (B, n, 3) tensor of the xyz positions of the unknown features 226 | known : torch.Tensor 227 | (B, m, 3) tensor of the xyz positions of the known features 228 | unknow_feats : torch.Tensor 229 | (B, C1, n) tensor of the features to be propigated to 230 | known_feats : torch.Tensor 231 | (B, C2, m) tensor of features to be propigated 232 | 233 | Returns 234 | ------- 235 | new_features : torch.Tensor 236 | (B, mlp[-1], n) tensor of the features of the unknown features 237 | """ 238 | 239 | dist, idx = pointnet2_utils.three_nn(unknown, known) 240 | dist_recip = 1.0 / (dist + 1e-8) 241 | norm = torch.sum(dist_recip, dim=2, keepdim=True) 242 | weight = dist_recip / norm 243 | 244 | interpolated_feats = pointnet2_utils.three_interpolate( 245 | known_feats, idx, weight 246 | ) 247 | if unknow_feats is not None: 248 | new_features = torch.cat([interpolated_feats, unknow_feats], 249 | dim=1) #(B, C2 + C1, n) 250 | else: 251 | new_features = interpolated_feats 252 | 253 | new_features = new_features.unsqueeze(-1) 254 | new_features = self.mlp(new_features) 255 | 256 | return new_features.squeeze(-1) 257 | 258 | 259 | if __name__ == "__main__": 260 | from torch.autograd import Variable 261 | torch.manual_seed(1) 262 | torch.cuda.manual_seed_all(1) 263 | xyz = Variable(torch.randn(2, 9, 3).cuda(), requires_grad=True) 264 | xyz_feats = Variable(torch.randn(2, 9, 6).cuda(), requires_grad=True) 265 | 266 | test_module = PointnetSAModuleMSG( 267 | npoint=2, e1=[5.0, 10.0], e2=[5.0, 10.0], e3=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]] 268 | ) 269 | test_module.cuda() 270 | print(test_module(xyz, xyz_feats)) 271 | 272 | # test_module = PointnetFPModule(mlp=[6, 6]) 273 | # test_module.cuda() 274 | # from torch.autograd import gradcheck 275 | # inputs = (xyz, xyz, None, xyz_feats) 276 | # test = gradcheck(test_module, inputs, eps=1e-6, atol=1e-4) 277 | # print(test) 278 | 279 | for _ in range(1): 280 | _, new_features = test_module(xyz, xyz_feats) 281 | new_features.backward( 282 | torch.cuda.FloatTensor(*new_features.size()).fill_(1) 283 | ) 284 | print(new_features) 285 | print(xyz.grad) 286 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.autograd import Function 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | from linalg_utils import pdist2, PDist2Order 7 | from collections import namedtuple 8 | import pytorch_utils as pt_utils 9 | from typing import List, Tuple 10 | 11 | from _ext import pointnet2 12 | 13 | 14 | class RandomDropout(nn.Module): 15 | 16 | def __init__(self, p=0.5, inplace=False): 17 | super().__init__() 18 | self.p = p 19 | self.inplace = inplace 20 | 21 | def forward(self, X): 22 | theta = torch.Tensor(1).uniform_(0, self.p)[0] 23 | return pt_utils.feature_dropout_no_scaling( 24 | X, theta, self.train, self.inplace 25 | ) 26 | 27 | 28 | class FurthestPointSampling(Function): 29 | 30 | @staticmethod 31 | def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor: 32 | r""" 33 | Uses iterative furthest point sampling to select a set of npoint features that have the largest 34 | minimum distance 35 | 36 | Parameters 37 | ---------- 38 | xyz : torch.Tensor 39 | (B, N, 3) tensor where N > npoint 40 | npoint : int32 41 | number of features in the sampled set 42 | 43 | Returns 44 | ------- 45 | torch.Tensor 46 | (B, npoint) tensor containing the set 47 | """ 48 | assert xyz.is_contiguous() 49 | 50 | B, N, _ = xyz.size() 51 | 52 | output = torch.cuda.IntTensor(B, npoint) 53 | temp = torch.cuda.FloatTensor(B, N).fill_(1e10) 54 | pointnet2.furthest_point_sampling_wrapper( 55 | B, N, npoint, xyz, temp, output 56 | ) 57 | return output 58 | 59 | @staticmethod 60 | def backward(xyz, a=None): 61 | return None, None 62 | 63 | 64 | furthest_point_sample = FurthestPointSampling.apply 65 | 66 | 67 | class GatherOperation(Function): 68 | 69 | @staticmethod 70 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: 71 | r""" 72 | 73 | Parameters 74 | ---------- 75 | features : torch.Tensor 76 | (B, C, N) tensor 77 | 78 | idx : torch.Tensor 79 | (B, npoint) tensor of the features to gather 80 | 81 | Returns 82 | ------- 83 | torch.Tensor 84 | (B, C, npoint) tensor 85 | """ 86 | assert features.is_contiguous() 87 | assert idx.is_contiguous() 88 | 89 | B, npoint = idx.size() 90 | _, C, N = features.size() 91 | 92 | output = torch.cuda.FloatTensor(B, C, npoint) 93 | 94 | pointnet2.gather_points_wrapper( 95 | B, C, N, npoint, features, idx, output 96 | ) 97 | 98 | ctx.for_backwards = (idx, C, N) 99 | 100 | return output 101 | 102 | @staticmethod 103 | def backward(ctx, grad_out): 104 | idx, C, N = ctx.for_backwards 105 | B, npoint = idx.size() 106 | 107 | grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) 108 | grad_out_data = grad_out.data.contiguous() 109 | pointnet2.gather_points_grad_wrapper( 110 | B, C, N, npoint, grad_out_data, idx, grad_features.data 111 | ) 112 | 113 | return grad_features, None 114 | 115 | 116 | gather_operation = GatherOperation.apply 117 | 118 | 119 | class ThreeNN(Function): 120 | 121 | @staticmethod 122 | def forward(ctx, unknown: torch.Tensor, 123 | known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 124 | r""" 125 | Find the three nearest neighbors of unknown in known 126 | Parameters 127 | ---------- 128 | unknown : torch.Tensor 129 | (B, n, 3) tensor of known features 130 | known : torch.Tensor 131 | (B, m, 3) tensor of unknown features 132 | 133 | Returns 134 | ------- 135 | dist : torch.Tensor 136 | (B, n, 3) l2 distance to the three nearest neighbors 137 | idx : torch.Tensor 138 | (B, n, 3) index of 3 nearest neighbors 139 | """ 140 | assert unknown.is_contiguous() 141 | assert known.is_contiguous() 142 | 143 | B, N, _ = unknown.size() 144 | m = known.size(1) 145 | dist2 = torch.cuda.FloatTensor(B, N, 3) 146 | idx = torch.cuda.IntTensor(B, N, 3) 147 | 148 | pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx) 149 | 150 | return torch.sqrt(dist2), idx 151 | 152 | @staticmethod 153 | def backward(ctx, a=None, b=None): 154 | return None, None 155 | 156 | 157 | three_nn = ThreeNN.apply 158 | 159 | 160 | class ThreeInterpolate(Function): 161 | 162 | @staticmethod 163 | def forward( 164 | ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor 165 | ) -> torch.Tensor: 166 | r""" 167 | Performs weight linear interpolation on 3 features 168 | Parameters 169 | ---------- 170 | features : torch.Tensor 171 | (B, c, m) Features descriptors to be interpolated from 172 | idx : torch.Tensor 173 | (B, n, 3) three nearest neighbors of the target features in features 174 | weight : torch.Tensor 175 | (B, n, 3) weights 176 | 177 | Returns 178 | ------- 179 | torch.Tensor 180 | (B, c, n) tensor of the interpolated features 181 | """ 182 | assert features.is_contiguous() 183 | assert idx.is_contiguous() 184 | assert weight.is_contiguous() 185 | 186 | B, c, m = features.size() 187 | n = idx.size(1) 188 | 189 | ctx.three_interpolate_for_backward = (idx, weight, m) 190 | 191 | output = torch.cuda.FloatTensor(B, c, n) 192 | 193 | pointnet2.three_interpolate_wrapper( 194 | B, c, m, n, features, idx, weight, output 195 | ) 196 | 197 | return output 198 | 199 | @staticmethod 200 | def backward(ctx, grad_out: torch.Tensor 201 | ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 202 | r""" 203 | Parameters 204 | ---------- 205 | grad_out : torch.Tensor 206 | (B, c, n) tensor with gradients of ouputs 207 | 208 | Returns 209 | ------- 210 | grad_features : torch.Tensor 211 | (B, c, m) tensor with gradients of features 212 | 213 | None 214 | 215 | None 216 | """ 217 | idx, weight, m = ctx.three_interpolate_for_backward 218 | B, c, n = grad_out.size() 219 | 220 | grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_()) 221 | 222 | grad_out_data = grad_out.data.contiguous() 223 | pointnet2.three_interpolate_grad_wrapper( 224 | B, c, n, m, grad_out_data, idx, weight, grad_features.data 225 | ) 226 | 227 | return grad_features, None, None 228 | 229 | 230 | three_interpolate = ThreeInterpolate.apply 231 | 232 | 233 | class GroupingOperation(Function): 234 | 235 | @staticmethod 236 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: 237 | r""" 238 | 239 | Parameters 240 | ---------- 241 | features : torch.Tensor 242 | (B, C, N) tensor of points to group 243 | idx : torch.Tensor 244 | (B, npoint, nsample) tensor containing the indicies of points to group with 245 | 246 | Returns 247 | ------- 248 | torch.Tensor 249 | (B, C, npoint, nsample) tensor 250 | """ 251 | assert features.is_contiguous() 252 | assert idx.is_contiguous() 253 | 254 | B, nfeatures, nsample = idx.size() 255 | _, C, N = features.size() 256 | 257 | output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) 258 | 259 | pointnet2.group_points_wrapper( 260 | B, C, N, nfeatures, nsample, features, idx, output 261 | ) 262 | 263 | ctx.for_backwards = (idx, N) 264 | return output 265 | 266 | @staticmethod 267 | def backward(ctx, 268 | grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 269 | r""" 270 | 271 | Parameters 272 | ---------- 273 | grad_out : torch.Tensor 274 | (B, C, npoint, nsample) tensor of the gradients of the output from forward 275 | 276 | Returns 277 | ------- 278 | torch.Tensor 279 | (B, C, N) gradient of the features 280 | None 281 | """ 282 | idx, N = ctx.for_backwards 283 | 284 | B, C, npoint, nsample = grad_out.size() 285 | grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) 286 | 287 | grad_out_data = grad_out.data.contiguous() 288 | pointnet2.group_points_grad_wrapper( 289 | B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data 290 | ) 291 | 292 | return grad_features, None 293 | 294 | 295 | grouping_operation = GroupingOperation.apply 296 | 297 | 298 | class BallQuery(Function): 299 | 300 | @staticmethod 301 | def forward( 302 | ctx, radius: float, nsample: int, xyz: torch.Tensor, 303 | new_xyz: torch.Tensor, fps_idx: torch.IntTensor 304 | ) -> torch.Tensor: 305 | r""" 306 | 307 | Parameters 308 | ---------- 309 | radius : float 310 | radius of the balls 311 | nsample : int 312 | maximum number of features in the balls 313 | xyz : torch.Tensor 314 | (B, N, 3) xyz coordinates of the features 315 | new_xyz : torch.Tensor 316 | (B, npoint, 3) centers of the ball query 317 | 318 | Returns 319 | ------- 320 | torch.Tensor 321 | (B, npoint, nsample) tensor with the indicies of the features that form the query balls 322 | """ 323 | assert new_xyz.is_contiguous() 324 | assert xyz.is_contiguous() 325 | 326 | B, N, _ = xyz.size() 327 | npoint = new_xyz.size(1) 328 | idx = torch.cuda.IntTensor(B, npoint, nsample).zero_() 329 | 330 | pointnet2.ball_query_wrapper( 331 | B, N, npoint, radius, nsample, new_xyz, xyz, fps_idx, idx 332 | ) 333 | 334 | return torch.cat([fps_idx.unsqueeze(2), idx], dim = 2) 335 | 336 | @staticmethod 337 | def backward(ctx, a=None): 338 | return None, None, None, None 339 | 340 | 341 | ball_query = BallQuery.apply 342 | 343 | class EllipsoidQuery(Function): 344 | 345 | @staticmethod 346 | def forward( 347 | ctx, e1, e2, e3: float, nsample: int, xyz: torch.Tensor, 348 | new_xyz: torch.Tensor, fps_idx: torch.IntTensor 349 | ) -> torch.Tensor: 350 | r""" 351 | 352 | Parameters , ingroup_pts_cnt: torch.IntTensor, ingroup_out: torch.Tensor, ingroup_cva: torch.Tensor, v: torch.Tensor, d: torch.Tensor 353 | ---------- 354 | e1, e2, e3 : float 355 | e1, e2, e3 of the ellipsoid 356 | nsample : int 357 | maximum number of features in the balls 358 | xyz : torch.Tensor 359 | (B, N, 3) xyz coordinates of the features 360 | new_xyz : torch.Tensor 361 | (B, npoint, 3) centers of the ball query 362 | 363 | Returns 364 | ------- 365 | torch.Tensor 366 | (B, npoint, nsample) tensor with the indicies of the features that form the query balls 367 | """ 368 | assert new_xyz.is_contiguous() 369 | assert xyz.is_contiguous() 370 | 371 | B, N, _ = xyz.size() 372 | npoint = new_xyz.size(1) 373 | idx = torch.cuda.IntTensor(B, npoint, nsample).zero_() 374 | ingroup_pts_cnt = torch.cuda.IntTensor(B, npoint).zero_() 375 | ingroup_out = torch.cuda.FloatTensor(B, npoint, nsample, 3).zero_() 376 | ingroup_cva = torch.cuda.FloatTensor(B, npoint, 3*3).zero_() 377 | v = torch.cuda.FloatTensor(B, npoint, 3*3).zero_() 378 | d = torch.cuda.FloatTensor(B, npoint, 3).zero_() 379 | 380 | pointnet2.ellipsoid_query_wrapper( 381 | B, N, npoint, e1, e2, e3, nsample, new_xyz, xyz, fps_idx, idx, ingroup_pts_cnt, ingroup_out, ingroup_cva, v, d 382 | ) 383 | 384 | return torch.cat([fps_idx.unsqueeze(2), idx], dim = 2),d 385 | 386 | @staticmethod 387 | def backward(ctx, a=None): 388 | return None, None, None, None 389 | 390 | 391 | ellipsoid_query = EllipsoidQuery.apply 392 | 393 | 394 | class QueryAndGroup(nn.Module): 395 | r""" 396 | Groups with a ball query of radius 397 | 398 | Parameters 399 | --------- 400 | radius : float32 401 | Radius of ball 402 | nsample : int32 403 | Maximum number of points to gather in the ball 404 | """ 405 | 406 | def __init__(self, radius: float, nsample: int, use_xyz: bool = True): 407 | super().__init__() 408 | self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz 409 | 410 | def forward( 411 | self, 412 | xyz: torch.Tensor, 413 | new_xyz: torch.Tensor, 414 | features: torch.Tensor = None, 415 | fps_idx: torch.IntTensor = None 416 | ) -> Tuple[torch.Tensor]: 417 | r""" 418 | Parameters 419 | ---------- 420 | xyz : torch.Tensor 421 | xyz coordinates of the features (B, N, 3) 422 | new_xyz : torch.Tensor 423 | centriods (B, npoint, 3) 424 | features : torch.Tensor 425 | Descriptors of the features (B, C, N) 426 | 427 | Returns 428 | ------- 429 | new_features : torch.Tensor 430 | (B, 3 + C, npoint, nsample) tensor 431 | """ 432 | 433 | idx = ball_query(self.radius, self.nsample, xyz, new_xyz, fps_idx) 434 | xyz_trans = xyz.transpose(1, 2).contiguous() 435 | grouped_xyz = grouping_operation( 436 | xyz_trans, idx 437 | ) # (B, 3, npoint, nsample) 438 | raw_grouped_xyz = grouped_xyz 439 | grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) 440 | 441 | if features is not None: 442 | grouped_features = grouping_operation(features, idx) 443 | if self.use_xyz: 444 | new_features = torch.cat([raw_grouped_xyz, grouped_xyz, grouped_features], 445 | dim=1) # (B, C + 3 + 3, npoint, nsample) 446 | else: 447 | new_features = grouped_features 448 | else: 449 | assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" 450 | new_features = torch.cat([raw_grouped_xyz, grouped_xyz], dim = 1) 451 | 452 | return new_features 453 | 454 | class QueryAndGroupE(nn.Module): 455 | r""" 456 | Groups with a ball query of radius 457 | 458 | Parameters 459 | --------- 460 | e1, e2, e3 : float32 461 | e1, e2, e3 of ellipsoid 462 | nsample : int32 463 | Maximum number of points to gather in the ball 464 | """ 465 | 466 | def __init__(self, e1: float, e2: float, e3: float, nsample: int, use_xyz: bool = True): 467 | super().__init__() 468 | self.e1, self.e2, self.e3, self.nsample, self.use_xyz = e1, e2, e3, nsample, use_xyz 469 | 470 | def forward( 471 | self, 472 | xyz: torch.Tensor, 473 | new_xyz: torch.Tensor, 474 | features: torch.Tensor = None, 475 | fps_idx: torch.IntTensor = None 476 | ) -> Tuple[torch.Tensor]: 477 | r""" 478 | Parameters 479 | ---------- 480 | xyz : torch.Tensor 481 | xyz coordinates of the features (B, N, 3) 482 | new_xyz : torch.Tensor 483 | centriods (B, npoint, 3) 484 | features : torch.Tensor 485 | Descriptors of the features (B, C, N) 486 | 487 | Returns 488 | ------- 489 | new_features : torch.Tensor 490 | (B, 3 + C, npoint, nsample) tensor 491 | """ 492 | 493 | idx,d = ellipsoid_query(self.e1, self.e2, self.e3, self.nsample, xyz, new_xyz, fps_idx)#,ingroup_pts_cnt, ingroup_out, ingroup_cva, v, d) 494 | xyz_trans = xyz.transpose(1, 2).contiguous() 495 | d = d.transpose(1,2).unsqueeze(-1) 496 | grouped_xyz = grouping_operation( 497 | xyz_trans, idx 498 | ) # (B, 3, npoint, nsample) 499 | raw_grouped_xyz = grouped_xyz 500 | grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) 501 | 502 | if features is not None: 503 | grouped_features = grouping_operation(features, idx) 504 | if self.use_xyz: 505 | new_features = torch.cat([raw_grouped_xyz, grouped_xyz, grouped_features], 506 | dim=1) # (B, C + 3 + 3, npoint, nsample) 507 | else: 508 | new_features = grouped_features 509 | else: 510 | assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" 511 | new_features = torch.cat([raw_grouped_xyz, grouped_xyz], dim = 1) 512 | # d = d.repeat([1,1,1,new_features.size(3)]) 513 | # print(d.size()) 514 | # print(new_features.size()) 515 | # new_features = torch.cat([new_features, d],dim=1) 516 | # print(new_features.size()) 517 | 518 | return new_features 519 | 520 | class GroupAll(nn.Module): 521 | r""" 522 | Groups all features 523 | 524 | Parameters 525 | --------- 526 | """ 527 | 528 | def __init__(self, use_xyz: bool = True): 529 | super().__init__() 530 | self.use_xyz = use_xyz 531 | 532 | def forward( 533 | self, 534 | xyz: torch.Tensor, 535 | new_xyz: torch.Tensor, 536 | features: torch.Tensor = None 537 | ) -> Tuple[torch.Tensor]: 538 | r""" 539 | Parameters 540 | ---------- 541 | xyz : torch.Tensor 542 | xyz coordinates of the features (B, N, 3) 543 | new_xyz : torch.Tensor 544 | Ignored 545 | features : torch.Tensor 546 | Descriptors of the features (B, C, N) 547 | 548 | Returns 549 | ------- 550 | new_features : torch.Tensor 551 | (B, C + 3, 1, N) tensor 552 | """ 553 | 554 | grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) 555 | if features is not None: 556 | grouped_features = features.unsqueeze(2) 557 | if self.use_xyz: 558 | new_features = torch.cat([grouped_xyz, grouped_features], 559 | dim=1) # (B, 3 + C, 1, N) 560 | else: 561 | new_features = grouped_features 562 | else: 563 | new_features = grouped_xyz 564 | 565 | return new_features 566 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/pytorch_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .pytorch_utils import * 2 | -------------------------------------------------------------------------------- /RSCNNEQ/utils/pytorch_utils/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from torch.autograd.function import InplaceFunction 6 | from itertools import repeat 7 | import numpy as np 8 | import shutil, os 9 | from typing import List, Tuple 10 | from scipy.stats import t as student_t 11 | import statistics as stats 12 | import math 13 | 14 | ########## Relation-Shape Convolution begin ############ 15 | class RSConv(nn.Module): 16 | ''' 17 | Input shape: (B, C_in, npoint, nsample) 18 | Output shape: (B, C_out, npoint) 19 | ''' 20 | def __init__( 21 | self, 22 | C_in, 23 | C_out, 24 | activation = nn.ReLU(inplace=True), 25 | mapping = None, 26 | relation_prior = 1, 27 | first_layer = False 28 | ): 29 | super(RSConv, self).__init__() 30 | self.bn_rsconv = nn.BatchNorm2d(C_in) if not first_layer else nn.BatchNorm2d(16) 31 | self.bn_channel_raising = nn.BatchNorm1d(C_out) 32 | self.bn_xyz_raising = nn.BatchNorm2d(16) 33 | if first_layer: 34 | self.bn_mapping = nn.BatchNorm2d(math.floor(C_out / 2)) 35 | else: 36 | self.bn_mapping = nn.BatchNorm2d(math.floor(C_out / 4)) 37 | self.activation = activation 38 | self.relation_prior = relation_prior 39 | self.first_layer = first_layer 40 | self.mapping_func1 = mapping[0] 41 | self.mapping_func2 = mapping[1] 42 | self.cr_mapping = mapping[2] 43 | if first_layer: 44 | self.xyz_raising = mapping[3] 45 | 46 | def forward(self, input): # input: (B, 3 + 3 + C_in, npoint, centroid + nsample) 47 | 48 | x = input[:, 3:, :, :] # (B, C_in, npoint, nsample+1), input features 49 | C_in = x.size()[1] 50 | nsample = x.size()[3] 51 | if self.relation_prior == 2: 52 | abs_coord = input[:, 0:2, :, :] 53 | delta_x = input[:, 3:5, :, :] 54 | zero_vec = Variable(torch.zeros(x.size()[0], 1, x.size()[2], nsample).cuda()) 55 | else: 56 | abs_coord = input[:, 0:3, :, :] # (B, 3, npoint, nsample+1), absolute coordinates 57 | delta_x = input[:, 3:6, :, :] # (B, 3, npoint, nsample+1), normalized coordinates 58 | 59 | coord_xi = abs_coord[:, :, :, 0:1].repeat(1, 1, 1, nsample) # (B, 3, npoint, nsample), centroid point 60 | h_xi_xj = torch.norm(delta_x, p = 2, dim = 1).unsqueeze(1) 61 | if self.relation_prior == 1: 62 | h_xi_xj = torch.cat((h_xi_xj, coord_xi, abs_coord, delta_x), dim = 1) 63 | elif self.relation_prior == 2: 64 | h_xi_xj = torch.cat((h_xi_xj, coord_xi, zero_vec, abs_coord, zero_vec, delta_x, zero_vec), dim = 1) 65 | del coord_xi, abs_coord, delta_x 66 | 67 | h_xi_xj = self.mapping_func2(self.activation(self.bn_mapping(self.mapping_func1(h_xi_xj)))) 68 | if self.first_layer: 69 | x = self.activation(self.bn_xyz_raising(self.xyz_raising(x))) 70 | x = F.max_pool2d(self.activation(self.bn_rsconv(torch.mul(h_xi_xj, x))), kernel_size = (1, nsample)).squeeze(3) # (B, C_in, npoint) 71 | del h_xi_xj 72 | x = self.activation(self.bn_channel_raising(self.cr_mapping(x))) 73 | 74 | return x 75 | 76 | class RSConvLayer(nn.Sequential): 77 | 78 | def __init__( 79 | self, 80 | in_size: int, 81 | out_size: int, 82 | activation=nn.ReLU(inplace=True), 83 | conv=RSConv, 84 | mapping = None, 85 | relation_prior = 1, 86 | first_layer = False 87 | ): 88 | super(RSConvLayer, self).__init__() 89 | 90 | conv_unit = conv( 91 | in_size, 92 | out_size, 93 | activation = activation, 94 | mapping = mapping, 95 | relation_prior = relation_prior, 96 | first_layer = first_layer 97 | ) 98 | 99 | self.add_module('RS_Conv', conv_unit) 100 | 101 | class SharedRSConv(nn.Sequential): 102 | 103 | def __init__( 104 | self, 105 | args: List[int], 106 | *, 107 | activation=nn.ReLU(inplace=True), 108 | mapping = None, 109 | relation_prior = 1, 110 | first_layer = False 111 | ): 112 | super().__init__() 113 | 114 | for i in range(len(args) - 1): 115 | self.add_module( 116 | 'RSConvLayer{}'.format(i), 117 | RSConvLayer( 118 | args[i], 119 | args[i + 1], 120 | activation = activation, 121 | mapping = mapping, 122 | relation_prior = relation_prior, 123 | first_layer = first_layer 124 | ) 125 | ) 126 | 127 | ########## Relation-Shape Convolution end ############ 128 | 129 | 130 | 131 | ########## global convolutional pooling begin ############ 132 | 133 | class GloAvgConv(nn.Module): 134 | ''' 135 | Input shape: (B, C_in, 1, nsample) 136 | Output shape: (B, C_out, npoint) 137 | ''' 138 | def __init__( 139 | self, 140 | C_in, 141 | C_out, 142 | init=nn.init.kaiming_normal_, 143 | bias = True, 144 | activation = nn.ReLU(inplace=True) 145 | ): 146 | super(GloAvgConv, self).__init__() 147 | 148 | self.conv_avg = nn.Conv2d(in_channels = C_in, out_channels = C_out, kernel_size = (1, 1), 149 | stride = (1, 1), bias = bias) 150 | self.bn_avg = nn.BatchNorm2d(C_out) 151 | self.activation = activation 152 | 153 | init(self.conv_avg.weight) 154 | if bias: 155 | nn.init.constant_(self.conv_avg.bias, 0) 156 | 157 | def forward(self, x): 158 | nsample = x.size()[3] 159 | x = self.activation(self.bn_avg(self.conv_avg(x))) 160 | x = F.max_pool2d(x, kernel_size = (1, nsample)).squeeze(3) 161 | 162 | return x 163 | 164 | ########## global convolutional pooling end ############ 165 | 166 | 167 | class SharedMLP(nn.Sequential): 168 | 169 | def __init__( 170 | self, 171 | args: List[int], 172 | *, 173 | bn: bool = False, 174 | activation=nn.ReLU(inplace=True), 175 | preact: bool = False, 176 | first: bool = False, 177 | name: str = "" 178 | ): 179 | super().__init__() 180 | 181 | for i in range(len(args) - 1): 182 | self.add_module( 183 | name + 'layer{}'.format(i), 184 | Conv2d( 185 | args[i], 186 | args[i + 1], 187 | bn=(not first or not preact or (i != 0)) and bn, 188 | activation=activation 189 | if (not first or not preact or (i != 0)) else None, 190 | preact=preact 191 | ) 192 | ) 193 | 194 | 195 | class _BNBase(nn.Sequential): 196 | 197 | def __init__(self, in_size, batch_norm=None, name=""): 198 | super().__init__() 199 | self.add_module(name + "bn", batch_norm(in_size)) 200 | 201 | nn.init.constant_(self[0].weight, 1.0) 202 | nn.init.constant_(self[0].bias, 0) 203 | 204 | 205 | class BatchNorm1d(_BNBase): 206 | 207 | def __init__(self, in_size: int, *, name: str = ""): 208 | super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) 209 | 210 | 211 | class BatchNorm2d(_BNBase): 212 | 213 | def __init__(self, in_size: int, name: str = ""): 214 | super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) 215 | 216 | 217 | class BatchNorm3d(_BNBase): 218 | 219 | def __init__(self, in_size: int, name: str = ""): 220 | super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name) 221 | 222 | 223 | class _ConvBase(nn.Sequential): 224 | 225 | def __init__( 226 | self, 227 | in_size, 228 | out_size, 229 | kernel_size, 230 | stride, 231 | padding, 232 | activation, 233 | bn, 234 | init, 235 | conv=None, 236 | batch_norm=None, 237 | bias=True, 238 | preact=False, 239 | name="" 240 | ): 241 | super().__init__() 242 | 243 | bias = bias and (not bn) 244 | conv_unit = conv( 245 | in_size, 246 | out_size, 247 | kernel_size=kernel_size, 248 | stride=stride, 249 | padding=padding, 250 | bias=bias 251 | ) 252 | init(conv_unit.weight) 253 | if bias: 254 | nn.init.constant_(conv_unit.bias, 0) 255 | 256 | if bn: 257 | if not preact: 258 | bn_unit = batch_norm(out_size) 259 | else: 260 | bn_unit = batch_norm(in_size) 261 | 262 | if preact: 263 | if bn: 264 | self.add_module(name + 'bn', bn_unit) 265 | 266 | if activation is not None: 267 | self.add_module(name + 'activation', activation) 268 | 269 | self.add_module(name + 'conv', conv_unit) 270 | 271 | if not preact: 272 | if bn: 273 | self.add_module(name + 'bn', bn_unit) 274 | 275 | if activation is not None: 276 | self.add_module(name + 'activation', activation) 277 | 278 | 279 | class Conv1d(_ConvBase): 280 | 281 | def __init__( 282 | self, 283 | in_size: int, 284 | out_size: int, 285 | *, 286 | kernel_size: int = 1, 287 | stride: int = 1, 288 | padding: int = 0, 289 | activation=nn.ReLU(inplace=True), 290 | bn: bool = False, 291 | init=nn.init.kaiming_normal_, 292 | bias: bool = True, 293 | preact: bool = False, 294 | name: str = "" 295 | ): 296 | super().__init__( 297 | in_size, 298 | out_size, 299 | kernel_size, 300 | stride, 301 | padding, 302 | activation, 303 | bn, 304 | init, 305 | conv=nn.Conv1d, 306 | batch_norm=BatchNorm1d, 307 | bias=bias, 308 | preact=preact, 309 | name=name 310 | ) 311 | 312 | 313 | class Conv2d(_ConvBase): 314 | 315 | def __init__( 316 | self, 317 | in_size: int, 318 | out_size: int, 319 | *, 320 | kernel_size: Tuple[int, int] = (1, 1), 321 | stride: Tuple[int, int] = (1, 1), 322 | padding: Tuple[int, int] = (0, 0), 323 | activation=nn.ReLU(inplace=True), 324 | bn: bool = False, 325 | init=nn.init.kaiming_normal_, 326 | bias: bool = True, 327 | preact: bool = False, 328 | name: str = "" 329 | ): 330 | super().__init__( 331 | in_size, 332 | out_size, 333 | kernel_size, 334 | stride, 335 | padding, 336 | activation, 337 | bn, 338 | init, 339 | conv=nn.Conv2d, 340 | batch_norm=BatchNorm2d, 341 | bias=bias, 342 | preact=preact, 343 | name=name 344 | ) 345 | 346 | 347 | class Conv3d(_ConvBase): 348 | 349 | def __init__( 350 | self, 351 | in_size: int, 352 | out_size: int, 353 | *, 354 | kernel_size: Tuple[int, int, int] = (1, 1, 1), 355 | stride: Tuple[int, int, int] = (1, 1, 1), 356 | padding: Tuple[int, int, int] = (0, 0, 0), 357 | activation=nn.ReLU(inplace=True), 358 | bn: bool = False, 359 | init=nn.init.kaiming_normal_, 360 | bias: bool = True, 361 | preact: bool = False, 362 | name: str = "" 363 | ): 364 | super().__init__( 365 | in_size, 366 | out_size, 367 | kernel_size, 368 | stride, 369 | padding, 370 | activation, 371 | bn, 372 | init, 373 | conv=nn.Conv3d, 374 | batch_norm=BatchNorm3d, 375 | bias=bias, 376 | preact=preact, 377 | name=name 378 | ) 379 | 380 | 381 | class FC(nn.Sequential): 382 | 383 | def __init__( 384 | self, 385 | in_size: int, 386 | out_size: int, 387 | *, 388 | activation=nn.ReLU(inplace=True), 389 | bn: bool = False, 390 | init=None, 391 | preact: bool = False, 392 | name: str = "" 393 | ): 394 | super().__init__() 395 | 396 | fc = nn.Linear(in_size, out_size, bias=not bn) 397 | if init is not None: 398 | init(fc.weight) 399 | if not bn: 400 | nn.init.constant_(fc.bias, 0) 401 | 402 | if preact: 403 | if bn: 404 | self.add_module(name + 'bn', BatchNorm1d(in_size)) 405 | 406 | if activation is not None: 407 | self.add_module(name + 'activation', activation) 408 | 409 | self.add_module(name + 'fc', fc) 410 | 411 | if not preact: 412 | if bn: 413 | self.add_module(name + 'bn', BatchNorm1d(out_size)) 414 | 415 | if activation is not None: 416 | self.add_module(name + 'activation', activation) 417 | 418 | 419 | class _DropoutNoScaling(InplaceFunction): 420 | 421 | @staticmethod 422 | def _make_noise(input): 423 | return input.new().resize_as_(input) 424 | 425 | @staticmethod 426 | def symbolic(g, input, p=0.5, train=False, inplace=False): 427 | if inplace: 428 | return None 429 | n = g.appendNode( 430 | g.create("Dropout", [input]).f_("ratio", 431 | p).i_("is_test", not train) 432 | ) 433 | real = g.appendNode(g.createSelect(n, 0)) 434 | g.appendNode(g.createSelect(n, 1)) 435 | return real 436 | 437 | @classmethod 438 | def forward(cls, ctx, input, p=0.5, train=False, inplace=False): 439 | if p < 0 or p > 1: 440 | raise ValueError( 441 | "dropout probability has to be between 0 and 1, " 442 | "but got {}".format(p) 443 | ) 444 | ctx.p = p 445 | ctx.train = train 446 | ctx.inplace = inplace 447 | 448 | if ctx.inplace: 449 | ctx.mark_dirty(input) 450 | output = input 451 | else: 452 | output = input.clone() 453 | 454 | if ctx.p > 0 and ctx.train: 455 | ctx.noise = cls._make_noise(input) 456 | if ctx.p == 1: 457 | ctx.noise.fill_(0) 458 | else: 459 | ctx.noise.bernoulli_(1 - ctx.p) 460 | ctx.noise = ctx.noise.expand_as(input) 461 | output.mul_(ctx.noise) 462 | 463 | return output 464 | 465 | @staticmethod 466 | def backward(ctx, grad_output): 467 | if ctx.p > 0 and ctx.train: 468 | return grad_output.mul(Variable(ctx.noise)), None, None, None 469 | else: 470 | return grad_output, None, None, None 471 | 472 | 473 | dropout_no_scaling = _DropoutNoScaling.apply 474 | 475 | 476 | class _FeatureDropoutNoScaling(_DropoutNoScaling): 477 | 478 | @staticmethod 479 | def symbolic(input, p=0.5, train=False, inplace=False): 480 | return None 481 | 482 | @staticmethod 483 | def _make_noise(input): 484 | return input.new().resize_( 485 | input.size(0), input.size(1), *repeat(1, 486 | input.dim() - 2) 487 | ) 488 | 489 | 490 | feature_dropout_no_scaling = _FeatureDropoutNoScaling.apply 491 | 492 | 493 | def group_model_params(model: nn.Module): 494 | decay_group = [] 495 | no_decay_group = [] 496 | 497 | for name, param in model.named_parameters(): 498 | if name.find("bn") != -1 or name.find("bias") != -1: 499 | no_decay_group.append(param) 500 | else: 501 | decay_group.append(param) 502 | 503 | assert len(list(model.parameters()) 504 | ) == len(decay_group) + len(no_decay_group) 505 | 506 | return [ 507 | dict(params=decay_group), 508 | dict(params=no_decay_group, weight_decay=0.0) 509 | ] 510 | 511 | 512 | def checkpoint_state(model=None, optimizer=None, best_prec=None, epoch=None): 513 | optim_state = optimizer.state_dict() if optimizer is not None else None 514 | if model is not None: 515 | if isinstance(model, torch.nn.DataParallel): 516 | model_state = model.module.state_dict() 517 | else: 518 | model_state = model.state_dict() 519 | else: 520 | model_state = None 521 | 522 | return { 523 | 'epoch': epoch, 524 | 'best_prec': best_prec, 525 | 'model_state': model_state, 526 | 'optimizer_state': optim_state 527 | } 528 | 529 | 530 | def save_checkpoint( 531 | state, is_best, filename='checkpoint', bestname='model_best' 532 | ): 533 | filename = '{}.pth.tar'.format(filename) 534 | torch.save(state, filename) 535 | if is_best: 536 | shutil.copyfile(filename, '{}.pth.tar'.format(bestname)) 537 | 538 | 539 | def load_checkpoint(model=None, optimizer=None, filename='checkpoint'): 540 | filename = "{}.pth.tar".format(filename) 541 | if os.path.isfile(filename): 542 | print("==> Loading from checkpoint '{}'".format(filename)) 543 | checkpoint = torch.load(filename) 544 | epoch = checkpoint['epoch'] 545 | best_prec = checkpoint['best_prec'] 546 | if model is not None and checkpoint['model_state'] is not None: 547 | model.load_state_dict(checkpoint['model_state']) 548 | if optimizer is not None and checkpoint['optimizer_state'] is not None: 549 | optimizer.load_state_dict(checkpoint['optimizer_state']) 550 | print("==> Done") 551 | else: 552 | print("==> Checkpoint '{}' not found".format(filename)) 553 | 554 | return epoch, best_prec 555 | 556 | 557 | def variable_size_collate(pad_val=0, use_shared_memory=True): 558 | import collections 559 | _numpy_type_map = { 560 | 'float64': torch.DoubleTensor, 561 | 'float32': torch.FloatTensor, 562 | 'float16': torch.HalfTensor, 563 | 'int64': torch.LongTensor, 564 | 'int32': torch.IntTensor, 565 | 'int16': torch.ShortTensor, 566 | 'int8': torch.CharTensor, 567 | 'uint8': torch.ByteTensor, 568 | } 569 | 570 | def wrapped(batch): 571 | "Puts each data field into a tensor with outer dimension batch size" 572 | 573 | error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" 574 | elem_type = type(batch[0]) 575 | if torch.is_tensor(batch[0]): 576 | max_len = 0 577 | for b in batch: 578 | max_len = max(max_len, b.size(0)) 579 | 580 | numel = sum([int(b.numel() / b.size(0) * max_len) for b in batch]) 581 | if use_shared_memory: 582 | # If we're in a background process, concatenate directly into a 583 | # shared memory tensor to avoid an extra copy 584 | storage = batch[0].storage()._new_shared(numel) 585 | out = batch[0].new(storage) 586 | else: 587 | out = batch[0].new(numel) 588 | 589 | out = out.view( 590 | len(batch), max_len, 591 | *[batch[0].size(i) for i in range(1, batch[0].dim())] 592 | ) 593 | out.fill_(pad_val) 594 | for i in range(len(batch)): 595 | out[i, 0:batch[i].size(0)] = batch[i] 596 | 597 | return out 598 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 599 | and elem_type.__name__ != 'string_': 600 | elem = batch[0] 601 | if elem_type.__name__ == 'ndarray': 602 | # array of string classes and object 603 | if re.search('[SaUO]', elem.dtype.str) is not None: 604 | raise TypeError(error_msg.format(elem.dtype)) 605 | 606 | return wrapped([torch.from_numpy(b) for b in batch]) 607 | if elem.shape == (): # scalars 608 | py_type = float if elem.dtype.name.startswith('float') else int 609 | return _numpy_type_map[elem.dtype.name]( 610 | list(map(py_type, batch)) 611 | ) 612 | elif isinstance(batch[0], int): 613 | return torch.LongTensor(batch) 614 | elif isinstance(batch[0], float): 615 | return torch.DoubleTensor(batch) 616 | elif isinstance(batch[0], collections.Mapping): 617 | return {key: wrapped([d[key] for d in batch]) for key in batch[0]} 618 | elif isinstance(batch[0], collections.Sequence): 619 | transposed = zip(*batch) 620 | return [wrapped(samples) for samples in transposed] 621 | 622 | raise TypeError((error_msg.format(type(batch[0])))) 623 | 624 | return wrapped 625 | 626 | 627 | class TrainValSplitter(): 628 | r""" 629 | Creates a training and validation split to be used as the sampler in a pytorch DataLoader 630 | Parameters 631 | --------- 632 | numel : int 633 | Number of elements in the entire training dataset 634 | percent_train : float 635 | Percentage of data in the training split 636 | shuffled : bool 637 | Whether or not shuffle which data goes to which split 638 | """ 639 | 640 | def __init__( 641 | self, *, numel: int, percent_train: float, shuffled: bool = False 642 | ): 643 | indicies = np.array([i for i in range(numel)]) 644 | if shuffled: 645 | np.random.shuffle(indicies) 646 | 647 | self.train = torch.utils.data.sampler.SubsetRandomSampler( 648 | indicies[0:int(percent_train * numel)] 649 | ) 650 | self.val = torch.utils.data.sampler.SubsetRandomSampler( 651 | indicies[int(percent_train * numel):-1] 652 | ) 653 | 654 | 655 | class CrossValSplitter(): 656 | r""" 657 | Class that creates cross validation splits. The train and val splits can be used in pytorch DataLoaders. The splits can be updated 658 | by calling next(self) or using a loop: 659 | for _ in self: 660 | .... 661 | Parameters 662 | --------- 663 | numel : int 664 | Number of elements in the training set 665 | k_folds : int 666 | Number of folds 667 | shuffled : bool 668 | Whether or not to shuffle which data goes in which fold 669 | """ 670 | 671 | def __init__(self, *, numel: int, k_folds: int, shuffled: bool = False): 672 | inidicies = np.array([i for i in range(numel)]) 673 | if shuffled: 674 | np.random.shuffle(inidicies) 675 | 676 | self.folds = np.array(np.array_split(inidicies, k_folds), dtype=object) 677 | self.current_v_ind = -1 678 | 679 | self.val = torch.utils.data.sampler.SubsetRandomSampler(self.folds[0]) 680 | self.train = torch.utils.data.sampler.SubsetRandomSampler( 681 | np.concatenate(self.folds[1:], axis=0) 682 | ) 683 | 684 | self.metrics = {} 685 | 686 | def __iter__(self): 687 | self.current_v_ind = -1 688 | return self 689 | 690 | def __len__(self): 691 | return len(self.folds) 692 | 693 | def __getitem__(self, idx): 694 | assert idx >= 0 and idx < len(self) 695 | self.val.inidicies = self.folds[idx] 696 | self.train.inidicies = np.concatenate( 697 | self.folds[np.arange(len(self)) != idx], axis=0 698 | ) 699 | 700 | def __next__(self): 701 | self.current_v_ind += 1 702 | if self.current_v_ind >= len(self): 703 | raise StopIteration 704 | 705 | self[self.current_v_ind] 706 | 707 | def update_metrics(self, to_post: dict): 708 | for k, v in to_post.items(): 709 | if k in self.metrics: 710 | self.metrics[k].append(v) 711 | else: 712 | self.metrics[k] = [v] 713 | 714 | def print_metrics(self): 715 | for name, samples in self.metrics.items(): 716 | xbar = stats.mean(samples) 717 | sx = stats.stdev(samples, xbar) 718 | tstar = student_t.ppf(1.0 - 0.025, len(samples) - 1) 719 | margin_of_error = tstar * sx / sqrt(len(samples)) 720 | print("{}: {} +/- {}".format(name, xbar, margin_of_error)) 721 | 722 | 723 | def set_bn_momentum_default(bn_momentum): 724 | 725 | def fn(m): 726 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): 727 | m.momentum = bn_momentum 728 | 729 | return fn 730 | 731 | 732 | class BNMomentumScheduler(object): 733 | 734 | def __init__( 735 | self, model, bn_lambda, last_epoch=-1, 736 | setter=set_bn_momentum_default 737 | ): 738 | if not isinstance(model, nn.Module): 739 | raise RuntimeError( 740 | "Class '{}' is not a PyTorch nn Module".format( 741 | type(model).__name__ 742 | ) 743 | ) 744 | 745 | self.model = model 746 | self.setter = setter 747 | self.lmbd = bn_lambda 748 | 749 | self.step(last_epoch + 1) 750 | self.last_epoch = last_epoch 751 | 752 | def step(self, epoch=None): 753 | if epoch is None: 754 | epoch = self.last_epoch + 1 755 | 756 | self.last_epoch = epoch 757 | self.model.apply(self.setter(self.lmbd(epoch))) 758 | 759 | def get_momentum(self, epoch=None): 760 | if epoch is None: 761 | epoch = self.last_epoch + 1 762 | return self.lmbd(epoch) -------------------------------------------------------------------------------- /RSCNNEQ/voting_evaluate_cls.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | import torch.optim.lr_scheduler as lr_sched 4 | import torch.nn as nn 5 | from torch.utils.data import DataLoader 6 | from torch.autograd import Variable 7 | import torch.nn.functional as F 8 | import numpy as np 9 | import os 10 | from torchvision import transforms 11 | from models import RSCNN_SSN_Cls as RSCNN_SSN 12 | from data import ModelNet40Cls 13 | import utils.pytorch_utils as pt_utils 14 | import utils.pointnet2_utils as pointnet2_utils 15 | import data.data_utils as d_utils 16 | import argparse 17 | import random 18 | import yaml 19 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 20 | torch.backends.cudnn.enabled = True 21 | torch.backends.cudnn.benchmark = True 22 | torch.backends.cudnn.deterministic = True 23 | 24 | seed = 123 25 | random.seed(seed) 26 | np.random.seed(seed) 27 | torch.manual_seed(seed) 28 | torch.cuda.manual_seed(seed) 29 | torch.cuda.manual_seed_all(seed) 30 | 31 | parser = argparse.ArgumentParser(description='Relation-Shape CNN Shape Classification Voting Evaluation') 32 | parser.add_argument('--config', default='cfgs/config_ssn_cls.yaml', type=str) 33 | 34 | NUM_REPEAT = 100 35 | NUM_VOTE = 10 36 | 37 | def main(): 38 | args = parser.parse_args() 39 | with open(args.config) as f: 40 | config = yaml.load(f) 41 | for k, v in config['common'].items(): 42 | setattr(args, k, v) 43 | 44 | test_transforms = transforms.Compose([ 45 | d_utils.PointcloudToTensor() 46 | ]) 47 | 48 | test_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=test_transforms, train=False) 49 | test_dataloader = DataLoader( 50 | test_dataset, 51 | batch_size=args.batch_size, 52 | shuffle=False, 53 | num_workers=int(args.workers), 54 | pin_memory=True 55 | ) 56 | 57 | model = RSCNN_SSN(num_classes = args.num_classes, input_channels = args.input_channels, relation_prior = args.relation_prior, use_xyz = True) 58 | model.cuda() 59 | 60 | if args.checkpoint is not '': 61 | model.load_state_dict(torch.load(args.checkpoint)) 62 | print('Load model successfully: %s' % (args.checkpoint)) 63 | 64 | # evaluate 65 | PointcloudScale = d_utils.PointcloudScale() # initialize random scaling 66 | model.eval() 67 | global_acc = 0 68 | for i in range(NUM_REPEAT): 69 | preds = [] 70 | labels = [] 71 | with torch.no_grad(): 72 | for j, data in enumerate(test_dataloader, 0): 73 | points, target = data 74 | points, target = points.cuda(), target.cuda() 75 | #points, target = Variable(points, volatile=True), Variable(target, volatile=True) 76 | 77 | # fastest point sampling 78 | fps_idx = pointnet2_utils.furthest_point_sample(points, 1200) # (B, npoint) 79 | pred = 0 80 | for v in range(NUM_VOTE): 81 | new_fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)] 82 | new_points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), new_fps_idx).transpose(1, 2).contiguous() 83 | if v > 0: 84 | new_points.data = PointcloudScale(new_points.data) 85 | pred += F.softmax(model(new_points), dim = 1) 86 | pred /= NUM_VOTE 87 | target = target.view(-1) 88 | _, pred_choice = torch.max(pred.data, -1) 89 | 90 | preds.append(pred_choice) 91 | labels.append(target.data) 92 | 93 | preds = torch.cat(preds, 0) 94 | labels = torch.cat(labels, 0) 95 | acc = (preds == labels).sum().item() / labels.numel() 96 | if acc > global_acc: 97 | global_acc = acc 98 | print('Repeat %3d \t Acc: %0.6f' % (i + 1, acc)) 99 | print('\nBest voting acc: %0.6f' % (global_acc)) 100 | 101 | if __name__ == '__main__': 102 | main() 103 | --------------------------------------------------------------------------------