├── README.md
└── RSCNNEQ
├── CMakeLists.txt
├── cfgs
└── config_ssn_cls.yaml
├── cls
└── model_cls_ssn_iter_70917_acc_0.925446.pth
├── data
├── ModelNet40Loader.py
├── ShapeNetPartLoader.py
├── __init__.py
└── data_utils.py
├── models
├── __init__.py
└── rscnn_ssn_cls.py
├── train_cls.py
├── train_cls.sh
├── utils
├── build_ffi.py
├── cinclude
│ ├── cuda_utils.h
│ ├── ellipsoid_query_gpu.h
│ ├── ellipsoid_query_wrapper.h
│ ├── group_points_gpu.h
│ ├── group_points_wrapper.h
│ ├── helper_cuda.h
│ ├── helper_string.h
│ ├── interpolate_gpu.h
│ ├── interpolate_wrapper.h
│ ├── sampling_gpu.h
│ └── sampling_wrapper.h
├── csrc
│ ├── ellipsoid_query.c
│ ├── ellipsoid_query_gpu.cu
│ ├── group_points.c
│ ├── group_points_gpu.cu
│ ├── interpolate.c
│ ├── interpolate_gpu.cu
│ ├── sampling.c
│ └── sampling_gpu.cu
├── linalg_utils.py
├── pointnet2_modules.py
├── pointnet2_utils.py
└── pytorch_utils
│ ├── __init__.py
│ └── pytorch_utils.py
└── voting_evaluate_cls.py
/README.md:
--------------------------------------------------------------------------------
1 | # EllipsoidQuery
2 |
3 | This code submission is to reproduce the impact of re-oriented ellipsoid querying on RS-CNN Shape Classification.
4 |
5 | contact email: ssheshap@udel.edu
6 |
7 | Software requirements
8 | ----------------------
9 | Ubuntu 18.04
10 | Python 3.5 (recommend Anaconda3)
11 | Pytorch 0.4.1
12 | CMake 3.10.2
13 | CUDA 10.0 + cuDNN 7
14 | Cudatoolkit V10.0.130
15 |
16 | Note: Also, works in the environment suggested by the authors of RS-CNN(https://github.com/Yochengliu/Relation-Shape-CNN/).
17 |
18 | Download
19 | --------
20 | git clone https://github.com/VimsLab/EllipsoidQuery.git
21 | cd EllipsoidQuery/RSCNNEQ
22 |
23 | Building Kernel
24 | ---------------
25 | mkdir build && cd build
26 | cmake .. && make
27 |
28 | Dataset
29 | -------
30 | Download and unzip ModelNet40 (415M) in data directory.
31 | https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip
32 |
33 | Usage: Train
34 | ------------
35 | sh train_cls.sh
36 |
37 | Note: We have trained a Single-Scale-Neighborhood classification model in cls folder, whose training accuracy is 92.55% ('cls/model_cls_ssn_iter_70917_acc_0.925446.pth')
38 |
39 | Usage: Evaluation
40 | -----------------
41 | Modify cfgs/config_ssn_cls.yaml with *.pth file from cls/ folder with highest accuracy.
42 | python voting_evaluate_cls.py
43 |
44 | Note: You can use our model cls/model_cls_ssn_iter_70917_acc_0.925446.pth as the checkpoint in config_ssn_cls.yaml, and with majority voting you will get an accuracy of 93.51%. Due to randomness the accuracy might vary.
45 |
46 | This code has been heaviy borrowed from https://github.com/Yochengliu/Relation-Shape-CNN/ and https://github.com/erikwijmans/Pointnet2_PyTorch
47 |
48 |
49 | To cite our paper please use below bibtex.
50 |
51 | ```BibTex
52 | @InProceedings{Sheshappanavar_2020_CVPR_Workshops,
53 | author = {Venkanna Sheshappanavar, Shivanand and Kambhamettu, Chandra},
54 | title = {A Novel Local Geometry Capture in PointNet++ for 3D Classification},
55 | booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
56 | month = {June},
57 | year = {2020}
58 | }
59 | ```
60 |
--------------------------------------------------------------------------------
/RSCNNEQ/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(PointNet2)
2 | cmake_minimum_required(VERSION 2.8)
3 |
4 | find_package(CUDA REQUIRED)
5 |
6 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude")
7 | cuda_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude")
8 | file(GLOB cuda_kernels_src "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrc/*.cu")
9 | cuda_compile(cuda_kernels SHARED ${cuda_kernels_src} OPTIONS -O3)
10 |
11 | set(BUILD_CMD python "${CMAKE_CURRENT_SOURCE_DIR}/utils/build_ffi.py")
12 | file(GLOB wrapper_headers "${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude/*wrapper.h")
13 | file(GLOB wrapper_sources "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrs/*.c")
14 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so"
15 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/utils
16 | COMMAND ${BUILD_CMD} --build --objs ${cuda_kernels}
17 | DEPENDS ${cuda_kernels}
18 | DEPENDS ${wrapper_headers}
19 | DEPENDS ${wrapper_sources}
20 | VERBATIM)
21 |
22 | add_custom_target(pointnet2_ext ALL
23 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so")
24 |
25 |
--------------------------------------------------------------------------------
/RSCNNEQ/cfgs/config_ssn_cls.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | workers: 4
3 |
4 | num_points: 1024
5 | num_classes: 40
6 | batch_size: 16
7 |
8 | base_lr: 0.002
9 | lr_clip: 0.00001
10 | lr_decay: 0.7
11 | decay_step: 25
12 | epochs: 200
13 |
14 | weight_decay: 0
15 | bn_momentum: 0.9
16 | bnm_clip: 0.01
17 | bn_decay: 0.5
18 |
19 | evaluate: 1
20 | val_freq_epoch: 0.5 # frequency in epoch for validation, can be decimal
21 | print_freq_iter: 20 # frequency in iteration for printing infomation
22 |
23 | input_channels: 0 # feature channels except (x, y, z)
24 |
25 | # h_ij: 0 for 3D Euclidean distance (3D Ed), channels = 1
26 | # 1 for (3D Ed, x_i, x_j, x_j - x_i), channels = 10
27 | # 2 for (2D Ed, x'_i, x'_j, x'_j - x'_i), channels = 10, x' indicates 2D coordinates
28 | relation_prior: 1
29 |
30 | checkpoint: '' # the model to start from
31 | save_path: cls
32 | data_root: data
33 |
--------------------------------------------------------------------------------
/RSCNNEQ/cls/model_cls_ssn_iter_70917_acc_0.925446.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VimsLab/EllipsoidQuery/7279e2920991c60a5be650ac37832af0b4c8cd76/RSCNNEQ/cls/model_cls_ssn_iter_70917_acc_0.925446.pth
--------------------------------------------------------------------------------
/RSCNNEQ/data/ModelNet40Loader.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data as data
3 | import numpy as np
4 | import os, sys, h5py
5 |
6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
7 | sys.path.append(BASE_DIR)
8 |
9 | def _get_data_files(list_filename):
10 | with open(list_filename) as f:
11 | return [line.rstrip()[5:] for line in f]
12 |
13 | def _load_data_file(name):
14 | f = h5py.File(name)
15 | data = f['data'][:]
16 | label = f['label'][:]
17 | return data, label
18 |
19 | class ModelNet40Cls(data.Dataset):
20 |
21 | def __init__(
22 | self, num_points, root, transforms=None, train=True
23 | ):
24 | super().__init__()
25 |
26 | self.transforms = transforms
27 |
28 | root = os.path.abspath(root)
29 | self.folder = "modelnet40_ply_hdf5_2048"
30 | self.data_dir = os.path.join(root, self.folder)
31 |
32 | self.train, self.num_points = train, num_points
33 | if self.train:
34 | self.files = _get_data_files( \
35 | os.path.join(self.data_dir, 'train_files.txt'))
36 | else:
37 | self.files = _get_data_files( \
38 | os.path.join(self.data_dir, 'test_files.txt'))
39 |
40 | point_list, label_list = [], []
41 | for f in self.files:
42 | points, labels = _load_data_file(os.path.join(root, f))
43 | point_list.append(points)
44 | label_list.append(labels)
45 |
46 | self.points = np.concatenate(point_list, 0)
47 | self.labels = np.concatenate(label_list, 0)
48 |
49 | def __getitem__(self, idx):
50 | pt_idxs = np.arange(0, self.points.shape[1]) # 2048
51 | if self.train:
52 | np.random.shuffle(pt_idxs)
53 |
54 | current_points = self.points[idx, pt_idxs].copy()
55 | label = torch.from_numpy(self.labels[idx]).type(torch.LongTensor)
56 |
57 | if self.transforms is not None:
58 | current_points = self.transforms(current_points)
59 |
60 | return current_points, label
61 |
62 | def __len__(self):
63 | return self.points.shape[0]
64 |
65 | if __name__ == "__main__":
66 | from torchvision import transforms
67 | import data_utils as d_utils
68 |
69 | transforms = transforms.Compose([
70 | d_utils.PointcloudToTensor(),
71 | d_utils.PointcloudRotate(axis=np.array([1,0,0])),
72 | d_utils.PointcloudScale(),
73 | d_utils.PointcloudTranslate(),
74 | d_utils.PointcloudJitter()
75 | ])
76 | dset = ModelNet40Cls(16, "./", train=True, transforms=transforms)
77 | print(dset[0][0])
78 | print(dset[0][1])
79 | print(len(dset))
80 | dloader = torch.utils.data.DataLoader(dset, batch_size=32, shuffle=True)
81 |
--------------------------------------------------------------------------------
/RSCNNEQ/data/ShapeNetPartLoader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path
3 | import torch
4 | import json
5 | import numpy as np
6 | import sys
7 | import torchvision.transforms as transforms
8 |
9 | def pc_normalize(pc):
10 | l = pc.shape[0]
11 | centroid = np.mean(pc, axis=0)
12 | pc = pc - centroid
13 | m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
14 | pc = pc / m
15 | return pc
16 |
17 | class ShapeNetPart():
18 | def __init__(self, root, num_points = 2048, split='train', normalize=True, transforms = None):
19 | self.transforms = transforms
20 | self.num_points = num_points
21 | self.root = root
22 | self.catfile = os.path.join(self.root, 'synsetoffset2category.txt')
23 | self.normalize = normalize
24 |
25 | self.cat = {}
26 | with open(self.catfile, 'r') as f:
27 | for line in f:
28 | ls = line.strip().split()
29 | self.cat[ls[0]] = ls[1]
30 | self.cat = {k:v for k,v in self.cat.items()}
31 |
32 | self.meta = {}
33 | with open(os.path.join(self.root, 'train_test_split', 'shuffled_train_file_list.json'), 'r') as f:
34 | train_ids = set([str(d.split('/')[2]) for d in json.load(f)])
35 | with open(os.path.join(self.root, 'train_test_split', 'shuffled_val_file_list.json'), 'r') as f:
36 | val_ids = set([str(d.split('/')[2]) for d in json.load(f)])
37 | with open(os.path.join(self.root, 'train_test_split', 'shuffled_test_file_list.json'), 'r') as f:
38 | test_ids = set([str(d.split('/')[2]) for d in json.load(f)])
39 | for item in self.cat:
40 | self.meta[item] = []
41 | dir_point = os.path.join(self.root, self.cat[item])
42 | fns = sorted(os.listdir(dir_point))
43 | if split=='trainval':
44 | fns = [fn for fn in fns if ((fn[0:-4] in train_ids) or (fn[0:-4] in val_ids))]
45 | elif split=='train':
46 | fns = [fn for fn in fns if fn[0:-4] in train_ids]
47 | elif split=='val':
48 | fns = [fn for fn in fns if fn[0:-4] in val_ids]
49 | elif split=='test':
50 | fns = [fn for fn in fns if fn[0:-4] in test_ids]
51 | else:
52 | print('Unknown split: %s. Exiting..'%(split))
53 | exit(-1)
54 |
55 | for fn in fns:
56 | token = (os.path.splitext(os.path.basename(fn))[0])
57 | self.meta[item].append(os.path.join(dir_point, token + '.txt'))
58 |
59 | self.datapath = []
60 | for item in self.cat:
61 | for fn in self.meta[item]:
62 | self.datapath.append((item, fn))
63 |
64 | self.classes = dict(zip(self.cat, range(len(self.cat))))
65 | # Mapping from category ('Chair') to a list of int [10,11,12,13] as segmentation labels
66 | self.seg_classes = {'Earphone': [16, 17, 18], 'Motorbike': [30, 31, 32, 33, 34, 35], 'Rocket': [41, 42, 43], 'Car': [8, 9, 10, 11], 'Laptop': [28, 29], 'Cap': [6, 7], 'Skateboard': [44, 45, 46], 'Mug': [36, 37], 'Guitar': [19, 20, 21], 'Bag': [4, 5], 'Lamp': [24, 25, 26, 27], 'Table': [47, 48, 49], 'Airplane': [0, 1, 2, 3], 'Pistol': [38, 39, 40], 'Chair': [12, 13, 14, 15], 'Knife': [22, 23]}
67 |
68 | self.cache = {}
69 | self.cache_size = 20000
70 |
71 | def __getitem__(self, index):
72 | if index in self.cache:
73 | point_set, seg, cls = self.cache[index]
74 | else:
75 | fn = self.datapath[index]
76 | cat = self.datapath[index][0]
77 | cls = self.classes[cat]
78 | cls = np.array([cls]).astype(np.int64)
79 | data = np.loadtxt(fn[1]).astype(np.float32)
80 | point_set = data[:,0:3]
81 | if self.normalize:
82 | point_set = pc_normalize(point_set)
83 | seg = data[:,-1].astype(np.int64)
84 | if len(self.cache) < self.cache_size:
85 | self.cache[index] = (point_set, seg, cls)
86 |
87 | choice = np.random.choice(len(seg), self.num_points, replace=True)
88 | #resample
89 | point_set = point_set[choice, :]
90 | seg = seg[choice]
91 | if self.transforms is not None:
92 | point_set = self.transforms(point_set)
93 |
94 | return point_set, torch.from_numpy(seg), torch.from_numpy(cls)
95 |
96 | def __len__(self):
97 | return len(self.datapath)
98 |
99 |
--------------------------------------------------------------------------------
/RSCNNEQ/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .ModelNet40Loader import ModelNet40Cls
2 | from .ShapeNetPartLoader import ShapeNetPart
--------------------------------------------------------------------------------
/RSCNNEQ/data/data_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 | class PointcloudToTensor(object):
5 | def __call__(self, points):
6 | return torch.from_numpy(points).float()
7 |
8 | def angle_axis(angle: float, axis: np.ndarray):
9 | r"""Returns a 4x4 rotation matrix that performs a rotation around axis by angle
10 |
11 | Parameters
12 | ----------
13 | angle : float
14 | Angle to rotate by
15 | axis: np.ndarray
16 | Axis to rotate about
17 |
18 | Returns
19 | -------
20 | torch.Tensor
21 | 3x3 rotation matrix
22 | """
23 | u = axis / np.linalg.norm(axis)
24 | cosval, sinval = np.cos(angle), np.sin(angle)
25 |
26 | # yapf: disable
27 | cross_prod_mat = np.array([[0.0, -u[2], u[1]],
28 | [u[2], 0.0, -u[0]],
29 | [-u[1], u[0], 0.0]])
30 |
31 | R = torch.from_numpy(
32 | cosval * np.eye(3)
33 | + sinval * cross_prod_mat
34 | + (1.0 - cosval) * np.outer(u, u)
35 | )
36 | # yapf: enable
37 | return R.float()
38 |
39 | class PointcloudRotatebyAngle(object):
40 | def __init__(self, rotation_angle = 0.0):
41 | self.rotation_angle = rotation_angle
42 |
43 | def __call__(self, pc):
44 | normals = pc.size(2) > 3
45 | bsize = pc.size()[0]
46 | for i in range(bsize):
47 | cosval = np.cos(self.rotation_angle)
48 | sinval = np.sin(self.rotation_angle)
49 | rotation_matrix = np.array([[cosval, 0, sinval],
50 | [0, 1, 0],
51 | [-sinval, 0, cosval]])
52 | rotation_matrix = torch.from_numpy(rotation_matrix).float().cuda()
53 |
54 | cur_pc = pc[i, :, :]
55 | if not normals:
56 | cur_pc = cur_pc @ rotation_matrix
57 | else:
58 | pc_xyz = cur_pc[:, 0:3]
59 | pc_normals = cur_pc[:, 3:]
60 | cur_pc[:, 0:3] = pc_xyz @ rotation_matrix
61 | cur_pc[:, 3:] = pc_normals @ rotation_matrix
62 |
63 | pc[i, :, :] = cur_pc
64 |
65 | return pc
66 |
67 | class PointcloudJitter(object):
68 | def __init__(self, std=0.01, clip=0.05):
69 | self.std, self.clip = std, clip
70 |
71 | def __call__(self, pc):
72 | bsize = pc.size()[0]
73 | for i in range(bsize):
74 | jittered_data = pc.new(pc.size(1), 3).normal_(
75 | mean=0.0, std=self.std
76 | ).clamp_(-self.clip, self.clip)
77 | pc[i, :, 0:3] += jittered_data
78 |
79 | return pc
80 |
81 | class PointcloudScaleAndTranslate(object):
82 | def __init__(self, scale_low=2. / 3., scale_high=3. / 2., translate_range=0.2):
83 | self.scale_low = scale_low
84 | self.scale_high = scale_high
85 | self.translate_range = translate_range
86 |
87 | def __call__(self, pc):
88 | bsize = pc.size()[0]
89 | for i in range(bsize):
90 | xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3])
91 | xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3])
92 |
93 | pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda()) + torch.from_numpy(xyz2).float().cuda()
94 |
95 | return pc
96 |
97 | class PointcloudScale(object):
98 | def __init__(self, scale_low=2. / 3., scale_high=3. / 2.):
99 | self.scale_low = scale_low
100 | self.scale_high = scale_high
101 |
102 | def __call__(self, pc):
103 | bsize = pc.size()[0]
104 | for i in range(bsize):
105 | xyz1 = np.random.uniform(low=self.scale_low, high=self.scale_high, size=[3])
106 |
107 | pc[i, :, 0:3] = torch.mul(pc[i, :, 0:3], torch.from_numpy(xyz1).float().cuda())
108 |
109 | return pc
110 |
111 | class PointcloudTranslate(object):
112 | def __init__(self, translate_range=0.2):
113 | self.translate_range = translate_range
114 |
115 | def __call__(self, pc):
116 | bsize = pc.size()[0]
117 | for i in range(bsize):
118 | xyz2 = np.random.uniform(low=-self.translate_range, high=self.translate_range, size=[3])
119 |
120 | pc[i, :, 0:3] = pc[i, :, 0:3] + torch.from_numpy(xyz2).float().cuda()
121 |
122 | return pc
123 |
124 | class PointcloudRandomInputDropout(object):
125 | def __init__(self, max_dropout_ratio=0.875):
126 | assert max_dropout_ratio >= 0 and max_dropout_ratio < 1
127 | self.max_dropout_ratio = max_dropout_ratio
128 |
129 | def __call__(self, pc):
130 | bsize = pc.size()[0]
131 | for i in range(bsize):
132 | dropout_ratio = np.random.random() * self.max_dropout_ratio # 0~0.875
133 | drop_idx = np.where(np.random.random((pc.size()[1])) <= dropout_ratio)[0]
134 | if len(drop_idx) > 0:
135 | cur_pc = pc[i, :, :]
136 | cur_pc[drop_idx.tolist(), 0:3] = cur_pc[0, 0:3].repeat(len(drop_idx), 1) # set to the first point
137 | pc[i, :, :] = cur_pc
138 |
139 | return pc
140 |
--------------------------------------------------------------------------------
/RSCNNEQ/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .rscnn_ssn_cls import RSCNN_SSN as RSCNN_SSN_Cls
2 |
--------------------------------------------------------------------------------
/RSCNNEQ/models/rscnn_ssn_cls.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
3 | sys.path.append(BASE_DIR)
4 | sys.path.append(os.path.join(BASE_DIR, "../utils"))
5 | import torch
6 | import torch.nn as nn
7 | from torch.autograd import Variable
8 | import pytorch_utils as pt_utils
9 | from pointnet2_modules import PointnetSAModule, PointnetSAModuleMSG
10 | import numpy as np
11 |
12 | # Relation-Shape CNN: Single-Scale Neighborhood
13 | class RSCNN_SSN(nn.Module):
14 | r"""
15 | PointNet2 with multi-scale grouping
16 | Semantic segmentation network that uses feature propogation layers
17 |
18 | Parameters
19 | ----------
20 | num_classes: int
21 | Number of semantics classes to predict over -- size of softmax classifier that run for each point
22 | input_channels: int = 6
23 | Number of input channels in the feature descriptor for each point. If the point cloud is Nx9, this
24 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
25 | use_xyz: bool = True
26 | Whether or not to use the xyz position of a point as a feature
27 | """
28 |
29 | def __init__(self, num_classes, input_channels=0, relation_prior=1, use_xyz=True):
30 | super().__init__()
31 |
32 | self.SA_modules = nn.ModuleList()
33 |
34 | self.SA_modules.append(
35 | PointnetSAModuleMSG(
36 | npoint=512,
37 | e1s=[0.25],
38 | e2s=[0.15],
39 | e3s=[0.15],
40 | nsamples=[48],
41 | mlps=[[input_channels, 128]],
42 | first_layer=True,
43 | use_xyz=use_xyz,
44 | relation_prior=relation_prior
45 | )
46 | )
47 |
48 | self.SA_modules.append(
49 | PointnetSAModuleMSG(
50 | npoint=128,
51 | e1s=[0.50],
52 | e2s=[0.30],
53 | e3s=[0.30],
54 | nsamples=[64],
55 | mlps=[[128, 512]],
56 | use_xyz=use_xyz,
57 | relation_prior=relation_prior
58 | )
59 | )
60 |
61 | self.SA_modules.append(
62 | # global convolutional pooling
63 | PointnetSAModule(
64 | nsample = 128,
65 | mlp=[512, 1024],
66 | use_xyz=use_xyz
67 | )
68 | )
69 |
70 | self.FC_layer = nn.Sequential(
71 | pt_utils.FC(1024, 512, activation=nn.ReLU(inplace=True), bn=True),
72 | nn.Dropout(p=0.5),
73 | pt_utils.FC(512, 256, activation=nn.ReLU(inplace=True), bn=True),
74 | nn.Dropout(p=0.5),
75 | pt_utils.FC(256, num_classes, activation=None)
76 | )
77 |
78 | def _break_up_pc(self, pc):
79 | xyz = pc[..., 0:3].contiguous()
80 | features = (
81 | pc[..., 3:].transpose(1, 2).contiguous()
82 | if pc.size(-1) > 3 else None
83 | )
84 | return xyz, features
85 |
86 | def forward(self, pointcloud: torch.cuda.FloatTensor):
87 | r"""
88 | Forward pass of the network
89 |
90 | Parameters
91 | ----------
92 | pointcloud: Variable(torch.cuda.FloatTensor)
93 | (B, N, 3 + input_channels) tensor
94 | Point cloud to run predicts on
95 | Each point in the point-cloud MUST
96 | be formated as (x, y, z, features...)
97 | """
98 | xyz, features = self._break_up_pc(pointcloud)
99 | for module in self.SA_modules:
100 | xyz, features = module(xyz, features)
101 | return self.FC_layer(features.squeeze(-1))
102 |
103 |
104 | if __name__ == "__main__":
105 | sim_data = Variable(torch.rand(32, 2048, 6))
106 | sim_data = sim_data.cuda()
107 | sim_cls = Variable(torch.ones(32, 16))
108 | sim_cls = sim_cls.cuda()
109 |
110 | seg = RSCNN_SSN(num_classes=50, input_channels=3, use_xyz=True)
111 | seg = seg.cuda()
112 | out = seg(sim_data, sim_cls)
113 | print('seg', out.size())
--------------------------------------------------------------------------------
/RSCNNEQ/train_cls.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.optim as optim
3 | import torch.optim.lr_scheduler as lr_sched
4 | import torch.nn as nn
5 | from torch.utils.data import DataLoader
6 | from torch.autograd import Variable
7 | import numpy as np
8 | import os
9 | from torchvision import transforms
10 | from models import RSCNN_SSN_Cls as RSCNN_SSN
11 | from data import ModelNet40Cls
12 | import utils.pytorch_utils as pt_utils
13 | import utils.pointnet2_utils as pointnet2_utils
14 | import data.data_utils as d_utils
15 | import argparse
16 | import random
17 | import yaml
18 | import gc
19 |
20 | torch.backends.cudnn.enabled = True
21 | torch.backends.cudnn.benchmark = True
22 | torch.backends.cudnn.deterministic = True
23 |
24 | seed = 123
25 | random.seed(seed)
26 | np.random.seed(seed)
27 | torch.manual_seed(seed)
28 | torch.cuda.manual_seed(seed)
29 | torch.cuda.manual_seed_all(seed)
30 |
31 | parser = argparse.ArgumentParser(description='Relation-Shape CNN Shape Classification Training')
32 | parser.add_argument('--config', default='cfgs/config_ssn_cls.yaml', type=str)
33 |
34 | def main():
35 | # os.system('cp models/rscnn_ssn_cls.py cls/') # bkp of train procedure
36 | # os.system('cp utils/pointnet2_utils.py cls/')
37 | # os.system('cp utils/pointnet2_modules.py cls/')
38 | # os.system('cp utils/csrc/ellipsoid_query_gpu.cu cls/')
39 | # os.system('cp utils/csrc/ellipsoid_query.c cls/')
40 | # os.system('cp cfgs/config_ssn_cls.yaml cls/')
41 | args = parser.parse_args()
42 | with open(args.config) as f:
43 | config = yaml.load(f)
44 | print("\n**************************")
45 | for k, v in config['common'].items():
46 | setattr(args, k, v)
47 | print('\n[%s]:'%(k), v)
48 | print("\n**************************\n")
49 |
50 | try:
51 | os.makedirs(args.save_path)
52 | except OSError:
53 | pass
54 |
55 | train_transforms = transforms.Compose([
56 | d_utils.PointcloudToTensor()
57 | ])
58 | test_transforms = transforms.Compose([
59 | d_utils.PointcloudToTensor()
60 | ])
61 |
62 | train_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=train_transforms)
63 | train_dataloader = DataLoader(
64 | train_dataset,
65 | batch_size=args.batch_size,
66 | shuffle=True,
67 | num_workers=int(args.workers),
68 | pin_memory=True
69 | )
70 |
71 | test_dataset = ModelNet40Cls(num_points = args.num_points, root = args.data_root, transforms=test_transforms, train=False)
72 | test_dataloader = DataLoader(
73 | test_dataset,
74 | batch_size=args.batch_size,
75 | shuffle=False,
76 | num_workers=int(args.workers),
77 | pin_memory=True
78 | )
79 |
80 | model = RSCNN_SSN(num_classes = args.num_classes, input_channels = args.input_channels, relation_prior = args.relation_prior, use_xyz = True)
81 | model.cuda()
82 | optimizer = optim.Adam(
83 | model.parameters(), lr=args.base_lr, weight_decay=args.weight_decay)
84 |
85 | lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), args.lr_clip / args.base_lr)
86 | bnm_lmbd = lambda e: max(args.bn_momentum * args.bn_decay**(e // args.decay_step), args.bnm_clip)
87 | lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd)
88 | bnm_scheduler = pt_utils.BNMomentumScheduler(model, bnm_lmbd)
89 |
90 | if args.checkpoint is not '':
91 | model.load_state_dict(torch.load(args.checkpoint))
92 | print('Load model successfully: %s' % (args.checkpoint))
93 |
94 | criterion = nn.CrossEntropyLoss()
95 | num_batch = len(train_dataset)/args.batch_size
96 |
97 | # training
98 | train(train_dataloader, test_dataloader, model, criterion, optimizer, lr_scheduler, bnm_scheduler, args, num_batch)
99 |
100 |
101 | def train(train_dataloader, test_dataloader, model, criterion, optimizer, lr_scheduler, bnm_scheduler, args, num_batch):
102 | PointcloudScaleAndTranslate = d_utils.PointcloudScaleAndTranslate() # initialize augmentation
103 | global g_acc
104 | g_acc = 0.91 # only save the model whose acc > 0.91
105 | batch_count = 0
106 | model.train()
107 | for epoch in range(args.epochs):
108 | for i, data in enumerate(train_dataloader, 0):
109 | if lr_scheduler is not None:
110 | lr_scheduler.step(epoch)
111 | if bnm_scheduler is not None:
112 | bnm_scheduler.step(epoch-1)
113 | points, target = data
114 | points, target = points.cuda(), target.cuda()
115 | points, target = Variable(points), Variable(target)
116 |
117 | # fastest point sampling
118 | fps_idx = pointnet2_utils.furthest_point_sample(points, 1200) # (B, npoint)
119 | fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)]
120 | points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), fps_idx).transpose(1, 2).contiguous() # (B, N, 3)
121 |
122 | # augmentation
123 | points.data = PointcloudScaleAndTranslate(points.data)
124 |
125 | optimizer.zero_grad()
126 |
127 | pred = model(points)
128 | target = target.view(-1)
129 | loss = criterion(pred, target)
130 | loss.backward()
131 | optimizer.step()
132 | if i % args.print_freq_iter == 0:
133 | print('[epoch %3d: %3d/%3d] \t train loss: %0.6f \t lr: %0.5f' %(epoch+1, i, num_batch, loss.data.clone(), lr_scheduler.get_lr()[0]))
134 | batch_count += 1
135 |
136 | # validation in between an epoch
137 | if args.evaluate and batch_count % int(args.val_freq_epoch * num_batch) == 0:
138 | validate(test_dataloader, model, criterion, args, batch_count)
139 |
140 |
141 | def validate(test_dataloader, model, criterion, args, iter):
142 | global g_acc
143 | model.eval()
144 | losses, preds, labels = [], [], []
145 | gc.collect()
146 | with torch.no_grad():
147 | for j, data in enumerate(test_dataloader, 0):
148 | points, target = data
149 | points, target = points.cuda(), target.cuda()
150 | # points, target = Variable(points, volatile=True), Variable(target, volatile=True)
151 |
152 | # fastest point sampling
153 | fps_idx = pointnet2_utils.furthest_point_sample(points, args.num_points) # (B, npoint)
154 | # fps_idx = fps_idx[:, np.random.choice(1200, args.num_points, False)]
155 | points = pointnet2_utils.gather_operation(points.transpose(1, 2).contiguous(), fps_idx).transpose(1, 2).contiguous()
156 |
157 | pred = model(points)
158 | target = target.view(-1)
159 | loss = criterion(pred, target)
160 | losses.append(loss.data.clone())
161 | _, pred_choice = torch.max(pred.data, -1)
162 |
163 | preds.append(pred_choice)
164 | labels.append(target.data)
165 |
166 | preds = torch.cat(preds, 0)
167 | labels = torch.cat(labels, 0)
168 | acc = (preds == labels).sum().item() / labels.numel()
169 | print('\nval loss: %0.6f \t acc: %0.6f\n' %(np.array(losses).mean(), acc))
170 | if acc >= g_acc or acc > 0.9250:
171 | g_acc = acc
172 | torch.save(model.state_dict(), '%s/cls_ssn_iter_%d_acc_%0.6f.pth' % (args.save_path, iter, acc))
173 | model.train()
174 |
175 | if __name__ == "__main__":
176 | main()
177 |
--------------------------------------------------------------------------------
/RSCNNEQ/train_cls.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | mkdir -p log
3 | now=$(date +"%Y%m%d_%H%M%S")
4 | log_name="Cls_LOG_"$now""
5 | export CUDA_VISIBLE_DEVICES=0
6 | python -u train_cls.py \
7 | --config cfgs/config_ssn_cls.yaml \
8 | 2>&1|tee log/$log_name.log &
9 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/build_ffi.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import torch
3 | import os.path as osp
4 | from torch.utils.ffi import create_extension
5 | import sys, argparse, shutil
6 |
7 | base_dir = osp.dirname(osp.abspath(__file__))
8 |
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | description="Arguments for building pointnet2 ffi extension"
13 | )
14 | parser.add_argument("--objs", nargs="*")
15 | clean_arg = parser.add_mutually_exclusive_group()
16 | clean_arg.add_argument("--build", dest='build', action="store_true")
17 | clean_arg.add_argument("--clean", dest='clean', action="store_true")
18 | parser.set_defaults(build=False, clean=False)
19 |
20 | args = parser.parse_args()
21 | assert args.build or args.clean
22 |
23 | return args
24 |
25 |
26 | def build(args):
27 | extra_objects = args.objs
28 | extra_objects += [a for a in glob.glob('/usr/local/cuda/lib64/*.a')]
29 |
30 | ffi = create_extension(
31 | '_ext.pointnet2',
32 | headers=[a for a in glob.glob("cinclude/*_wrapper.h")],
33 | sources=[a for a in glob.glob("csrc/*.c")],
34 | define_macros=[('WITH_CUDA', None)],
35 | relative_to=__file__,
36 | with_cuda=True,
37 | extra_objects=extra_objects,
38 | include_dirs=[osp.join(base_dir, 'cinclude')],
39 | verbose=False,
40 | package=False
41 | )
42 | ffi.build()
43 |
44 |
45 | def clean(args):
46 | shutil.rmtree(osp.join(base_dir, "_ext"))
47 |
48 |
49 | if __name__ == "__main__":
50 | args = parse_args()
51 | if args.clean:
52 | clean(args)
53 | else:
54 | build(args)
55 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/cuda_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_UTILS_H
2 | #define _CUDA_UTILS_H
3 |
4 | #include
5 |
6 | #define TOTAL_THREADS 512
7 |
8 | inline int opt_n_threads(int work_size) {
9 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0);
10 |
11 | return max(min(1 << pow_2, TOTAL_THREADS), 1);
12 | }
13 |
14 | inline dim3 opt_block_config(int x, int y) {
15 | const int x_threads = opt_n_threads(x);
16 | const int y_threads =
17 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
18 | dim3 block_config(x_threads, y_threads, 1);
19 |
20 | return block_config;
21 | }
22 |
23 | #endif
24 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/ellipsoid_query_gpu.h:
--------------------------------------------------------------------------------
1 | #ifndef _ELLIPSOID_QUERY_GPU
2 | #define _ELLIPSOID_QUERY_GPU
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void query_ellipsoid_point_kernel_wrapper(int b, int n, int m, float e1, float e2, float e3,
9 | int nsample, const float *xyz,
10 | const float *new_xyz, const int *fps_idx, int *idx, int *ingroup_pts_cnt,
11 | float *ingroup_out, float *ingroup_cva, float *v, float *d,
12 | cudaStream_t stream);
13 |
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | #endif
18 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/ellipsoid_query_wrapper.h:
--------------------------------------------------------------------------------
1 |
2 | int ellipsoid_query_wrapper(int b, int n, int m, float e1, float e2, float e3, int nsample,
3 | THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor, THCudaIntTensor *fps_idx_tensor,
4 | THCudaIntTensor *idx_tensor, THCudaIntTensor *ingroup_pts_cnt_tensor,
5 | THCudaTensor *ingroup_out_tensor, THCudaTensor *ingroup_cva_tensor,
6 | THCudaTensor *v_tensor, THCudaTensor *d_tensor);
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/group_points_gpu.h:
--------------------------------------------------------------------------------
1 | #ifndef _BALL_QUERY_GPU
2 | #define _BALL_QUERY_GPU
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
9 | const float *points, const int *idx,
10 | float *out, cudaStream_t stream);
11 |
12 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
13 | int nsample, const float *grad_out,
14 | const int *idx, float *grad_points,
15 | cudaStream_t stream);
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | #endif
20 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/group_points_wrapper.h:
--------------------------------------------------------------------------------
1 | int group_points_wrapper(int b, int c, int n, int npoints, int nsample,
2 | THCudaTensor *points_tensor,
3 | THCudaIntTensor *idx_tensor, THCudaTensor *out);
4 | int group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,
5 | THCudaTensor *grad_out_tensor,
6 | THCudaIntTensor *idx_tensor,
7 | THCudaTensor *grad_points_tensor);
8 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/helper_cuda.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
3 | *
4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
5 | * with this source code for terms and conditions that govern your use of
6 | * this software. Any use, reproduction, disclosure, or distribution of
7 | * this software and related documentation outside the terms of the EULA
8 | * is strictly prohibited.
9 | *
10 | */
11 |
12 | ////////////////////////////////////////////////////////////////////////////////
13 | // These are CUDA Helper functions for initialization and error checking
14 |
15 | #ifndef HELPER_CUDA_H
16 | #define HELPER_CUDA_H
17 |
18 | #pragma once
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | #include "helper_string.h"
25 |
26 | #ifndef EXIT_WAIVED
27 | #define EXIT_WAIVED 2
28 | #endif
29 |
30 | // Note, it is required that your SDK sample to include the proper header files, please
31 | // refer the CUDA examples for examples of the needed CUDA headers, which may change depending
32 | // on which CUDA functions are used.
33 |
34 | // CUDA Runtime error messages
35 | #ifdef __DRIVER_TYPES_H__
36 | static const char *_cudaGetErrorEnum(cudaError_t error)
37 | {
38 | switch (error)
39 | {
40 | case cudaSuccess:
41 | return "cudaSuccess";
42 |
43 | case cudaErrorMissingConfiguration:
44 | return "cudaErrorMissingConfiguration";
45 |
46 | case cudaErrorMemoryAllocation:
47 | return "cudaErrorMemoryAllocation";
48 |
49 | case cudaErrorInitializationError:
50 | return "cudaErrorInitializationError";
51 |
52 | case cudaErrorLaunchFailure:
53 | return "cudaErrorLaunchFailure";
54 |
55 | case cudaErrorPriorLaunchFailure:
56 | return "cudaErrorPriorLaunchFailure";
57 |
58 | case cudaErrorLaunchTimeout:
59 | return "cudaErrorLaunchTimeout";
60 |
61 | case cudaErrorLaunchOutOfResources:
62 | return "cudaErrorLaunchOutOfResources";
63 |
64 | case cudaErrorInvalidDeviceFunction:
65 | return "cudaErrorInvalidDeviceFunction";
66 |
67 | case cudaErrorInvalidConfiguration:
68 | return "cudaErrorInvalidConfiguration";
69 |
70 | case cudaErrorInvalidDevice:
71 | return "cudaErrorInvalidDevice";
72 |
73 | case cudaErrorInvalidValue:
74 | return "cudaErrorInvalidValue";
75 |
76 | case cudaErrorInvalidPitchValue:
77 | return "cudaErrorInvalidPitchValue";
78 |
79 | case cudaErrorInvalidSymbol:
80 | return "cudaErrorInvalidSymbol";
81 |
82 | case cudaErrorMapBufferObjectFailed:
83 | return "cudaErrorMapBufferObjectFailed";
84 |
85 | case cudaErrorUnmapBufferObjectFailed:
86 | return "cudaErrorUnmapBufferObjectFailed";
87 |
88 | case cudaErrorInvalidHostPointer:
89 | return "cudaErrorInvalidHostPointer";
90 |
91 | case cudaErrorInvalidDevicePointer:
92 | return "cudaErrorInvalidDevicePointer";
93 |
94 | case cudaErrorInvalidTexture:
95 | return "cudaErrorInvalidTexture";
96 |
97 | case cudaErrorInvalidTextureBinding:
98 | return "cudaErrorInvalidTextureBinding";
99 |
100 | case cudaErrorInvalidChannelDescriptor:
101 | return "cudaErrorInvalidChannelDescriptor";
102 |
103 | case cudaErrorInvalidMemcpyDirection:
104 | return "cudaErrorInvalidMemcpyDirection";
105 |
106 | case cudaErrorAddressOfConstant:
107 | return "cudaErrorAddressOfConstant";
108 |
109 | case cudaErrorTextureFetchFailed:
110 | return "cudaErrorTextureFetchFailed";
111 |
112 | case cudaErrorTextureNotBound:
113 | return "cudaErrorTextureNotBound";
114 |
115 | case cudaErrorSynchronizationError:
116 | return "cudaErrorSynchronizationError";
117 |
118 | case cudaErrorInvalidFilterSetting:
119 | return "cudaErrorInvalidFilterSetting";
120 |
121 | case cudaErrorInvalidNormSetting:
122 | return "cudaErrorInvalidNormSetting";
123 |
124 | case cudaErrorMixedDeviceExecution:
125 | return "cudaErrorMixedDeviceExecution";
126 |
127 | case cudaErrorCudartUnloading:
128 | return "cudaErrorCudartUnloading";
129 |
130 | case cudaErrorUnknown:
131 | return "cudaErrorUnknown";
132 |
133 | case cudaErrorNotYetImplemented:
134 | return "cudaErrorNotYetImplemented";
135 |
136 | case cudaErrorMemoryValueTooLarge:
137 | return "cudaErrorMemoryValueTooLarge";
138 |
139 | case cudaErrorInvalidResourceHandle:
140 | return "cudaErrorInvalidResourceHandle";
141 |
142 | case cudaErrorNotReady:
143 | return "cudaErrorNotReady";
144 |
145 | case cudaErrorInsufficientDriver:
146 | return "cudaErrorInsufficientDriver";
147 |
148 | case cudaErrorSetOnActiveProcess:
149 | return "cudaErrorSetOnActiveProcess";
150 |
151 | case cudaErrorInvalidSurface:
152 | return "cudaErrorInvalidSurface";
153 |
154 | case cudaErrorNoDevice:
155 | return "cudaErrorNoDevice";
156 |
157 | case cudaErrorECCUncorrectable:
158 | return "cudaErrorECCUncorrectable";
159 |
160 | case cudaErrorSharedObjectSymbolNotFound:
161 | return "cudaErrorSharedObjectSymbolNotFound";
162 |
163 | case cudaErrorSharedObjectInitFailed:
164 | return "cudaErrorSharedObjectInitFailed";
165 |
166 | case cudaErrorUnsupportedLimit:
167 | return "cudaErrorUnsupportedLimit";
168 |
169 | case cudaErrorDuplicateVariableName:
170 | return "cudaErrorDuplicateVariableName";
171 |
172 | case cudaErrorDuplicateTextureName:
173 | return "cudaErrorDuplicateTextureName";
174 |
175 | case cudaErrorDuplicateSurfaceName:
176 | return "cudaErrorDuplicateSurfaceName";
177 |
178 | case cudaErrorDevicesUnavailable:
179 | return "cudaErrorDevicesUnavailable";
180 |
181 | case cudaErrorInvalidKernelImage:
182 | return "cudaErrorInvalidKernelImage";
183 |
184 | case cudaErrorNoKernelImageForDevice:
185 | return "cudaErrorNoKernelImageForDevice";
186 |
187 | case cudaErrorIncompatibleDriverContext:
188 | return "cudaErrorIncompatibleDriverContext";
189 |
190 | case cudaErrorPeerAccessAlreadyEnabled:
191 | return "cudaErrorPeerAccessAlreadyEnabled";
192 |
193 | case cudaErrorPeerAccessNotEnabled:
194 | return "cudaErrorPeerAccessNotEnabled";
195 |
196 | case cudaErrorDeviceAlreadyInUse:
197 | return "cudaErrorDeviceAlreadyInUse";
198 |
199 | case cudaErrorProfilerDisabled:
200 | return "cudaErrorProfilerDisabled";
201 |
202 | case cudaErrorProfilerNotInitialized:
203 | return "cudaErrorProfilerNotInitialized";
204 |
205 | case cudaErrorProfilerAlreadyStarted:
206 | return "cudaErrorProfilerAlreadyStarted";
207 |
208 | case cudaErrorProfilerAlreadyStopped:
209 | return "cudaErrorProfilerAlreadyStopped";
210 |
211 | /* Since CUDA 4.0*/
212 | case cudaErrorAssert:
213 | return "cudaErrorAssert";
214 |
215 | case cudaErrorTooManyPeers:
216 | return "cudaErrorTooManyPeers";
217 |
218 | case cudaErrorHostMemoryAlreadyRegistered:
219 | return "cudaErrorHostMemoryAlreadyRegistered";
220 |
221 | case cudaErrorHostMemoryNotRegistered:
222 | return "cudaErrorHostMemoryNotRegistered";
223 |
224 | /* Since CUDA 5.0 */
225 | case cudaErrorOperatingSystem:
226 | return "cudaErrorOperatingSystem";
227 |
228 | case cudaErrorPeerAccessUnsupported:
229 | return "cudaErrorPeerAccessUnsupported";
230 |
231 | case cudaErrorLaunchMaxDepthExceeded:
232 | return "cudaErrorLaunchMaxDepthExceeded";
233 |
234 | case cudaErrorLaunchFileScopedTex:
235 | return "cudaErrorLaunchFileScopedTex";
236 |
237 | case cudaErrorLaunchFileScopedSurf:
238 | return "cudaErrorLaunchFileScopedSurf";
239 |
240 | case cudaErrorSyncDepthExceeded:
241 | return "cudaErrorSyncDepthExceeded";
242 |
243 | case cudaErrorLaunchPendingCountExceeded:
244 | return "cudaErrorLaunchPendingCountExceeded";
245 |
246 | case cudaErrorNotPermitted:
247 | return "cudaErrorNotPermitted";
248 |
249 | case cudaErrorNotSupported:
250 | return "cudaErrorNotSupported";
251 |
252 | /* Since CUDA 6.0 */
253 | case cudaErrorHardwareStackError:
254 | return "cudaErrorHardwareStackError";
255 |
256 | case cudaErrorIllegalInstruction:
257 | return "cudaErrorIllegalInstruction";
258 |
259 | case cudaErrorMisalignedAddress:
260 | return "cudaErrorMisalignedAddress";
261 |
262 | case cudaErrorInvalidAddressSpace:
263 | return "cudaErrorInvalidAddressSpace";
264 |
265 | case cudaErrorInvalidPc:
266 | return "cudaErrorInvalidPc";
267 |
268 | case cudaErrorIllegalAddress:
269 | return "cudaErrorIllegalAddress";
270 |
271 | /* Since CUDA 6.5*/
272 | case cudaErrorInvalidPtx:
273 | return "cudaErrorInvalidPtx";
274 |
275 | case cudaErrorInvalidGraphicsContext:
276 | return "cudaErrorInvalidGraphicsContext";
277 |
278 | case cudaErrorStartupFailure:
279 | return "cudaErrorStartupFailure";
280 |
281 | case cudaErrorApiFailureBase:
282 | return "cudaErrorApiFailureBase";
283 |
284 | /* Since CUDA 8.0*/
285 | case cudaErrorNvlinkUncorrectable :
286 | return "cudaErrorNvlinkUncorrectable";
287 | }
288 |
289 | return "";
290 | }
291 | #endif
292 |
293 | #ifdef __cuda_cuda_h__
294 | // CUDA Driver API errors
295 | static const char *_cudaGetErrorEnum(CUresult error)
296 | {
297 | switch (error)
298 | {
299 | case CUDA_SUCCESS:
300 | return "CUDA_SUCCESS";
301 |
302 | case CUDA_ERROR_INVALID_VALUE:
303 | return "CUDA_ERROR_INVALID_VALUE";
304 |
305 | case CUDA_ERROR_OUT_OF_MEMORY:
306 | return "CUDA_ERROR_OUT_OF_MEMORY";
307 |
308 | case CUDA_ERROR_NOT_INITIALIZED:
309 | return "CUDA_ERROR_NOT_INITIALIZED";
310 |
311 | case CUDA_ERROR_DEINITIALIZED:
312 | return "CUDA_ERROR_DEINITIALIZED";
313 |
314 | case CUDA_ERROR_PROFILER_DISABLED:
315 | return "CUDA_ERROR_PROFILER_DISABLED";
316 |
317 | case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
318 | return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
319 |
320 | case CUDA_ERROR_PROFILER_ALREADY_STARTED:
321 | return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
322 |
323 | case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
324 | return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
325 |
326 | case CUDA_ERROR_NO_DEVICE:
327 | return "CUDA_ERROR_NO_DEVICE";
328 |
329 | case CUDA_ERROR_INVALID_DEVICE:
330 | return "CUDA_ERROR_INVALID_DEVICE";
331 |
332 | case CUDA_ERROR_INVALID_IMAGE:
333 | return "CUDA_ERROR_INVALID_IMAGE";
334 |
335 | case CUDA_ERROR_INVALID_CONTEXT:
336 | return "CUDA_ERROR_INVALID_CONTEXT";
337 |
338 | case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
339 | return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
340 |
341 | case CUDA_ERROR_MAP_FAILED:
342 | return "CUDA_ERROR_MAP_FAILED";
343 |
344 | case CUDA_ERROR_UNMAP_FAILED:
345 | return "CUDA_ERROR_UNMAP_FAILED";
346 |
347 | case CUDA_ERROR_ARRAY_IS_MAPPED:
348 | return "CUDA_ERROR_ARRAY_IS_MAPPED";
349 |
350 | case CUDA_ERROR_ALREADY_MAPPED:
351 | return "CUDA_ERROR_ALREADY_MAPPED";
352 |
353 | case CUDA_ERROR_NO_BINARY_FOR_GPU:
354 | return "CUDA_ERROR_NO_BINARY_FOR_GPU";
355 |
356 | case CUDA_ERROR_ALREADY_ACQUIRED:
357 | return "CUDA_ERROR_ALREADY_ACQUIRED";
358 |
359 | case CUDA_ERROR_NOT_MAPPED:
360 | return "CUDA_ERROR_NOT_MAPPED";
361 |
362 | case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
363 | return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
364 |
365 | case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
366 | return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
367 |
368 | case CUDA_ERROR_ECC_UNCORRECTABLE:
369 | return "CUDA_ERROR_ECC_UNCORRECTABLE";
370 |
371 | case CUDA_ERROR_UNSUPPORTED_LIMIT:
372 | return "CUDA_ERROR_UNSUPPORTED_LIMIT";
373 |
374 | case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
375 | return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
376 |
377 | case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
378 | return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
379 |
380 | case CUDA_ERROR_INVALID_PTX:
381 | return "CUDA_ERROR_INVALID_PTX";
382 |
383 | case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
384 | return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
385 |
386 | case CUDA_ERROR_NVLINK_UNCORRECTABLE:
387 | return "CUDA_ERROR_NVLINK_UNCORRECTABLE";
388 |
389 | case CUDA_ERROR_INVALID_SOURCE:
390 | return "CUDA_ERROR_INVALID_SOURCE";
391 |
392 | case CUDA_ERROR_FILE_NOT_FOUND:
393 | return "CUDA_ERROR_FILE_NOT_FOUND";
394 |
395 | case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
396 | return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
397 |
398 | case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
399 | return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
400 |
401 | case CUDA_ERROR_OPERATING_SYSTEM:
402 | return "CUDA_ERROR_OPERATING_SYSTEM";
403 |
404 | case CUDA_ERROR_INVALID_HANDLE:
405 | return "CUDA_ERROR_INVALID_HANDLE";
406 |
407 | case CUDA_ERROR_NOT_FOUND:
408 | return "CUDA_ERROR_NOT_FOUND";
409 |
410 | case CUDA_ERROR_NOT_READY:
411 | return "CUDA_ERROR_NOT_READY";
412 |
413 | case CUDA_ERROR_ILLEGAL_ADDRESS:
414 | return "CUDA_ERROR_ILLEGAL_ADDRESS";
415 |
416 | case CUDA_ERROR_LAUNCH_FAILED:
417 | return "CUDA_ERROR_LAUNCH_FAILED";
418 |
419 | case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
420 | return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
421 |
422 | case CUDA_ERROR_LAUNCH_TIMEOUT:
423 | return "CUDA_ERROR_LAUNCH_TIMEOUT";
424 |
425 | case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
426 | return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
427 |
428 | case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
429 | return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
430 |
431 | case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
432 | return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
433 |
434 | case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
435 | return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
436 |
437 | case CUDA_ERROR_CONTEXT_IS_DESTROYED:
438 | return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
439 |
440 | case CUDA_ERROR_ASSERT:
441 | return "CUDA_ERROR_ASSERT";
442 |
443 | case CUDA_ERROR_TOO_MANY_PEERS:
444 | return "CUDA_ERROR_TOO_MANY_PEERS";
445 |
446 | case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
447 | return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
448 |
449 | case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
450 | return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
451 |
452 | case CUDA_ERROR_HARDWARE_STACK_ERROR:
453 | return "CUDA_ERROR_HARDWARE_STACK_ERROR";
454 |
455 | case CUDA_ERROR_ILLEGAL_INSTRUCTION:
456 | return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
457 |
458 | case CUDA_ERROR_MISALIGNED_ADDRESS:
459 | return "CUDA_ERROR_MISALIGNED_ADDRESS";
460 |
461 | case CUDA_ERROR_INVALID_ADDRESS_SPACE:
462 | return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
463 |
464 | case CUDA_ERROR_INVALID_PC:
465 | return "CUDA_ERROR_INVALID_PC";
466 |
467 | case CUDA_ERROR_NOT_PERMITTED:
468 | return "CUDA_ERROR_NOT_PERMITTED";
469 |
470 | case CUDA_ERROR_NOT_SUPPORTED:
471 | return "CUDA_ERROR_NOT_SUPPORTED";
472 |
473 | case CUDA_ERROR_UNKNOWN:
474 | return "CUDA_ERROR_UNKNOWN";
475 | }
476 |
477 | return "";
478 | }
479 | #endif
480 |
481 | #ifdef CUBLAS_API_H_
482 | // cuBLAS API errors
483 | static const char *_cudaGetErrorEnum(cublasStatus_t error)
484 | {
485 | switch (error)
486 | {
487 | case CUBLAS_STATUS_SUCCESS:
488 | return "CUBLAS_STATUS_SUCCESS";
489 |
490 | case CUBLAS_STATUS_NOT_INITIALIZED:
491 | return "CUBLAS_STATUS_NOT_INITIALIZED";
492 |
493 | case CUBLAS_STATUS_ALLOC_FAILED:
494 | return "CUBLAS_STATUS_ALLOC_FAILED";
495 |
496 | case CUBLAS_STATUS_INVALID_VALUE:
497 | return "CUBLAS_STATUS_INVALID_VALUE";
498 |
499 | case CUBLAS_STATUS_ARCH_MISMATCH:
500 | return "CUBLAS_STATUS_ARCH_MISMATCH";
501 |
502 | case CUBLAS_STATUS_MAPPING_ERROR:
503 | return "CUBLAS_STATUS_MAPPING_ERROR";
504 |
505 | case CUBLAS_STATUS_EXECUTION_FAILED:
506 | return "CUBLAS_STATUS_EXECUTION_FAILED";
507 |
508 | case CUBLAS_STATUS_INTERNAL_ERROR:
509 | return "CUBLAS_STATUS_INTERNAL_ERROR";
510 |
511 | case CUBLAS_STATUS_NOT_SUPPORTED:
512 | return "CUBLAS_STATUS_NOT_SUPPORTED";
513 |
514 | case CUBLAS_STATUS_LICENSE_ERROR:
515 | return "CUBLAS_STATUS_LICENSE_ERROR";
516 | }
517 |
518 | return "";
519 | }
520 | #endif
521 |
522 | #ifdef _CUFFT_H_
523 | // cuFFT API errors
524 | static const char *_cudaGetErrorEnum(cufftResult error)
525 | {
526 | switch (error)
527 | {
528 | case CUFFT_SUCCESS:
529 | return "CUFFT_SUCCESS";
530 |
531 | case CUFFT_INVALID_PLAN:
532 | return "CUFFT_INVALID_PLAN";
533 |
534 | case CUFFT_ALLOC_FAILED:
535 | return "CUFFT_ALLOC_FAILED";
536 |
537 | case CUFFT_INVALID_TYPE:
538 | return "CUFFT_INVALID_TYPE";
539 |
540 | case CUFFT_INVALID_VALUE:
541 | return "CUFFT_INVALID_VALUE";
542 |
543 | case CUFFT_INTERNAL_ERROR:
544 | return "CUFFT_INTERNAL_ERROR";
545 |
546 | case CUFFT_EXEC_FAILED:
547 | return "CUFFT_EXEC_FAILED";
548 |
549 | case CUFFT_SETUP_FAILED:
550 | return "CUFFT_SETUP_FAILED";
551 |
552 | case CUFFT_INVALID_SIZE:
553 | return "CUFFT_INVALID_SIZE";
554 |
555 | case CUFFT_UNALIGNED_DATA:
556 | return "CUFFT_UNALIGNED_DATA";
557 |
558 | case CUFFT_INCOMPLETE_PARAMETER_LIST:
559 | return "CUFFT_INCOMPLETE_PARAMETER_LIST";
560 |
561 | case CUFFT_INVALID_DEVICE:
562 | return "CUFFT_INVALID_DEVICE";
563 |
564 | case CUFFT_PARSE_ERROR:
565 | return "CUFFT_PARSE_ERROR";
566 |
567 | case CUFFT_NO_WORKSPACE:
568 | return "CUFFT_NO_WORKSPACE";
569 |
570 | case CUFFT_NOT_IMPLEMENTED:
571 | return "CUFFT_NOT_IMPLEMENTED";
572 |
573 | case CUFFT_LICENSE_ERROR:
574 | return "CUFFT_LICENSE_ERROR";
575 |
576 | case CUFFT_NOT_SUPPORTED:
577 | return "CUFFT_NOT_SUPPORTED";
578 | }
579 |
580 | return "";
581 | }
582 | #endif
583 |
584 |
585 | #ifdef CUSPARSEAPI
586 | // cuSPARSE API errors
587 | static const char *_cudaGetErrorEnum(cusparseStatus_t error)
588 | {
589 | switch (error)
590 | {
591 | case CUSPARSE_STATUS_SUCCESS:
592 | return "CUSPARSE_STATUS_SUCCESS";
593 |
594 | case CUSPARSE_STATUS_NOT_INITIALIZED:
595 | return "CUSPARSE_STATUS_NOT_INITIALIZED";
596 |
597 | case CUSPARSE_STATUS_ALLOC_FAILED:
598 | return "CUSPARSE_STATUS_ALLOC_FAILED";
599 |
600 | case CUSPARSE_STATUS_INVALID_VALUE:
601 | return "CUSPARSE_STATUS_INVALID_VALUE";
602 |
603 | case CUSPARSE_STATUS_ARCH_MISMATCH:
604 | return "CUSPARSE_STATUS_ARCH_MISMATCH";
605 |
606 | case CUSPARSE_STATUS_MAPPING_ERROR:
607 | return "CUSPARSE_STATUS_MAPPING_ERROR";
608 |
609 | case CUSPARSE_STATUS_EXECUTION_FAILED:
610 | return "CUSPARSE_STATUS_EXECUTION_FAILED";
611 |
612 | case CUSPARSE_STATUS_INTERNAL_ERROR:
613 | return "CUSPARSE_STATUS_INTERNAL_ERROR";
614 |
615 | case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
616 | return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
617 | }
618 |
619 | return "";
620 | }
621 | #endif
622 |
623 | #ifdef CUSOLVER_COMMON_H_
624 | //cuSOLVER API errors
625 | static const char *_cudaGetErrorEnum(cusolverStatus_t error)
626 | {
627 | switch(error)
628 | {
629 | case CUSOLVER_STATUS_SUCCESS:
630 | return "CUSOLVER_STATUS_SUCCESS";
631 | case CUSOLVER_STATUS_NOT_INITIALIZED:
632 | return "CUSOLVER_STATUS_NOT_INITIALIZED";
633 | case CUSOLVER_STATUS_ALLOC_FAILED:
634 | return "CUSOLVER_STATUS_ALLOC_FAILED";
635 | case CUSOLVER_STATUS_INVALID_VALUE:
636 | return "CUSOLVER_STATUS_INVALID_VALUE";
637 | case CUSOLVER_STATUS_ARCH_MISMATCH:
638 | return "CUSOLVER_STATUS_ARCH_MISMATCH";
639 | case CUSOLVER_STATUS_MAPPING_ERROR:
640 | return "CUSOLVER_STATUS_MAPPING_ERROR";
641 | case CUSOLVER_STATUS_EXECUTION_FAILED:
642 | return "CUSOLVER_STATUS_EXECUTION_FAILED";
643 | case CUSOLVER_STATUS_INTERNAL_ERROR:
644 | return "CUSOLVER_STATUS_INTERNAL_ERROR";
645 | case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
646 | return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
647 | case CUSOLVER_STATUS_NOT_SUPPORTED :
648 | return "CUSOLVER_STATUS_NOT_SUPPORTED ";
649 | case CUSOLVER_STATUS_ZERO_PIVOT:
650 | return "CUSOLVER_STATUS_ZERO_PIVOT";
651 | case CUSOLVER_STATUS_INVALID_LICENSE:
652 | return "CUSOLVER_STATUS_INVALID_LICENSE";
653 | }
654 |
655 | return "";
656 |
657 | }
658 | #endif
659 |
660 | #ifdef CURAND_H_
661 | // cuRAND API errors
662 | static const char *_cudaGetErrorEnum(curandStatus_t error)
663 | {
664 | switch (error)
665 | {
666 | case CURAND_STATUS_SUCCESS:
667 | return "CURAND_STATUS_SUCCESS";
668 |
669 | case CURAND_STATUS_VERSION_MISMATCH:
670 | return "CURAND_STATUS_VERSION_MISMATCH";
671 |
672 | case CURAND_STATUS_NOT_INITIALIZED:
673 | return "CURAND_STATUS_NOT_INITIALIZED";
674 |
675 | case CURAND_STATUS_ALLOCATION_FAILED:
676 | return "CURAND_STATUS_ALLOCATION_FAILED";
677 |
678 | case CURAND_STATUS_TYPE_ERROR:
679 | return "CURAND_STATUS_TYPE_ERROR";
680 |
681 | case CURAND_STATUS_OUT_OF_RANGE:
682 | return "CURAND_STATUS_OUT_OF_RANGE";
683 |
684 | case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
685 | return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
686 |
687 | case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
688 | return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
689 |
690 | case CURAND_STATUS_LAUNCH_FAILURE:
691 | return "CURAND_STATUS_LAUNCH_FAILURE";
692 |
693 | case CURAND_STATUS_PREEXISTING_FAILURE:
694 | return "CURAND_STATUS_PREEXISTING_FAILURE";
695 |
696 | case CURAND_STATUS_INITIALIZATION_FAILED:
697 | return "CURAND_STATUS_INITIALIZATION_FAILED";
698 |
699 | case CURAND_STATUS_ARCH_MISMATCH:
700 | return "CURAND_STATUS_ARCH_MISMATCH";
701 |
702 | case CURAND_STATUS_INTERNAL_ERROR:
703 | return "CURAND_STATUS_INTERNAL_ERROR";
704 | }
705 |
706 | return "";
707 | }
708 | #endif
709 |
710 | #ifdef NV_NPPIDEFS_H
711 | // NPP API errors
712 | static const char *_cudaGetErrorEnum(NppStatus error)
713 | {
714 | switch (error)
715 | {
716 | case NPP_NOT_SUPPORTED_MODE_ERROR:
717 | return "NPP_NOT_SUPPORTED_MODE_ERROR";
718 |
719 | case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
720 | return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
721 |
722 | case NPP_RESIZE_NO_OPERATION_ERROR:
723 | return "NPP_RESIZE_NO_OPERATION_ERROR";
724 |
725 | case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
726 | return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
727 |
728 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
729 |
730 | case NPP_BAD_ARG_ERROR:
731 | return "NPP_BAD_ARGUMENT_ERROR";
732 |
733 | case NPP_COEFF_ERROR:
734 | return "NPP_COEFFICIENT_ERROR";
735 |
736 | case NPP_RECT_ERROR:
737 | return "NPP_RECTANGLE_ERROR";
738 |
739 | case NPP_QUAD_ERROR:
740 | return "NPP_QUADRANGLE_ERROR";
741 |
742 | case NPP_MEM_ALLOC_ERR:
743 | return "NPP_MEMORY_ALLOCATION_ERROR";
744 |
745 | case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
746 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
747 |
748 | case NPP_INVALID_INPUT:
749 | return "NPP_INVALID_INPUT";
750 |
751 | case NPP_POINTER_ERROR:
752 | return "NPP_POINTER_ERROR";
753 |
754 | case NPP_WARNING:
755 | return "NPP_WARNING";
756 |
757 | case NPP_ODD_ROI_WARNING:
758 | return "NPP_ODD_ROI_WARNING";
759 | #else
760 |
761 | // These are for CUDA 5.5 or higher
762 | case NPP_BAD_ARGUMENT_ERROR:
763 | return "NPP_BAD_ARGUMENT_ERROR";
764 |
765 | case NPP_COEFFICIENT_ERROR:
766 | return "NPP_COEFFICIENT_ERROR";
767 |
768 | case NPP_RECTANGLE_ERROR:
769 | return "NPP_RECTANGLE_ERROR";
770 |
771 | case NPP_QUADRANGLE_ERROR:
772 | return "NPP_QUADRANGLE_ERROR";
773 |
774 | case NPP_MEMORY_ALLOCATION_ERR:
775 | return "NPP_MEMORY_ALLOCATION_ERROR";
776 |
777 | case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
778 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
779 |
780 | case NPP_INVALID_HOST_POINTER_ERROR:
781 | return "NPP_INVALID_HOST_POINTER_ERROR";
782 |
783 | case NPP_INVALID_DEVICE_POINTER_ERROR:
784 | return "NPP_INVALID_DEVICE_POINTER_ERROR";
785 | #endif
786 |
787 | case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
788 | return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
789 |
790 | case NPP_TEXTURE_BIND_ERROR:
791 | return "NPP_TEXTURE_BIND_ERROR";
792 |
793 | case NPP_WRONG_INTERSECTION_ROI_ERROR:
794 | return "NPP_WRONG_INTERSECTION_ROI_ERROR";
795 |
796 | case NPP_NOT_EVEN_STEP_ERROR:
797 | return "NPP_NOT_EVEN_STEP_ERROR";
798 |
799 | case NPP_INTERPOLATION_ERROR:
800 | return "NPP_INTERPOLATION_ERROR";
801 |
802 | case NPP_RESIZE_FACTOR_ERROR:
803 | return "NPP_RESIZE_FACTOR_ERROR";
804 |
805 | case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
806 | return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
807 |
808 |
809 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
810 |
811 | case NPP_MEMFREE_ERR:
812 | return "NPP_MEMFREE_ERR";
813 |
814 | case NPP_MEMSET_ERR:
815 | return "NPP_MEMSET_ERR";
816 |
817 | case NPP_MEMCPY_ERR:
818 | return "NPP_MEMCPY_ERROR";
819 |
820 | case NPP_MIRROR_FLIP_ERR:
821 | return "NPP_MIRROR_FLIP_ERR";
822 | #else
823 |
824 | case NPP_MEMFREE_ERROR:
825 | return "NPP_MEMFREE_ERROR";
826 |
827 | case NPP_MEMSET_ERROR:
828 | return "NPP_MEMSET_ERROR";
829 |
830 | case NPP_MEMCPY_ERROR:
831 | return "NPP_MEMCPY_ERROR";
832 |
833 | case NPP_MIRROR_FLIP_ERROR:
834 | return "NPP_MIRROR_FLIP_ERROR";
835 | #endif
836 |
837 | case NPP_ALIGNMENT_ERROR:
838 | return "NPP_ALIGNMENT_ERROR";
839 |
840 | case NPP_STEP_ERROR:
841 | return "NPP_STEP_ERROR";
842 |
843 | case NPP_SIZE_ERROR:
844 | return "NPP_SIZE_ERROR";
845 |
846 | case NPP_NULL_POINTER_ERROR:
847 | return "NPP_NULL_POINTER_ERROR";
848 |
849 | case NPP_CUDA_KERNEL_EXECUTION_ERROR:
850 | return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
851 |
852 | case NPP_NOT_IMPLEMENTED_ERROR:
853 | return "NPP_NOT_IMPLEMENTED_ERROR";
854 |
855 | case NPP_ERROR:
856 | return "NPP_ERROR";
857 |
858 | case NPP_SUCCESS:
859 | return "NPP_SUCCESS";
860 |
861 | case NPP_WRONG_INTERSECTION_QUAD_WARNING:
862 | return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
863 |
864 | case NPP_MISALIGNED_DST_ROI_WARNING:
865 | return "NPP_MISALIGNED_DST_ROI_WARNING";
866 |
867 | case NPP_AFFINE_QUAD_INCORRECT_WARNING:
868 | return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
869 |
870 | case NPP_DOUBLE_SIZE_WARNING:
871 | return "NPP_DOUBLE_SIZE_WARNING";
872 |
873 | case NPP_WRONG_INTERSECTION_ROI_WARNING:
874 | return "NPP_WRONG_INTERSECTION_ROI_WARNING";
875 |
876 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
877 | /* These are 6.0 or higher */
878 | case NPP_LUT_PALETTE_BITSIZE_ERROR:
879 | return "NPP_LUT_PALETTE_BITSIZE_ERROR";
880 |
881 | case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
882 | return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
883 |
884 | case NPP_QUALITY_INDEX_ERROR:
885 | return "NPP_QUALITY_INDEX_ERROR";
886 |
887 | case NPP_CHANNEL_ORDER_ERROR:
888 | return "NPP_CHANNEL_ORDER_ERROR";
889 |
890 | case NPP_ZERO_MASK_VALUE_ERROR:
891 | return "NPP_ZERO_MASK_VALUE_ERROR";
892 |
893 | case NPP_NUMBER_OF_CHANNELS_ERROR:
894 | return "NPP_NUMBER_OF_CHANNELS_ERROR";
895 |
896 | case NPP_COI_ERROR:
897 | return "NPP_COI_ERROR";
898 |
899 | case NPP_DIVISOR_ERROR:
900 | return "NPP_DIVISOR_ERROR";
901 |
902 | case NPP_CHANNEL_ERROR:
903 | return "NPP_CHANNEL_ERROR";
904 |
905 | case NPP_STRIDE_ERROR:
906 | return "NPP_STRIDE_ERROR";
907 |
908 | case NPP_ANCHOR_ERROR:
909 | return "NPP_ANCHOR_ERROR";
910 |
911 | case NPP_MASK_SIZE_ERROR:
912 | return "NPP_MASK_SIZE_ERROR";
913 |
914 | case NPP_MOMENT_00_ZERO_ERROR:
915 | return "NPP_MOMENT_00_ZERO_ERROR";
916 |
917 | case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
918 | return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
919 |
920 | case NPP_THRESHOLD_ERROR:
921 | return "NPP_THRESHOLD_ERROR";
922 |
923 | case NPP_CONTEXT_MATCH_ERROR:
924 | return "NPP_CONTEXT_MATCH_ERROR";
925 |
926 | case NPP_FFT_FLAG_ERROR:
927 | return "NPP_FFT_FLAG_ERROR";
928 |
929 | case NPP_FFT_ORDER_ERROR:
930 | return "NPP_FFT_ORDER_ERROR";
931 |
932 | case NPP_SCALE_RANGE_ERROR:
933 | return "NPP_SCALE_RANGE_ERROR";
934 |
935 | case NPP_DATA_TYPE_ERROR:
936 | return "NPP_DATA_TYPE_ERROR";
937 |
938 | case NPP_OUT_OFF_RANGE_ERROR:
939 | return "NPP_OUT_OFF_RANGE_ERROR";
940 |
941 | case NPP_DIVIDE_BY_ZERO_ERROR:
942 | return "NPP_DIVIDE_BY_ZERO_ERROR";
943 |
944 | case NPP_RANGE_ERROR:
945 | return "NPP_RANGE_ERROR";
946 |
947 | case NPP_NO_MEMORY_ERROR:
948 | return "NPP_NO_MEMORY_ERROR";
949 |
950 | case NPP_ERROR_RESERVED:
951 | return "NPP_ERROR_RESERVED";
952 |
953 | case NPP_NO_OPERATION_WARNING:
954 | return "NPP_NO_OPERATION_WARNING";
955 |
956 | case NPP_DIVIDE_BY_ZERO_WARNING:
957 | return "NPP_DIVIDE_BY_ZERO_WARNING";
958 | #endif
959 |
960 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
961 | /* These are 7.0 or higher */
962 | case NPP_OVERFLOW_ERROR:
963 | return "NPP_OVERFLOW_ERROR";
964 |
965 | case NPP_CORRUPTED_DATA_ERROR:
966 | return "NPP_CORRUPTED_DATA_ERROR";
967 | #endif
968 | }
969 |
970 | return "";
971 | }
972 | #endif
973 |
974 | #ifdef __DRIVER_TYPES_H__
975 | #ifndef DEVICE_RESET
976 | #define DEVICE_RESET cudaDeviceReset();
977 | #endif
978 | #else
979 | #ifndef DEVICE_RESET
980 | #define DEVICE_RESET
981 | #endif
982 | #endif
983 |
984 | template< typename T >
985 | void check(T result, char const *const func, const char *const file, int const line)
986 | {
987 | if (result)
988 | {
989 | fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
990 | file, line, static_cast(result), _cudaGetErrorEnum(result), func);
991 | DEVICE_RESET
992 | // Make sure we call CUDA Device Reset before exiting
993 | exit(EXIT_FAILURE);
994 | }
995 | }
996 |
997 | #ifdef __DRIVER_TYPES_H__
998 | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
999 | #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
1000 |
1001 | // This will output the proper error string when calling cudaGetLastError
1002 | #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
1003 |
1004 | inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
1005 | {
1006 | cudaError_t err = cudaGetLastError();
1007 |
1008 | if (cudaSuccess != err)
1009 | {
1010 | fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
1011 | file, line, errorMessage, (int)err, cudaGetErrorString(err));
1012 | DEVICE_RESET
1013 | exit(EXIT_FAILURE);
1014 | }
1015 | }
1016 | #endif
1017 |
1018 | #ifndef MAX
1019 | #define MAX(a,b) (a > b ? a : b)
1020 | #endif
1021 |
1022 | // Float To Int conversion
1023 | inline int ftoi(float value)
1024 | {
1025 | return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
1026 | }
1027 |
1028 | // Beginning of GPU Architecture definitions
1029 | inline int _ConvertSMVer2Cores(int major, int minor)
1030 | {
1031 | // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
1032 | typedef struct
1033 | {
1034 | int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
1035 | int Cores;
1036 | } sSMtoCores;
1037 |
1038 | sSMtoCores nGpuArchCoresPerSM[] =
1039 | {
1040 | { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
1041 | { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
1042 | { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
1043 | { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
1044 | { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
1045 | { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
1046 | { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
1047 | { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
1048 | { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
1049 | { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class
1050 | { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
1051 | { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
1052 | { -1, -1 }
1053 | };
1054 |
1055 | int index = 0;
1056 |
1057 | while (nGpuArchCoresPerSM[index].SM != -1)
1058 | {
1059 | if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
1060 | {
1061 | return nGpuArchCoresPerSM[index].Cores;
1062 | }
1063 |
1064 | index++;
1065 | }
1066 |
1067 | // If we don't find the values, we default use the previous one to run properly
1068 | printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
1069 | return nGpuArchCoresPerSM[index-1].Cores;
1070 | }
1071 | // end of GPU Architecture definitions
1072 |
1073 | #ifdef __CUDA_RUNTIME_H__
1074 | // General GPU Device CUDA Initialization
1075 | inline int gpuDeviceInit(int devID)
1076 | {
1077 | int device_count;
1078 | checkCudaErrors(cudaGetDeviceCount(&device_count));
1079 |
1080 | if (device_count == 0)
1081 | {
1082 | fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
1083 | exit(EXIT_FAILURE);
1084 | }
1085 |
1086 | if (devID < 0)
1087 | {
1088 | devID = 0;
1089 | }
1090 |
1091 | if (devID > device_count-1)
1092 | {
1093 | fprintf(stderr, "\n");
1094 | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
1095 | fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
1096 | fprintf(stderr, "\n");
1097 | return -devID;
1098 | }
1099 |
1100 | cudaDeviceProp deviceProp;
1101 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
1102 |
1103 | if (deviceProp.computeMode == cudaComputeModeProhibited)
1104 | {
1105 | fprintf(stderr, "Error: device is running in , no threads can use ::cudaSetDevice().\n");
1106 | return -1;
1107 | }
1108 |
1109 | if (deviceProp.major < 1)
1110 | {
1111 | fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
1112 | exit(EXIT_FAILURE);
1113 | }
1114 |
1115 | checkCudaErrors(cudaSetDevice(devID));
1116 | printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
1117 |
1118 | return devID;
1119 | }
1120 |
1121 | // This function returns the best GPU (with maximum GFLOPS)
1122 | inline int gpuGetMaxGflopsDeviceId()
1123 | {
1124 | int current_device = 0, sm_per_multiproc = 0;
1125 | int max_perf_device = 0;
1126 | int device_count = 0, best_SM_arch = 0;
1127 | int devices_prohibited = 0;
1128 |
1129 | unsigned long long max_compute_perf = 0;
1130 | cudaDeviceProp deviceProp;
1131 | cudaGetDeviceCount(&device_count);
1132 |
1133 | checkCudaErrors(cudaGetDeviceCount(&device_count));
1134 |
1135 | if (device_count == 0)
1136 | {
1137 | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
1138 | exit(EXIT_FAILURE);
1139 | }
1140 |
1141 | // Find the best major SM Architecture GPU device
1142 | while (current_device < device_count)
1143 | {
1144 | cudaGetDeviceProperties(&deviceProp, current_device);
1145 |
1146 | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
1147 | if (deviceProp.computeMode != cudaComputeModeProhibited)
1148 | {
1149 | if (deviceProp.major > 0 && deviceProp.major < 9999)
1150 | {
1151 | best_SM_arch = MAX(best_SM_arch, deviceProp.major);
1152 | }
1153 | }
1154 | else
1155 | {
1156 | devices_prohibited++;
1157 | }
1158 |
1159 | current_device++;
1160 | }
1161 |
1162 | if (devices_prohibited == device_count)
1163 | {
1164 | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
1165 | exit(EXIT_FAILURE);
1166 | }
1167 |
1168 | // Find the best CUDA capable GPU device
1169 | current_device = 0;
1170 |
1171 | while (current_device < device_count)
1172 | {
1173 | cudaGetDeviceProperties(&deviceProp, current_device);
1174 |
1175 | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
1176 | if (deviceProp.computeMode != cudaComputeModeProhibited)
1177 | {
1178 | if (deviceProp.major == 9999 && deviceProp.minor == 9999)
1179 | {
1180 | sm_per_multiproc = 1;
1181 | }
1182 | else
1183 | {
1184 | sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
1185 | }
1186 |
1187 | unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
1188 |
1189 | if (compute_perf > max_compute_perf)
1190 | {
1191 | // If we find GPU with SM major > 2, search only these
1192 | if (best_SM_arch > 2)
1193 | {
1194 | // If our device==dest_SM_arch, choose this, or else pass
1195 | if (deviceProp.major == best_SM_arch)
1196 | {
1197 | max_compute_perf = compute_perf;
1198 | max_perf_device = current_device;
1199 | }
1200 | }
1201 | else
1202 | {
1203 | max_compute_perf = compute_perf;
1204 | max_perf_device = current_device;
1205 | }
1206 | }
1207 | }
1208 |
1209 | ++current_device;
1210 | }
1211 |
1212 | return max_perf_device;
1213 | }
1214 |
1215 |
1216 | // Initialization code to find the best CUDA Device
1217 | inline int findCudaDevice(int argc, const char **argv)
1218 | {
1219 | cudaDeviceProp deviceProp;
1220 | int devID = 0;
1221 |
1222 | // If the command-line has a device number specified, use it
1223 | if (checkCmdLineFlag(argc, argv, "device"))
1224 | {
1225 | devID = getCmdLineArgumentInt(argc, argv, "device=");
1226 |
1227 | if (devID < 0)
1228 | {
1229 | printf("Invalid command line parameter\n ");
1230 | exit(EXIT_FAILURE);
1231 | }
1232 | else
1233 | {
1234 | devID = gpuDeviceInit(devID);
1235 |
1236 | if (devID < 0)
1237 | {
1238 | printf("exiting...\n");
1239 | exit(EXIT_FAILURE);
1240 | }
1241 | }
1242 | }
1243 | else
1244 | {
1245 | // Otherwise pick the device with highest Gflops/s
1246 | devID = gpuGetMaxGflopsDeviceId();
1247 | checkCudaErrors(cudaSetDevice(devID));
1248 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
1249 | printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
1250 | }
1251 |
1252 | return devID;
1253 | }
1254 |
1255 | // General check for CUDA GPU SM Capabilities
1256 | inline bool checkCudaCapabilities(int major_version, int minor_version)
1257 | {
1258 | cudaDeviceProp deviceProp;
1259 | deviceProp.major = 0;
1260 | deviceProp.minor = 0;
1261 | int dev;
1262 |
1263 | checkCudaErrors(cudaGetDevice(&dev));
1264 | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
1265 |
1266 | if ((deviceProp.major > major_version) ||
1267 | (deviceProp.major == major_version && deviceProp.minor >= minor_version))
1268 | {
1269 | printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
1270 | return true;
1271 | }
1272 | else
1273 | {
1274 | printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
1275 | return false;
1276 | }
1277 | }
1278 | #endif
1279 |
1280 | // end of CUDA Helper Functions
1281 |
1282 |
1283 | #endif
1284 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/helper_string.h:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
3 | *
4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
5 | * with this source code for terms and conditions that govern your use of
6 | * this software. Any use, reproduction, disclosure, or distribution of
7 | * this software and related documentation outside the terms of the EULA
8 | * is strictly prohibited.
9 | *
10 | */
11 |
12 | // These are helper functions for the SDK samples (string parsing, timers, etc)
13 | #ifndef STRING_HELPER_H
14 | #define STRING_HELPER_H
15 |
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
22 | #ifndef _CRT_SECURE_NO_DEPRECATE
23 | #define _CRT_SECURE_NO_DEPRECATE
24 | #endif
25 | #ifndef STRCASECMP
26 | #define STRCASECMP _stricmp
27 | #endif
28 | #ifndef STRNCASECMP
29 | #define STRNCASECMP _strnicmp
30 | #endif
31 | #ifndef STRCPY
32 | #define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
33 | #endif
34 |
35 | #ifndef FOPEN
36 | #define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
37 | #endif
38 | #ifndef FOPEN_FAIL
39 | #define FOPEN_FAIL(result) (result != 0)
40 | #endif
41 | #ifndef SSCANF
42 | #define SSCANF sscanf_s
43 | #endif
44 | #ifndef SPRINTF
45 | #define SPRINTF sprintf_s
46 | #endif
47 | #else // Linux Includes
48 | #include
49 | #include
50 |
51 | #ifndef STRCASECMP
52 | #define STRCASECMP strcasecmp
53 | #endif
54 | #ifndef STRNCASECMP
55 | #define STRNCASECMP strncasecmp
56 | #endif
57 | #ifndef STRCPY
58 | #define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
59 | #endif
60 |
61 | #ifndef FOPEN
62 | #define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
63 | #endif
64 | #ifndef FOPEN_FAIL
65 | #define FOPEN_FAIL(result) (result == NULL)
66 | #endif
67 | #ifndef SSCANF
68 | #define SSCANF sscanf
69 | #endif
70 | #ifndef SPRINTF
71 | #define SPRINTF sprintf
72 | #endif
73 | #endif
74 |
75 | #ifndef EXIT_WAIVED
76 | #define EXIT_WAIVED 2
77 | #endif
78 |
79 | // CUDA Utility Helper Functions
80 | inline int stringRemoveDelimiter(char delimiter, const char *string)
81 | {
82 | int string_start = 0;
83 |
84 | while (string[string_start] == delimiter)
85 | {
86 | string_start++;
87 | }
88 |
89 | if (string_start >= (int)strlen(string)-1)
90 | {
91 | return 0;
92 | }
93 |
94 | return string_start;
95 | }
96 |
97 | inline int getFileExtension(char *filename, char **extension)
98 | {
99 | int string_length = (int)strlen(filename);
100 |
101 | while (filename[string_length--] != '.')
102 | {
103 | if (string_length == 0)
104 | break;
105 | }
106 |
107 | if (string_length > 0) string_length += 2;
108 |
109 | if (string_length == 0)
110 | *extension = NULL;
111 | else
112 | *extension = &filename[string_length];
113 |
114 | return string_length;
115 | }
116 |
117 |
118 | inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
119 | {
120 | bool bFound = false;
121 |
122 | if (argc >= 1)
123 | {
124 | for (int i=1; i < argc; i++)
125 | {
126 | int string_start = stringRemoveDelimiter('-', argv[i]);
127 | const char *string_argv = &argv[i][string_start];
128 |
129 | const char *equal_pos = strchr(string_argv, '=');
130 | int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
131 |
132 | int length = (int)strlen(string_ref);
133 |
134 | if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
135 | {
136 | bFound = true;
137 | continue;
138 | }
139 | }
140 | }
141 |
142 | return bFound;
143 | }
144 |
145 | // This function wraps the CUDA Driver API into a template function
146 | template
147 | inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
148 | {
149 | bool bFound = false;
150 |
151 | if (argc >= 1)
152 | {
153 | for (int i=1; i < argc; i++)
154 | {
155 | int string_start = stringRemoveDelimiter('-', argv[i]);
156 | const char *string_argv = &argv[i][string_start];
157 | int length = (int)strlen(string_ref);
158 |
159 | if (!STRNCASECMP(string_argv, string_ref, length))
160 | {
161 | if (length+1 <= (int)strlen(string_argv))
162 | {
163 | int auto_inc = (string_argv[length] == '=') ? 1 : 0;
164 | *value = (T)atoi(&string_argv[length + auto_inc]);
165 | }
166 |
167 | bFound = true;
168 | i=argc;
169 | }
170 | }
171 | }
172 |
173 | return bFound;
174 | }
175 |
176 | inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
177 | {
178 | bool bFound = false;
179 | int value = -1;
180 |
181 | if (argc >= 1)
182 | {
183 | for (int i=1; i < argc; i++)
184 | {
185 | int string_start = stringRemoveDelimiter('-', argv[i]);
186 | const char *string_argv = &argv[i][string_start];
187 | int length = (int)strlen(string_ref);
188 |
189 | if (!STRNCASECMP(string_argv, string_ref, length))
190 | {
191 | if (length+1 <= (int)strlen(string_argv))
192 | {
193 | int auto_inc = (string_argv[length] == '=') ? 1 : 0;
194 | value = atoi(&string_argv[length + auto_inc]);
195 | }
196 | else
197 | {
198 | value = 0;
199 | }
200 |
201 | bFound = true;
202 | continue;
203 | }
204 | }
205 | }
206 |
207 | if (bFound)
208 | {
209 | return value;
210 | }
211 | else
212 | {
213 | return 0;
214 | }
215 | }
216 |
217 | inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
218 | {
219 | bool bFound = false;
220 | float value = -1;
221 |
222 | if (argc >= 1)
223 | {
224 | for (int i=1; i < argc; i++)
225 | {
226 | int string_start = stringRemoveDelimiter('-', argv[i]);
227 | const char *string_argv = &argv[i][string_start];
228 | int length = (int)strlen(string_ref);
229 |
230 | if (!STRNCASECMP(string_argv, string_ref, length))
231 | {
232 | if (length+1 <= (int)strlen(string_argv))
233 | {
234 | int auto_inc = (string_argv[length] == '=') ? 1 : 0;
235 | value = (float)atof(&string_argv[length + auto_inc]);
236 | }
237 | else
238 | {
239 | value = 0.f;
240 | }
241 |
242 | bFound = true;
243 | continue;
244 | }
245 | }
246 | }
247 |
248 | if (bFound)
249 | {
250 | return value;
251 | }
252 | else
253 | {
254 | return 0;
255 | }
256 | }
257 |
258 | inline bool getCmdLineArgumentString(const int argc, const char **argv,
259 | const char *string_ref, char **string_retval)
260 | {
261 | bool bFound = false;
262 |
263 | if (argc >= 1)
264 | {
265 | for (int i=1; i < argc; i++)
266 | {
267 | int string_start = stringRemoveDelimiter('-', argv[i]);
268 | char *string_argv = (char *)&argv[i][string_start];
269 | int length = (int)strlen(string_ref);
270 |
271 | if (!STRNCASECMP(string_argv, string_ref, length))
272 | {
273 | *string_retval = &string_argv[length+1];
274 | bFound = true;
275 | continue;
276 | }
277 | }
278 | }
279 |
280 | if (!bFound)
281 | {
282 | *string_retval = NULL;
283 | }
284 |
285 | return bFound;
286 | }
287 |
288 | //////////////////////////////////////////////////////////////////////////////
289 | //! Find the path for a file assuming that
290 | //! files are found in the searchPath.
291 | //!
292 | //! @return the path if succeeded, otherwise 0
293 | //! @param filename name of the file
294 | //! @param executable_path optional absolute path of the executable
295 | //////////////////////////////////////////////////////////////////////////////
296 | inline char *sdkFindFilePath(const char *filename, const char *executable_path)
297 | {
298 | // defines a variable that is replaced with the name of the executable
299 |
300 | // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
301 | // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
302 | const char *searchPath[] =
303 | {
304 | "./", // same dir
305 | "./_data_files/",
306 | "./common/", // "/common/" subdir
307 | "./common/data/", // "/common/data/" subdir
308 | "./data/", // "/data/" subdir
309 | "./src/", // "/src/" subdir
310 | "./src//data/", // "/src//data/" subdir
311 | "./inc/", // "/inc/" subdir
312 | "./0_Simple/", // "/0_Simple/" subdir
313 | "./1_Utilities/", // "/1_Utilities/" subdir
314 | "./2_Graphics/", // "/2_Graphics/" subdir
315 | "./3_Imaging/", // "/3_Imaging/" subdir
316 | "./4_Finance/", // "/4_Finance/" subdir
317 | "./5_Simulations/", // "/5_Simulations/" subdir
318 | "./6_Advanced/", // "/6_Advanced/" subdir
319 | "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
320 | "./8_Android/", // "/8_Android/" subdir
321 | "./samples/", // "/samples/" subdir
322 |
323 | "./0_Simple//data/", // "/0_Simple//data/" subdir
324 | "./1_Utilities//data/", // "/1_Utilities//data/" subdir
325 | "./2_Graphics//data/", // "/2_Graphics//data/" subdir
326 | "./3_Imaging//data/", // "/3_Imaging//data/" subdir
327 | "./4_Finance//data/", // "/4_Finance//data/" subdir
328 | "./5_Simulations//data/", // "/5_Simulations//data/" subdir
329 | "./6_Advanced//data/", // "/6_Advanced//data/" subdir
330 | "./7_CUDALibraries//", // "/7_CUDALibraries//" subdir
331 | "./7_CUDALibraries//data/", // "/7_CUDALibraries//data/" subdir
332 |
333 | "../", // up 1 in tree
334 | "../common/", // up 1 in tree, "/common/" subdir
335 | "../common/data/", // up 1 in tree, "/common/data/" subdir
336 | "../data/", // up 1 in tree, "/data/" subdir
337 | "../src/", // up 1 in tree, "/src/" subdir
338 | "../inc/", // up 1 in tree, "/inc/" subdir
339 |
340 | "../0_Simple//data/", // up 1 in tree, "/0_Simple//" subdir
341 | "../1_Utilities//data/", // up 1 in tree, "/1_Utilities//" subdir
342 | "../2_Graphics//data/", // up 1 in tree, "/2_Graphics//" subdir
343 | "../3_Imaging//data/", // up 1 in tree, "/3_Imaging//" subdir
344 | "../4_Finance//data/", // up 1 in tree, "/4_Finance//" subdir
345 | "../5_Simulations//data/", // up 1 in tree, "/5_Simulations//" subdir
346 | "../6_Advanced//data/", // up 1 in tree, "/6_Advanced//" subdir
347 | "../7_CUDALibraries//data/",// up 1 in tree, "/7_CUDALibraries//" subdir
348 | "../8_Android//data/", // up 1 in tree, "/8_Android//" subdir
349 | "../samples//data/", // up 1 in tree, "/samples//" subdir
350 | "../../", // up 2 in tree
351 | "../../common/", // up 2 in tree, "/common/" subdir
352 | "../../common/data/", // up 2 in tree, "/common/data/" subdir
353 | "../../data/", // up 2 in tree, "/data/" subdir
354 | "../../src/", // up 2 in tree, "/src/" subdir
355 | "../../inc/", // up 2 in tree, "/inc/" subdir
356 | "../../sandbox//data/", // up 2 in tree, "/sandbox//" subdir
357 | "../../0_Simple//data/", // up 2 in tree, "/0_Simple//" subdir
358 | "../../1_Utilities//data/", // up 2 in tree, "/1_Utilities//" subdir
359 | "../../2_Graphics//data/", // up 2 in tree, "/2_Graphics//" subdir
360 | "../../3_Imaging//data/", // up 2 in tree, "/3_Imaging//" subdir
361 | "../../4_Finance//data/", // up 2 in tree, "/4_Finance//" subdir
362 | "../../5_Simulations//data/", // up 2 in tree, "/5_Simulations//" subdir
363 | "../../6_Advanced//data/", // up 2 in tree, "/6_Advanced//" subdir
364 | "../../7_CUDALibraries//data/", // up 2 in tree, "/7_CUDALibraries//" subdir
365 | "../../8_Android//data/", // up 2 in tree, "/8_Android//" subdir
366 | "../../samples//data/", // up 2 in tree, "/samples//" subdir
367 | "../../../", // up 3 in tree
368 | "../../../src//", // up 3 in tree, "/src//" subdir
369 | "../../../src//data/", // up 3 in tree, "/src//data/" subdir
370 | "../../../src//src/", // up 3 in tree, "/src//src/" subdir
371 | "../../../src//inc/", // up 3 in tree, "/src//inc/" subdir
372 | "../../../sandbox//", // up 3 in tree, "/sandbox//" subdir
373 | "../../../sandbox//data/", // up 3 in tree, "/sandbox//data/" subdir
374 | "../../../sandbox//src/", // up 3 in tree, "/sandbox//src/" subdir
375 | "../../../sandbox//inc/", // up 3 in tree, "/sandbox//inc/" subdir
376 | "../../../0_Simple//data/", // up 3 in tree, "/0_Simple//" subdir
377 | "../../../1_Utilities//data/", // up 3 in tree, "/1_Utilities//" subdir
378 | "../../../2_Graphics//data/", // up 3 in tree, "/2_Graphics//" subdir
379 | "../../../3_Imaging//data/", // up 3 in tree, "/3_Imaging//" subdir
380 | "../../../4_Finance//data/", // up 3 in tree, "/4_Finance//" subdir
381 | "../../../5_Simulations//data/", // up 3 in tree, "/5_Simulations//" subdir
382 | "../../../6_Advanced//data/", // up 3 in tree, "/6_Advanced//" subdir
383 | "../../../7_CUDALibraries//data/", // up 3 in tree, "/7_CUDALibraries//" subdir
384 | "../../../8_Android//data/", // up 3 in tree, "/8_Android//" subdir
385 | "../../../0_Simple//", // up 3 in tree, "/0_Simple//" subdir
386 | "../../../1_Utilities//", // up 3 in tree, "/1_Utilities//" subdir
387 | "../../../2_Graphics//", // up 3 in tree, "/2_Graphics//" subdir
388 | "../../../3_Imaging//", // up 3 in tree, "/3_Imaging//" subdir
389 | "../../../4_Finance//", // up 3 in tree, "/4_Finance//" subdir
390 | "../../../5_Simulations//", // up 3 in tree, "/5_Simulations//" subdir
391 | "../../../6_Advanced//", // up 3 in tree, "/6_Advanced//" subdir
392 | "../../../7_CUDALibraries//", // up 3 in tree, "/7_CUDALibraries//" subdir
393 | "../../../8_Android//", // up 3 in tree, "/8_Android//" subdir
394 | "../../../samples//data/", // up 3 in tree, "/samples//" subdir
395 | "../../../common/", // up 3 in tree, "../../../common/" subdir
396 | "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
397 | "../../../data/", // up 3 in tree, "../../../data/" subdir
398 | "../../../../", // up 4 in tree
399 | "../../../../src//", // up 4 in tree, "/src//" subdir
400 | "../../../../src//data/", // up 4 in tree, "/src//data/" subdir
401 | "../../../../src//src/", // up 4 in tree, "/src//src/" subdir
402 | "../../../../src//inc/", // up 4 in tree, "/src//inc/" subdir
403 | "../../../../sandbox//", // up 4 in tree, "/sandbox//" subdir
404 | "../../../../sandbox//data/", // up 4 in tree, "/sandbox//data/" subdir
405 | "../../../../sandbox//src/", // up 4 in tree, "/sandbox//src/" subdir
406 | "../../../../sandbox//inc/", // up 4 in tree, "/sandbox//inc/" subdir
407 | "../../../../0_Simple//data/", // up 4 in tree, "/0_Simple//" subdir
408 | "../../../../1_Utilities//data/", // up 4 in tree, "/1_Utilities//" subdir
409 | "../../../../2_Graphics//data/", // up 4 in tree, "/2_Graphics//" subdir
410 | "../../../../3_Imaging//data/", // up 4 in tree, "/3_Imaging//" subdir
411 | "../../../../4_Finance//data/", // up 4 in tree, "/4_Finance//" subdir
412 | "../../../../5_Simulations//data/",// up 4 in tree, "/5_Simulations//" subdir
413 | "../../../../6_Advanced//data/", // up 4 in tree, "/6_Advanced//" subdir
414 | "../../../../7_CUDALibraries//data/", // up 4 in tree, "/7_CUDALibraries//" subdir
415 | "../../../../8_Android//data/", // up 4 in tree, "/8_Android//" subdir
416 | "../../../../0_Simple//", // up 4 in tree, "/0_Simple//" subdir
417 | "../../../../1_Utilities//", // up 4 in tree, "/1_Utilities//" subdir
418 | "../../../../2_Graphics//", // up 4 in tree, "/2_Graphics//" subdir
419 | "../../../../3_Imaging//", // up 4 in tree, "/3_Imaging//" subdir
420 | "../../../../4_Finance//", // up 4 in tree, "/4_Finance//" subdir
421 | "../../../../5_Simulations//",// up 4 in tree, "/5_Simulations//" subdir
422 | "../../../../6_Advanced//", // up 4 in tree, "/6_Advanced//" subdir
423 | "../../../../7_CUDALibraries//", // up 4 in tree, "/7_CUDALibraries//" subdir
424 | "../../../../8_Android//", // up 4 in tree, "/8_Android//" subdir
425 | "../../../../samples//data/", // up 4 in tree, "/samples//" subdir
426 | "../../../../common/", // up 4 in tree, "../../../common/" subdir
427 | "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir
428 | "../../../../data/", // up 4 in tree, "../../../data/" subdir
429 | "../../../../../", // up 5 in tree
430 | "../../../../../src//", // up 5 in tree, "/src//" subdir
431 | "../../../../../src//data/", // up 5 in tree, "/src//data/" subdir
432 | "../../../../../src//src/", // up 5 in tree, "/src//src/" subdir
433 | "../../../../../src//inc/", // up 5 in tree, "/src//inc/" subdir
434 | "../../../../../sandbox//", // up 5 in tree, "/sandbox//" subdir
435 | "../../../../../sandbox//data/", // up 5 in tree, "/sandbox//data/" subdir
436 | "../../../../../sandbox//src/", // up 5 in tree, "/sandbox//src/" subdir
437 | "../../../../../sandbox//inc/", // up 5 in tree, "/sandbox//inc/" subdir
438 | "../../../../../0_Simple//data/", // up 5 in tree, "/0_Simple//" subdir
439 | "../../../../../1_Utilities//data/", // up 5 in tree, "/1_Utilities//" subdir
440 | "../../../../../2_Graphics//data/", // up 5 in tree, "/2_Graphics//" subdir
441 | "../../../../../3_Imaging//data/", // up 5 in tree, "/3_Imaging//" subdir
442 | "../../../../../4_Finance//data/", // up 5 in tree, "/4_Finance//" subdir
443 | "../../../../../5_Simulations//data/",// up 5 in tree, "/5_Simulations//" subdir
444 | "../../../../../6_Advanced//data/", // up 5 in tree, "/6_Advanced//" subdir
445 | "../../../../../7_CUDALibraries//data/", // up 5 in tree, "/7_CUDALibraries//" subdir
446 | "../../../../../8_Android//data/", // up 5 in tree, "/8_Android//" subdir
447 | "../../../../../samples//data/", // up 5 in tree, "/samples//" subdir
448 | "../../../../../common/", // up 5 in tree, "../../../common/" subdir
449 | "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir
450 | };
451 |
452 | // Extract the executable name
453 | std::string executable_name;
454 |
455 | if (executable_path != 0)
456 | {
457 | executable_name = std::string(executable_path);
458 |
459 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
460 | // Windows path delimiter
461 | size_t delimiter_pos = executable_name.find_last_of('\\');
462 | executable_name.erase(0, delimiter_pos + 1);
463 |
464 | if (executable_name.rfind(".exe") != std::string::npos)
465 | {
466 | // we strip .exe, only if the .exe is found
467 | executable_name.resize(executable_name.size() - 4);
468 | }
469 |
470 | #else
471 | // Linux & OSX path delimiter
472 | size_t delimiter_pos = executable_name.find_last_of('/');
473 | executable_name.erase(0,delimiter_pos+1);
474 | #endif
475 | }
476 |
477 | // Loop over all search paths and return the first hit
478 | for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
479 | {
480 | std::string path(searchPath[i]);
481 | size_t executable_name_pos = path.find("");
482 |
483 | // If there is executable_name variable in the searchPath
484 | // replace it with the value
485 | if (executable_name_pos != std::string::npos)
486 | {
487 | if (executable_path != 0)
488 | {
489 | path.replace(executable_name_pos, strlen(""), executable_name);
490 | }
491 | else
492 | {
493 | // Skip this path entry if no executable argument is given
494 | continue;
495 | }
496 | }
497 |
498 | #ifdef _DEBUG
499 | printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
500 | #endif
501 |
502 | // Test if the file exists
503 | path.append(filename);
504 | FILE *fp;
505 | FOPEN(fp, path.c_str(), "rb");
506 |
507 | if (fp != NULL)
508 | {
509 | fclose(fp);
510 | // File found
511 | // returning an allocated array here for backwards compatibility reasons
512 | char *file_path = (char *) malloc(path.length() + 1);
513 | STRCPY(file_path, path.length() + 1, path.c_str());
514 | return file_path;
515 | }
516 |
517 | if (fp)
518 | {
519 | fclose(fp);
520 | }
521 | }
522 |
523 | // File not found
524 | return 0;
525 | }
526 |
527 | #endif
528 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/interpolate_gpu.h:
--------------------------------------------------------------------------------
1 | #ifndef _INTERPOLATE_GPU_H
2 | #define _INTERPOLATE_GPU_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
9 | const float *known, float *dist2, int *idx,
10 | cudaStream_t stream);
11 |
12 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
13 | const float *points, const int *idx,
14 | const float *weight, float *out,
15 | cudaStream_t stream);
16 |
17 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
18 | const float *grad_out,
19 | const int *idx, const float *weight,
20 | float *grad_points,
21 | cudaStream_t stream);
22 |
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/interpolate_wrapper.h:
--------------------------------------------------------------------------------
1 |
2 |
3 | void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor,
4 | THCudaTensor *known_tensor, THCudaTensor *dist2_tensor,
5 | THCudaIntTensor *idx_tensor);
6 | void three_interpolate_wrapper(int b, int c, int m, int n,
7 | THCudaTensor *points_tensor,
8 | THCudaIntTensor *idx_tensor,
9 | THCudaTensor *weight_tensor,
10 | THCudaTensor *out_tensor);
11 |
12 | void three_interpolate_grad_wrapper(int b, int c, int n, int m,
13 | THCudaTensor *grad_out_tensor,
14 | THCudaIntTensor *idx_tensor,
15 | THCudaTensor *weight_tensor,
16 | THCudaTensor *grad_points_tensor);
17 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/sampling_gpu.h:
--------------------------------------------------------------------------------
1 | #ifndef _SAMPLING_GPU_H
2 | #define _SAMPLING_GPU_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
9 | const float *points, const int *idx,
10 | float *out, cudaStream_t stream);
11 |
12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
13 | const float *grad_out, const int *idx,
14 | float *grad_points, cudaStream_t stream);
15 |
16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
17 | const float *dataset, float *temp,
18 | int *idxs, cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | #endif
24 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/cinclude/sampling_wrapper.h:
--------------------------------------------------------------------------------
1 |
2 | int gather_points_wrapper(int b, int c, int n, int npoints,
3 | THCudaTensor *points_tensor,
4 | THCudaIntTensor *idx_tensor,
5 | THCudaTensor *out_tensor);
6 | int gather_points_grad_wrapper(int b, int c, int n, int npoints,
7 | THCudaTensor *grad_out_tensor,
8 | THCudaIntTensor *idx_tensor,
9 | THCudaTensor *grad_points_tensor);
10 |
11 | int furthest_point_sampling_wrapper(int b, int n, int m,
12 | THCudaTensor *points_tensor,
13 | THCudaTensor *temp_tensor,
14 | THCudaIntTensor *idx_tensor);
15 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/csrc/ellipsoid_query.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "ellipsoid_query_gpu.h"
4 |
5 | extern THCState *state;
6 |
7 | int ellipsoid_query_wrapper(int b, int n, int m, float e1, float e2, float e3, int nsample,
8 | THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor, THCudaIntTensor *fps_idx_tensor,
9 | THCudaIntTensor *idx_tensor,THCudaIntTensor *ingroup_pts_cnt_tensor, THCudaTensor *ingroup_out_tensor, THCudaTensor *ingroup_cva_tensor, THCudaTensor *v_tensor,THCudaTensor *d_tensor) {
10 |
11 | const float *new_xyz = THCudaTensor_data(state, new_xyz_tensor);
12 | const float *xyz = THCudaTensor_data(state, xyz_tensor);
13 | const int *fps_idx = THCudaIntTensor_data(state, fps_idx_tensor);
14 | int *idx = THCudaIntTensor_data(state, idx_tensor);
15 | //below tensors added by me
16 | int *ingroup_pts_cnt = THCudaIntTensor_data(state, ingroup_pts_cnt_tensor);
17 | float *ingroup_out = THCudaTensor_data(state, ingroup_out_tensor);
18 | float *ingroup_cva = THCudaTensor_data(state, ingroup_cva_tensor);
19 | float *v = THCudaTensor_data(state, v_tensor);
20 | float *d = THCudaTensor_data(state, d_tensor);
21 |
22 | cudaStream_t stream = THCState_getCurrentStream(state);
23 |
24 | query_ellipsoid_point_kernel_wrapper(b, n, m, e1, e2, e3, nsample, new_xyz, xyz, fps_idx, idx, ingroup_pts_cnt, ingroup_out, ingroup_cva, v, d,
25 | stream);
26 | return 1;
27 | }
28 |
--------------------------------------------------------------------------------
/RSCNNEQ/utils/csrc/ellipsoid_query_gpu.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include // memset
8 | #include // rand, RAND_MAX
9 | #include // sqrtf
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include "helper_cuda.h"
22 | #include
23 | #include