├── data ├── __init__.py ├── datagen.py ├── get_label.py ├── make_ali_lidar.py └── make_kitti_xyzic.py ├── model ├── DLA.py ├── ResNet.py ├── __init__.py └── loss.py ├── tensorrt ├── BatchStream.h ├── Makefile ├── common.h ├── lasernet.cpp ├── lasernet.trt ├── make_input.cpp ├── make_input.h └── test.py └── train.py /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feicay/LaserNet/863a91aa8f542bd278f7bc03f899c0363a898570/data/__init__.py -------------------------------------------------------------------------------- /data/datagen.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | import numpy as np 4 | import random 5 | import cv2 6 | 7 | def make_xyzic_image(xyzic, yaw_start=-45, yaw_end=45, v_start=-30, v_end=10, d_yaw=0.225, d_v=0.2): 8 | num, _ = xyzic.shape 9 | width = int((yaw_end - yaw_start) / d_yaw + 0.01) 10 | height = int((v_end - v_start) / d_v + 0.01) 11 | im_ref = np.zeros((height, width), dtype=np.float32) 12 | im_height = np.zeros((height, width), dtype=np.float32) 13 | im_cls = np.zeros((height, width), dtype=np.float32) 14 | im_range = np.zeros((height, width), dtype=np.float32) 15 | x = xyzic[:, 0] 16 | y = xyzic[:, 1] 17 | z = xyzic[:, 2] 18 | L = np.sqrt(x**2 + y**2 + z**2) 19 | yaw = np.arctan2(y, x) 20 | v_angel = np.arctan2(z, np.sqrt(x*x + y*y)) 21 | i = ((yaw_end - yaw*180/np.pi) / d_yaw).astype(np.int32) 22 | j = ((v_end - v_angel*180/np.pi) / d_v).astype(np.int32) 23 | mask = (i > -1) & (i < width) & (j > -1) & (j < height) 24 | i = i[mask] 25 | j = j[mask] 26 | xyzic = xyzic[mask, :] 27 | im_cls[j, i] = (xyzic[:, 4] + 0.01) 28 | im_ref[j, i] = xyzic[:, 3] 29 | im_height[j, i] = xyzic[:, 2] + 1.73 30 | im_range[j, i] = L[mask] 31 | #convert to -1~1 32 | im_range = im_range / 100 33 | im_height = im_height / 10 34 | im = np.stack((im_range, im_ref, im_height)) 35 | return im, im_cls.astype(np.int32) 36 | 37 | class Lidar_xyzic_dataset(data.Dataset): 38 | def __init__(self, filelist, train=1): 39 | with open(filelist, 'r') as fp: 40 | self.filelist = fp.readlines() 41 | self.len = len(self.filelist) 42 | fp.close() 43 | self.train = train 44 | 45 | def __len__(self): 46 | return self.len 47 | 48 | def __getitem__(self, index): 49 | binfile = self.filelist[index].replace('\n', '') 50 | data = np.fromfile(binfile, dtype=np.float32).reshape(-1, 5) 51 | if self.train: 52 | yaw_start = (random.random() - 0.5) * 360 53 | yaw_end = yaw_start + 90 54 | im, im_cls = make_xyzic_image(data, yaw_start=yaw_start, yaw_end=yaw_end) 55 | else: 56 | im, im_cls = make_xyzic_image(data) 57 | image = torch.from_numpy(im) 58 | truth = torch.from_numpy(im_cls) 59 | truth = truth.long().unsqueeze(0) 60 | return image, truth 61 | 62 | '''category 63 | ‘DontCare’: 0 64 | ‘cyclist’: 1 65 | ‘tricycle’: 2 66 | ‘smallMot’: 3 67 | ‘bigMot’: 4 68 | ‘pedestrian’: 5 69 | ‘crowds’: 6 70 | ‘unknown’: 7 71 | ''' 72 | 73 | def test(): 74 | color = np.array([[0, 0, 0], 75 | [0, 0, 250], 76 | [0, 250, 250], 77 | [0, 250, 0], 78 | [250, 250, 0], 79 | [250, 0, 0], 80 | [250, 0, 250], 81 | [150, 150, 150]]) 82 | datalist = '/home/adas/data/pytorch_ws/LaserNet/trainlist.txt' 83 | dataset = Lidar_xyzic_dataset(datalist) 84 | image, truth = dataset.__getitem__(1000) 85 | print(image.size(), truth.size()) 86 | _, h, w = image.size() 87 | image = image.permute(1, 2, 0).numpy() 88 | truth = truth.numpy() 89 | cv2.imshow('image', image) 90 | #cv2.waitKey(0) 91 | im_cls = np.zeros((h, w, 3), dtype=np.uint8) 92 | im_cls[:,:] = color[truth[:,:]] 93 | cv2.imshow('cls', im_cls) 94 | cv2.waitKey(0) 95 | 96 | #test() -------------------------------------------------------------------------------- /data/get_label.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | def get_class(class_name): 5 | cls_ = 0 6 | if class_name == 'Pedestrian' or class_name == 'Person_sitting': 7 | cls_ = 1 8 | elif class_name == 'Cyclist': 9 | cls_ = 2 10 | elif class_name == 'Car' or class_name == 'Van': 11 | cls_ = 3 12 | elif class_name == 'Truck': 13 | cls_ = 4 14 | elif class_name == 'Tram': 15 | cls_ = 5 16 | else: 17 | cls_ = 0 18 | return cls_ 19 | 20 | def get_kitti_label(label_dir, coord='camera'): 21 | with open(label_dir, 'r') as fp: 22 | label_list = fp.readlines() 23 | result_list = [] 24 | for i in range(len(label_list)): 25 | label = label_list[i].replace('\n', '') 26 | obj = label.split(' ') 27 | cls_ = float(get_class(obj[0])) 28 | if cls_ > 0: 29 | x = float(obj[11]) 30 | y = float(obj[12]) 31 | z = float(obj[13]) 32 | l = float(obj[10]) 33 | w = float(obj[9]) 34 | h = float(obj[8]) 35 | r = float(obj[14]) 36 | if coord == 'velodyne': 37 | x = float(obj[13]) + 0.27 38 | y = - float(obj[11]) 39 | z = -0.08 - float(obj[12]) + (h/2) 40 | r = -float(obj[14]) - np.pi/2 41 | if r < -np.pi: 42 | r = r + np.pi * 2 43 | label_obj = np.array([cls_, x, y, z, l, w, h, r]).reshape(1, 8) 44 | result_list.append(label_obj) 45 | if len(result_list) > 0: 46 | result = np.concatenate(result_list, axis=0) 47 | else: 48 | result = None 49 | return result 50 | 51 | def sort_label(label): 52 | x = label[:, 1] 53 | y = label[:, 2] 54 | z = label[:, 3] 55 | L = np.sqrt(x**2 + y**2 + z**2) 56 | idx = np.argsort(L) 57 | result = label[idx, :] 58 | result = result[::-1, :] 59 | return result 60 | 61 | def test(): 62 | label_dir = '/home/yifeihu/data/Kitti/object/training/label_2/000010.txt' 63 | objs = get_kitti_label(label_dir, coord='velodyne') 64 | print(objs) 65 | 66 | #test() -------------------------------------------------------------------------------- /data/make_ali_lidar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from PIL import Image 5 | import random 6 | 7 | def get_points(pts_file, intensity_file, cat_file=None): 8 | pts = pd.read_csv(pts_file, header=None) 9 | points_loc = np.array(pts, dtype=np.float32) 10 | N, _ = points_loc.shape 11 | Inten = pd.read_csv(intensity_file, header=None) 12 | points_i = np.array(Inten, dtype=np.float32).reshape(-1, 1) 13 | if cat_file is not None: 14 | cat = pd.read_csv(cat_file, header=None) 15 | points_cat = np.array(cat, dtype=np.float32).reshape(-1, 1) 16 | points_xyzi = np.concatenate([points_loc, points_i, points_cat], axis=1) 17 | else: 18 | points_cat = np.zeros((N, 1)) 19 | points_xyzi = np.concatenate([points_loc, points_i, points_cat], axis=1) 20 | #return points_filter(points_xyzi) 21 | return points_xyzi 22 | 23 | def make_ali_lidar_xyzic(): 24 | ali_lidar_dir = '/raid/alibaba-lidar/training/' 25 | int_dir = ali_lidar_dir + 'intensity/' 26 | pts_dir = ali_lidar_dir + 'pts/' 27 | cat_dir = ali_lidar_dir + 'category/' 28 | out_dir = ali_lidar_dir + 'xyzic/' 29 | filelist = os.listdir(pts_dir) 30 | for i in range(len(filelist)): 31 | print(i) 32 | filename = filelist[i] 33 | ptsfile = pts_dir + filename 34 | intfile = int_dir + filename 35 | catfile = cat_dir + filename 36 | outfile = out_dir + filename.replace('csv', 'bin') 37 | xyzic = get_points(ptsfile, intfile, cat_file=catfile) 38 | xyzic = xyzic.astype(np.float32) 39 | xyzic.tofile(outfile) 40 | 41 | def make_train_list(): 42 | fdir = '/raid/alibaba-lidar/training/xyzic' 43 | filelist = os.listdir(fdir) 44 | with open('trainlist.txt', 'w') as fp1, open('validlist.txt', 'w') as fp2: 45 | for i in range(len(filelist)): 46 | filename = fdir + '/' + filelist[i] + '\n' 47 | a = random.random() 48 | if a < 0.9: 49 | fp1.write(filename) 50 | else: 51 | fp2.write(filename) 52 | 53 | 54 | #make_ali_lidar_xyzic() 55 | #make_train_list() -------------------------------------------------------------------------------- /data/make_kitti_xyzic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | from get_label import get_kitti_label, sort_label 5 | import cv2 6 | 7 | KITTI_DIR = '/home/adas/data/Kitti/object/testing' 8 | color = np.array([[0, 0, 0], 9 | [0, 0, 250], 10 | [0, 250, 250], 11 | [0, 250, 0], 12 | [250, 250, 0], 13 | [250, 0, 0], 14 | [250, 0, 250]]) 15 | 16 | def get_calib(calibfile): 17 | with open(calibfile, 'r') as fp: 18 | text = fp.readlines() 19 | p2 = text[2].replace('\n', '').split(': ')[1] 20 | r0 = text[4].replace('\n', '').split(': ')[1] 21 | velo2cam = text[5].replace('\n', '').split(': ')[1] 22 | p2 = np.array(p2.split(' ')).reshape(3, 4).astype(np.float32) 23 | r0 = np.array(r0.split(' ')).reshape(3, 3).astype(np.float32) 24 | velo2cam = np.array(velo2cam.split(' ')).reshape(3, 4).astype(np.float32) 25 | zero1 = np.zeros((3,1)) 26 | const = np.array([[0, 0, 0, 1]]) 27 | r0 = np.concatenate((r0, zero1), axis=1) 28 | r0 = np.concatenate((r0, const), axis=0) 29 | velo2cam = np.concatenate((velo2cam, const), axis=0) 30 | return p2, r0, velo2cam 31 | 32 | def make_velodyne_reduce(): 33 | bin_dir = KITTI_DIR + '/velodyne/' 34 | im_dir = KITTI_DIR + '/image_2/' 35 | calib_dir = KITTI_DIR + '/calib/' 36 | bin_reduce_dir = KITTI_DIR + '/velodyne_reduce/' 37 | binlist = os.listdir(bin_dir) 38 | for i in range(len(binlist)): 39 | print(i, binlist[i]) 40 | binfile = binlist[i] 41 | calibfile = calib_dir + binfile.replace('bin', 'txt') 42 | imfile = im_dir + binfile.replace('bin', 'png') 43 | rawbinfile = bin_dir + binfile 44 | outbinfile = bin_reduce_dir + binfile 45 | data = np.fromfile(rawbinfile, dtype=np.float32).reshape(-1, 4) 46 | num, _ = data.shape 47 | xyz = data[:, 0:3] 48 | xyz1 = np.concatenate((xyz, np.ones((num, 1))), axis=1) 49 | p2, r0, velo2cam = get_calib(calibfile) 50 | Tmat = np.dot(p2, np.dot(r0, velo2cam)).T 51 | imdata = np.dot(xyz1, Tmat) 52 | imdata[:, 0] = imdata[:, 0] / imdata[:, 2] 53 | imdata[:, 1] = imdata[:, 1] / imdata[:, 2] 54 | im = Image.open(imfile) 55 | width, height = im.size 56 | mask = (imdata[:, 0] > 0) & (imdata[:, 0] < width) & (imdata[:, 1] > 0) & (imdata[:, 1] < height) & (data[:, 0] > 0) 57 | outdata = data[mask, :] 58 | outdata.tofile(outbinfile) 59 | 60 | def get_xyzic(data, label, h_offset=1.73): 61 | if label is None: 62 | return data 63 | num, _ = data.shape 64 | n, _ = label.shape 65 | c = np.zeros((num, 1), dtype=np.float32) 66 | for i in range(n): 67 | obj = label[i, :] 68 | print(obj) 69 | cls_ , x, y, z, l, w, h, r = obj 70 | delta_x = data[:, 0] - np.ones(num)*x 71 | delta_y = data[:, 1] - np.ones(num)*y 72 | delta_z = data[:, 2] - np.ones(num)*z 73 | theta = np.arctan2(delta_y, delta_x) - np.ones(num)*r 74 | L = np.sqrt(delta_x*delta_x + delta_y*delta_y) 75 | delta_w = L * np.sin(theta) 76 | delta_l = L * np.cos(theta) 77 | mask = (delta_w > (-w/2)) & (delta_w < (w/2)) & (delta_l > (-l/2)) & (delta_l < (l/2)) & (delta_z > (-h/2)) & (delta_z < (h/2)) 78 | c[mask, :] = cls_ 79 | xyzic = np.concatenate((data, c), axis=1) 80 | return xyzic 81 | 82 | def make_xyzic_image(xyzic, yaw_start=-45, yaw_end=45, v_start=-30, v_end=10, d_yaw=0.2, d_v=0.2): 83 | num, _ = xyzic.shape 84 | width = int((yaw_end - yaw_start) / d_yaw + 0.01) 85 | height = int((v_end - v_start) / d_v + 0.01) 86 | im_ref = np.zeros((height, width), dtype=np.float32) 87 | im_height = np.zeros((height, width), dtype=np.float32) 88 | im_cls = np.zeros((height, width, 3), dtype=np.float32) 89 | im_range = np.zeros((height, width), dtype=np.float32) 90 | x = xyzic[:, 0] 91 | y = xyzic[:, 1] 92 | z = xyzic[:, 2] 93 | L = np.sqrt(x**2 + y**2 + z**2) 94 | yaw = np.arctan2(y, x) 95 | v_angel = np.arctan2(z, np.sqrt(x*x + y*y)) 96 | i = ((yaw_end - yaw*180/np.pi) / d_yaw).astype(np.int32) 97 | j = ((v_end - v_angel*180/np.pi) / d_v).astype(np.int32) 98 | mask = (i > -1) & (i < width) & (j > -1) & (j < height) 99 | i = i[mask] 100 | j = j[mask] 101 | xyzic = xyzic[mask, :] 102 | im_ref[j, i] = xyzic[:, 3] 103 | im_height[j, i] = xyzic[:, 2] + 1.73 104 | im_cls[j, i] = color[xyzic[:, 4].astype(np.int32), :] 105 | im_range[j, i] = L 106 | return im_cls, im_ref, im_height, im_range 107 | 108 | 109 | def make_kitti_xyzic(): 110 | bin_dir = KITTI_DIR + '/velodyne_reduce/' 111 | bin_xyzic_dir = KITTI_DIR + '/velodyne_xyzic/' 112 | label_dir = KITTI_DIR + '/label_2/' 113 | binlist = os.listdir(bin_dir) 114 | for i in range(len(binlist)): 115 | #for i in range(1): 116 | print(i, binlist[i]) 117 | binfile = binlist[i] 118 | labelfile = label_dir + binfile.replace('bin', 'txt') 119 | rawbinfile = bin_dir + binfile 120 | outbinfile = bin_xyzic_dir + binfile 121 | label = get_kitti_label(labelfile, 'velodyne') 122 | label = sort_label(label) 123 | data = np.fromfile(rawbinfile, dtype=np.float32).reshape(-1, 4) 124 | #data[:, 2] = data[:, 2] 125 | xyzic = get_xyzic(data, label) 126 | im_cls, im_ref, im_height, im_range = make_xyzic_image(xyzic) 127 | # im1 = Image.fromarray(im_cls.astype('uint8')).convert('RGB') 128 | # im1.show() 129 | # im2 = Image.fromarray((im_ref*255).astype('uint8')).convert('RGB') 130 | # im2.show() 131 | # im3 = Image.fromarray((im_height*255).astype('uint8')).convert('RGB') 132 | # im3.show() 133 | # im4 = Image.fromarray((im_range/100*255).astype('uint8')).convert('RGB') 134 | # im4.show() 135 | xyzic.tofile(outbinfile) 136 | 137 | 138 | def test(): 139 | calibfile = '/home/yifeihu/data/Kitti/object/training/calib/000000.txt' 140 | binfile = '/home/yifeihu/data/Kitti/object/training/velodyne/000000.bin' 141 | imfile = '/home/yifeihu/data/Kitti/object/training/image_2/000000.png' 142 | p2, r0, velo2cam = get_calib(calibfile) 143 | print(p2) 144 | print(r0) 145 | print(velo2cam) 146 | T = np.dot(p2, np.dot(r0, velo2cam)) 147 | print(T) 148 | data = np.fromfile(binfile, dtype=np.float32).reshape(-1, 4) 149 | data[:, 3] = 1 150 | print(data.shape) 151 | imdata = np.dot(data, T.T) 152 | imdata[:, 0] = imdata[:, 0] / imdata[:, 2] 153 | imdata[:, 1] = imdata[:, 1] / imdata[:, 2] 154 | print(imdata[:10, :]) 155 | im = Image.open(imfile) 156 | width, height = im.size 157 | print(width, height) 158 | mask = (imdata[:, 0] > 0) & (imdata[:, 0] < width) & (imdata[:, 1] > 0) & (imdata[:, 1] < height) 159 | outdata = data[mask, :] 160 | print(outdata.shape) 161 | 162 | #make_velodyne_reduce() 163 | #test() 164 | make_kitti_xyzic() -------------------------------------------------------------------------------- /model/DLA.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import functional as F 4 | import torch.nn.init as init 5 | from model.ResNet import BasicBlock, make_layer 6 | 7 | #reference: LaserNet: An Efficient Probabilistic 3D Object Detector for Autonomous Driving 8 | class FeatureExtractor(nn.Module): 9 | def __init__(self, inplanes, planes, blocks, stride=1): 10 | super(FeatureExtractor, self).__init__() 11 | self.name = 'FeatureExtractor' 12 | self.block = make_layer(BasicBlock, inplanes, planes, blocks, stride=stride) 13 | 14 | def forward(self, x): 15 | x = self.block(x) 16 | return x 17 | 18 | class FeatureAggregator(nn.Module): 19 | def __init__(self, FinePlanes, CoarsePlanes, planes): 20 | super(FeatureAggregator, self).__init__() 21 | self.block1 = nn.Sequential( 22 | nn.ConvTranspose2d(CoarsePlanes, planes, 3, stride=2, padding=1, output_padding=1,bias=False), 23 | nn.BatchNorm2d(planes), 24 | nn.ReLU(inplace=True) 25 | ) 26 | self.block2 = make_layer(BasicBlock, FinePlanes+planes, planes, 2, stride=1) 27 | 28 | def forward(self, xFine, xCoarse): 29 | x1 = self.block1(xCoarse) 30 | x = torch.cat((x1, xFine), 1) 31 | x = self.block2(x) 32 | return x 33 | 34 | class DLA(nn.Module): 35 | def __init__(self, num_class): 36 | super(DLA, self).__init__() 37 | self.block1a = FeatureExtractor(3, 64, 4, stride=1) 38 | self.block2a = FeatureExtractor(64, 64, 5, stride=2) 39 | self.block3a = FeatureExtractor(64, 128, 6, stride=2) 40 | self.block1b = FeatureAggregator(64, 64, 64) 41 | self.block1c = FeatureAggregator(64, 128, 128) 42 | self.block2b = FeatureAggregator(64, 128, 128) 43 | 44 | self.cls = nn.Conv2d(128, num_class, 1) 45 | 46 | def forward(self, x): 47 | x1 = self.block1a(x) 48 | x2 = self.block2a(x1) 49 | y1 = self.block1b(x1, x2) 50 | x3 = self.block3a(x2) 51 | y2 = self.block2b(x2, x3) 52 | y = self.block1c(y1, y2) 53 | y = self.cls(y) 54 | #y = y.exp() 55 | return y 56 | 57 | 58 | def test(): 59 | net = DLA(5) 60 | x = torch.randn(1, 3, 200, 500) 61 | y = net(x) 62 | print(y[0,0,0,:]) 63 | 64 | #test() -------------------------------------------------------------------------------- /model/ResNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.nn.init as init 5 | 6 | class BasicBlock(nn.Module): 7 | expansion = 1 8 | def __init__(self, inplanes, planes, stride=1, downsample=None): 9 | super(BasicBlock, self).__init__() 10 | self.conv1 = nn.Conv2d(inplanes, planes, 3, stride=stride, padding=1) 11 | self.bn1 = nn.BatchNorm2d(planes) 12 | self.relu1 = nn.ReLU(inplace=True) 13 | self.conv2 = nn.Conv2d(planes, planes, 3, stride=1, padding=1) 14 | self.bn2 = nn.BatchNorm2d(planes) 15 | self.relu2 = nn.ReLU(inplace=True) 16 | self.downsample = downsample 17 | self.stride = stride 18 | 19 | init.xavier_normal_(self.conv1.weight.data) 20 | init.xavier_normal_(self.conv2.weight.data) 21 | 22 | def forward(self, x): 23 | residual = x 24 | 25 | out = self.conv1(x) 26 | out = self.bn1(out) 27 | out = self.relu1(out) 28 | 29 | out = self.conv2(out) 30 | out = self.bn2(out) 31 | 32 | if self.downsample is not None: 33 | residual = self.downsample(x) 34 | 35 | out += residual 36 | out = self.relu2(out) 37 | 38 | return out 39 | 40 | class Bottleneck(nn.Module): 41 | expansion = 4 42 | def __init__(self, inplanes, planes, stride=1, downsample=None): 43 | super(Bottleneck, self).__init__() 44 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | self.relu1 = nn.ReLU(inplace=True) 47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | self.relu2 = nn.ReLU(inplace=True) 50 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 51 | self.bn3 = nn.BatchNorm2d(planes * 4) 52 | self.relu3 = nn.ReLU(inplace=True) 53 | self.downsample = downsample 54 | self.stride = stride 55 | 56 | init.xavier_normal_(self.conv1.weight.data) 57 | init.xavier_normal_(self.conv2.weight.data) 58 | init.xavier_normal_(self.conv3.weight.data) 59 | 60 | def forward(self, x): 61 | residual = x 62 | 63 | out = self.conv1(x) 64 | out = self.bn1(out) 65 | out = self.relu1(out) 66 | 67 | out = self.conv2(out) 68 | out = self.bn2(out) 69 | out = self.relu2(out) 70 | 71 | out = self.conv3(out) 72 | out = self.bn3(out) 73 | 74 | if self.downsample is not None: 75 | residual = self.downsample(x) 76 | 77 | out += residual 78 | out = self.relu3(out) 79 | 80 | return out 81 | 82 | def make_layer(block, inplanes, planes, blocks, stride=1): 83 | downsample = None 84 | if stride != 1 or inplanes != planes * block.expansion: 85 | downsample = nn.Sequential( 86 | nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 87 | nn.BatchNorm2d(planes * block.expansion), 88 | ) 89 | init.xavier_normal_(downsample[0].weight.data) 90 | 91 | layers = [] 92 | layers.append(block(inplanes, planes, stride, downsample)) 93 | inplanes = planes * block.expansion 94 | for i in range(1, blocks): 95 | layers.append(block(inplanes, planes)) 96 | 97 | return nn.Sequential(*layers) 98 | 99 | class ResNet34(nn.Module): 100 | def __init__(self, inplanes): 101 | super(ResNet34, self).__init__() 102 | self.conv1 = nn.Conv2d(inplanes, 64, kernel_size=7, stride=2, padding=3, bias=False) 103 | self.bn1 = nn.BatchNorm2d(64) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 106 | self.layer1 = make_layer(BasicBlock, 64, 64, 3) 107 | self.layer2 = make_layer(BasicBlock, 64, 128, 4, stride=2) 108 | self.layer3 = make_layer(BasicBlock, 128, 256, 6, stride=2) 109 | self.layer4 = make_layer(BasicBlock, 256, 512, 3, stride=2) 110 | 111 | init.xavier_normal_(self.conv1.weight.data) 112 | 113 | def forward(self, x): 114 | x = self.conv1(x) 115 | x = self.bn1(x) 116 | x = self.relu(x) 117 | x = self.maxpool(x) 118 | x = self.layer1(x) 119 | x = self.layer2(x) 120 | x = self.layer3(x) 121 | x = self.layer4(x) 122 | return x 123 | 124 | class ResNet50(nn.Module): 125 | def __init__(self, inplanes): 126 | super(ResNet50, self).__init__() 127 | self.conv1 = nn.Conv2d(inplanes, 64, kernel_size=7, stride=2, padding=3, bias=False) 128 | self.bn1 = nn.BatchNorm2d(64) 129 | self.relu = nn.ReLU(inplace=True) 130 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 131 | self.layer1 = make_layer(Bottleneck, 64, 64, 3) 132 | self.layer2 = make_layer(Bottleneck, 64, 128, 4, stride=2) 133 | self.layer3 = make_layer(Bottleneck, 128, 256, 6, stride=2) 134 | self.layer4 = make_layer(Bottleneck, 256, 512, 3, stride=2) 135 | 136 | init.xavier_normal_(self.conv1.weight.data) 137 | 138 | def forward(self, x): 139 | x = self.conv1(x) 140 | x = self.bn1(x) 141 | x = self.relu(x) 142 | x = self.maxpool(x) 143 | x = self.layer1(x) 144 | x = self.layer2(x) 145 | x = self.layer3(x) 146 | x = self.layer4(x) 147 | return x 148 | 149 | class ResNet101(nn.Module): 150 | def __init__(self, inplanes): 151 | super(ResNet101, self).__init__() 152 | self.conv1 = nn.Conv2d(inplanes, 64, kernel_size=7, stride=2, padding=3, bias=False) 153 | self.bn1 = nn.BatchNorm2d(64) 154 | self.relu = nn.ReLU(inplace=True) 155 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 156 | self.layer1 = make_layer(Bottleneck, 64, 64, 3) 157 | self.layer2 = make_layer(Bottleneck, 64, 128, 4, stride=2) 158 | self.layer3 = make_layer(Bottleneck, 128, 256, 23, stride=2) 159 | self.layer4 = make_layer(Bottleneck, 256, 512, 3, stride=2) 160 | 161 | init.xavier_normal_(self.conv1.weight.data) 162 | 163 | def forward(self, x): 164 | x = self.conv1(x) 165 | x = self.bn1(x) 166 | x = self.relu(x) 167 | x = self.maxpool(x) 168 | x = self.layer1(x) 169 | x = self.layer2(x) 170 | x = self.layer3(x) 171 | x = self.layer4(x) 172 | return x -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feicay/LaserNet/863a91aa8f542bd278f7bc03f899c0363a898570/model/__init__.py -------------------------------------------------------------------------------- /model/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | class FocalLossClassify(nn.Module): 7 | def __init__(self, num_classes, background=0, cuda=1): 8 | super(FocalLossClassify, self).__init__() 9 | self.num_cls = num_classes + background 10 | self.one_hot = torch.eye(num_classes + background) 11 | if cuda: 12 | self.one_hot = self.one_hot.cuda() 13 | 14 | def focal_loss(self, x, y): 15 | alpha = 0.25 16 | gamma = 2 17 | t = self.one_hot[y.data, :] 18 | t = Variable(t) 19 | p = x.sigmoid() 20 | pt = p*t + (1-p)*(1-t) # pt = p if t > 0 else 1-p 21 | w = alpha*t + (1-alpha)*(1-t) # w = alpha if t > 0 else 1-alpha 22 | w = w * (1-pt).pow(gamma) 23 | w = w.detach() 24 | return F.binary_cross_entropy_with_logits(x, t, w, size_average=False) 25 | 26 | def forward(self, cls_pred, cls_truth): 27 | batch, C, L, W = cls_pred.size() 28 | cls_pred = cls_pred.permute(0,2,3,1).contiguous().view(batch,-1,self.num_cls) 29 | cls_truth = cls_truth.permute(0,2,3,1).contiguous().view(batch,-1) 30 | 31 | pos = cls_truth > 0 32 | num_obj = pos.data.sum() 33 | 34 | cls_loss = self.focal_loss(cls_pred, cls_truth) 35 | 36 | self.cls_loss = cls_loss.data 37 | self.loss = cls_loss/num_obj.float() 38 | return self.loss 39 | 40 | class FocalLoss(nn.Module): 41 | def __init__(self, num_classes, box_len, num_anchor, background=1, cuda=1): 42 | super(FocalLoss, self).__init__() 43 | self.num_cls = num_classes + background 44 | self.box_len = box_len 45 | self.num_anchor = num_anchor 46 | self.one_hot = torch.eye(num_classes + background) 47 | if cuda: 48 | self.one_hot = self.one_hot.cuda() 49 | 50 | def focal_loss(self, x, y): 51 | alpha = 0.25 52 | gamma = 2 53 | t = self.one_hot[y.data, :] 54 | t = Variable(t) 55 | p = x.sigmoid() 56 | pt = p*t + (1-p)*(1-t) # pt = p if t > 0 else 1-p 57 | w = alpha*t + (1-alpha)*(1-t) # w = alpha if t > 0 else 1-alpha 58 | w = w * (1-pt).pow(gamma) 59 | w = w.detach() 60 | return F.binary_cross_entropy_with_logits(x, t, w, size_average=False) 61 | 62 | def forward(self, cls_pred, cls_truth, box_pred, box_truth): 63 | batch, C, L, W = cls_pred.size() 64 | box_pred = box_pred.permute(0,2,3,1).contiguous().view(batch,-1,self.box_len) 65 | cls_pred = cls_pred.permute(0,2,3,1).contiguous().view(batch,-1,self.num_cls) 66 | box_truth = box_truth.permute(0,2,3,1).contiguous().view(batch,-1,self.box_len) 67 | cls_truth = cls_truth.permute(0,2,3,1).contiguous().view(batch,-1) 68 | 69 | pos = cls_truth > 0 70 | num_obj = pos.data.sum() 71 | 72 | # box_loss = SmoothL1Loss(pos_box_pred, pos_box_targets) 73 | mask = pos.unsqueeze(2).expand_as(box_pred) # [batch, anchors, 8] 74 | masked_box_pred = box_pred[mask].view(-1,self.box_len) # [#pos,8] 75 | masked_box_truth = box_truth[mask].view(-1,self.box_len) # [#pos,8] 76 | box_loss = F.smooth_l1_loss(masked_box_pred, masked_box_truth, size_average=False) 77 | 78 | # cls_loss = FocalLoss(loc_preds, loc_targets) 79 | pos_neg = cls_truth > -1 # exclude ignored anchors 80 | mask = pos_neg.unsqueeze(2).expand_as(cls_pred) 81 | masked_cls_pred = cls_pred[mask].view(-1,self.num_cls) 82 | cls_loss = self.focal_loss(masked_cls_pred, cls_truth[pos_neg]) 83 | 84 | self.cls_loss = cls_loss.data 85 | self.box_loss = box_loss.data 86 | self.loss = (cls_loss + box_loss)/num_obj.float() 87 | return self.loss -------------------------------------------------------------------------------- /tensorrt/BatchStream.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCH_STREAM_H 2 | #define BATCH_STREAM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "NvInfer.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "make_input.h" 13 | 14 | #define PCLOUD_SIZE 1000000 //the size of buffer for the point cloud 15 | 16 | class BatchStream 17 | { 18 | public: 19 | BatchStream(int batchSize, int maxBatches, std::string calibList) 20 | : mBatchSize(batchSize) 21 | , mMaxBatches(maxBatches) 22 | , mCalibList(calibList) 23 | { 24 | //get calib file names 25 | std::fstream fin(mCalibList); 26 | std::string ReadLine; 27 | while(std::getline (fin, ReadLine)) 28 | { 29 | mFileList.push_back(ReadLine); 30 | } 31 | //set network input dims 32 | mDims = nvinfer1::DimsNCHW{1, 3, 200, 400}; 33 | mImageSize = mDims.c() * mDims.h() * mDims.w(); 34 | mTruthSize = mDims.h() * mDims.w() ; 35 | mBatch.resize(mBatchSize * mImageSize, 0); 36 | mLabels.resize(mBatchSize * mTruthSize, 0); 37 | mFileBatch.resize(mDims.n() * mImageSize, 0); // one input image buffer 38 | mFileLabels.resize(mDims.n() * mTruthSize, 0); 39 | mPointsBuf.resize(PCLOUD_SIZE, 0); 40 | reset(0); 41 | } 42 | 43 | void reset(int firstBatch) 44 | { 45 | mBatchCount = 0; 46 | mFileCount = 0; 47 | mFileBatchPos = mDims.n(); 48 | skip(firstBatch); 49 | } 50 | 51 | bool next() 52 | { 53 | if (mBatchCount == mMaxBatches) 54 | return false; 55 | 56 | for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize) 57 | { 58 | if (!update()) 59 | return false; 60 | std::copy_n(getFileBatch(), csize * mImageSize, getBatch() + batchPos * mImageSize); 61 | std::copy_n(getFileLabels(), csize * mTruthSize, getLabels() + batchPos * mTruthSize); 62 | } 63 | mBatchCount++; 64 | return true; 65 | } 66 | 67 | void skip(int skipCount) 68 | { 69 | mFileCount = skipCount; 70 | } 71 | 72 | float* getBatch() { return &mBatch[0]; } 73 | float* getLabels() { return &mLabels[0]; } 74 | int getBatchesRead() const { return mBatchCount; } 75 | int getBatchSize() const { return mBatchSize; } 76 | nvinfer1::DimsNCHW getDims() const { return mDims; } 77 | 78 | private: 79 | float* getFileBatch() { return &mFileBatch[0]; } 80 | float* getFileLabels() { return &mFileLabels[0]; } 81 | float* getPointsBuf() { return &mPointsBuf[0]; } 82 | 83 | bool update() 84 | { 85 | memset(getPointsBuf(), 0, PCLOUD_SIZE*sizeof(float)); 86 | memset(getFileBatch(), 0, mImageSize*sizeof(float)); 87 | memset(getFileLabels(), 0, mTruthSize*sizeof(float)); 88 | char buf_s[64]; 89 | int size = 0; 90 | mFileCount++; 91 | std::string inputFileName = mFileList[mFileCount]; 92 | //std::cout< mFileList; 120 | std::vector mBatch; 121 | std::vector mLabels; 122 | std::vector mFileBatch; 123 | std::vector mFileLabels; 124 | std::vector mPointsBuf;//the buffer for the point cloud 125 | }; 126 | 127 | #endif 128 | -------------------------------------------------------------------------------- /tensorrt/Makefile: -------------------------------------------------------------------------------- 1 | OUTNAME=lidar_seg_int8 2 | 3 | CC = g++ 4 | CUCC =$(CUDA_INSTALL_DIR)/bin/nvcc -m64 5 | 6 | TENSORRT_DIR=/usr/local/TensorRT-5.0.2.6 7 | CUDA_INSTALL_DIR=/usr/local/cuda 8 | CUDA_LIBDIR=lib64 9 | 10 | LIBPATHS=-L/usr/local/lib -L"$(CUDA_INSTALL_DIR)/$(CUDA_LIBDIR)" -L"$(TENSORRT_DIR)/lib" 11 | TENSORRT_LIB=-lnvinfer -lnvparsers -lnvinfer_plugin -lnvonnxparser -lnvonnxparser_runtime 12 | INCPATHS=-I/usr/local/include -I"$(CUDA_INSTALL_DIR)/include" -I"$(TENSORRT_DIR)/include" 13 | 14 | COMMON_FLAGS += -Wall -std=c++11 $(INCPATHS) -Wl,-rpath "$(TENSORRT_DIR)/lib" 15 | COMMON_LD_FLAGS += $(LIBPATHS) 16 | COMMON_LIBS = -lcudnn -lcublas -lnvToolsExt -lcudart $(TENSORRT_LIB) 17 | 18 | OBJS=make_input.o lasernet.o 19 | BIN=demo.bin 20 | 21 | all:$(OBJS) 22 | $(CC) $(OBJS) $(COMMON_FLAGS) $(COMMON_LIBS) $(COMMON_LD_FLAGS) -o $(BIN) 23 | 24 | %.o:%.cpp 25 | $(CC) $(COMMON_FLAGS) $(COMMON_LIBS) $(COMMON_LD_FLAGS) -c $< -o $@ 26 | 27 | clean: 28 | rm *.o $(BIN) -------------------------------------------------------------------------------- /tensorrt/common.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSORRT_COMMON_H 2 | #define TENSORRT_COMMON_H 3 | 4 | #include "NvInfer.h" 5 | #include "NvInferPlugin.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | using namespace std; 27 | using namespace nvinfer1; 28 | using namespace plugin; 29 | 30 | #define CHECK(status) \ 31 | do \ 32 | { \ 33 | auto ret = (status); \ 34 | if (ret != 0) \ 35 | { \ 36 | std::cout << "Cuda failure: " << ret; \ 37 | abort(); \ 38 | } \ 39 | } while (0) 40 | 41 | constexpr long double operator"" _GB(long double val) 42 | { 43 | return val * (1 << 30); 44 | } 45 | constexpr long double operator"" _MB(long double val) { return val * (1 << 20); } 46 | constexpr long double operator"" _KB(long double val) { return val * (1 << 10); } 47 | 48 | // These is necessary if we want to be able to write 1_GB instead of 1.0_GB. 49 | // Since the return type is signed, -1_GB will work as expected. 50 | constexpr long long int operator"" _GB(long long unsigned int val) { return val * (1 << 30); } 51 | constexpr long long int operator"" _MB(long long unsigned int val) { return val * (1 << 20); } 52 | constexpr long long int operator"" _KB(long long unsigned int val) { return val * (1 << 10); } 53 | 54 | // Logger for TensorRT info/warning/errors 55 | class Logger : public nvinfer1::ILogger 56 | { 57 | public: 58 | Logger(Severity severity = Severity::kWARNING) 59 | : reportableSeverity(severity) 60 | { 61 | } 62 | 63 | void log(Severity severity, const char* msg) override 64 | { 65 | // suppress messages with severity enum value greater than the reportable 66 | if (severity > reportableSeverity) 67 | return; 68 | 69 | switch (severity) 70 | { 71 | case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break; 72 | case Severity::kERROR: std::cerr << "ERROR: "; break; 73 | case Severity::kWARNING: std::cerr << "WARNING: "; break; 74 | case Severity::kINFO: std::cerr << "INFO: "; break; 75 | default: std::cerr << "UNKNOWN: "; break; 76 | } 77 | std::cerr << msg << std::endl; 78 | } 79 | 80 | Severity reportableSeverity; 81 | }; 82 | 83 | struct SimpleProfiler : public nvinfer1::IProfiler 84 | { 85 | struct Record 86 | { 87 | float time{0}; 88 | int count{0}; 89 | }; 90 | 91 | virtual void reportLayerTime(const char* layerName, float ms) 92 | { 93 | mProfile[layerName].count++; 94 | mProfile[layerName].time += ms; 95 | } 96 | 97 | SimpleProfiler( 98 | const char* name, 99 | const std::vector& srcProfilers = std::vector()) 100 | : mName(name) 101 | { 102 | for (const auto& srcProfiler : srcProfilers) 103 | { 104 | for (const auto& rec : srcProfiler.mProfile) 105 | { 106 | auto it = mProfile.find(rec.first); 107 | if (it == mProfile.end()) 108 | { 109 | mProfile.insert(rec); 110 | } 111 | else 112 | { 113 | it->second.time += rec.second.time; 114 | it->second.count += rec.second.count; 115 | } 116 | } 117 | } 118 | } 119 | 120 | friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value) 121 | { 122 | out << "========== " << value.mName << " profile ==========" << std::endl; 123 | float totalTime = 0; 124 | std::string layerNameStr = "TensorRT layer name"; 125 | int maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); 126 | for (const auto& elem : value.mProfile) 127 | { 128 | totalTime += elem.second.time; 129 | maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); 130 | } 131 | 132 | auto old_settings = out.flags(); 133 | auto old_precision = out.precision(); 134 | // Output header 135 | { 136 | out << std::setw(maxLayerNameLength) << layerNameStr << " "; 137 | out << std::setw(12) << "Runtime, " 138 | << "%" 139 | << " "; 140 | out << std::setw(12) << "Invocations" 141 | << " "; 142 | out << std::setw(12) << "Runtime, ms" << std::endl; 143 | } 144 | for (const auto& elem : value.mProfile) 145 | { 146 | out << std::setw(maxLayerNameLength) << elem.first << " "; 147 | out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.second.time * 100.0F / totalTime) << "%" 148 | << " "; 149 | out << std::setw(12) << elem.second.count << " "; 150 | out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.time << std::endl; 151 | } 152 | out.flags(old_settings); 153 | out.precision(old_precision); 154 | out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl; 155 | 156 | return out; 157 | } 158 | 159 | private: 160 | std::string mName; 161 | std::map mProfile; 162 | }; 163 | 164 | // Locate path to file, given its filename or filepath suffix and possible dirs it might lie in 165 | // Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path 166 | inline std::string locateFile(const std::string& filepathSuffix, const std::vector& directories) 167 | { 168 | const int MAX_DEPTH{10}; 169 | bool found{false}; 170 | std::string filepath; 171 | 172 | for (auto& dir : directories) 173 | { 174 | filepath = dir + filepathSuffix; 175 | 176 | for (int i = 0; i < MAX_DEPTH && !found; i++) 177 | { 178 | std::ifstream checkFile(filepath); 179 | found = checkFile.is_open(); 180 | if (found) 181 | break; 182 | filepath = "../" + filepath; // Try again in parent dir 183 | } 184 | 185 | if (found) 186 | { 187 | break; 188 | } 189 | 190 | filepath.clear(); 191 | } 192 | 193 | if (filepath.empty()) 194 | { 195 | std::string directoryList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(), 196 | [](const std::string& a, const std::string& b) { return a + "\n\t" + b; }); 197 | std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << directoryList << std::endl; 198 | exit(EXIT_FAILURE); 199 | } 200 | return filepath; 201 | } 202 | 203 | inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW) 204 | { 205 | std::ifstream infile(fileName, std::ifstream::binary); 206 | assert(infile.is_open() && "Attempting to read from a file that is not open."); 207 | std::string magic, h, w, max; 208 | infile >> magic >> h >> w >> max; 209 | infile.seekg(1, infile.cur); 210 | infile.read(reinterpret_cast(buffer), inH * inW); 211 | } 212 | 213 | namespace samplesCommon 214 | { 215 | 216 | inline void* safeCudaMalloc(size_t memSize) 217 | { 218 | void* deviceMem; 219 | CHECK(cudaMalloc(&deviceMem, memSize)); 220 | if (deviceMem == nullptr) 221 | { 222 | std::cerr << "Out of memory" << std::endl; 223 | exit(1); 224 | } 225 | return deviceMem; 226 | } 227 | 228 | inline bool isDebug() 229 | { 230 | return (std::getenv("TENSORRT_DEBUG") ? true : false); 231 | } 232 | 233 | struct InferDeleter 234 | { 235 | template 236 | void operator()(T* obj) const 237 | { 238 | if (obj) 239 | { 240 | obj->destroy(); 241 | } 242 | } 243 | }; 244 | 245 | template 246 | inline std::shared_ptr infer_object(T* obj) 247 | { 248 | if (!obj) 249 | { 250 | throw std::runtime_error("Failed to create object"); 251 | } 252 | return std::shared_ptr(obj, InferDeleter()); 253 | } 254 | 255 | template 256 | inline std::vector argsort(Iter begin, Iter end, bool reverse = false) 257 | { 258 | std::vector inds(end - begin); 259 | std::iota(inds.begin(), inds.end(), 0); 260 | if (reverse) 261 | { 262 | std::sort(inds.begin(), inds.end(), [&begin](size_t i1, size_t i2) { 263 | return begin[i2] < begin[i1]; 264 | }); 265 | } 266 | else 267 | { 268 | std::sort(inds.begin(), inds.end(), [&begin](size_t i1, size_t i2) { 269 | return begin[i1] < begin[i2]; 270 | }); 271 | } 272 | return inds; 273 | } 274 | 275 | inline bool readReferenceFile(const std::string& fileName, std::vector& refVector) 276 | { 277 | std::ifstream infile(fileName); 278 | if (!infile.is_open()) 279 | { 280 | cout << "ERROR: readReferenceFile: Attempting to read from a file that is not open." << endl; 281 | return false; 282 | } 283 | std::string line; 284 | while (std::getline(infile, line)) 285 | { 286 | if (line.empty()) 287 | continue; 288 | refVector.push_back(line); 289 | } 290 | infile.close(); 291 | return true; 292 | } 293 | 294 | template 295 | inline std::vector classify(const vector& refVector, const result_vector_t& output, const size_t topK) 296 | { 297 | auto inds = samplesCommon::argsort(output.cbegin(), output.cend(), true); 298 | std::vector result; 299 | for (size_t k = 0; k < topK; ++k) 300 | { 301 | result.push_back(refVector[inds[k]]); 302 | } 303 | return result; 304 | } 305 | 306 | //...LG returns top K indices, not values. 307 | template 308 | inline vector topK(const vector inp, const size_t k) 309 | { 310 | vector result; 311 | std::vector inds = samplesCommon::argsort(inp.cbegin(), inp.cend(), true); 312 | result.assign(inds.begin(), inds.begin() + k); 313 | return result; 314 | } 315 | 316 | template 317 | inline bool readASCIIFile(const string& fileName, const size_t size, vector& out) 318 | { 319 | std::ifstream infile(fileName); 320 | if (!infile.is_open()) 321 | { 322 | cout << "ERROR readASCIIFile: Attempting to read from a file that is not open." << endl; 323 | return false; 324 | } 325 | out.clear(); 326 | out.reserve(size); 327 | out.assign(std::istream_iterator(infile), std::istream_iterator()); 328 | infile.close(); 329 | return true; 330 | } 331 | 332 | template 333 | inline bool writeASCIIFile(const string& fileName, const vector& in) 334 | { 335 | std::ofstream outfile(fileName); 336 | if (!outfile.is_open()) 337 | { 338 | cout << "ERROR: writeASCIIFile: Attempting to write to a file that is not open." << endl; 339 | return false; 340 | } 341 | for (auto fn : in) 342 | { 343 | outfile << fn << " "; 344 | } 345 | outfile.close(); 346 | return true; 347 | } 348 | 349 | inline void print_version() 350 | { 351 | //... This can be only done after statically linking this support into parserONNX.library 352 | #if 0 353 | std::cout << "Parser built against:" << std::endl; 354 | std::cout << " ONNX IR version: " << nvonnxparser::onnx_ir_version_string(onnx::IR_VERSION) << std::endl; 355 | #endif 356 | std::cout << " TensorRT version: " 357 | << NV_TENSORRT_MAJOR << "." 358 | << NV_TENSORRT_MINOR << "." 359 | << NV_TENSORRT_PATCH << "." 360 | << NV_TENSORRT_BUILD << std::endl; 361 | } 362 | 363 | inline string getFileType(const string& filepath) 364 | { 365 | return filepath.substr(filepath.find_last_of(".") + 1); 366 | } 367 | 368 | inline string toLower(const string& inp) 369 | { 370 | string out = inp; 371 | std::transform(out.begin(), out.end(), out.begin(), ::tolower); 372 | return out; 373 | } 374 | 375 | inline void enableDLA(IBuilder* b, int useDLACore) 376 | { 377 | if (useDLACore >= 0) 378 | { 379 | b->allowGPUFallback(true); 380 | b->setFp16Mode(true); 381 | b->setDefaultDeviceType(DeviceType::kDLA); 382 | b->setDLACore(useDLACore); 383 | } 384 | } 385 | 386 | inline int parseDLA(int argc, char** argv) 387 | { 388 | for (int i = 1; i < argc; i++) 389 | { 390 | std::string arg(argv[i]); 391 | if (strncmp(argv[i], "--useDLACore=", 13) == 0) 392 | return stoi(argv[i] + 13); 393 | } 394 | return -1; 395 | } 396 | 397 | inline unsigned int getElementSize(nvinfer1::DataType t) 398 | { 399 | switch (t) 400 | { 401 | case nvinfer1::DataType::kINT32: return 4; 402 | case nvinfer1::DataType::kFLOAT: return 4; 403 | case nvinfer1::DataType::kHALF: return 2; 404 | case nvinfer1::DataType::kINT8: return 1; 405 | } 406 | throw std::runtime_error("Invalid DataType."); 407 | return 0; 408 | } 409 | 410 | inline int64_t volume(const nvinfer1::Dims& d) 411 | { 412 | return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); 413 | } 414 | 415 | template 416 | struct PPM 417 | { 418 | std::string magic, fileName; 419 | int h, w, max; 420 | uint8_t buffer[C * H * W]; 421 | }; 422 | 423 | struct BBox 424 | { 425 | float x1, y1, x2, y2; 426 | }; 427 | 428 | template 429 | inline void readPPMFile(const std::string& filename, samplesCommon::PPM& ppm) 430 | { 431 | ppm.fileName = filename; 432 | std::ifstream infile(filename, std::ifstream::binary); 433 | assert(infile.is_open() && "Attempting to read from a file that is not open."); 434 | infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; 435 | infile.seekg(1, infile.cur); 436 | infile.read(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); 437 | } 438 | 439 | template 440 | inline void writePPMFileWithBBox(const std::string& filename, PPM& ppm, const BBox& bbox) 441 | { 442 | std::ofstream outfile("./" + filename, std::ofstream::binary); 443 | assert(!outfile.fail()); 444 | outfile << "P6" 445 | << "\n" 446 | << ppm.w << " " << ppm.h << "\n" 447 | << ppm.max << "\n"; 448 | auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; 449 | const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); 450 | const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); 451 | const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); 452 | const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1); 453 | for (int x = x1; x <= x2; ++x) 454 | { 455 | // bbox top border 456 | ppm.buffer[(y1 * ppm.w + x) * 3] = 255; 457 | ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0; 458 | ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0; 459 | // bbox bottom border 460 | ppm.buffer[(y2 * ppm.w + x) * 3] = 255; 461 | ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0; 462 | ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0; 463 | } 464 | for (int y = y1; y <= y2; ++y) 465 | { 466 | // bbox left border 467 | ppm.buffer[(y * ppm.w + x1) * 3] = 255; 468 | ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0; 469 | ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0; 470 | // bbox right border 471 | ppm.buffer[(y * ppm.w + x2) * 3] = 255; 472 | ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0; 473 | ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0; 474 | } 475 | outfile.write(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); 476 | } 477 | 478 | class TimerBase 479 | { 480 | public: 481 | virtual void start() {} 482 | virtual void stop() {} 483 | float microseconds() const noexcept { return mMs * 1000.f; } 484 | float milliseconds() const noexcept { return mMs; } 485 | float seconds() const noexcept { return mMs / 1000.f; } 486 | void reset() noexcept { mMs = 0.f; } 487 | 488 | protected: 489 | float mMs{0.0f}; 490 | }; 491 | 492 | class GpuTimer : public TimerBase 493 | { 494 | public: 495 | GpuTimer(cudaStream_t stream) 496 | : mStream(stream) 497 | { 498 | CHECK(cudaEventCreate(&mStart)); 499 | CHECK(cudaEventCreate(&mStop)); 500 | } 501 | ~GpuTimer() 502 | { 503 | CHECK(cudaEventDestroy(mStart)); 504 | CHECK(cudaEventDestroy(mStop)); 505 | } 506 | void start() { CHECK(cudaEventRecord(mStart, mStream)); } 507 | void stop() 508 | { 509 | CHECK(cudaEventRecord(mStop, mStream)); 510 | float ms{0.0f}; 511 | CHECK(cudaEventSynchronize(mStop)); 512 | CHECK(cudaEventElapsedTime(&ms, mStart, mStop)); 513 | mMs += ms; 514 | } 515 | 516 | private: 517 | cudaEvent_t mStart, mStop; 518 | cudaStream_t mStream; 519 | }; // class GpuTimer 520 | 521 | template 522 | class CpuTimer : public TimerBase 523 | { 524 | public: 525 | using clock_type = Clock; 526 | 527 | void start() { mStart = Clock::now(); } 528 | void stop() 529 | { 530 | mStop = Clock::now(); 531 | mMs += std::chrono::duration{mStop - mStart}.count(); 532 | } 533 | 534 | private: 535 | std::chrono::time_point mStart, mStop; 536 | }; // class CpuTimer 537 | 538 | using PreciseCpuTimer = CpuTimer; 539 | 540 | } // namespace samplesCommon 541 | 542 | #endif // TENSORRT_COMMON_H 543 | -------------------------------------------------------------------------------- /tensorrt/lasernet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "NvInfer.h" 18 | #include "NvOnnxParser.h" 19 | #include "NvOnnxParserRuntime.h" 20 | 21 | #include "BatchStream.h" 22 | #include "common.h" 23 | 24 | using namespace nvinfer1; 25 | using namespace nvonnxparser; 26 | 27 | static Logger gLogger; 28 | static int gUseDLACore = -1; 29 | 30 | static const int CAL_BATCH_SIZE = 4; 31 | static const int FIRST_CAL_BATCH = 500, NB_CAL_BATCHES = 2000; // calibrate over images 0-600 32 | static const int FIRST_CAL_SCORE_BATCH = 500, NB_CAL_SCORE_BATCHES = 200; // score over images 500-5000 33 | 34 | const char* gNetworkName{nullptr}; 35 | std::string calibPath("/home/adas/data/alibaba-lidar/training/xyzic/"); 36 | std::string calibList("/home/adas/data/pytorch_ws/LaserNet/validlist.txt"); 37 | 38 | bool onnxToTRTModel(const std::string& modelFile, 39 | int& maxBatchSize, 40 | DataType dataType, 41 | IInt8Calibrator* calibrator, 42 | nvinfer1::IHostMemory*& trtModelStream) 43 | { 44 | int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING; 45 | // create the builder 46 | IBuilder* builder = createInferBuilder(gLogger); 47 | // parse the onnx model to populate the network, then set the outputs 48 | INetworkDefinition* network = builder->createNetwork(); 49 | nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger); 50 | if (!parser->parseFromFile(modelFile.c_str(), verbosity)) 51 | { 52 | std::string msg("failed to parse onnx file"); 53 | gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str()); 54 | exit(EXIT_FAILURE); 55 | } 56 | //check platform and datatype 57 | if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8())) 58 | { 59 | std::cout<<"Current Device does not support INT8 inference!"<platformHasFastFp16()) 63 | { 64 | std::cout<<"Current Device does not support FP16 inference!"<getNbLayers()<getLayer(network->getNbLayers() - i - 1); 72 | std::string layername(layer->getName()); 73 | std::cout<<" layer name: "<getMaxWorkspaceSize()<setMaxWorkspaceSize(WorkspaceSize); 80 | std::cout<getMaxWorkspaceSize()<setAverageFindIterations(1); 82 | builder->setMinFindIterations(1); 83 | builder->setDebugSync(true); 84 | builder->setInt8Mode(dataType == DataType::kINT8); 85 | builder->setFp16Mode(dataType == DataType::kHALF); 86 | builder->setInt8Calibrator(calibrator); 87 | if (gUseDLACore >= 0) 88 | { 89 | samplesCommon::enableDLA(builder, gUseDLACore); 90 | if (maxBatchSize > builder->getMaxDLABatchSize()) 91 | { 92 | std::cerr << "Requested batch size " << maxBatchSize << " is greater than the max DLA batch size of " 93 | << builder->getMaxDLABatchSize() << ". Reducing batch size accordingly." << std::endl; 94 | maxBatchSize = builder->getMaxDLABatchSize(); 95 | } 96 | } 97 | if(dataType == DataType::kINT8) 98 | { 99 | builder->setStrictTypeConstraints(true); 100 | for(int i = 0; i < 20; i++) 101 | { 102 | // ILayer* layer = network->getLayer(network->getNbLayers() - i - 1); 103 | // layer->setPrecision(DataType::kFLOAT); 104 | // for (int j = 0; j < layer->getNbOutputs(); ++j) 105 | // { 106 | // layer->setOutputType(j, nvinfer1::DataType::kFLOAT); 107 | // } 108 | ILayer* layer1 = network->getLayer(i); 109 | layer1->setPrecision(DataType::kFLOAT); 110 | for (int j = 0; j < layer1->getNbOutputs(); ++j) 111 | { 112 | layer1->setOutputType(j, nvinfer1::DataType::kFLOAT); 113 | } 114 | } 115 | //builder->setStrictTypeConstraints(true); 116 | } 117 | builder->setMaxBatchSize(4); 118 | std::cout<<"000"<buildCudaEngine(*network); 120 | assert(engine); 121 | 122 | // serialize the engine, then close everything down 123 | 124 | std::cout<<"111"<serialize(); 126 | std::cout<<"222"<data(), 1, trtModelStream->size(), fp); 129 | fclose(fp); 130 | 131 | // we don't need the network any more, and we can destroy the parser 132 | parser->destroy(); 133 | engine->destroy(); 134 | network->destroy(); 135 | builder->destroy(); 136 | std::cout<<"Create TensorRT model finished!"<(context.getEngine().getBindingDimensions(inputIndex)); 156 | Dims3 outputDims = static_cast(context.getEngine().getBindingDimensions(outputIndex_cls)); 157 | 158 | size_t inputSize = batchSize * inputDims.d[0] * inputDims.d[1] * inputDims.d[2] * sizeof(float); 159 | size_t outputSize = batchSize * outputDims.d[0] * outputDims.d[1] * outputDims.d[2] * sizeof(float); 160 | CHECK(cudaMalloc(&buffers[inputIndex], inputSize)); 161 | CHECK(cudaMalloc(&buffers[outputIndex_cls], outputSize)); 162 | 163 | CHECK(cudaMemcpy(buffers[inputIndex], input, inputSize, cudaMemcpyHostToDevice)); 164 | 165 | cudaStream_t stream; 166 | CHECK(cudaStreamCreate(&stream)); 167 | cudaEvent_t start, end; 168 | CHECK(cudaEventCreateWithFlags(&start, cudaEventBlockingSync)); 169 | CHECK(cudaEventCreateWithFlags(&end, cudaEventBlockingSync)); 170 | cudaEventRecord(start, stream); 171 | context.enqueue(batchSize, buffers, stream, nullptr); 172 | cudaEventRecord(end, stream); 173 | cudaEventSynchronize(end); 174 | cudaEventElapsedTime(&ms, start, end); 175 | cudaEventDestroy(start); 176 | cudaEventDestroy(end); 177 | 178 | CHECK(cudaMemcpy(output, buffers[outputIndex_cls], outputSize, cudaMemcpyDeviceToHost)); 179 | CHECK(cudaFree(buffers[inputIndex])); 180 | CHECK(cudaFree(buffers[outputIndex_cls])); 181 | CHECK(cudaStreamDestroy(stream)); 182 | return ms; 183 | } 184 | 185 | float calculateScore(float* batchProb, float* labels, int batchSize, int outputSize, int height, int width) 186 | { 187 | float miou = 0.0; 188 | int TP = 0; 189 | int FPTN = 0; 190 | float *pred, *truth; 191 | float p_cls, t_cls, val; 192 | for (int i = 0; i < batchSize; i++) 193 | { 194 | pred = batchProb + outputSize * i; 195 | truth = labels + height*width*i; 196 | for(int h=0; h 1){ 208 | TP++; 209 | }else if(val==1){ 210 | FPTN++; 211 | } 212 | } 213 | } 214 | } 215 | if( (TP+FPTN) == 0 ){ 216 | miou = 1; 217 | }else{ 218 | miou = ((float)TP)/(TP+FPTN); 219 | } 220 | return miou; 221 | } 222 | 223 | class Int8EntropyCalibrator : public IInt8EntropyCalibrator 224 | { 225 | public: 226 | Int8EntropyCalibrator(BatchStream& stream, int firstBatch, bool readCache = true) 227 | : mStream(stream) 228 | , mReadCache(readCache) 229 | { 230 | DimsNCHW dims = mStream.getDims(); 231 | mInputCount = mStream.getBatchSize() * dims.c() * dims.h() * dims.w(); 232 | CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); 233 | mStream.reset(firstBatch); 234 | } 235 | 236 | virtual ~Int8EntropyCalibrator() 237 | { 238 | CHECK(cudaFree(mDeviceInput)); 239 | } 240 | 241 | int getBatchSize() const override { return mStream.getBatchSize(); } 242 | 243 | bool getBatch(void* bindings[], const char* names[], int nbBindings) override 244 | { 245 | if (!mStream.next()) 246 | return false; 247 | 248 | CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); 249 | //assert(!strcmp(names[0], INPUT_BLOB_NAME)); 250 | bindings[0] = mDeviceInput; 251 | return true; 252 | } 253 | 254 | const void* readCalibrationCache(size_t& length) override 255 | { 256 | mCalibrationCache.clear(); 257 | std::ifstream input(calibrationTableName(), std::ios::binary); 258 | input >> std::noskipws; 259 | if (mReadCache && input.good()) 260 | std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(mCalibrationCache)); 261 | 262 | length = mCalibrationCache.size(); 263 | return length ? &mCalibrationCache[0] : nullptr; 264 | } 265 | 266 | void writeCalibrationCache(const void* cache, size_t length) override 267 | { 268 | std::ofstream output(calibrationTableName(), std::ios::binary); 269 | output.write(reinterpret_cast(cache), length); 270 | } 271 | 272 | private: 273 | static std::string calibrationTableName() 274 | { 275 | assert(gNetworkName); 276 | return std::string("CalibrationTable") + gNetworkName; 277 | } 278 | BatchStream mStream; 279 | bool mReadCache{true}; 280 | 281 | size_t mInputCount; 282 | void* mDeviceInput{nullptr}; 283 | std::vector mCalibrationCache; 284 | }; 285 | 286 | float scoreModel(std::string modelname, int batchSize, int firstBatch, int nbScoreBatches, DataType datatype, IInt8Calibrator* calibrator, bool quiet = false) 287 | { 288 | IHostMemory* trtModelStream{nullptr}; 289 | bool valid = onnxToTRTModel(modelname, batchSize, datatype, calibrator, trtModelStream); 290 | 291 | if (!valid) 292 | { 293 | std::cout << "Engine could not be created at this precision" << std::endl; 294 | return 0; 295 | } 296 | 297 | assert(trtModelStream != nullptr); 298 | 299 | // Create engine and deserialize model. 300 | std::cout<<"000"<= 0) 304 | { 305 | infer->setDLACore(gUseDLACore); 306 | } 307 | ICudaEngine* engine = infer->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr); 308 | std::cout<<"111"<destroy(); 311 | IExecutionContext* context = engine->createExecutionContext(); 312 | assert(context != nullptr); 313 | 314 | std::cout<<"222"<(context->getEngine().getBindingDimensions(1)); 319 | int outputSize = outputDims.d[0] * outputDims.d[1] * outputDims.d[2]; 320 | std::cout< prob(batchSize * outputSize, 0); 324 | 325 | while (stream.next()) 326 | { 327 | totalTime += doInference(*context, stream.getBatch(), &prob[0], batchSize); 328 | 329 | mIoU += calculateScore(&prob[0], stream.getLabels(), batchSize, outputSize, outputDims.d[1], outputDims.d[2]); 330 | } 331 | int imagesRead = stream.getBatchesRead() * batchSize; 332 | 333 | if (!quiet) 334 | { 335 | std::cout << "\nmIoU: " << (mIoU / imagesRead * batchSize) << std::endl; 336 | std::cout << "Processing " << imagesRead << " images averaged " << totalTime / imagesRead << " ms/image and " << totalTime / stream.getBatchesRead() << " ms/batch." << std::endl; 337 | } 338 | 339 | context->destroy(); 340 | engine->destroy(); 341 | infer->destroy(); 342 | return mIoU; 343 | } 344 | 345 | static void printUsage() 346 | { 347 | std::cout << std::endl; 348 | std::cout << "Usage: ./sample_int8 " << std::endl; 349 | std::cout << std::endl; 350 | std::cout << "Optional params" << std::endl; 351 | std::cout << " batch=N Set batch size (default = 100)" << std::endl; 352 | std::cout << " start=N Set the first batch to be scored (default = 100). All batches before this batch will be used for calibration." << std::endl; 353 | std::cout << " score=N Set the number of batches to be scored (default = 400)" << std::endl; 354 | std::cout << " search Search for best calibration. Can only be used with legacy calibration algorithm" << std::endl; 355 | std::cout << " legacy Use legacy calibration algorithm" << std::endl; 356 | std::cout << " useDLACore=N Enable execution on DLA for all layers that support dla. Value can range from 0 to n-1, where n is the number of DLA engines on the platform." << std::endl; 357 | } 358 | 359 | int main(int argc, char** argv) 360 | { 361 | if (argc < 2 || !strncmp(argv[1], "help", 4) || !strncmp(argv[1], "--help", 6) || !strncmp(argv[1], "--h", 3)) 362 | { 363 | printUsage(); 364 | exit(0); 365 | } 366 | gNetworkName = argv[1]; 367 | std::string modelname(gNetworkName); 368 | 369 | int batchSize = CAL_BATCH_SIZE; 370 | int firstScoreBatch = FIRST_CAL_SCORE_BATCH; 371 | int nbScoreBatches = NB_CAL_SCORE_BATCHES; 372 | bool search = false; 373 | CalibrationAlgoType calibrationAlgo = CalibrationAlgoType::kENTROPY_CALIBRATION; 374 | 375 | for (int i = 2; i < argc; i++) 376 | { 377 | if (!strncmp(argv[i], "batch=", 6)) 378 | batchSize = atoi(argv[i] + 6); 379 | else if (!strncmp(argv[i], "start=", 6)) 380 | firstScoreBatch = atoi(argv[i] + 6); 381 | else if (!strncmp(argv[i], "score=", 6)) 382 | nbScoreBatches = atoi(argv[i] + 6); 383 | else if (!strncmp(argv[i], "search", 6)) 384 | search = true; 385 | else if (!strncmp(argv[i], "legacy", 6)) 386 | calibrationAlgo = CalibrationAlgoType::kLEGACY_CALIBRATION; 387 | else if (!strncmp(argv[i], "useDLACore=", 11)) 388 | gUseDLACore = stoi(argv[i] + 11); 389 | else 390 | { 391 | std::cout << "Unrecognized argument " << argv[i] << std::endl; 392 | exit(0); 393 | } 394 | } 395 | 396 | if (calibrationAlgo == CalibrationAlgoType::kENTROPY_CALIBRATION) 397 | { 398 | search = false; 399 | } 400 | 401 | if (batchSize > 128) 402 | { 403 | std::cout << "Please provide batch size <= 128" << std::endl; 404 | exit(0); 405 | } 406 | 407 | if ((firstScoreBatch + nbScoreBatches) * batchSize > 5000) 408 | { 409 | std::cout << "Only 5000 images available" << std::endl; 410 | exit(0); 411 | } 412 | 413 | std::cout.precision(6); 414 | 415 | BatchStream calibrationStream(CAL_BATCH_SIZE, NB_CAL_BATCHES, calibPath); 416 | int dla{gUseDLACore}; 417 | 418 | // Set gUseDLACore to -1 here since FP16 mode is not enabled. 419 | if (gUseDLACore >= 0) 420 | { 421 | std::cout << "\nDLA requested. Disabling for FP32 run since its not supported." << std::endl; 422 | gUseDLACore = -1; 423 | } 424 | std::cout << "\nFP32 run:" << nbScoreBatches << " batches of size " << batchSize << " starting at " << firstScoreBatch << std::endl; 425 | scoreModel(modelname, batchSize, firstScoreBatch, nbScoreBatches, DataType::kFLOAT, nullptr); 426 | 427 | // Set gUseDLACore correctly to enable DLA if requested. 428 | gUseDLACore = dla; 429 | std::cout << "\nFP16 run:" << nbScoreBatches << " batches of size " << batchSize << " starting at " << firstScoreBatch << std::endl; 430 | scoreModel(modelname, batchSize, firstScoreBatch, nbScoreBatches, DataType::kHALF, nullptr); 431 | 432 | // reset DLA to -1 for int8 mode. 433 | if (gUseDLACore >= 0) 434 | { 435 | std::cout << "\nDLA requested. Disabling for Int8 run since its not supported." << std::endl; 436 | gUseDLACore = -1; 437 | } 438 | std::cout << "\nINT8 run:" << nbScoreBatches << " batches of size " << batchSize << " starting at " << firstScoreBatch << std::endl; 439 | if (calibrationAlgo == CalibrationAlgoType::kENTROPY_CALIBRATION) 440 | { 441 | Int8EntropyCalibrator calibrator(calibrationStream, FIRST_CAL_BATCH); 442 | scoreModel(modelname, batchSize, firstScoreBatch, nbScoreBatches, DataType::kINT8, &calibrator); 443 | } 444 | 445 | return 0; 446 | } 447 | -------------------------------------------------------------------------------- /tensorrt/lasernet.trt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feicay/LaserNet/863a91aa8f542bd278f7bc03f899c0363a898570/tensorrt/lasernet.trt -------------------------------------------------------------------------------- /tensorrt/make_input.cpp: -------------------------------------------------------------------------------- 1 | #include "make_input.h" 2 | #include 3 | #include 4 | 5 | #define PI (3.14159) 6 | 7 | void xyzic_to_image(float* xyzic, int num, float h_start, float h_end, float v_start, float v_end, float dh, float dv, float* im, float* im_cls) 8 | { 9 | int H = int((v_end - v_start + 0.01)/dv); 10 | int W = int((h_end - h_start + 0.01)/dh); 11 | float x,y,z,intensity,yaw,v_angle; 12 | int c; 13 | int w,h; 14 | memset(im, 0, W*H*3); 15 | memset(im_cls, 0, W*H); 16 | for(int i=0; i=0) && (w=0) && (h loss: %f'%(ii,i,criterion.loss)) 78 | if args.vis: 79 | vis.line(Y=loss.data.cpu().view(1,1).numpy(),X=np.array([ii]),win='loss',update='append' if ii>0 else None) 80 | if i < 3: 81 | loss_train = loss.data 82 | else: 83 | loss_train = loss_train / ii 84 | loss_val = 0.0 85 | net.eval() 86 | for jj, (image, cls_truth) in enumerate(loader_val): 87 | image = Variable(image).cuda() 88 | cls_truth = Variable(cls_truth).cuda() 89 | optimizer.zero_grad() 90 | cls_pred = net(image) 91 | loss = criterion(cls_pred, cls_truth) 92 | loss_val += loss.data 93 | print('val: %3d/%3d => loss: %f'%(jj,i,criterion.loss)) 94 | loss_val = loss_val / jj 95 | if args.vis: 96 | vis.line(Y=torch.cat((loss_val.view(1,1), loss_train.view(1,1)),1).cpu().numpy(),X=np.array([i]),\ 97 | win='eval-train loss',update='append' if i>0 else None) 98 | print('Saving weights..') 99 | state = { 100 | 'net': net.module.state_dict(), 101 | 'loss': loss_val, 102 | 'epoch': i, 103 | } 104 | if not os.path.isdir('checkpoint'): 105 | os.mkdir('checkpoint') 106 | torch.save(state, './checkpoint/lasernet%d.pth'%i) 107 | del image, cls_truth 108 | del cls_pred 109 | gc.collect() 110 | time.sleep(1) 111 | if i==50: 112 | lr = lr*0.1 113 | print('learning rate: %f'%lr) 114 | for para_group in optimizer.param_groups: 115 | para_group['lr'] = lr 116 | torch.save(network,'lasernet_model_final.pkl') 117 | print('finish training!') 118 | 119 | def test(): 120 | color = np.array([[0, 0, 0], 121 | [0, 0, 250], 122 | [0, 250, 250], 123 | [0, 250, 0], 124 | [250, 250, 0], 125 | [250, 0, 0], 126 | [250, 0, 250], 127 | [150, 150, 150]]) 128 | validlist = '/raid/pytorch_ws/LaserNet/validlist.txt' 129 | validset = Lidar_xyzic_dataset(validlist, train=0) 130 | loader_test = data.DataLoader(validset, batch_size=1, shuffle=1, num_workers=1, drop_last=True) 131 | 132 | network = DLA(8) 133 | checkpoint = torch.load('./checkpoint/lasernet69.pth') 134 | network.load_state_dict(checkpoint['net']) 135 | network = network.cuda().eval() 136 | if args.onnx == 1: 137 | dummy_input = torch.randn(4, 3, 200, 400, device='cuda') 138 | torch.onnx.export(network, dummy_input, "lasernet.onnx", verbose=True) 139 | return 140 | for i, (image, cls_truth) in enumerate(loader_test): 141 | image = Variable(image).cuda() 142 | cls_truth = Variable(cls_truth).cuda() 143 | t1 = time.time() 144 | cls_pred = network(image) 145 | t2 = time.time() 146 | print('inference time %f'%(t2-t1)) 147 | print(cls_pred.size()) 148 | im = image.squeeze(0).permute(1, 2, 0).cpu().numpy() 149 | cv2.imshow('image', im) 150 | pred = F.softmax(cls_pred, dim=1).squeeze(0).cpu() 151 | prob, cls_ = pred.max(dim=0) 152 | im_cls = np.zeros((200, 400, 3), dtype=np.uint8) 153 | im_cls[:,:] = color[cls_[:,:]] 154 | im_truth = np.zeros((200, 400, 3), dtype=np.uint8) 155 | im_truth[:,:] = color[cls_truth[:,:].cpu().numpy()] 156 | cv2.imshow('cls', im_cls) 157 | cv2.imshow('truth', im_truth) 158 | cv2.waitKey(0) 159 | 160 | if args.test: 161 | test() 162 | else: 163 | train() --------------------------------------------------------------------------------