├── data
    ├── __init__.py
    ├── datagen.py
    ├── get_label.py
    ├── make_ali_lidar.py
    └── make_kitti_xyzic.py
├── model
    ├── DLA.py
    ├── ResNet.py
    ├── __init__.py
    └── loss.py
├── tensorrt
    ├── BatchStream.h
    ├── Makefile
    ├── common.h
    ├── lasernet.cpp
    ├── lasernet.trt
    ├── make_input.cpp
    ├── make_input.h
    └── test.py
└── train.py


/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feicay/LaserNet/863a91aa8f542bd278f7bc03f899c0363a898570/data/__init__.py


--------------------------------------------------------------------------------
/data/datagen.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.utils.data as data
 3 | import numpy as np
 4 | import random
 5 | import cv2
 6 | 
 7 | def make_xyzic_image(xyzic, yaw_start=-45, yaw_end=45, v_start=-30, v_end=10, d_yaw=0.225, d_v=0.2):
 8 |     num, _ = xyzic.shape
 9 |     width = int((yaw_end - yaw_start) / d_yaw + 0.01)
10 |     height = int((v_end - v_start) / d_v + 0.01)
11 |     im_ref = np.zeros((height, width), dtype=np.float32)
12 |     im_height = np.zeros((height, width), dtype=np.float32)
13 |     im_cls = np.zeros((height, width), dtype=np.float32)
14 |     im_range = np.zeros((height, width), dtype=np.float32)
15 |     x = xyzic[:, 0]
16 |     y = xyzic[:, 1]
17 |     z = xyzic[:, 2]
18 |     L = np.sqrt(x**2 + y**2 + z**2)
19 |     yaw = np.arctan2(y, x)
20 |     v_angel = np.arctan2(z, np.sqrt(x*x + y*y))
21 |     i = ((yaw_end - yaw*180/np.pi) / d_yaw).astype(np.int32)
22 |     j = ((v_end - v_angel*180/np.pi) / d_v).astype(np.int32)
23 |     mask = (i > -1) & (i < width) & (j > -1) & (j < height)
24 |     i = i[mask]
25 |     j = j[mask]
26 |     xyzic = xyzic[mask, :]
27 |     im_cls[j, i] = (xyzic[:, 4] + 0.01)
28 |     im_ref[j, i] = xyzic[:, 3]
29 |     im_height[j, i] = xyzic[:, 2] + 1.73
30 |     im_range[j, i] = L[mask]
31 |     #convert to -1~1
32 |     im_range = im_range / 100
33 |     im_height = im_height / 10
34 |     im = np.stack((im_range, im_ref, im_height))
35 |     return im, im_cls.astype(np.int32)
36 | 
37 | class Lidar_xyzic_dataset(data.Dataset):
38 |     def __init__(self, filelist, train=1):
39 |         with open(filelist, 'r') as fp:
40 |             self.filelist = fp.readlines()
41 |             self.len = len(self.filelist)
42 |             fp.close()
43 |         self.train = train
44 | 
45 |     def __len__(self):
46 |         return self.len
47 | 
48 |     def __getitem__(self, index):
49 |         binfile = self.filelist[index].replace('\n', '')
50 |         data = np.fromfile(binfile, dtype=np.float32).reshape(-1, 5)
51 |         if self.train:
52 |             yaw_start = (random.random() - 0.5) * 360
53 |             yaw_end = yaw_start + 90
54 |             im, im_cls = make_xyzic_image(data, yaw_start=yaw_start, yaw_end=yaw_end)
55 |         else:
56 |             im, im_cls = make_xyzic_image(data)
57 |         image = torch.from_numpy(im)
58 |         truth = torch.from_numpy(im_cls)
59 |         truth = truth.long().unsqueeze(0)
60 |         return image, truth
61 | 
62 | '''category
63 | ‘DontCare’: 0
64 | ‘cyclist’: 1 
65 | ‘tricycle’: 2 
66 | ‘smallMot’: 3 
67 | ‘bigMot’: 4 
68 | ‘pedestrian’: 5 
69 | ‘crowds’: 6 
70 | ‘unknown’: 7
71 | '''
72 | 
73 | def test():
74 |     color = np.array([[0, 0, 0],
75 |                   [0, 0, 250],
76 |                   [0, 250, 250],
77 |                   [0, 250, 0],
78 |                   [250, 250, 0],
79 |                   [250, 0, 0],
80 |                   [250, 0, 250],
81 |                   [150, 150, 150]])
82 |     datalist = '/home/adas/data/pytorch_ws/LaserNet/trainlist.txt'
83 |     dataset = Lidar_xyzic_dataset(datalist)
84 |     image, truth = dataset.__getitem__(1000)
85 |     print(image.size(), truth.size())
86 |     _, h, w = image.size()
87 |     image = image.permute(1, 2, 0).numpy()
88 |     truth = truth.numpy()
89 |     cv2.imshow('image', image)
90 |     #cv2.waitKey(0)
91 |     im_cls = np.zeros((h, w, 3), dtype=np.uint8)
92 |     im_cls[:,:] = color[truth[:,:]]
93 |     cv2.imshow('cls', im_cls)
94 |     cv2.waitKey(0)
95 | 
96 | #test()


--------------------------------------------------------------------------------
/data/get_label.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np 
 3 | 
 4 | def get_class(class_name):
 5 |     cls_ = 0
 6 |     if class_name == 'Pedestrian' or class_name == 'Person_sitting':
 7 |         cls_ = 1
 8 |     elif class_name == 'Cyclist':
 9 |         cls_ = 2
10 |     elif class_name == 'Car' or class_name == 'Van':
11 |         cls_ = 3
12 |     elif class_name == 'Truck':
13 |         cls_ = 4
14 |     elif class_name == 'Tram':
15 |         cls_ = 5
16 |     else:
17 |         cls_ = 0
18 |     return cls_
19 | 
20 | def get_kitti_label(label_dir, coord='camera'):
21 |     with open(label_dir, 'r') as fp:
22 |         label_list = fp.readlines()
23 |         result_list = []
24 |         for i in range(len(label_list)):
25 |             label = label_list[i].replace('\n', '')
26 |             obj = label.split(' ')
27 |             cls_ = float(get_class(obj[0]))
28 |             if cls_ > 0:
29 |                 x = float(obj[11])
30 |                 y = float(obj[12])
31 |                 z = float(obj[13])
32 |                 l = float(obj[10])
33 |                 w = float(obj[9])
34 |                 h = float(obj[8])
35 |                 r = float(obj[14])
36 |                 if coord == 'velodyne':
37 |                     x = float(obj[13]) + 0.27
38 |                     y = - float(obj[11])
39 |                     z = -0.08 - float(obj[12]) + (h/2)
40 |                     r = -float(obj[14]) - np.pi/2
41 |                     if r < -np.pi:
42 |                         r = r + np.pi * 2
43 |                 label_obj = np.array([cls_, x, y, z, l, w, h, r]).reshape(1, 8)
44 |                 result_list.append(label_obj)
45 |         if len(result_list) > 0:
46 |             result = np.concatenate(result_list, axis=0)
47 |         else: 
48 |             result = None
49 |     return result
50 | 
51 | def sort_label(label):
52 |     x = label[:, 1]
53 |     y = label[:, 2]
54 |     z = label[:, 3]
55 |     L = np.sqrt(x**2 + y**2 + z**2)
56 |     idx = np.argsort(L)
57 |     result = label[idx, :]
58 |     result = result[::-1, :]
59 |     return result
60 | 
61 | def test():
62 |     label_dir = '/home/yifeihu/data/Kitti/object/training/label_2/000010.txt'
63 |     objs = get_kitti_label(label_dir, coord='velodyne')
64 |     print(objs)
65 | 
66 | #test()


--------------------------------------------------------------------------------
/data/make_ali_lidar.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np 
 3 | import pandas as pd 
 4 | from PIL import Image
 5 | import random
 6 | 
 7 | def get_points(pts_file, intensity_file, cat_file=None):
 8 |     pts = pd.read_csv(pts_file, header=None)
 9 |     points_loc = np.array(pts, dtype=np.float32)
10 |     N, _ = points_loc.shape
11 |     Inten = pd.read_csv(intensity_file, header=None)
12 |     points_i = np.array(Inten, dtype=np.float32).reshape(-1, 1)
13 |     if cat_file is not None:
14 |         cat = pd.read_csv(cat_file, header=None)
15 |         points_cat = np.array(cat, dtype=np.float32).reshape(-1, 1)
16 |         points_xyzi = np.concatenate([points_loc, points_i, points_cat], axis=1)
17 |     else:
18 |         points_cat = np.zeros((N, 1))
19 |         points_xyzi = np.concatenate([points_loc, points_i, points_cat], axis=1)
20 |     #return points_filter(points_xyzi)
21 |     return points_xyzi
22 | 
23 | def make_ali_lidar_xyzic():
24 |     ali_lidar_dir = '/raid/alibaba-lidar/training/'
25 |     int_dir = ali_lidar_dir + 'intensity/'
26 |     pts_dir = ali_lidar_dir + 'pts/'
27 |     cat_dir = ali_lidar_dir + 'category/'
28 |     out_dir = ali_lidar_dir + 'xyzic/'
29 |     filelist = os.listdir(pts_dir)
30 |     for i in range(len(filelist)):
31 |         print(i)
32 |         filename = filelist[i]
33 |         ptsfile = pts_dir + filename
34 |         intfile = int_dir + filename
35 |         catfile = cat_dir + filename
36 |         outfile = out_dir + filename.replace('csv', 'bin')
37 |         xyzic = get_points(ptsfile, intfile, cat_file=catfile)
38 |         xyzic = xyzic.astype(np.float32)
39 |         xyzic.tofile(outfile)
40 | 
41 | def make_train_list():
42 |     fdir = '/raid/alibaba-lidar/training/xyzic'
43 |     filelist = os.listdir(fdir)
44 |     with open('trainlist.txt', 'w') as fp1, open('validlist.txt', 'w') as fp2:
45 |         for i in range(len(filelist)):
46 |             filename = fdir + '/' + filelist[i] + '\n'
47 |             a = random.random()
48 |             if a < 0.9:
49 |                 fp1.write(filename)
50 |             else:
51 |                 fp2.write(filename)
52 | 
53 |     
54 | #make_ali_lidar_xyzic()
55 | #make_train_list()


--------------------------------------------------------------------------------
/data/make_kitti_xyzic.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np 
  3 | from PIL import Image
  4 | from get_label import get_kitti_label, sort_label
  5 | import cv2
  6 | 
  7 | KITTI_DIR = '/home/adas/data/Kitti/object/testing'
  8 | color = np.array([[0, 0, 0],
  9 |                   [0, 0, 250],
 10 |                   [0, 250, 250],
 11 |                   [0, 250, 0],
 12 |                   [250, 250, 0],
 13 |                   [250, 0, 0],
 14 |                   [250, 0, 250]])
 15 | 
 16 | def get_calib(calibfile):
 17 |     with open(calibfile, 'r') as fp:
 18 |         text = fp.readlines()
 19 |         p2 = text[2].replace('\n', '').split(': ')[1]
 20 |         r0 = text[4].replace('\n', '').split(': ')[1]
 21 |         velo2cam = text[5].replace('\n', '').split(': ')[1]
 22 |     p2 = np.array(p2.split(' ')).reshape(3, 4).astype(np.float32)
 23 |     r0 = np.array(r0.split(' ')).reshape(3, 3).astype(np.float32)
 24 |     velo2cam = np.array(velo2cam.split(' ')).reshape(3, 4).astype(np.float32)
 25 |     zero1 = np.zeros((3,1))
 26 |     const = np.array([[0, 0, 0, 1]])
 27 |     r0 = np.concatenate((r0, zero1), axis=1)
 28 |     r0 = np.concatenate((r0, const), axis=0)
 29 |     velo2cam = np.concatenate((velo2cam, const), axis=0)
 30 |     return p2, r0, velo2cam
 31 | 
 32 | def make_velodyne_reduce():
 33 |     bin_dir = KITTI_DIR + '/velodyne/'
 34 |     im_dir =  KITTI_DIR + '/image_2/'
 35 |     calib_dir = KITTI_DIR + '/calib/'
 36 |     bin_reduce_dir = KITTI_DIR + '/velodyne_reduce/'
 37 |     binlist = os.listdir(bin_dir)
 38 |     for i in range(len(binlist)):
 39 |         print(i, binlist[i])
 40 |         binfile = binlist[i]
 41 |         calibfile = calib_dir + binfile.replace('bin', 'txt')
 42 |         imfile = im_dir + binfile.replace('bin', 'png')
 43 |         rawbinfile = bin_dir + binfile
 44 |         outbinfile = bin_reduce_dir + binfile
 45 |         data = np.fromfile(rawbinfile, dtype=np.float32).reshape(-1, 4)
 46 |         num, _ = data.shape
 47 |         xyz = data[:, 0:3]
 48 |         xyz1 = np.concatenate((xyz, np.ones((num, 1))), axis=1)
 49 |         p2, r0, velo2cam = get_calib(calibfile)
 50 |         Tmat = np.dot(p2, np.dot(r0, velo2cam)).T
 51 |         imdata = np.dot(xyz1, Tmat)
 52 |         imdata[:, 0] = imdata[:, 0] / imdata[:, 2]
 53 |         imdata[:, 1] = imdata[:, 1] / imdata[:, 2]
 54 |         im = Image.open(imfile)
 55 |         width, height = im.size
 56 |         mask = (imdata[:, 0] > 0) & (imdata[:, 0] < width) & (imdata[:, 1] > 0) & (imdata[:, 1] < height) & (data[:, 0] > 0)
 57 |         outdata = data[mask, :]
 58 |         outdata.tofile(outbinfile)
 59 | 
 60 | def get_xyzic(data, label, h_offset=1.73):
 61 |     if label is None:
 62 |         return data
 63 |     num, _ = data.shape
 64 |     n, _ = label.shape
 65 |     c = np.zeros((num, 1), dtype=np.float32)
 66 |     for i in range(n):
 67 |         obj = label[i, :]
 68 |         print(obj)
 69 |         cls_ , x, y, z, l, w, h, r = obj
 70 |         delta_x = data[:, 0] - np.ones(num)*x
 71 |         delta_y = data[:, 1] - np.ones(num)*y
 72 |         delta_z = data[:, 2] - np.ones(num)*z 
 73 |         theta = np.arctan2(delta_y, delta_x) - np.ones(num)*r
 74 |         L = np.sqrt(delta_x*delta_x + delta_y*delta_y)
 75 |         delta_w = L * np.sin(theta)
 76 |         delta_l = L * np.cos(theta)
 77 |         mask = (delta_w > (-w/2)) & (delta_w < (w/2)) & (delta_l > (-l/2)) & (delta_l < (l/2)) & (delta_z > (-h/2)) & (delta_z < (h/2))
 78 |         c[mask, :] = cls_
 79 |     xyzic = np.concatenate((data, c), axis=1)
 80 |     return xyzic
 81 | 
 82 | def make_xyzic_image(xyzic, yaw_start=-45, yaw_end=45, v_start=-30, v_end=10, d_yaw=0.2, d_v=0.2):
 83 |     num, _ = xyzic.shape
 84 |     width = int((yaw_end - yaw_start) / d_yaw + 0.01)
 85 |     height = int((v_end - v_start) / d_v + 0.01)
 86 |     im_ref = np.zeros((height, width), dtype=np.float32)
 87 |     im_height = np.zeros((height, width), dtype=np.float32)
 88 |     im_cls = np.zeros((height, width, 3), dtype=np.float32)
 89 |     im_range = np.zeros((height, width), dtype=np.float32)
 90 |     x = xyzic[:, 0]
 91 |     y = xyzic[:, 1]
 92 |     z = xyzic[:, 2]
 93 |     L = np.sqrt(x**2 + y**2 + z**2)
 94 |     yaw = np.arctan2(y, x)
 95 |     v_angel = np.arctan2(z, np.sqrt(x*x + y*y))
 96 |     i = ((yaw_end - yaw*180/np.pi) / d_yaw).astype(np.int32)
 97 |     j = ((v_end - v_angel*180/np.pi) / d_v).astype(np.int32)
 98 |     mask = (i > -1) & (i < width) & (j > -1) & (j < height)
 99 |     i = i[mask]
100 |     j = j[mask]
101 |     xyzic = xyzic[mask, :]
102 |     im_ref[j, i] = xyzic[:, 3]
103 |     im_height[j, i] = xyzic[:, 2] + 1.73
104 |     im_cls[j, i] = color[xyzic[:, 4].astype(np.int32), :]
105 |     im_range[j, i] = L
106 |     return im_cls, im_ref, im_height, im_range
107 | 
108 | 
109 | def make_kitti_xyzic():
110 |     bin_dir = KITTI_DIR + '/velodyne_reduce/'
111 |     bin_xyzic_dir = KITTI_DIR + '/velodyne_xyzic/'
112 |     label_dir = KITTI_DIR + '/label_2/'
113 |     binlist = os.listdir(bin_dir)
114 |     for i in range(len(binlist)):
115 |     #for i in range(1):
116 |         print(i, binlist[i])
117 |         binfile = binlist[i]  
118 |         labelfile = label_dir + binfile.replace('bin', 'txt')
119 |         rawbinfile = bin_dir + binfile
120 |         outbinfile = bin_xyzic_dir + binfile
121 |         label = get_kitti_label(labelfile, 'velodyne')
122 |         label = sort_label(label)
123 |         data = np.fromfile(rawbinfile, dtype=np.float32).reshape(-1, 4)
124 |         #data[:, 2] = data[:, 2]
125 |         xyzic = get_xyzic(data, label)
126 |         im_cls, im_ref, im_height, im_range = make_xyzic_image(xyzic)
127 |         # im1 = Image.fromarray(im_cls.astype('uint8')).convert('RGB')
128 |         # im1.show()
129 |         # im2 = Image.fromarray((im_ref*255).astype('uint8')).convert('RGB')
130 |         # im2.show()
131 |         # im3 = Image.fromarray((im_height*255).astype('uint8')).convert('RGB')
132 |         # im3.show()
133 |         # im4 = Image.fromarray((im_range/100*255).astype('uint8')).convert('RGB')
134 |         # im4.show()
135 |         xyzic.tofile(outbinfile)
136 | 
137 | 
138 | def test():
139 |     calibfile = '/home/yifeihu/data/Kitti/object/training/calib/000000.txt'
140 |     binfile = '/home/yifeihu/data/Kitti/object/training/velodyne/000000.bin'
141 |     imfile = '/home/yifeihu/data/Kitti/object/training/image_2/000000.png'
142 |     p2, r0, velo2cam = get_calib(calibfile)
143 |     print(p2)
144 |     print(r0)
145 |     print(velo2cam)
146 |     T = np.dot(p2, np.dot(r0, velo2cam))
147 |     print(T)
148 |     data = np.fromfile(binfile, dtype=np.float32).reshape(-1, 4)
149 |     data[:, 3] = 1
150 |     print(data.shape)
151 |     imdata = np.dot(data, T.T)
152 |     imdata[:, 0] = imdata[:, 0] / imdata[:, 2]
153 |     imdata[:, 1] = imdata[:, 1] / imdata[:, 2]
154 |     print(imdata[:10, :])
155 |     im = Image.open(imfile)
156 |     width, height = im.size
157 |     print(width, height)
158 |     mask = (imdata[:, 0] > 0) & (imdata[:, 0] < width) & (imdata[:, 1] > 0) & (imdata[:, 1] < height)
159 |     outdata = data[mask, :]
160 |     print(outdata.shape)
161 | 
162 | #make_velodyne_reduce()
163 | #test()
164 | make_kitti_xyzic()


--------------------------------------------------------------------------------
/model/DLA.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | from torch.nn import functional as F 
 4 | import torch.nn.init as init
 5 | from model.ResNet import BasicBlock, make_layer
 6 | 
 7 | #reference: LaserNet: An Efficient Probabilistic 3D Object Detector for Autonomous Driving
 8 | class FeatureExtractor(nn.Module):
 9 |     def __init__(self, inplanes, planes, blocks, stride=1):
10 |         super(FeatureExtractor, self).__init__()
11 |         self.name = 'FeatureExtractor'
12 |         self.block = make_layer(BasicBlock, inplanes, planes, blocks, stride=stride)
13 | 
14 |     def forward(self, x):
15 |         x = self.block(x)
16 |         return x 
17 | 
18 | class FeatureAggregator(nn.Module):
19 |     def __init__(self, FinePlanes, CoarsePlanes, planes):
20 |         super(FeatureAggregator, self).__init__()
21 |         self.block1 = nn.Sequential(
22 |             nn.ConvTranspose2d(CoarsePlanes, planes, 3, stride=2, padding=1, output_padding=1,bias=False),
23 |             nn.BatchNorm2d(planes),
24 |             nn.ReLU(inplace=True)
25 |         )
26 |         self.block2 = make_layer(BasicBlock, FinePlanes+planes, planes, 2, stride=1)
27 | 
28 |     def forward(self, xFine, xCoarse):
29 |         x1 = self.block1(xCoarse)
30 |         x = torch.cat((x1, xFine), 1)
31 |         x = self.block2(x)
32 |         return x
33 | 
34 | class DLA(nn.Module):
35 |     def __init__(self, num_class):
36 |         super(DLA, self).__init__()
37 |         self.block1a = FeatureExtractor(3, 64, 4, stride=1)
38 |         self.block2a = FeatureExtractor(64, 64, 5, stride=2)
39 |         self.block3a = FeatureExtractor(64, 128, 6, stride=2)
40 |         self.block1b = FeatureAggregator(64, 64, 64)
41 |         self.block1c = FeatureAggregator(64, 128, 128)
42 |         self.block2b = FeatureAggregator(64, 128, 128)
43 | 
44 |         self.cls = nn.Conv2d(128, num_class, 1)
45 | 
46 |     def forward(self, x):
47 |         x1 = self.block1a(x)
48 |         x2 = self.block2a(x1)
49 |         y1 = self.block1b(x1, x2)
50 |         x3 = self.block3a(x2)
51 |         y2 = self.block2b(x2, x3)
52 |         y = self.block1c(y1, y2)
53 |         y = self.cls(y)
54 |         #y = y.exp()
55 |         return y
56 | 
57 | 
58 | def test():
59 |     net = DLA(5)
60 |     x = torch.randn(1, 3, 200, 500)
61 |     y = net(x)
62 |     print(y[0,0,0,:])
63 | 
64 | #test()


--------------------------------------------------------------------------------
/model/ResNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.nn.init as init
  5 | 
  6 | class BasicBlock(nn.Module):
  7 |     expansion = 1
  8 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
  9 |         super(BasicBlock, self).__init__()
 10 |         self.conv1 = nn.Conv2d(inplanes, planes, 3, stride=stride, padding=1)
 11 |         self.bn1 = nn.BatchNorm2d(planes)
 12 |         self.relu1 = nn.ReLU(inplace=True)
 13 |         self.conv2 = nn.Conv2d(planes, planes, 3, stride=1, padding=1)
 14 |         self.bn2 = nn.BatchNorm2d(planes)
 15 |         self.relu2 = nn.ReLU(inplace=True)
 16 |         self.downsample = downsample
 17 |         self.stride = stride
 18 | 
 19 |         init.xavier_normal_(self.conv1.weight.data)
 20 |         init.xavier_normal_(self.conv2.weight.data)
 21 | 
 22 |     def forward(self, x):
 23 |         residual = x
 24 | 
 25 |         out = self.conv1(x)
 26 |         out = self.bn1(out)
 27 |         out = self.relu1(out)
 28 | 
 29 |         out = self.conv2(out)
 30 |         out = self.bn2(out)
 31 | 
 32 |         if self.downsample is not None:
 33 |             residual = self.downsample(x)
 34 | 
 35 |         out += residual
 36 |         out = self.relu2(out)
 37 | 
 38 |         return out
 39 | 
 40 | class Bottleneck(nn.Module):
 41 |     expansion = 4
 42 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 43 |         super(Bottleneck, self).__init__()
 44 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 45 |         self.bn1 = nn.BatchNorm2d(planes)
 46 |         self.relu1 = nn.ReLU(inplace=True)
 47 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 48 |         self.bn2 = nn.BatchNorm2d(planes)
 49 |         self.relu2 = nn.ReLU(inplace=True)
 50 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 51 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 52 |         self.relu3 = nn.ReLU(inplace=True)
 53 |         self.downsample = downsample
 54 |         self.stride = stride
 55 | 
 56 |         init.xavier_normal_(self.conv1.weight.data)
 57 |         init.xavier_normal_(self.conv2.weight.data)
 58 |         init.xavier_normal_(self.conv3.weight.data)
 59 | 
 60 |     def forward(self, x):
 61 |         residual = x
 62 | 
 63 |         out = self.conv1(x)
 64 |         out = self.bn1(out)
 65 |         out = self.relu1(out)
 66 | 
 67 |         out = self.conv2(out)
 68 |         out = self.bn2(out)
 69 |         out = self.relu2(out)
 70 | 
 71 |         out = self.conv3(out)
 72 |         out = self.bn3(out)
 73 | 
 74 |         if self.downsample is not None:
 75 |             residual = self.downsample(x)
 76 | 
 77 |         out += residual
 78 |         out = self.relu3(out)
 79 | 
 80 |         return out
 81 | 
 82 | def make_layer(block, inplanes, planes, blocks, stride=1):
 83 |         downsample = None
 84 |         if stride != 1 or inplanes != planes * block.expansion:
 85 |             downsample = nn.Sequential(
 86 |                 nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
 87 |                 nn.BatchNorm2d(planes * block.expansion),
 88 |             )
 89 |             init.xavier_normal_(downsample[0].weight.data)
 90 | 
 91 |         layers = []
 92 |         layers.append(block(inplanes, planes, stride, downsample))
 93 |         inplanes = planes * block.expansion
 94 |         for i in range(1, blocks):
 95 |             layers.append(block(inplanes, planes))
 96 | 
 97 |         return nn.Sequential(*layers)
 98 | 
 99 | class ResNet34(nn.Module):
100 |     def __init__(self, inplanes):
101 |         super(ResNet34, self).__init__()
102 |         self.conv1 = nn.Conv2d(inplanes, 64, kernel_size=7, stride=2, padding=3, bias=False)
103 |         self.bn1 = nn.BatchNorm2d(64)
104 |         self.relu = nn.ReLU(inplace=True)
105 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
106 |         self.layer1 = make_layer(BasicBlock, 64, 64, 3)
107 |         self.layer2 = make_layer(BasicBlock, 64, 128, 4, stride=2)
108 |         self.layer3 = make_layer(BasicBlock, 128, 256, 6, stride=2)
109 |         self.layer4 = make_layer(BasicBlock, 256, 512, 3, stride=2)
110 | 
111 |         init.xavier_normal_(self.conv1.weight.data)
112 | 
113 |     def forward(self, x):
114 |         x = self.conv1(x)
115 |         x = self.bn1(x)
116 |         x = self.relu(x)
117 |         x = self.maxpool(x)
118 |         x = self.layer1(x)
119 |         x = self.layer2(x)
120 |         x = self.layer3(x)
121 |         x = self.layer4(x)
122 |         return x
123 | 
124 | class ResNet50(nn.Module):
125 |     def __init__(self, inplanes):
126 |         super(ResNet50, self).__init__()
127 |         self.conv1 = nn.Conv2d(inplanes, 64, kernel_size=7, stride=2, padding=3, bias=False)
128 |         self.bn1 = nn.BatchNorm2d(64)
129 |         self.relu = nn.ReLU(inplace=True)
130 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
131 |         self.layer1 = make_layer(Bottleneck, 64, 64, 3)
132 |         self.layer2 = make_layer(Bottleneck, 64, 128, 4, stride=2)
133 |         self.layer3 = make_layer(Bottleneck, 128, 256, 6, stride=2)
134 |         self.layer4 = make_layer(Bottleneck, 256, 512, 3, stride=2)
135 | 
136 |         init.xavier_normal_(self.conv1.weight.data)
137 | 
138 |     def forward(self, x):
139 |         x = self.conv1(x)
140 |         x = self.bn1(x)
141 |         x = self.relu(x)
142 |         x = self.maxpool(x)
143 |         x = self.layer1(x)
144 |         x = self.layer2(x)
145 |         x = self.layer3(x)
146 |         x = self.layer4(x)
147 |         return x
148 | 
149 | class ResNet101(nn.Module):
150 |     def __init__(self, inplanes):
151 |         super(ResNet101, self).__init__()
152 |         self.conv1 = nn.Conv2d(inplanes, 64, kernel_size=7, stride=2, padding=3, bias=False)
153 |         self.bn1 = nn.BatchNorm2d(64)
154 |         self.relu = nn.ReLU(inplace=True)
155 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
156 |         self.layer1 = make_layer(Bottleneck, 64, 64, 3)
157 |         self.layer2 = make_layer(Bottleneck, 64, 128, 4, stride=2)
158 |         self.layer3 = make_layer(Bottleneck, 128, 256, 23, stride=2)
159 |         self.layer4 = make_layer(Bottleneck, 256, 512, 3, stride=2)
160 | 
161 |         init.xavier_normal_(self.conv1.weight.data)
162 | 
163 |     def forward(self, x):
164 |         x = self.conv1(x)
165 |         x = self.bn1(x)
166 |         x = self.relu(x)
167 |         x = self.maxpool(x)
168 |         x = self.layer1(x)
169 |         x = self.layer2(x)
170 |         x = self.layer3(x)
171 |         x = self.layer4(x)
172 |         return x


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feicay/LaserNet/863a91aa8f542bd278f7bc03f899c0363a898570/model/__init__.py


--------------------------------------------------------------------------------
/model/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | 
 6 | class FocalLossClassify(nn.Module):
 7 |     def __init__(self, num_classes, background=0, cuda=1):
 8 |         super(FocalLossClassify, self).__init__()
 9 |         self.num_cls = num_classes + background
10 |         self.one_hot = torch.eye(num_classes + background)
11 |         if cuda:
12 |             self.one_hot = self.one_hot.cuda()
13 | 
14 |     def focal_loss(self, x, y):
15 |         alpha = 0.25
16 |         gamma = 2
17 |         t = self.one_hot[y.data, :]
18 |         t = Variable(t)
19 |         p = x.sigmoid()
20 |         pt = p*t + (1-p)*(1-t)         # pt = p if t > 0 else 1-p
21 |         w = alpha*t + (1-alpha)*(1-t)  # w = alpha if t > 0 else 1-alpha
22 |         w = w * (1-pt).pow(gamma)
23 |         w = w.detach()
24 |         return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)
25 |     
26 |     def forward(self, cls_pred, cls_truth):
27 |         batch, C, L, W = cls_pred.size()
28 |         cls_pred = cls_pred.permute(0,2,3,1).contiguous().view(batch,-1,self.num_cls)
29 |         cls_truth = cls_truth.permute(0,2,3,1).contiguous().view(batch,-1)
30 |         
31 |         pos = cls_truth > 0
32 |         num_obj = pos.data.sum()
33 | 
34 |         cls_loss = self.focal_loss(cls_pred, cls_truth)
35 | 
36 |         self.cls_loss = cls_loss.data
37 |         self.loss = cls_loss/num_obj.float()
38 |         return self.loss
39 | 
40 | class FocalLoss(nn.Module):
41 |     def __init__(self, num_classes, box_len, num_anchor, background=1, cuda=1):
42 |         super(FocalLoss, self).__init__()
43 |         self.num_cls = num_classes + background
44 |         self.box_len = box_len
45 |         self.num_anchor = num_anchor
46 |         self.one_hot = torch.eye(num_classes + background)
47 |         if cuda:
48 |             self.one_hot = self.one_hot.cuda()
49 | 
50 |     def focal_loss(self, x, y):
51 |         alpha = 0.25
52 |         gamma = 2
53 |         t = self.one_hot[y.data, :]
54 |         t = Variable(t)
55 |         p = x.sigmoid()
56 |         pt = p*t + (1-p)*(1-t)         # pt = p if t > 0 else 1-p
57 |         w = alpha*t + (1-alpha)*(1-t)  # w = alpha if t > 0 else 1-alpha
58 |         w = w * (1-pt).pow(gamma)
59 |         w = w.detach()
60 |         return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)
61 |     
62 |     def forward(self, cls_pred, cls_truth, box_pred, box_truth):
63 |         batch, C, L, W = cls_pred.size()
64 |         box_pred = box_pred.permute(0,2,3,1).contiguous().view(batch,-1,self.box_len) 
65 |         cls_pred = cls_pred.permute(0,2,3,1).contiguous().view(batch,-1,self.num_cls)
66 |         box_truth = box_truth.permute(0,2,3,1).contiguous().view(batch,-1,self.box_len) 
67 |         cls_truth = cls_truth.permute(0,2,3,1).contiguous().view(batch,-1)
68 |         
69 |         pos = cls_truth > 0
70 |         num_obj = pos.data.sum()
71 | 
72 |         # box_loss = SmoothL1Loss(pos_box_pred, pos_box_targets)
73 |         mask = pos.unsqueeze(2).expand_as(box_pred)      # [batch, anchors, 8]
74 |         masked_box_pred = box_pred[mask].view(-1,self.box_len)      # [#pos,8]
75 |         masked_box_truth = box_truth[mask].view(-1,self.box_len)    # [#pos,8]
76 |         box_loss = F.smooth_l1_loss(masked_box_pred, masked_box_truth, size_average=False)
77 | 
78 |         # cls_loss = FocalLoss(loc_preds, loc_targets)
79 |         pos_neg = cls_truth > -1  # exclude ignored anchors
80 |         mask = pos_neg.unsqueeze(2).expand_as(cls_pred)
81 |         masked_cls_pred = cls_pred[mask].view(-1,self.num_cls)
82 |         cls_loss = self.focal_loss(masked_cls_pred, cls_truth[pos_neg])
83 | 
84 |         self.cls_loss = cls_loss.data
85 |         self.box_loss = box_loss.data
86 |         self.loss = (cls_loss + box_loss)/num_obj.float()
87 |         return self.loss


--------------------------------------------------------------------------------
/tensorrt/BatchStream.h:
--------------------------------------------------------------------------------
  1 | #ifndef BATCH_STREAM_H
  2 | #define BATCH_STREAM_H
  3 | 
  4 | #include <vector>
  5 | #include <assert.h>
  6 | #include <algorithm>
  7 | #include "NvInfer.h"
  8 | #include <iostream>
  9 | #include <string>
 10 | #include <stdio.h>
 11 | #include <fstream>
 12 | #include "make_input.h"
 13 | 
 14 | #define PCLOUD_SIZE 1000000 //the size of buffer for the point cloud
 15 | 
 16 | class BatchStream
 17 | {
 18 | public:
 19 |     BatchStream(int batchSize, int maxBatches, std::string calibList)
 20 |         : mBatchSize(batchSize)
 21 |         , mMaxBatches(maxBatches)
 22 |         , mCalibList(calibList)
 23 |     {
 24 |         //get calib file names
 25 |         std::fstream fin(mCalibList);
 26 |         std::string ReadLine;
 27 |         while(std::getline (fin, ReadLine))
 28 |         {
 29 |             mFileList.push_back(ReadLine);
 30 |         }
 31 |         //set network input dims
 32 |         mDims = nvinfer1::DimsNCHW{1, 3, 200, 400};
 33 |         mImageSize = mDims.c() * mDims.h() * mDims.w();
 34 |         mTruthSize = mDims.h() * mDims.w() ;
 35 |         mBatch.resize(mBatchSize * mImageSize, 0);
 36 |         mLabels.resize(mBatchSize * mTruthSize, 0);
 37 |         mFileBatch.resize(mDims.n() * mImageSize, 0); // one input image buffer
 38 |         mFileLabels.resize(mDims.n() * mTruthSize, 0);
 39 |         mPointsBuf.resize(PCLOUD_SIZE, 0);
 40 |         reset(0);
 41 |     }
 42 | 
 43 |     void reset(int firstBatch)
 44 |     {
 45 |         mBatchCount = 0;
 46 |         mFileCount = 0;
 47 |         mFileBatchPos = mDims.n();
 48 |         skip(firstBatch);
 49 |     }
 50 | 
 51 |     bool next()
 52 |     {
 53 |         if (mBatchCount == mMaxBatches)
 54 |             return false;
 55 | 
 56 |         for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize)
 57 |         {
 58 |             if (!update())
 59 |                 return false;
 60 |             std::copy_n(getFileBatch(), csize * mImageSize, getBatch() + batchPos * mImageSize);
 61 |             std::copy_n(getFileLabels(), csize * mTruthSize, getLabels() + batchPos * mTruthSize);
 62 |         }
 63 |         mBatchCount++;
 64 |         return true;
 65 |     }
 66 | 
 67 |     void skip(int skipCount)
 68 |     {
 69 |         mFileCount = skipCount;
 70 |     }
 71 | 
 72 |     float* getBatch() { return &mBatch[0]; }
 73 |     float* getLabels() { return &mLabels[0]; }
 74 |     int getBatchesRead() const { return mBatchCount; }
 75 |     int getBatchSize() const { return mBatchSize; }
 76 |     nvinfer1::DimsNCHW getDims() const { return mDims; }
 77 | 
 78 | private:
 79 |     float* getFileBatch() { return &mFileBatch[0]; }
 80 |     float* getFileLabels() { return &mFileLabels[0]; }
 81 |     float* getPointsBuf() { return &mPointsBuf[0]; }
 82 | 
 83 |     bool update()
 84 |     {
 85 |         memset(getPointsBuf(), 0, PCLOUD_SIZE*sizeof(float));
 86 |         memset(getFileBatch(), 0, mImageSize*sizeof(float));
 87 |         memset(getFileLabels(), 0, mTruthSize*sizeof(float));
 88 |         char buf_s[64];
 89 |         int size = 0;
 90 |         mFileCount++;
 91 |         std::string inputFileName = mFileList[mFileCount];
 92 |         //std::cout<<inputFileName<<std::endl;
 93 |         FILE* file = fopen(inputFileName.c_str(), "rb");
 94 |         if (!file)
 95 |             return false;
 96 |         else{
 97 |             fseek(file,0L,SEEK_END); 
 98 |             size = ftell(file);
 99 |             fseek(file,0L,SEEK_SET);
100 |             fread(getPointsBuf(), 1, size, file);
101 |         }
102 |         int points_num = size / sizeof(float) / 5;
103 |         xyzic_to_image(getPointsBuf(), points_num, -45, 45, -30, 10, 0.225, 0.2, getFileBatch(), getFileLabels());
104 |         fclose(file);
105 |         mFileBatchPos = 0;
106 |         return true;
107 |     }
108 | 
109 |     int mBatchSize{0};
110 |     int mMaxBatches{0};
111 |     int mBatchCount{0};
112 | 
113 |     int mFileCount{0}, mFileBatchPos{0};
114 |     int mImageSize{0};
115 |     int mTruthSize{0};
116 | 
117 |     nvinfer1::DimsNCHW mDims;
118 |     std::string mCalibList;
119 |     std::vector<std::string> mFileList;
120 |     std::vector<float> mBatch;
121 |     std::vector<float> mLabels;
122 |     std::vector<float> mFileBatch;
123 |     std::vector<float> mFileLabels;
124 |     std::vector<float> mPointsBuf;//the buffer for the point cloud
125 | };
126 | 
127 | #endif
128 | 


--------------------------------------------------------------------------------
/tensorrt/Makefile:
--------------------------------------------------------------------------------
 1 | OUTNAME=lidar_seg_int8
 2 | 
 3 | CC = g++
 4 | CUCC =$(CUDA_INSTALL_DIR)/bin/nvcc -m64
 5 | 
 6 | TENSORRT_DIR=/usr/local/TensorRT-5.0.2.6
 7 | CUDA_INSTALL_DIR=/usr/local/cuda
 8 | CUDA_LIBDIR=lib64
 9 | 
10 | LIBPATHS=-L/usr/local/lib -L"$(CUDA_INSTALL_DIR)/$(CUDA_LIBDIR)" -L"$(TENSORRT_DIR)/lib"
11 | TENSORRT_LIB=-lnvinfer -lnvparsers -lnvinfer_plugin -lnvonnxparser -lnvonnxparser_runtime
12 | INCPATHS=-I/usr/local/include -I"$(CUDA_INSTALL_DIR)/include"  -I"$(TENSORRT_DIR)/include" 
13 | 
14 | COMMON_FLAGS += -Wall -std=c++11  $(INCPATHS) -Wl,-rpath "$(TENSORRT_DIR)/lib"
15 | COMMON_LD_FLAGS += $(LIBPATHS)
16 | COMMON_LIBS = -lcudnn -lcublas -lnvToolsExt -lcudart $(TENSORRT_LIB)
17 | 
18 | OBJS=make_input.o lasernet.o 
19 | BIN=demo.bin
20 | 
21 | all:$(OBJS)
22 | 	$(CC) $(OBJS) $(COMMON_FLAGS) $(COMMON_LIBS) $(COMMON_LD_FLAGS) -o $(BIN) 
23 | 
24 | %.o:%.cpp
25 | 	$(CC) $(COMMON_FLAGS) $(COMMON_LIBS) $(COMMON_LD_FLAGS) -c $< -o $@
26 | 
27 | clean:
28 | 	rm *.o $(BIN) 


--------------------------------------------------------------------------------
/tensorrt/common.h:
--------------------------------------------------------------------------------
  1 | #ifndef TENSORRT_COMMON_H
  2 | #define TENSORRT_COMMON_H
  3 | 
  4 | #include "NvInfer.h"
  5 | #include "NvInferPlugin.h"
  6 | 
  7 | #include <algorithm>
  8 | #include <cassert>
  9 | #include <chrono>
 10 | #include <cmath>
 11 | #include <cstring>
 12 | #include <cuda_runtime_api.h>
 13 | #include <fstream>
 14 | #include <iostream>
 15 | #include <iomanip>
 16 | #include <iterator>
 17 | #include <map>
 18 | #include <memory>
 19 | #include <new>
 20 | #include <numeric>
 21 | #include <ratio>
 22 | #include <string>
 23 | #include <utility>
 24 | #include <vector>
 25 | 
 26 | using namespace std;
 27 | using namespace nvinfer1;
 28 | using namespace plugin;
 29 | 
 30 | #define CHECK(status)                             \
 31 |     do                                            \
 32 |     {                                             \
 33 |         auto ret = (status);                      \
 34 |         if (ret != 0)                             \
 35 |         {                                         \
 36 |             std::cout << "Cuda failure: " << ret; \
 37 |             abort();                              \
 38 |         }                                         \
 39 |     } while (0)
 40 | 
 41 | constexpr long double operator"" _GB(long double val)
 42 | {
 43 |     return val * (1 << 30);
 44 | }
 45 | constexpr long double operator"" _MB(long double val) { return val * (1 << 20); }
 46 | constexpr long double operator"" _KB(long double val) { return val * (1 << 10); }
 47 | 
 48 | // These is necessary if we want to be able to write 1_GB instead of 1.0_GB.
 49 | // Since the return type is signed, -1_GB will work as expected.
 50 | constexpr long long int operator"" _GB(long long unsigned int val) { return val * (1 << 30); }
 51 | constexpr long long int operator"" _MB(long long unsigned int val) { return val * (1 << 20); }
 52 | constexpr long long int operator"" _KB(long long unsigned int val) { return val * (1 << 10); }
 53 | 
 54 | // Logger for TensorRT info/warning/errors
 55 | class Logger : public nvinfer1::ILogger
 56 | {
 57 | public:
 58 |     Logger(Severity severity = Severity::kWARNING)
 59 |         : reportableSeverity(severity)
 60 |     {
 61 |     }
 62 | 
 63 |     void log(Severity severity, const char* msg) override
 64 |     {
 65 |         // suppress messages with severity enum value greater than the reportable
 66 |         if (severity > reportableSeverity)
 67 |             return;
 68 | 
 69 |         switch (severity)
 70 |         {
 71 |         case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
 72 |         case Severity::kERROR: std::cerr << "ERROR: "; break;
 73 |         case Severity::kWARNING: std::cerr << "WARNING: "; break;
 74 |         case Severity::kINFO: std::cerr << "INFO: "; break;
 75 |         default: std::cerr << "UNKNOWN: "; break;
 76 |         }
 77 |         std::cerr << msg << std::endl;
 78 |     }
 79 | 
 80 |     Severity reportableSeverity;
 81 | };
 82 | 
 83 | struct SimpleProfiler : public nvinfer1::IProfiler
 84 | {
 85 |     struct Record
 86 |     {
 87 |         float time{0};
 88 |         int count{0};
 89 |     };
 90 | 
 91 |     virtual void reportLayerTime(const char* layerName, float ms)
 92 |     {
 93 |         mProfile[layerName].count++;
 94 |         mProfile[layerName].time += ms;
 95 |     }
 96 | 
 97 |     SimpleProfiler(
 98 |         const char* name,
 99 |         const std::vector<SimpleProfiler>& srcProfilers = std::vector<SimpleProfiler>())
100 |         : mName(name)
101 |     {
102 |         for (const auto& srcProfiler : srcProfilers)
103 |         {
104 |             for (const auto& rec : srcProfiler.mProfile)
105 |             {
106 |                 auto it = mProfile.find(rec.first);
107 |                 if (it == mProfile.end())
108 |                 {
109 |                     mProfile.insert(rec);
110 |                 }
111 |                 else
112 |                 {
113 |                     it->second.time += rec.second.time;
114 |                     it->second.count += rec.second.count;
115 |                 }
116 |             }
117 |         }
118 |     }
119 | 
120 |     friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value)
121 |     {
122 |         out << "========== " << value.mName << " profile ==========" << std::endl;
123 |         float totalTime = 0;
124 |         std::string layerNameStr = "TensorRT layer name";
125 |         int maxLayerNameLength = std::max(static_cast<int>(layerNameStr.size()), 70);
126 |         for (const auto& elem : value.mProfile)
127 |         {
128 |             totalTime += elem.second.time;
129 |             maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
130 |         }
131 | 
132 |         auto old_settings = out.flags();
133 |         auto old_precision = out.precision();
134 |         // Output header
135 |         {
136 |             out << std::setw(maxLayerNameLength) << layerNameStr << " ";
137 |             out << std::setw(12) << "Runtime, "
138 |                 << "%"
139 |                 << " ";
140 |             out << std::setw(12) << "Invocations"
141 |                 << " ";
142 |             out << std::setw(12) << "Runtime, ms" << std::endl;
143 |         }
144 |         for (const auto& elem : value.mProfile)
145 |         {
146 |             out << std::setw(maxLayerNameLength) << elem.first << " ";
147 |             out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.second.time * 100.0F / totalTime) << "%"
148 |                 << " ";
149 |             out << std::setw(12) << elem.second.count << " ";
150 |             out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.time << std::endl;
151 |         }
152 |         out.flags(old_settings);
153 |         out.precision(old_precision);
154 |         out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl;
155 | 
156 |         return out;
157 |     }
158 | 
159 | private:
160 |     std::string mName;
161 |     std::map<std::string, Record> mProfile;
162 | };
163 | 
164 | // Locate path to file, given its filename or filepath suffix and possible dirs it might lie in
165 | // Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path
166 | inline std::string locateFile(const std::string& filepathSuffix, const std::vector<std::string>& directories)
167 | {
168 |     const int MAX_DEPTH{10};
169 |     bool found{false};
170 |     std::string filepath;
171 | 
172 |     for (auto& dir : directories)
173 |     {
174 |         filepath = dir + filepathSuffix;
175 | 
176 |         for (int i = 0; i < MAX_DEPTH && !found; i++)
177 |         {
178 |             std::ifstream checkFile(filepath);
179 |             found = checkFile.is_open();
180 |             if (found)
181 |                 break;
182 |             filepath = "../" + filepath; // Try again in parent dir
183 |         }
184 | 
185 |         if (found)
186 |         {
187 |             break;
188 |         }
189 | 
190 |         filepath.clear();
191 |     }
192 | 
193 |     if (filepath.empty())
194 |     {
195 |         std::string directoryList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(),
196 |                                                     [](const std::string& a, const std::string& b) { return a + "\n\t" + b; });
197 |         std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << directoryList << std::endl;
198 |         exit(EXIT_FAILURE);
199 |     }
200 |     return filepath;
201 | }
202 | 
203 | inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, int inW)
204 | {
205 |     std::ifstream infile(fileName, std::ifstream::binary);
206 |     assert(infile.is_open() && "Attempting to read from a file that is not open.");
207 |     std::string magic, h, w, max;
208 |     infile >> magic >> h >> w >> max;
209 |     infile.seekg(1, infile.cur);
210 |     infile.read(reinterpret_cast<char*>(buffer), inH * inW);
211 | }
212 | 
213 | namespace samplesCommon
214 | {
215 | 
216 | inline void* safeCudaMalloc(size_t memSize)
217 | {
218 |     void* deviceMem;
219 |     CHECK(cudaMalloc(&deviceMem, memSize));
220 |     if (deviceMem == nullptr)
221 |     {
222 |         std::cerr << "Out of memory" << std::endl;
223 |         exit(1);
224 |     }
225 |     return deviceMem;
226 | }
227 | 
228 | inline bool isDebug()
229 | {
230 |     return (std::getenv("TENSORRT_DEBUG") ? true : false);
231 | }
232 | 
233 | struct InferDeleter
234 | {
235 |     template <typename T>
236 |     void operator()(T* obj) const
237 |     {
238 |         if (obj)
239 |         {
240 |             obj->destroy();
241 |         }
242 |     }
243 | };
244 | 
245 | template <typename T>
246 | inline std::shared_ptr<T> infer_object(T* obj)
247 | {
248 |     if (!obj)
249 |     {
250 |         throw std::runtime_error("Failed to create object");
251 |     }
252 |     return std::shared_ptr<T>(obj, InferDeleter());
253 | }
254 | 
255 | template <class Iter>
256 | inline std::vector<size_t> argsort(Iter begin, Iter end, bool reverse = false)
257 | {
258 |     std::vector<size_t> inds(end - begin);
259 |     std::iota(inds.begin(), inds.end(), 0);
260 |     if (reverse)
261 |     {
262 |         std::sort(inds.begin(), inds.end(), [&begin](size_t i1, size_t i2) {
263 |             return begin[i2] < begin[i1];
264 |         });
265 |     }
266 |     else
267 |     {
268 |         std::sort(inds.begin(), inds.end(), [&begin](size_t i1, size_t i2) {
269 |             return begin[i1] < begin[i2];
270 |         });
271 |     }
272 |     return inds;
273 | }
274 | 
275 | inline bool readReferenceFile(const std::string& fileName, std::vector<std::string>& refVector)
276 | {
277 |     std::ifstream infile(fileName);
278 |     if (!infile.is_open())
279 |     {
280 |         cout << "ERROR: readReferenceFile: Attempting to read from a file that is not open." << endl;
281 |         return false;
282 |     }
283 |     std::string line;
284 |     while (std::getline(infile, line))
285 |     {
286 |         if (line.empty())
287 |             continue;
288 |         refVector.push_back(line);
289 |     }
290 |     infile.close();
291 |     return true;
292 | }
293 | 
294 | template <typename result_vector_t>
295 | inline std::vector<std::string> classify(const vector<string>& refVector, const result_vector_t& output, const size_t topK)
296 | {
297 |     auto inds = samplesCommon::argsort(output.cbegin(), output.cend(), true);
298 |     std::vector<std::string> result;
299 |     for (size_t k = 0; k < topK; ++k)
300 |     {
301 |         result.push_back(refVector[inds[k]]);
302 |     }
303 |     return result;
304 | }
305 | 
306 | //...LG returns top K indices, not values.
307 | template <typename T>
308 | inline vector<size_t> topK(const vector<T> inp, const size_t k)
309 | {
310 |     vector<size_t> result;
311 |     std::vector<size_t> inds = samplesCommon::argsort(inp.cbegin(), inp.cend(), true);
312 |     result.assign(inds.begin(), inds.begin() + k);
313 |     return result;
314 | }
315 | 
316 | template <typename T>
317 | inline bool readASCIIFile(const string& fileName, const size_t size, vector<T>& out)
318 | {
319 |     std::ifstream infile(fileName);
320 |     if (!infile.is_open())
321 |     {
322 |         cout << "ERROR readASCIIFile: Attempting to read from a file that is not open." << endl;
323 |         return false;
324 |     }
325 |     out.clear();
326 |     out.reserve(size);
327 |     out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
328 |     infile.close();
329 |     return true;
330 | }
331 | 
332 | template <typename T>
333 | inline bool writeASCIIFile(const string& fileName, const vector<T>& in)
334 | {
335 |     std::ofstream outfile(fileName);
336 |     if (!outfile.is_open())
337 |     {
338 |         cout << "ERROR: writeASCIIFile: Attempting to write to a file that is not open." << endl;
339 |         return false;
340 |     }
341 |     for (auto fn : in)
342 |     {
343 |         outfile << fn << " ";
344 |     }
345 |     outfile.close();
346 |     return true;
347 | }
348 | 
349 | inline void print_version()
350 | {
351 | //... This can be only done after statically linking this support into parserONNX.library
352 | #if 0
353 |     std::cout << "Parser built against:" << std::endl;
354 |     std::cout << "  ONNX IR version:  " << nvonnxparser::onnx_ir_version_string(onnx::IR_VERSION) << std::endl;
355 | #endif
356 |     std::cout << "  TensorRT version: "
357 |               << NV_TENSORRT_MAJOR << "."
358 |               << NV_TENSORRT_MINOR << "."
359 |               << NV_TENSORRT_PATCH << "."
360 |               << NV_TENSORRT_BUILD << std::endl;
361 | }
362 | 
363 | inline string getFileType(const string& filepath)
364 | {
365 |     return filepath.substr(filepath.find_last_of(".") + 1);
366 | }
367 | 
368 | inline string toLower(const string& inp)
369 | {
370 |     string out = inp;
371 |     std::transform(out.begin(), out.end(), out.begin(), ::tolower);
372 |     return out;
373 | }
374 | 
375 | inline void enableDLA(IBuilder* b, int useDLACore)
376 | {
377 |     if (useDLACore >= 0)
378 |     {
379 |         b->allowGPUFallback(true);
380 |         b->setFp16Mode(true);
381 |         b->setDefaultDeviceType(DeviceType::kDLA);
382 |         b->setDLACore(useDLACore);
383 |     }
384 | }
385 | 
386 | inline int parseDLA(int argc, char** argv)
387 | {
388 |     for (int i = 1; i < argc; i++)
389 |     {
390 |         std::string arg(argv[i]);
391 |         if (strncmp(argv[i], "--useDLACore=", 13) == 0)
392 |             return stoi(argv[i] + 13);
393 |     }
394 |     return -1;
395 | }
396 | 
397 | inline unsigned int getElementSize(nvinfer1::DataType t)
398 | {
399 |     switch (t)
400 |     {
401 |     case nvinfer1::DataType::kINT32: return 4;
402 |     case nvinfer1::DataType::kFLOAT: return 4;
403 |     case nvinfer1::DataType::kHALF: return 2;
404 |     case nvinfer1::DataType::kINT8: return 1;
405 |     }
406 |     throw std::runtime_error("Invalid DataType.");
407 |     return 0;
408 | }
409 | 
410 | inline int64_t volume(const nvinfer1::Dims& d)
411 | {
412 |     return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
413 | }
414 | 
415 | template <int C, int H, int W>
416 | struct PPM
417 | {
418 |     std::string magic, fileName;
419 |     int h, w, max;
420 |     uint8_t buffer[C * H * W];
421 | };
422 | 
423 | struct BBox
424 | {
425 |     float x1, y1, x2, y2;
426 | };
427 | 
428 | template <int C, int H, int W>
429 | inline void readPPMFile(const std::string& filename, samplesCommon::PPM<C, H, W>& ppm)
430 | {
431 |     ppm.fileName = filename;
432 |     std::ifstream infile(filename, std::ifstream::binary);
433 |     assert(infile.is_open() && "Attempting to read from a file that is not open.");
434 |     infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
435 |     infile.seekg(1, infile.cur);
436 |     infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
437 | }
438 | 
439 | template <int C, int H, int W>
440 | inline void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm, const BBox& bbox)
441 | {
442 |     std::ofstream outfile("./" + filename, std::ofstream::binary);
443 |     assert(!outfile.fail());
444 |     outfile << "P6"
445 |             << "\n"
446 |             << ppm.w << " " << ppm.h << "\n"
447 |             << ppm.max << "\n";
448 |     auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
449 |     const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
450 |     const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
451 |     const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
452 |     const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
453 |     for (int x = x1; x <= x2; ++x)
454 |     {
455 |         // bbox top border
456 |         ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
457 |         ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
458 |         ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
459 |         // bbox bottom border
460 |         ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
461 |         ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
462 |         ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
463 |     }
464 |     for (int y = y1; y <= y2; ++y)
465 |     {
466 |         // bbox left border
467 |         ppm.buffer[(y * ppm.w + x1) * 3] = 255;
468 |         ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
469 |         ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
470 |         // bbox right border
471 |         ppm.buffer[(y * ppm.w + x2) * 3] = 255;
472 |         ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
473 |         ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
474 |     }
475 |     outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
476 | }
477 | 
478 | class TimerBase
479 | {
480 | public:
481 |     virtual void start() {}
482 |     virtual void stop() {}
483 |     float microseconds() const noexcept { return mMs * 1000.f; }
484 |     float milliseconds() const noexcept { return mMs; }
485 |     float seconds() const noexcept { return mMs / 1000.f; }
486 |     void reset() noexcept { mMs = 0.f; }
487 | 
488 | protected:
489 |     float mMs{0.0f};
490 | };
491 | 
492 | class GpuTimer : public TimerBase
493 | {
494 | public:
495 |     GpuTimer(cudaStream_t stream)
496 |         : mStream(stream)
497 |     {
498 |         CHECK(cudaEventCreate(&mStart));
499 |         CHECK(cudaEventCreate(&mStop));
500 |     }
501 |     ~GpuTimer()
502 |     {
503 |         CHECK(cudaEventDestroy(mStart));
504 |         CHECK(cudaEventDestroy(mStop));
505 |     }
506 |     void start() { CHECK(cudaEventRecord(mStart, mStream)); }
507 |     void stop()
508 |     {
509 |         CHECK(cudaEventRecord(mStop, mStream));
510 |         float ms{0.0f};
511 |         CHECK(cudaEventSynchronize(mStop));
512 |         CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
513 |         mMs += ms;
514 |     }
515 | 
516 | private:
517 |     cudaEvent_t mStart, mStop;
518 |     cudaStream_t mStream;
519 | }; // class GpuTimer
520 | 
521 | template <typename Clock>
522 | class CpuTimer : public TimerBase
523 | {
524 | public:
525 |     using clock_type = Clock;
526 | 
527 |     void start() { mStart = Clock::now(); }
528 |     void stop()
529 |     {
530 |         mStop = Clock::now();
531 |         mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
532 |     }
533 | 
534 | private:
535 |     std::chrono::time_point<Clock> mStart, mStop;
536 | }; // class CpuTimer
537 | 
538 | using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
539 | 
540 | } // namespace samplesCommon
541 | 
542 | #endif // TENSORRT_COMMON_H
543 | 


--------------------------------------------------------------------------------
/tensorrt/lasernet.cpp:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <fstream>
  3 | #include <sstream>
  4 | #include <iostream>
  5 | #include <cmath>
  6 | #include <sys/stat.h>
  7 | #include <cmath>
  8 | #include <time.h>
  9 | #include <cuda_runtime_api.h>
 10 | #include <unordered_map>
 11 | #include <algorithm>
 12 | #include <float.h>
 13 | #include <string.h>
 14 | #include <chrono>
 15 | #include <iterator>
 16 | 
 17 | #include "NvInfer.h"
 18 | #include "NvOnnxParser.h"
 19 | #include "NvOnnxParserRuntime.h"
 20 | 
 21 | #include "BatchStream.h"
 22 | #include "common.h"
 23 | 
 24 | using namespace nvinfer1;
 25 | using namespace nvonnxparser;
 26 | 
 27 | static Logger gLogger;
 28 | static int gUseDLACore = -1;
 29 | 
 30 | static const int CAL_BATCH_SIZE = 4;
 31 | static const int FIRST_CAL_BATCH = 500, NB_CAL_BATCHES = 2000;                // calibrate over images 0-600
 32 | static const int FIRST_CAL_SCORE_BATCH = 500, NB_CAL_SCORE_BATCHES = 200; // score over images 500-5000
 33 | 
 34 | const char* gNetworkName{nullptr};
 35 | std::string calibPath("/home/adas/data/alibaba-lidar/training/xyzic/");
 36 | std::string calibList("/home/adas/data/pytorch_ws/LaserNet/validlist.txt");
 37 | 
 38 | bool onnxToTRTModel(const std::string& modelFile,
 39 |                     int& maxBatchSize, 
 40 |                     DataType dataType,
 41 |                     IInt8Calibrator* calibrator,
 42 |                     nvinfer1::IHostMemory*& trtModelStream)
 43 | {
 44 |     int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
 45 |     // create the builder
 46 |     IBuilder* builder = createInferBuilder(gLogger);
 47 |     // parse the onnx model to populate the network, then set the outputs
 48 |     INetworkDefinition* network = builder->createNetwork();
 49 |     nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);
 50 |     if (!parser->parseFromFile(modelFile.c_str(), verbosity))
 51 |     {
 52 |         std::string msg("failed to parse onnx file");
 53 |         gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
 54 |         exit(EXIT_FAILURE);
 55 |     }
 56 |     //check platform and datatype
 57 |     if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()))
 58 |     {
 59 |         std::cout<<"Current Device does not support INT8 inference!"<<std::endl;
 60 |         return false;
 61 |     }
 62 |     if(dataType == DataType::kHALF && !builder->platformHasFastFp16())
 63 |     {
 64 |         std::cout<<"Current Device does not support FP16 inference!"<<std::endl;
 65 |         return false;
 66 |     }
 67 | 
 68 |     std::cout<<"network layer number: "<<network->getNbLayers()<<std::endl;
 69 |     for(int i=0; i<2; i++)
 70 |     {
 71 |         ILayer* layer = network->getLayer(network->getNbLayers() - i - 1);
 72 |         std::string layername(layer->getName());
 73 |         std::cout<<" layer name: "<<layername<<std::endl;
 74 |     }
 75 | 
 76 |     // Build the engine
 77 |     std::cout<<builder->getMaxWorkspaceSize()<<std::endl;
 78 |     std::size_t WorkspaceSize = (1l<<30) * 4;
 79 |     builder->setMaxWorkspaceSize(WorkspaceSize);
 80 |     std::cout<<builder->getMaxWorkspaceSize()<<std::endl;
 81 |     builder->setAverageFindIterations(1);
 82 |     builder->setMinFindIterations(1);
 83 |     builder->setDebugSync(true);
 84 |     builder->setInt8Mode(dataType == DataType::kINT8);
 85 |     builder->setFp16Mode(dataType == DataType::kHALF);
 86 |     builder->setInt8Calibrator(calibrator);
 87 |     if (gUseDLACore >= 0)
 88 |     {
 89 |         samplesCommon::enableDLA(builder, gUseDLACore);
 90 |         if (maxBatchSize > builder->getMaxDLABatchSize())
 91 |         {
 92 |             std::cerr << "Requested batch size " << maxBatchSize << " is greater than the max DLA batch size of "
 93 |                       << builder->getMaxDLABatchSize() << ". Reducing batch size accordingly." << std::endl;
 94 |             maxBatchSize = builder->getMaxDLABatchSize();
 95 |         }
 96 |     }
 97 |     if(dataType == DataType::kINT8)
 98 |     {
 99 |         builder->setStrictTypeConstraints(true);
100 |         for(int i = 0; i < 20; i++)
101 |         {
102 |             // ILayer* layer = network->getLayer(network->getNbLayers() - i - 1);
103 |             // layer->setPrecision(DataType::kFLOAT);
104 |             // for (int j = 0; j < layer->getNbOutputs(); ++j)
105 |             // {
106 |             //     layer->setOutputType(j, nvinfer1::DataType::kFLOAT);
107 |             // }
108 |             ILayer* layer1 = network->getLayer(i);
109 |             layer1->setPrecision(DataType::kFLOAT);
110 |             for (int j = 0; j < layer1->getNbOutputs(); ++j)
111 |             {
112 |                 layer1->setOutputType(j, nvinfer1::DataType::kFLOAT);
113 |             }
114 |         }
115 |         //builder->setStrictTypeConstraints(true);
116 |     }
117 |     builder->setMaxBatchSize(4);
118 |     std::cout<<"000"<<std::endl;
119 |     ICudaEngine* engine = builder->buildCudaEngine(*network);
120 |     assert(engine);
121 | 
122 |     // serialize the engine, then close everything down
123 |     
124 |     std::cout<<"111"<<std::endl;
125 |     trtModelStream = engine->serialize();
126 |     std::cout<<"222"<<std::endl;
127 |     FILE* fp = fopen("lasernet.trt", "wb");
128 |     fwrite(trtModelStream->data(), 1, trtModelStream->size(), fp);
129 |     fclose(fp);
130 | 
131 |  // we don't need the network any more, and we can destroy the parser
132 |     parser->destroy();
133 |     engine->destroy();
134 |     network->destroy();
135 |     builder->destroy();
136 |     std::cout<<"Create TensorRT model finished!"<<std::endl;
137 |     
138 |     return true;
139 | }
140 | 
141 | float doInference(IExecutionContext& context, float* input, float* output, int batchSize)
142 | {
143 |     const ICudaEngine& engine = context.getEngine();
144 |     // input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
145 |     // of these, but in this case we know that there is exactly one input and one output.
146 |     assert(engine.getNbBindings() == 2);
147 |     void* buffers[3];
148 |     float ms{0.0f};
149 | 
150 |     // In order to bind the buffers, we need to know the names of the input and output tensors.
151 |     // note that indices are guaranteed to be less than IEngine::getNbBindings()
152 |     int inputIndex = 0;
153 |     int outputIndex_cls = 1;
154 |     // create GPU buffers and a stream
155 |     Dims3 inputDims = static_cast<Dims3&&>(context.getEngine().getBindingDimensions(inputIndex));
156 |     Dims3 outputDims = static_cast<Dims3&&>(context.getEngine().getBindingDimensions(outputIndex_cls));
157 | 
158 |     size_t inputSize = batchSize * inputDims.d[0] * inputDims.d[1] * inputDims.d[2] * sizeof(float);
159 |     size_t outputSize = batchSize * outputDims.d[0] * outputDims.d[1] * outputDims.d[2] * sizeof(float);
160 |     CHECK(cudaMalloc(&buffers[inputIndex], inputSize));
161 |     CHECK(cudaMalloc(&buffers[outputIndex_cls], outputSize));
162 | 
163 |     CHECK(cudaMemcpy(buffers[inputIndex], input, inputSize, cudaMemcpyHostToDevice));
164 | 
165 |     cudaStream_t stream;
166 |     CHECK(cudaStreamCreate(&stream));
167 |     cudaEvent_t start, end;
168 |     CHECK(cudaEventCreateWithFlags(&start, cudaEventBlockingSync));
169 |     CHECK(cudaEventCreateWithFlags(&end, cudaEventBlockingSync));
170 |     cudaEventRecord(start, stream);
171 |     context.enqueue(batchSize, buffers, stream, nullptr);
172 |     cudaEventRecord(end, stream);
173 |     cudaEventSynchronize(end);
174 |     cudaEventElapsedTime(&ms, start, end);
175 |     cudaEventDestroy(start);
176 |     cudaEventDestroy(end);
177 | 
178 |     CHECK(cudaMemcpy(output, buffers[outputIndex_cls], outputSize, cudaMemcpyDeviceToHost));
179 |     CHECK(cudaFree(buffers[inputIndex]));
180 |     CHECK(cudaFree(buffers[outputIndex_cls]));
181 |     CHECK(cudaStreamDestroy(stream));
182 |     return ms;
183 | }
184 | 
185 | float calculateScore(float* batchProb, float* labels, int batchSize, int outputSize, int height, int width)
186 | {
187 |     float miou = 0.0;
188 |     int TP = 0;
189 |     int FPTN = 0;
190 |     float *pred, *truth;
191 |     float p_cls, t_cls, val;
192 |     for (int i = 0; i < batchSize; i++)
193 |     {
194 |         pred = batchProb + outputSize * i;
195 |         truth = labels + height*width*i;
196 |         for(int h=0; h<height; h++)
197 |         {
198 |             for(int w=0; w < width; w++)
199 |             {
200 |                 if(pred[h*width + w] < pred[height*width + h*width + w]){
201 |                     p_cls = 1;
202 |                 }else{
203 |                     p_cls = 0;
204 |                 }
205 |                 t_cls = truth[h*width + w];
206 |                 val = t_cls + p_cls;
207 |                 if(val > 1){
208 |                     TP++;
209 |                 }else if(val==1){
210 |                     FPTN++;
211 |                 }
212 |             }
213 |         }
214 |     }
215 |     if( (TP+FPTN) == 0 ){
216 |         miou = 1;
217 |     }else{
218 |         miou = ((float)TP)/(TP+FPTN);
219 |     }
220 |     return miou;
221 | }
222 | 
223 | class Int8EntropyCalibrator : public IInt8EntropyCalibrator
224 | {
225 | public:
226 |     Int8EntropyCalibrator(BatchStream& stream, int firstBatch, bool readCache = true)
227 |         : mStream(stream)
228 |         , mReadCache(readCache)
229 |     {
230 |         DimsNCHW dims = mStream.getDims();
231 |         mInputCount = mStream.getBatchSize() * dims.c() * dims.h() * dims.w();
232 |         CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
233 |         mStream.reset(firstBatch);
234 |     }
235 | 
236 |     virtual ~Int8EntropyCalibrator()
237 |     {
238 |         CHECK(cudaFree(mDeviceInput));
239 |     }
240 | 
241 |     int getBatchSize() const override { return mStream.getBatchSize(); }
242 | 
243 |     bool getBatch(void* bindings[], const char* names[], int nbBindings) override
244 |     {
245 |         if (!mStream.next())
246 |             return false;
247 | 
248 |         CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
249 |         //assert(!strcmp(names[0], INPUT_BLOB_NAME));
250 |         bindings[0] = mDeviceInput;
251 |         return true;
252 |     }
253 | 
254 |     const void* readCalibrationCache(size_t& length) override
255 |     {
256 |         mCalibrationCache.clear();
257 |         std::ifstream input(calibrationTableName(), std::ios::binary);
258 |         input >> std::noskipws;
259 |         if (mReadCache && input.good())
260 |             std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
261 | 
262 |         length = mCalibrationCache.size();
263 |         return length ? &mCalibrationCache[0] : nullptr;
264 |     }
265 | 
266 |     void writeCalibrationCache(const void* cache, size_t length) override
267 |     {
268 |         std::ofstream output(calibrationTableName(), std::ios::binary);
269 |         output.write(reinterpret_cast<const char*>(cache), length);
270 |     }
271 | 
272 | private:
273 |     static std::string calibrationTableName()
274 |     {
275 |         assert(gNetworkName);
276 |         return std::string("CalibrationTable") + gNetworkName;
277 |     }
278 |     BatchStream mStream;
279 |     bool mReadCache{true};
280 | 
281 |     size_t mInputCount;
282 |     void* mDeviceInput{nullptr};
283 |     std::vector<char> mCalibrationCache;
284 | };
285 | 
286 | float scoreModel(std::string modelname, int batchSize, int firstBatch, int nbScoreBatches, DataType datatype, IInt8Calibrator* calibrator, bool quiet = false)
287 | {
288 |     IHostMemory* trtModelStream{nullptr};
289 |     bool valid = onnxToTRTModel(modelname,  batchSize, datatype, calibrator, trtModelStream);
290 | 
291 |     if (!valid)
292 |     {
293 |         std::cout << "Engine could not be created at this precision" << std::endl;
294 |         return 0;
295 |     }
296 | 
297 |     assert(trtModelStream != nullptr);
298 | 
299 |     // Create engine and deserialize model.
300 |     std::cout<<"000"<<std::endl;
301 |     IRuntime* infer = createInferRuntime(gLogger);
302 |     assert(infer != nullptr);
303 |     if (gUseDLACore >= 0)
304 |     {
305 |         infer->setDLACore(gUseDLACore);
306 |     }
307 |     ICudaEngine* engine = infer->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr);
308 |     std::cout<<"111"<<std::endl;
309 |     assert(engine != nullptr);
310 |     trtModelStream->destroy();
311 |     IExecutionContext* context = engine->createExecutionContext();
312 |     assert(context != nullptr);
313 | 
314 |     std::cout<<"222"<<std::endl;
315 |     BatchStream stream(batchSize, nbScoreBatches, calibList);
316 |     stream.skip(firstBatch);
317 | 
318 |     Dims3 outputDims = static_cast<Dims3&&>(context->getEngine().getBindingDimensions(1));
319 |     int outputSize = outputDims.d[0] * outputDims.d[1] * outputDims.d[2];
320 |     std::cout<<outputDims.d[0]<<"  "<<outputDims.d[1]<<"  "<<outputDims.d[2]<<"  "<<std::endl;
321 |     float mIoU = 0.0;
322 |     float totalTime{0.0f};
323 |     std::vector<float> prob(batchSize * outputSize, 0);
324 | 
325 |     while (stream.next())
326 |     {
327 |         totalTime += doInference(*context, stream.getBatch(), &prob[0], batchSize);
328 |         
329 |         mIoU += calculateScore(&prob[0], stream.getLabels(), batchSize, outputSize, outputDims.d[1], outputDims.d[2]);
330 |     }
331 |     int imagesRead = stream.getBatchesRead() * batchSize;
332 | 
333 |     if (!quiet)
334 |     {
335 |         std::cout << "\nmIoU: " << (mIoU / imagesRead * batchSize)  << std::endl;
336 |         std::cout << "Processing " << imagesRead << " images averaged " << totalTime / imagesRead << " ms/image and " << totalTime / stream.getBatchesRead() << " ms/batch." << std::endl;
337 |     }
338 | 
339 |     context->destroy();
340 |     engine->destroy();
341 |     infer->destroy();
342 |     return mIoU;
343 | }
344 | 
345 | static void printUsage()
346 | {
347 |     std::cout << std::endl;
348 |     std::cout << "Usage: ./sample_int8 <network name> <optional params>" << std::endl;
349 |     std::cout << std::endl;
350 |     std::cout << "Optional params" << std::endl;
351 |     std::cout << "  batch=N            Set batch size (default = 100)" << std::endl;
352 |     std::cout << "  start=N            Set the first batch to be scored (default = 100). All batches before this batch will be used for calibration." << std::endl;
353 |     std::cout << "  score=N            Set the number of batches to be scored (default = 400)" << std::endl;
354 |     std::cout << "  search             Search for best calibration. Can only be used with legacy calibration algorithm" << std::endl;
355 |     std::cout << "  legacy             Use legacy calibration algorithm" << std::endl;
356 |     std::cout << "  useDLACore=N       Enable execution on DLA for all layers that support dla. Value can range from 0 to n-1, where n is the number of DLA engines on the platform." << std::endl;
357 | }
358 | 
359 | int main(int argc, char** argv)
360 | {
361 |     if (argc < 2 || !strncmp(argv[1], "help", 4) || !strncmp(argv[1], "--help", 6) || !strncmp(argv[1], "--h", 3))
362 |     {
363 |         printUsage();
364 |         exit(0);
365 |     }
366 |     gNetworkName = argv[1];
367 |     std::string modelname(gNetworkName);
368 | 
369 |     int batchSize = CAL_BATCH_SIZE;
370 |     int firstScoreBatch = FIRST_CAL_SCORE_BATCH;
371 |     int nbScoreBatches = NB_CAL_SCORE_BATCHES;
372 |     bool search = false;
373 |     CalibrationAlgoType calibrationAlgo = CalibrationAlgoType::kENTROPY_CALIBRATION;
374 | 
375 |     for (int i = 2; i < argc; i++)
376 |     {
377 |         if (!strncmp(argv[i], "batch=", 6))
378 |             batchSize = atoi(argv[i] + 6);
379 |         else if (!strncmp(argv[i], "start=", 6))
380 |             firstScoreBatch = atoi(argv[i] + 6);
381 |         else if (!strncmp(argv[i], "score=", 6))
382 |             nbScoreBatches = atoi(argv[i] + 6);
383 |         else if (!strncmp(argv[i], "search", 6))
384 |             search = true;
385 |         else if (!strncmp(argv[i], "legacy", 6))
386 |             calibrationAlgo = CalibrationAlgoType::kLEGACY_CALIBRATION;
387 |         else if (!strncmp(argv[i], "useDLACore=", 11))
388 |             gUseDLACore = stoi(argv[i] + 11);
389 |         else
390 |         {
391 |             std::cout << "Unrecognized argument " << argv[i] << std::endl;
392 |             exit(0);
393 |         }
394 |     }
395 | 
396 |     if (calibrationAlgo == CalibrationAlgoType::kENTROPY_CALIBRATION)
397 |     {
398 |         search = false;
399 |     }
400 | 
401 |     if (batchSize > 128)
402 |     {
403 |         std::cout << "Please provide batch size <= 128" << std::endl;
404 |         exit(0);
405 |     }
406 | 
407 |     if ((firstScoreBatch + nbScoreBatches) * batchSize > 5000)
408 |     {
409 |         std::cout << "Only 5000 images available" << std::endl;
410 |         exit(0);
411 |     }
412 | 
413 |     std::cout.precision(6);
414 | 
415 |     BatchStream calibrationStream(CAL_BATCH_SIZE, NB_CAL_BATCHES, calibPath);
416 |     int dla{gUseDLACore};
417 | 
418 |     // Set gUseDLACore to -1 here since FP16 mode is not enabled.
419 |     if (gUseDLACore >= 0)
420 |     {
421 |         std::cout << "\nDLA requested. Disabling for FP32 run since its not supported." << std::endl;
422 |         gUseDLACore = -1;
423 |     }
424 |     std::cout << "\nFP32 run:" << nbScoreBatches << " batches of size " << batchSize << " starting at " << firstScoreBatch << std::endl;
425 |     scoreModel(modelname, batchSize, firstScoreBatch, nbScoreBatches, DataType::kFLOAT, nullptr);
426 | 
427 |     // Set gUseDLACore correctly to enable DLA if requested.
428 |     gUseDLACore = dla;
429 |     std::cout << "\nFP16 run:" << nbScoreBatches << " batches of size " << batchSize << " starting at " << firstScoreBatch << std::endl;
430 |     scoreModel(modelname, batchSize, firstScoreBatch, nbScoreBatches, DataType::kHALF, nullptr);
431 | 
432 |     // reset DLA to -1 for int8 mode.
433 |     if (gUseDLACore >= 0)
434 |     {
435 |         std::cout << "\nDLA requested. Disabling for Int8 run since its not supported." << std::endl;
436 |         gUseDLACore = -1;
437 |     }
438 |     std::cout << "\nINT8 run:" << nbScoreBatches << " batches of size " << batchSize << " starting at " << firstScoreBatch << std::endl;
439 |     if (calibrationAlgo == CalibrationAlgoType::kENTROPY_CALIBRATION)
440 |     {
441 |         Int8EntropyCalibrator calibrator(calibrationStream, FIRST_CAL_BATCH);
442 |         scoreModel(modelname, batchSize, firstScoreBatch, nbScoreBatches, DataType::kINT8, &calibrator);
443 |     }
444 | 
445 |     return 0;
446 | }
447 | 


--------------------------------------------------------------------------------
/tensorrt/lasernet.trt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feicay/LaserNet/863a91aa8f542bd278f7bc03f899c0363a898570/tensorrt/lasernet.trt


--------------------------------------------------------------------------------
/tensorrt/make_input.cpp:
--------------------------------------------------------------------------------
 1 | #include "make_input.h"
 2 | #include <math.h>
 3 | #include <string.h>
 4 | 
 5 | #define PI (3.14159)
 6 | 
 7 | void xyzic_to_image(float* xyzic, int num, float h_start, float h_end, float v_start, float v_end, float dh, float dv, float* im, float* im_cls)
 8 | {
 9 |     int H = int((v_end - v_start + 0.01)/dv);
10 |     int W = int((h_end - h_start + 0.01)/dh);
11 |     float x,y,z,intensity,yaw,v_angle;
12 |     int c;
13 |     int w,h;
14 |     memset(im, 0, W*H*3);
15 |     memset(im_cls, 0, W*H);
16 |     for(int i=0; i<num; i++)
17 |     {
18 |         x = xyzic[i*5 + 0];
19 |         y = xyzic[i*5 + 1];
20 |         z = xyzic[i*5 + 2];
21 |         intensity = xyzic[i*5 + 3];
22 |         c = int(xyzic[i*5 + 4] + 0.01);
23 |         yaw = atan2(y, x);
24 |         v_angle = atan2(z, sqrt(x*x + y*y));
25 |         w = int((yaw - h_start)/dh + 0.5);
26 |         h = int((v_angle - v_end)/dv + 0.5);
27 |         if((w>=0) && (w<W) && (h>=0) && (h<H))
28 |         {
29 |             //range  reflict  height
30 |             im[H*W*0 + h*W + w] = sqrt(x*x + y*y + z*z);
31 |             im[H*W*1 + h*W + w] = intensity;
32 |             im[H*W*1 + h*W + w] = z;
33 |             im_cls[h*W + w] = c;
34 |         }
35 |     }
36 | }
37 | 
38 | void make_xyzi_to_image(float* xyzic, int num, float* im, float* im_cls)
39 | {
40 |     int W = 400;
41 |     int H = 200;
42 |     float dh = 0.225;
43 |     float dv = 0.2;
44 |     xyzic_to_image(xyzic, num, 0, 90, -30, 10, dh, dv, im, im_cls);
45 |     xyzic_to_image(xyzic, num, 90, 180, -30, 10, dh, dv, (im+W*H*3), (im_cls+W*H));
46 |     xyzic_to_image(xyzic, num, -180, -90, -30, 10, dh, dv, (im+2*W*H*3), (im_cls+2*W*H));
47 |     xyzic_to_image(xyzic, num, -90, 0, -30, 10, dh, dv, (im+3*W*H*3), (im_cls+3*W*H));
48 | }


--------------------------------------------------------------------------------
/tensorrt/make_input.h:
--------------------------------------------------------------------------------
1 | #ifndef __MAKE_INPUT__
2 | #define __MAKE_INPUT__
3 | 
4 | void xyzic_to_image(float* xyzic, int num, float h_start, float h_end, float v_start, float v_end, float dh, float dv, float* im, float* im_cls);
5 | void make_xyzi_to_image(float* xyzic, int num, float* im, float* im_cls);
6 | 
7 | #endif


--------------------------------------------------------------------------------
/tensorrt/test.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np 
 3 | 
 4 | def test_truth():
 5 |     i = 537
 6 |     fname = 'bin/%06d.bin'%i
 7 |     im = np.fromfile(fname, dtype=np.float32)
 8 |     im = im.reshape(400,320)
 9 |     cv2.imshow("truth", im)
10 |     cv2.waitKey(0)
11 | 
12 | test_truth()


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torchvision
  6 | import visdom
  7 | from torch.utils import data
  8 | from torch.autograd import Variable
  9 | from data.datagen import Lidar_xyzic_dataset
 10 | from model.DLA import DLA
 11 | from model.loss import FocalLossClassify
 12 | import time
 13 | import argparse
 14 | import numpy as np
 15 | import os
 16 | import gc
 17 | import cv2
 18 | 
 19 | parser = argparse.ArgumentParser(description='PyTorch LaserNet Training')
 20 | parser.add_argument('--lr', default=1e-4, type=float, help='learning rate')
 21 | parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
 22 | parser.add_argument('--vis', default=1, type=int, help='visdom')
 23 | parser.add_argument('--test', default=0, type=int, help='test')
 24 | parser.add_argument('--onnx', default=0, type=int, help='onnx')
 25 | args = parser.parse_args()
 26 | 
 27 | max_epoch = 70
 28 | 
 29 | def train():
 30 |     start_epoch = 0
 31 |     if args.vis:
 32 |         vis = visdom.Visdom(env=u'test1')
 33 | 
 34 |     trainlist = '/raid/pytorch_ws/LaserNet/trainlist.txt'
 35 |     validlist = '/raid/pytorch_ws/LaserNet/validlist.txt'
 36 |     trainset = Lidar_xyzic_dataset(trainlist)
 37 |     validset = Lidar_xyzic_dataset(validlist, train=0)
 38 |     loader_train = data.DataLoader(trainset, batch_size=16, shuffle=1, num_workers=4, drop_last=True)
 39 |     loader_val = data.DataLoader(validset, batch_size=4, shuffle=1, num_workers=4, drop_last=True)
 40 | 
 41 |     network = DLA(8)
 42 |     if args.resume:
 43 |         print('Resuming from checkpoint..')
 44 |         checkpoint = torch.load('./checkpoint/lasernet45.pth')
 45 |         network.load_state_dict(checkpoint['net'])
 46 |         best_loss = checkpoint['loss']
 47 |         start_epoch = checkpoint['epoch'] + 1
 48 |     net = torch.nn.DataParallel(network).cuda()
 49 |     criterion = FocalLossClassify(8)
 50 |     lr = args.lr
 51 |     optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=1e-4)
 52 | 
 53 |     for i in range(start_epoch, max_epoch):
 54 |         print('--------start training epoch %d --------'%i)
 55 |         loss_train = 0.0
 56 |         net.train()
 57 |         for ii, (image, cls_truth) in enumerate(loader_train):
 58 |             #input
 59 |             image = Variable(image).cuda()
 60 |             cls_truth = Variable(cls_truth).cuda()
 61 |             #forward
 62 |             optimizer.zero_grad()
 63 |             t0 = time.time()
 64 |             cls_pred = net(image)
 65 |             t1 = time.time()
 66 |             #loss
 67 |             loss = criterion(cls_pred, cls_truth)
 68 |             t2 = time.time()
 69 |             #backward
 70 |             loss.backward()
 71 |             t3 = time.time()
 72 |             #update
 73 |             optimizer.step()
 74 |             t4 = time.time()
 75 |             loss_train += loss.data
 76 |             print('forward time: %f, loss time: %f, backward time: %f, update time: %f'%((t1-t0),(t2-t1),(t3-t2),(t4-t3)))
 77 |             print('%3d/%3d => loss: %f'%(ii,i,criterion.loss))
 78 |             if args.vis:
 79 |                 vis.line(Y=loss.data.cpu().view(1,1).numpy(),X=np.array([ii]),win='loss',update='append' if ii>0 else None)
 80 |         if i < 3:
 81 |             loss_train = loss.data
 82 |         else:
 83 |             loss_train = loss_train / ii
 84 |         loss_val = 0.0
 85 |         net.eval()
 86 |         for jj, (image, cls_truth) in enumerate(loader_val):
 87 |             image = Variable(image).cuda()
 88 |             cls_truth = Variable(cls_truth).cuda()
 89 |             optimizer.zero_grad()
 90 |             cls_pred = net(image)
 91 |             loss = criterion(cls_pred, cls_truth)
 92 |             loss_val += loss.data
 93 |             print('val: %3d/%3d => loss: %f'%(jj,i,criterion.loss))
 94 |         loss_val = loss_val / jj
 95 |         if args.vis:
 96 |             vis.line(Y=torch.cat((loss_val.view(1,1), loss_train.view(1,1)),1).cpu().numpy(),X=np.array([i]),\
 97 |                         win='eval-train loss',update='append' if i>0 else None)
 98 |         print('Saving weights..')
 99 |         state = {
100 |             'net': net.module.state_dict(),
101 |             'loss': loss_val,
102 |             'epoch': i,
103 |         }
104 |         if not os.path.isdir('checkpoint'):
105 |             os.mkdir('checkpoint')
106 |         torch.save(state, './checkpoint/lasernet%d.pth'%i)
107 |         del image, cls_truth
108 |         del cls_pred
109 |         gc.collect()
110 |         time.sleep(1)
111 |         if i==50:
112 |             lr = lr*0.1
113 |             print('learning rate: %f'%lr)
114 |             for para_group in optimizer.param_groups:
115 |                 para_group['lr'] = lr
116 |     torch.save(network,'lasernet_model_final.pkl')
117 |     print('finish training!')
118 | 
119 | def test():
120 |     color = np.array([[0, 0, 0],
121 |                   [0, 0, 250],
122 |                   [0, 250, 250],
123 |                   [0, 250, 0],
124 |                   [250, 250, 0],
125 |                   [250, 0, 0],
126 |                   [250, 0, 250],
127 |                   [150, 150, 150]])
128 |     validlist = '/raid/pytorch_ws/LaserNet/validlist.txt'
129 |     validset = Lidar_xyzic_dataset(validlist, train=0)
130 |     loader_test = data.DataLoader(validset, batch_size=1, shuffle=1, num_workers=1, drop_last=True)
131 | 
132 |     network = DLA(8)
133 |     checkpoint = torch.load('./checkpoint/lasernet69.pth')
134 |     network.load_state_dict(checkpoint['net'])
135 |     network = network.cuda().eval()
136 |     if args.onnx == 1:
137 |         dummy_input = torch.randn(4, 3, 200, 400, device='cuda')
138 |         torch.onnx.export(network, dummy_input, "lasernet.onnx", verbose=True)
139 |         return
140 |     for i, (image, cls_truth) in enumerate(loader_test):
141 |         image = Variable(image).cuda()
142 |         cls_truth = Variable(cls_truth).cuda()
143 |         t1 = time.time()
144 |         cls_pred = network(image)
145 |         t2 = time.time()
146 |         print('inference time %f'%(t2-t1))
147 |         print(cls_pred.size())
148 |         im = image.squeeze(0).permute(1, 2, 0).cpu().numpy()
149 |         cv2.imshow('image', im)
150 |         pred = F.softmax(cls_pred, dim=1).squeeze(0).cpu()
151 |         prob, cls_ = pred.max(dim=0)
152 |         im_cls = np.zeros((200, 400, 3), dtype=np.uint8)
153 |         im_cls[:,:] = color[cls_[:,:]]
154 |         im_truth = np.zeros((200, 400, 3), dtype=np.uint8)
155 |         im_truth[:,:] = color[cls_truth[:,:].cpu().numpy()]
156 |         cv2.imshow('cls', im_cls)
157 |         cv2.imshow('truth', im_truth)
158 |         cv2.waitKey(0)
159 | 
160 | if args.test:
161 |     test()
162 | else:
163 |     train()


--------------------------------------------------------------------------------