├── README.md
├── main.py
└── dector_trt.py


/README.md:
--------------------------------------------------------------------------------
 1 | # yolov5-tensorrt-python
 2 | 不依赖于pytorch,只用tensorrt和numpy进行加速,在1080ti上测试达到了160fps
 3 | 
 4 | 1.需要安装tensorrt python版
 5 | 
 6 | 2.安装pycuda
 7 | 
 8 | 3.将训练好的模型（这里使用的是yolov5-4.0训练的s模型）按照https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v4.0/yolov5 上的方法转成libmyplugins.so和yolov5s.engine文件
 9 | 
10 | 4.修改dector_trt.py中categories里面的类别为自己的类别
11 | 
12 | 5.修改main.py中的PLUGIN_LIBRARY和engine_file_path路径就可以使用
13 | 
14 | 参考：
15 | 
16 | 1.https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v4.0/yolov5
17 | 
18 | 2.https://github.com/ultralytics/yolov5/tree/v4.0
19 | 
20 | 3.https://github.com/cong/yolov5_deepsort_tensorrt
21 | 
22 | 4.https://gitee.com/chaucerg/yolov5-tensorrt
23 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 3 | import cv2
 4 | import time
 5 | import ctypes
 6 | from dector_trt import Detector
 7 | import pycuda.autoinit
 8 | 
 9 | 
10 | def detect(engine_file_path):
11 |     detector = Detector(engine_file_path)
12 |     capture = cv2.VideoCapture(0)
13 |     # capture = cv2.VideoCapture(0)
14 |     fps = 0.0
15 |     while True:
16 |         ret, img = capture.read()
17 |         if img is None:
18 |             print('No image input!')
19 |             break
20 | 
21 |         t1 = time.time()
22 |         result_img = detector.detect(img)
23 | 
24 |         fps = (fps + (1. / (time.time() - t1))) / 2
25 |         cv2.putText(result_img, 'FPS: {:.2f}'.format(fps), (50, 30), 0, 1, (0, 255, 0), 2)
26 |         cv2.putText(result_img, 'Time: {:.3f}'.format(time.time() - t1), (50, 60), 0, 1, (0, 255, 0), 2)
27 |         if ret == True:
28 |             cv2.imshow('frame', result_img)
29 |             if cv2.waitKey(5) & 0xFF == ord('q'):
30 |                 break
31 |         else:
32 |             break
33 | 
34 |     capture.release()
35 |     cv2.destroyAllWindows()
36 |     detector.destroy()
37 | 
38 | 
39 | if __name__ == '__main__':
40 | 
41 |     PLUGIN_LIBRARY = "weights/libmyplugins.so"
42 |     ctypes.CDLL(PLUGIN_LIBRARY)
43 |     engine_file_path = 'weights/yolov5s.engine'
44 |     detect(engine_file_path)


--------------------------------------------------------------------------------
/dector_trt.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import tensorrt as trt
  4 | import pycuda.driver as cuda
  5 | import pycuda.autoinit
  6 | 
  7 | TRT_LOGGER = trt.Logger()
  8 | INPUT_W = 352
  9 | INPUT_H = 352
 10 | CONF_THRESH = 0.1
 11 | IOU_THRESHOLD = 0.4
 12 | 
 13 | # load coco labels
 14 | categories = ["battery","pills","bananas","cans","apples","bottles","ceram","green vegatable","broccoli","boxes","glass","cigarette","orange","eggshell","spitball","tile"]
 15 | 
 16 | 
 17 | 
 18 | class Detector:
 19 | 
 20 |     def __init__(self, engine_file_path):
 21 |         self.img_size = 352
 22 |         self.threshold = 0.3
 23 |         self.stride = 1
 24 | 
 25 |         # Create a Context on this device,
 26 |         self.cfx = cuda.Device(0).make_context()
 27 |         stream = cuda.Stream()
 28 |         TRT_LOGGER = trt.Logger(trt.Logger.INFO)
 29 |         runtime = trt.Runtime(TRT_LOGGER)
 30 | 
 31 |         # Deserialize the engine from file
 32 |         with open(engine_file_path, "rb") as f:
 33 |             engine = runtime.deserialize_cuda_engine(f.read())
 34 |         context = engine.create_execution_context()
 35 | 
 36 |         host_inputs = []
 37 |         cuda_inputs = []
 38 |         host_outputs = []
 39 |         cuda_outputs = []
 40 |         bindings = []
 41 | 
 42 |         for binding in engine:
 43 |             size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
 44 |             dtype = trt.nptype(engine.get_binding_dtype(binding))
 45 |             # Allocate host and device buffers
 46 |             host_mem = cuda.pagelocked_empty(size, dtype)
 47 |             cuda_mem = cuda.mem_alloc(host_mem.nbytes)
 48 |             # Append the device buffer to device bindings.
 49 |             bindings.append(int(cuda_mem))
 50 |             # Append to the appropriate list.
 51 |             if engine.binding_is_input(binding):
 52 |                 host_inputs.append(host_mem)
 53 |                 cuda_inputs.append(cuda_mem)
 54 |             else:
 55 |                 host_outputs.append(host_mem)
 56 |                 cuda_outputs.append(cuda_mem)
 57 | 
 58 |         # Store
 59 |         self.stream = stream
 60 |         self.context = context
 61 |         self.engine = engine
 62 |         self.host_inputs = host_inputs
 63 |         self.cuda_inputs = cuda_inputs
 64 |         self.host_outputs = host_outputs
 65 |         self.cuda_outputs = cuda_outputs
 66 |         self.bindings = bindings
 67 | 
 68 |     def detect(self, im):
 69 |         # threading.Thread.__init__(self)
 70 |         # Make self the active context, pushing it on top of the context stack.
 71 |         self.cfx.push()
 72 |         # Restore
 73 |         stream = self.stream
 74 |         context = self.context
 75 |         engine = self.engine
 76 |         host_inputs = self.host_inputs
 77 |         cuda_inputs = self.cuda_inputs
 78 |         host_outputs = self.host_outputs
 79 |         cuda_outputs = self.cuda_outputs
 80 |         bindings = self.bindings
 81 |         # Do image preprocess
 82 |         # img_raw, input_image = self.preprocess(im)
 83 |         input_image, image_raw, origin_h, origin_w = self.preprocess_image(im)
 84 |         # Copy input image to host buffer
 85 |         np.copyto(host_inputs[0], input_image.ravel())
 86 |         # Transfer input data  to the GPU.
 87 |         cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
 88 |         # Run inference.
 89 |         context.execute_async(bindings=bindings, stream_handle=stream.handle)
 90 |         # Transfer predictions back from the GPU.
 91 |         cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
 92 |         # Synchronize the stream
 93 |         stream.synchronize()
 94 |         # Remove any context from the top of the context stack, deactivating it.
 95 |         self.cfx.pop()
 96 |         # Here we use the first row of output in that batch_size = 1
 97 |         trt_outputs = host_outputs[0]
 98 |         # Do postprocess
 99 |         results_trt= self.post_process_new(im,trt_outputs, origin_h, origin_w)
100 | 
101 | 
102 |         return results_trt
103 | 
104 |     def preprocess_image(self, image_raw):
105 |         """
106 |         description: Read an image from image path, convert it to RGB,
107 |                      resize and pad it to target size, normalize to [0,1],
108 |                      transform to NCHW format.
109 |         param:
110 |             input_image_path: str, image path
111 |         return:
112 |             image:  the processed image
113 |             image_raw: the original image
114 |             h: original height
115 |             w: original width
116 |         """
117 |         h, w, c = image_raw.shape
118 |         image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
119 |         # Calculate widht and height and paddings
120 |         r_w = INPUT_W / w
121 |         r_h = INPUT_H / h
122 |         if r_h > r_w:
123 |             tw = INPUT_W
124 |             th = int(r_w * h)
125 |             tx1 = tx2 = 0
126 |             ty1 = int((INPUT_H - th) / 2)
127 |             ty2 = INPUT_H - th - ty1
128 |         else:
129 |             tw = int(r_h * w)
130 |             th = INPUT_H
131 |             tx1 = int((INPUT_W - tw) / 2)
132 |             tx2 = INPUT_W - tw - tx1
133 |             ty1 = ty2 = 0
134 |         # Resize the image with long side while maintaining ratio
135 |         image = cv2.resize(image, (tw, th))
136 |         # Pad the short side with (128,128,128)
137 |         image = cv2.copyMakeBorder(
138 |             image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
139 |         )
140 |         image = image.astype(np.float32)
141 |         # Normalize to [0,1]
142 |         image /= 255.0
143 |         # HWC to CHW format:
144 |         image = np.transpose(image, [2, 0, 1])
145 |         # CHW to NCHW format
146 |         image = np.expand_dims(image, axis=0)
147 |         # Convert the image to row-major order, also known as "C order":
148 |         image = np.ascontiguousarray(image)
149 |         return image, image_raw, h, w
150 | 
151 |     def xywh2xyxy(self, origin_h, origin_w, x):
152 |         """
153 |         description:    Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
154 |         param:
155 |             origin_h:   height of original image
156 |             origin_w:   width of original image
157 |             x:          A boxes tensor, each row is a box [center_x, center_y, w, h]
158 |         return:
159 |             y:          A boxes tensor, each row is a box [x1, y1, x2, y2]
160 |         """
161 |         y = np.zeros_like(x)
162 |         r_w = INPUT_W / origin_w
163 |         r_h = INPUT_H / origin_h
164 |         if r_h > r_w:
165 |             y[:, 0] = x[:, 0] - x[:, 2] / 2
166 |             y[:, 2] = x[:, 0] + x[:, 2] / 2
167 |             y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
168 |             y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
169 |             y /= r_w
170 |         else:
171 |             y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
172 |             y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
173 |             y[:, 1] = x[:, 1] - x[:, 3] / 2
174 |             y[:, 3] = x[:, 1] + x[:, 3] / 2
175 |             y /= r_h
176 | 
177 |         return y
178 | 
179 |     def non_max_suppression(self, boxes, confs, classes, iou_thres=0.4):
180 |         x1 = boxes[:, 0]
181 |         y1 = boxes[:, 1]
182 |         x2 = boxes[:, 2]
183 |         y2 = boxes[:, 3]
184 |         areas = (x2 - x1 + 1) * (y2 - y1 + 1)
185 |         order = confs.flatten().argsort()[::-1]
186 |         keep = []
187 |         while order.size > 0:
188 |             i = order[0]
189 |             keep.append(i)
190 |             xx1 = np.maximum(x1[i], x1[order[1:]])
191 |             yy1 = np.maximum(y1[i], y1[order[1:]])
192 |             xx2 = np.minimum(x2[i], x2[order[1:]])
193 |             yy2 = np.minimum(y2[i], y2[order[1:]])
194 |             w = np.maximum(0.0, xx2 - xx1 + 1)
195 |             h = np.maximum(0.0, yy2 - yy1 + 1)
196 |             inter = w * h
197 |             ovr = inter / (areas[i] + areas[order[1:]] - inter)
198 |             inds = np.where(ovr <= iou_thres)[0]
199 |             order = order[inds + 1]
200 |         boxes = boxes[keep]
201 |         confs = confs[keep]
202 |         classes = classes[keep]
203 |         return boxes, confs, classes
204 | 
205 | 
206 |     def post_process_new(self, im,output, origin_h, origin_w):
207 |         """
208 |         description: postprocess the prediction
209 |         param:
210 |             output:     A tensor likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...]
211 |             origin_h:   height of original image
212 |             origin_w:   width of original image
213 |         return:
214 |             result_boxes: finally boxes, a boxes tensor, each row is a box [x1, y1, x2, y2]
215 |             result_scores: finally scores, a tensor, each element is the score correspoing to box
216 |             result_classid: finally classid, a tensor, each element is the classid correspoing to box
217 |         """
218 |         # Get the num of boxes detected
219 |         num = int(output[0])
220 |         # Reshape to a two dimentional ndarray
221 |         pred = np.reshape(output[1:], (-1, 6))[:num, :]
222 |         # to a torch Tensor
223 |         #pred = torch.Tensor(pred).cuda()
224 |         # Get the boxes
225 |         boxes = pred[:, :4]
226 |         # Get the scores
227 |         scores = pred[:, 4]
228 |         # Get the classid
229 |         classid = pred[:, 5]
230 |         # Choose those boxes that score > CONF_THRESH
231 |         si = scores > CONF_THRESH
232 |         boxes = boxes[si, :]
233 |         scores = scores[si]
234 |         classid = classid[si]
235 |         # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
236 |         boxes = self.xywh2xyxy(origin_h, origin_w, boxes)
237 |         # Do nms
238 |         result_boxes,result_scores,result_classid = self.non_max_suppression(boxes,scores,classid)
239 |         for i in range(len(result_boxes)):
240 |             x1, y1 = int(result_boxes[i][0]), int(result_boxes[i][1])
241 |             x2, y2 = int(result_boxes[i][2]), int(result_boxes[i][3])
242 |             cid = result_classid[i]
243 |             lable = categories[int(cid)]
244 |             conf = result_scores[i]
245 |             c1, c2 = (x1, y1), (x2, y2)
246 |             cv2.rectangle(im, c1, c2, [225, 255, 0], thickness=1, lineType=cv2.LINE_AA)
247 |             cv2.putText(
248 |                 im,
249 |                 lable,
250 |                 (c1[0], c1[1] - 2),
251 |                 0,
252 |                 1 / 3,
253 |                 [225, 255, 255],
254 |                 thickness=1,
255 |                 lineType=cv2.LINE_AA,
256 |             )
257 | 
258 |         return im
259 | 
260 |     def destroy(self):
261 |         # Remove any context from the top of the context stack, deactivating it.
262 |         self.cfx.pop()


--------------------------------------------------------------------------------