├── README.md ├── main.py └── dector_trt.py /README.md: -------------------------------------------------------------------------------- 1 | # yolov5-tensorrt-python 2 | 不依赖于pytorch,只用tensorrt和numpy进行加速,在1080ti上测试达到了160fps 3 | 4 | 1.需要安装tensorrt python版 5 | 6 | 2.安装pycuda 7 | 8 | 3.将训练好的模型(这里使用的是yolov5-4.0训练的s模型)按照https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v4.0/yolov5 上的方法转成libmyplugins.so和yolov5s.engine文件 9 | 10 | 4.修改dector_trt.py中categories里面的类别为自己的类别 11 | 12 | 5.修改main.py中的PLUGIN_LIBRARY和engine_file_path路径就可以使用 13 | 14 | 参考: 15 | 16 | 1.https://github.com/wang-xinyu/tensorrtx/tree/yolov5-v4.0/yolov5 17 | 18 | 2.https://github.com/ultralytics/yolov5/tree/v4.0 19 | 20 | 3.https://github.com/cong/yolov5_deepsort_tensorrt 21 | 22 | 4.https://gitee.com/chaucerg/yolov5-tensorrt 23 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['KMP_DUPLICATE_LIB_OK']='True' 3 | import cv2 4 | import time 5 | import ctypes 6 | from dector_trt import Detector 7 | import pycuda.autoinit 8 | 9 | 10 | def detect(engine_file_path): 11 | detector = Detector(engine_file_path) 12 | capture = cv2.VideoCapture(0) 13 | # capture = cv2.VideoCapture(0) 14 | fps = 0.0 15 | while True: 16 | ret, img = capture.read() 17 | if img is None: 18 | print('No image input!') 19 | break 20 | 21 | t1 = time.time() 22 | result_img = detector.detect(img) 23 | 24 | fps = (fps + (1. / (time.time() - t1))) / 2 25 | cv2.putText(result_img, 'FPS: {:.2f}'.format(fps), (50, 30), 0, 1, (0, 255, 0), 2) 26 | cv2.putText(result_img, 'Time: {:.3f}'.format(time.time() - t1), (50, 60), 0, 1, (0, 255, 0), 2) 27 | if ret == True: 28 | cv2.imshow('frame', result_img) 29 | if cv2.waitKey(5) & 0xFF == ord('q'): 30 | break 31 | else: 32 | break 33 | 34 | capture.release() 35 | cv2.destroyAllWindows() 36 | detector.destroy() 37 | 38 | 39 | if __name__ == '__main__': 40 | 41 | PLUGIN_LIBRARY = "weights/libmyplugins.so" 42 | ctypes.CDLL(PLUGIN_LIBRARY) 43 | engine_file_path = 'weights/yolov5s.engine' 44 | detect(engine_file_path) -------------------------------------------------------------------------------- /dector_trt.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorrt as trt 4 | import pycuda.driver as cuda 5 | import pycuda.autoinit 6 | 7 | TRT_LOGGER = trt.Logger() 8 | INPUT_W = 352 9 | INPUT_H = 352 10 | CONF_THRESH = 0.1 11 | IOU_THRESHOLD = 0.4 12 | 13 | # load coco labels 14 | categories = ["battery","pills","bananas","cans","apples","bottles","ceram","green vegatable","broccoli","boxes","glass","cigarette","orange","eggshell","spitball","tile"] 15 | 16 | 17 | 18 | class Detector: 19 | 20 | def __init__(self, engine_file_path): 21 | self.img_size = 352 22 | self.threshold = 0.3 23 | self.stride = 1 24 | 25 | # Create a Context on this device, 26 | self.cfx = cuda.Device(0).make_context() 27 | stream = cuda.Stream() 28 | TRT_LOGGER = trt.Logger(trt.Logger.INFO) 29 | runtime = trt.Runtime(TRT_LOGGER) 30 | 31 | # Deserialize the engine from file 32 | with open(engine_file_path, "rb") as f: 33 | engine = runtime.deserialize_cuda_engine(f.read()) 34 | context = engine.create_execution_context() 35 | 36 | host_inputs = [] 37 | cuda_inputs = [] 38 | host_outputs = [] 39 | cuda_outputs = [] 40 | bindings = [] 41 | 42 | for binding in engine: 43 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size 44 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 45 | # Allocate host and device buffers 46 | host_mem = cuda.pagelocked_empty(size, dtype) 47 | cuda_mem = cuda.mem_alloc(host_mem.nbytes) 48 | # Append the device buffer to device bindings. 49 | bindings.append(int(cuda_mem)) 50 | # Append to the appropriate list. 51 | if engine.binding_is_input(binding): 52 | host_inputs.append(host_mem) 53 | cuda_inputs.append(cuda_mem) 54 | else: 55 | host_outputs.append(host_mem) 56 | cuda_outputs.append(cuda_mem) 57 | 58 | # Store 59 | self.stream = stream 60 | self.context = context 61 | self.engine = engine 62 | self.host_inputs = host_inputs 63 | self.cuda_inputs = cuda_inputs 64 | self.host_outputs = host_outputs 65 | self.cuda_outputs = cuda_outputs 66 | self.bindings = bindings 67 | 68 | def detect(self, im): 69 | # threading.Thread.__init__(self) 70 | # Make self the active context, pushing it on top of the context stack. 71 | self.cfx.push() 72 | # Restore 73 | stream = self.stream 74 | context = self.context 75 | engine = self.engine 76 | host_inputs = self.host_inputs 77 | cuda_inputs = self.cuda_inputs 78 | host_outputs = self.host_outputs 79 | cuda_outputs = self.cuda_outputs 80 | bindings = self.bindings 81 | # Do image preprocess 82 | # img_raw, input_image = self.preprocess(im) 83 | input_image, image_raw, origin_h, origin_w = self.preprocess_image(im) 84 | # Copy input image to host buffer 85 | np.copyto(host_inputs[0], input_image.ravel()) 86 | # Transfer input data to the GPU. 87 | cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream) 88 | # Run inference. 89 | context.execute_async(bindings=bindings, stream_handle=stream.handle) 90 | # Transfer predictions back from the GPU. 91 | cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream) 92 | # Synchronize the stream 93 | stream.synchronize() 94 | # Remove any context from the top of the context stack, deactivating it. 95 | self.cfx.pop() 96 | # Here we use the first row of output in that batch_size = 1 97 | trt_outputs = host_outputs[0] 98 | # Do postprocess 99 | results_trt= self.post_process_new(im,trt_outputs, origin_h, origin_w) 100 | 101 | 102 | return results_trt 103 | 104 | def preprocess_image(self, image_raw): 105 | """ 106 | description: Read an image from image path, convert it to RGB, 107 | resize and pad it to target size, normalize to [0,1], 108 | transform to NCHW format. 109 | param: 110 | input_image_path: str, image path 111 | return: 112 | image: the processed image 113 | image_raw: the original image 114 | h: original height 115 | w: original width 116 | """ 117 | h, w, c = image_raw.shape 118 | image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) 119 | # Calculate widht and height and paddings 120 | r_w = INPUT_W / w 121 | r_h = INPUT_H / h 122 | if r_h > r_w: 123 | tw = INPUT_W 124 | th = int(r_w * h) 125 | tx1 = tx2 = 0 126 | ty1 = int((INPUT_H - th) / 2) 127 | ty2 = INPUT_H - th - ty1 128 | else: 129 | tw = int(r_h * w) 130 | th = INPUT_H 131 | tx1 = int((INPUT_W - tw) / 2) 132 | tx2 = INPUT_W - tw - tx1 133 | ty1 = ty2 = 0 134 | # Resize the image with long side while maintaining ratio 135 | image = cv2.resize(image, (tw, th)) 136 | # Pad the short side with (128,128,128) 137 | image = cv2.copyMakeBorder( 138 | image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128) 139 | ) 140 | image = image.astype(np.float32) 141 | # Normalize to [0,1] 142 | image /= 255.0 143 | # HWC to CHW format: 144 | image = np.transpose(image, [2, 0, 1]) 145 | # CHW to NCHW format 146 | image = np.expand_dims(image, axis=0) 147 | # Convert the image to row-major order, also known as "C order": 148 | image = np.ascontiguousarray(image) 149 | return image, image_raw, h, w 150 | 151 | def xywh2xyxy(self, origin_h, origin_w, x): 152 | """ 153 | description: Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 154 | param: 155 | origin_h: height of original image 156 | origin_w: width of original image 157 | x: A boxes tensor, each row is a box [center_x, center_y, w, h] 158 | return: 159 | y: A boxes tensor, each row is a box [x1, y1, x2, y2] 160 | """ 161 | y = np.zeros_like(x) 162 | r_w = INPUT_W / origin_w 163 | r_h = INPUT_H / origin_h 164 | if r_h > r_w: 165 | y[:, 0] = x[:, 0] - x[:, 2] / 2 166 | y[:, 2] = x[:, 0] + x[:, 2] / 2 167 | y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2 168 | y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2 169 | y /= r_w 170 | else: 171 | y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2 172 | y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2 173 | y[:, 1] = x[:, 1] - x[:, 3] / 2 174 | y[:, 3] = x[:, 1] + x[:, 3] / 2 175 | y /= r_h 176 | 177 | return y 178 | 179 | def non_max_suppression(self, boxes, confs, classes, iou_thres=0.4): 180 | x1 = boxes[:, 0] 181 | y1 = boxes[:, 1] 182 | x2 = boxes[:, 2] 183 | y2 = boxes[:, 3] 184 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 185 | order = confs.flatten().argsort()[::-1] 186 | keep = [] 187 | while order.size > 0: 188 | i = order[0] 189 | keep.append(i) 190 | xx1 = np.maximum(x1[i], x1[order[1:]]) 191 | yy1 = np.maximum(y1[i], y1[order[1:]]) 192 | xx2 = np.minimum(x2[i], x2[order[1:]]) 193 | yy2 = np.minimum(y2[i], y2[order[1:]]) 194 | w = np.maximum(0.0, xx2 - xx1 + 1) 195 | h = np.maximum(0.0, yy2 - yy1 + 1) 196 | inter = w * h 197 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 198 | inds = np.where(ovr <= iou_thres)[0] 199 | order = order[inds + 1] 200 | boxes = boxes[keep] 201 | confs = confs[keep] 202 | classes = classes[keep] 203 | return boxes, confs, classes 204 | 205 | 206 | def post_process_new(self, im,output, origin_h, origin_w): 207 | """ 208 | description: postprocess the prediction 209 | param: 210 | output: A tensor likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...] 211 | origin_h: height of original image 212 | origin_w: width of original image 213 | return: 214 | result_boxes: finally boxes, a boxes tensor, each row is a box [x1, y1, x2, y2] 215 | result_scores: finally scores, a tensor, each element is the score correspoing to box 216 | result_classid: finally classid, a tensor, each element is the classid correspoing to box 217 | """ 218 | # Get the num of boxes detected 219 | num = int(output[0]) 220 | # Reshape to a two dimentional ndarray 221 | pred = np.reshape(output[1:], (-1, 6))[:num, :] 222 | # to a torch Tensor 223 | #pred = torch.Tensor(pred).cuda() 224 | # Get the boxes 225 | boxes = pred[:, :4] 226 | # Get the scores 227 | scores = pred[:, 4] 228 | # Get the classid 229 | classid = pred[:, 5] 230 | # Choose those boxes that score > CONF_THRESH 231 | si = scores > CONF_THRESH 232 | boxes = boxes[si, :] 233 | scores = scores[si] 234 | classid = classid[si] 235 | # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2] 236 | boxes = self.xywh2xyxy(origin_h, origin_w, boxes) 237 | # Do nms 238 | result_boxes,result_scores,result_classid = self.non_max_suppression(boxes,scores,classid) 239 | for i in range(len(result_boxes)): 240 | x1, y1 = int(result_boxes[i][0]), int(result_boxes[i][1]) 241 | x2, y2 = int(result_boxes[i][2]), int(result_boxes[i][3]) 242 | cid = result_classid[i] 243 | lable = categories[int(cid)] 244 | conf = result_scores[i] 245 | c1, c2 = (x1, y1), (x2, y2) 246 | cv2.rectangle(im, c1, c2, [225, 255, 0], thickness=1, lineType=cv2.LINE_AA) 247 | cv2.putText( 248 | im, 249 | lable, 250 | (c1[0], c1[1] - 2), 251 | 0, 252 | 1 / 3, 253 | [225, 255, 255], 254 | thickness=1, 255 | lineType=cv2.LINE_AA, 256 | ) 257 | 258 | return im 259 | 260 | def destroy(self): 261 | # Remove any context from the top of the context stack, deactivating it. 262 | self.cfx.pop() --------------------------------------------------------------------------------