├── README.md ├── YOLOv5-seg_openvino_onnx.py ├── YOLOv5_bytetrack.py ├── YOLOv5_openvino_onnx.py ├── YOLOv6_openvino_onnx.py ├── YOLOv7-pose_openvino_onnx.py ├── YOLOv7_bytetrack.py ├── YOLOv7_openvino_onnx.py ├── YOLOv8-pose_openvino_onnx.py ├── YOLOv8-seg_openvino_onnx.py ├── YOLOv8_bytetrack.py ├── YOLOv8_openvino_onnx.py ├── YOLOv9_openvino_onnx.py ├── bytetrack ├── __pycache__ │ ├── basetrack.cpython-36.pyc │ ├── basetrack.cpython-38.pyc │ ├── byte_tracker.cpython-36.pyc │ ├── byte_tracker.cpython-38.pyc │ ├── kalman_filter.cpython-36.pyc │ ├── kalman_filter.cpython-38.pyc │ ├── matching.cpython-36.pyc │ └── matching.cpython-38.pyc ├── basetrack.py ├── byte_tracker.py ├── kalman_filter.py └── matching.py └── test.mp4 /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv8_Openvino 2 | ## 0 内容 3 | 具体包括:基于CPU的Openvino和ONNXRuntime下的YOLOv5/v6/v7/v8/v9的检测、跟踪、分割、关键点检测。 4 | 5 | ## 1 环境: 6 | CPU:i5-12500 7 | 8 | Python:3.8.18 9 | 10 | VS2019 11 | 12 | 注:Bytetrack中的lap和cython_bbox库需要编译安装,直接安装报错,故下载VS2019。 13 | ## 2 安装Openvino和ONNXRuntime 14 | ### 2.1 Openvino简介 15 | Openvino是由Intel开发的专门用于优化和部署人工智能推理的半开源的工具包,主要用于对深度推理做优化。 16 | 17 | Openvino内部集成了Opencv、TensorFlow模块,除此之外它还具有强大的Plugin开发框架,允许开发者在Openvino之上对推理过程做优化。 18 | 19 | Openvino整体框架为:Openvino前端→ Plugin中间层→ Backend后端 20 | Openvino的优点在于它屏蔽了后端接口,提供了统一操作的前端API,开发者可以无需关心后端的实现,例如后端可以是TensorFlow、Keras、ARM-NN,通过Plugin提供给前端接口调用,也就意味着一套代码在Openvino之上可以运行在多个推理引擎之上,Openvino像是类似聚合一样的开发包。 21 | 22 | ### 2.2 ONNXRuntime简介 23 | ONNXRuntime是微软推出的一款推理框架,用户可以非常便利的用其运行一个onnx模型。ONNXRuntime支持多种运行后端包括CPU,GPU,TensorRT,DML等。可以说ONNXRuntime是对ONNX模型最原生的支持。 24 | 25 | 虽然大家用ONNX时更多的是作为一个中间表示,从pytorch转到onnx后直接喂到TensorRT或MNN等各种后端框架,但这并不能否认ONNXRuntime是一款非常优秀的推理框架。而且由于其自身只包含推理功能(最新的ONNXRuntime甚至已经可以训练),通过阅读其源码可以解深度学习框架的一些核心功能原理(op注册,内存管理,运行逻辑等) 26 | 总体来看,整个ONNXRuntime的运行可以分为三个阶段,Session构造,模型加载与初始化和运行。和其他所有主流框架相同,ONNXRuntime最常用的语言是python,而实际负责执行框架运行的则是C++。 27 | 28 | ### 2.3 安装 29 | pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 30 | 31 | pip install onnxruntime -i https://pypi.tuna.tsinghua.edu.cn/simple 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /YOLOv5-seg_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | 27 | def predict(self, datas): 28 | # 注:self.compiled_model_onnx([datas])是一个字典,self.compiled_model_onnx.output(0)是字典键,第一种读取所有值方法(0.11s) 比 第二种按键取值的方法(0.20s) 耗时减半 29 | predict_data = list(self.compiled_model_onnx([datas]).values()) 30 | # predict_data = [self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(0)], 31 | # self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(1)]] 32 | return predict_data 33 | 34 | 35 | class YOLOv5_seg: 36 | """YOLOv5 segmentation model class for handling inference and visualization.""" 37 | 38 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 39 | """ 40 | Initialization. 41 | 42 | Args: 43 | onnx_model (str): Path to the ONNX model. 44 | """ 45 | self.infer_tool = infer_tool 46 | if self.infer_tool == 'openvino': 47 | # 构建openvino推理引擎 48 | self.openvino = OpenvinoInference(onnx_model) 49 | self.ndtype = np.single 50 | else: 51 | # 构建onnxruntime推理引擎 52 | self.ort_session = ort.InferenceSession(onnx_model, 53 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 54 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 55 | 56 | # Numpy dtype: support both FP32 and FP16 onnx model 57 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 58 | 59 | self.classes = CLASSES # 加载模型类别 60 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 61 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 62 | 63 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32): 64 | """ 65 | The whole pipeline: pre-process -> inference -> post-process. 66 | 67 | Args: 68 | im0 (Numpy.ndarray): original input image. 69 | conf_threshold (float): confidence threshold for filtering predictions. 70 | iou_threshold (float): iou threshold for NMS. 71 | nm (int): the number of masks. 72 | 73 | Returns: 74 | boxes (List): list of bounding boxes. 75 | segments (List): list of segments. 76 | masks (np.ndarray): [N, H, W], output masks. 77 | """ 78 | # 前处理Pre-process 79 | t1 = time.time() 80 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 81 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 82 | 83 | # 推理 inference 84 | t2 = time.time() 85 | if self.infer_tool == 'openvino': 86 | preds = self.openvino.predict(im) 87 | else: 88 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im}) # 与bbox区别,输出是个列表,[检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160)] 89 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 90 | 91 | # 后处理Post-process 92 | t3 = time.time() 93 | boxes, segments, masks = self.postprocess(preds, 94 | im0=im0, 95 | ratio=ratio, 96 | pad_w=pad_w, 97 | pad_h=pad_h, 98 | conf_threshold=conf_threshold, 99 | iou_threshold=iou_threshold, 100 | nm=nm 101 | ) 102 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 103 | 104 | return boxes, segments, masks 105 | 106 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 107 | def preprocess(self, img): 108 | """ 109 | Pre-processes the input image. 110 | 111 | Args: 112 | img (Numpy.ndarray): image about to be processed. 113 | 114 | Returns: 115 | img_process (Numpy.ndarray): image preprocessed for inference. 116 | ratio (tuple): width, height ratios in letterbox. 117 | pad_w (float): width padding in letterbox. 118 | pad_h (float): height padding in letterbox. 119 | """ 120 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 121 | shape = img.shape[:2] # original image shape 122 | new_shape = (self.model_height, self.model_width) 123 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 124 | ratio = r, r 125 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 126 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 127 | if shape[::-1] != new_unpad: # resize 128 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 129 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 130 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 131 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 132 | 133 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 134 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 135 | img_process = img[None] if len(img.shape) == 3 else img 136 | return img_process, ratio, (pad_w, pad_h) 137 | 138 | # 后处理,包括:阈值过滤+NMS+masks处理 139 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32): 140 | """ 141 | Post-process the prediction. 142 | 143 | Args: 144 | preds (Numpy.ndarray): predictions come from ort.session.run(). 145 | im0 (Numpy.ndarray): [h, w, c] original input image. 146 | ratio (tuple): width, height ratios in letterbox. 147 | pad_w (float): width padding in letterbox. 148 | pad_h (float): height padding in letterbox. 149 | conf_threshold (float): conf threshold. 150 | iou_threshold (float): iou threshold. 151 | nm (int): the number of masks. 152 | 153 | Returns: 154 | boxes (List): list of bounding boxes. 155 | segments (List): list of segments. 156 | masks (np.ndarray): [N, H, W], output masks. 157 | """ 158 | # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数,v8v9采用类别里面最大的概率作为置信度score 159 | x, protos = preds[0], preds[1] # 与bbox区别:Two outputs: 检测头的输出(1, 8400*3, 117), 分割头的输出(1, 32, 160, 160) 160 | 161 | # Predictions filtering by conf-threshold 162 | x = x[x[..., 4] > conf_threshold] 163 | 164 | # Create a new matrix which merge these(box, score, cls, nm) into one 165 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 166 | x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:-nm], axis=-1), x[..., -nm:]] 167 | 168 | # NMS filtering 169 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls, nm], ...]), shape=(-1, 4 + 1 + 1 + 32) 170 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 171 | 172 | # 重新缩放边界框,为画图做准备 173 | if len(x) > 0: 174 | # Bounding boxes format change: cxcywh -> xyxy 175 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 176 | x[..., [2, 3]] += x[..., [0, 1]] 177 | 178 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 179 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 180 | x[..., :4] /= min(ratio) 181 | 182 | # Bounding boxes boundary clamp 183 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 184 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 185 | 186 | # 与bbox区别:增加masks处理 187 | # Process masks 188 | masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape) 189 | # Masks -> Segments(contours) 190 | segments = self.masks2segments(masks) 191 | 192 | return x[..., :6], segments, masks # boxes, segments, masks 193 | else: 194 | return [], [], [] 195 | 196 | @staticmethod 197 | def masks2segments(masks): 198 | """ 199 | It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from 200 | https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750) 201 | 202 | Args: 203 | masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160). 204 | 205 | Returns: 206 | segments (List): list of segment masks. 207 | """ 208 | segments = [] 209 | for x in masks.astype('uint8'): 210 | c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE 该函数用于查找二值图像中的轮廓。 211 | if c: 212 | # 这段代码的目的是找到图像x中的最外层轮廓,并从中选择最长的轮廓,然后将其转换为NumPy数组的形式。 213 | c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) 214 | else: 215 | c = np.zeros((0, 2)) # no segments found 216 | segments.append(c.astype('float32')) 217 | return segments 218 | 219 | 220 | def process_mask(self, protos, masks_in, bboxes, im0_shape): 221 | """ 222 | Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality 223 | but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618) 224 | 225 | Args: 226 | protos (numpy.ndarray): [mask_dim, mask_h, mask_w]. 227 | masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms. 228 | bboxes (numpy.ndarray): bboxes re-scaled to original image shape. 229 | im0_shape (tuple): the size of the input image (h,w,c). 230 | 231 | Returns: 232 | (numpy.ndarray): The upsampled masks. 233 | """ 234 | c, mh, mw = protos.shape 235 | masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN 236 | masks = np.ascontiguousarray(masks) 237 | masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape 238 | masks = np.einsum('HWN -> NHW', masks) # HWN -> NHW 239 | masks = self.crop_mask(masks, bboxes) 240 | return np.greater(masks, 0.5) 241 | 242 | @staticmethod 243 | def scale_mask(masks, im0_shape, ratio_pad=None): 244 | """ 245 | Takes a mask, and resizes it to the original image size. (Borrowed from 246 | https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305) 247 | 248 | Args: 249 | masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. 250 | im0_shape (tuple): the original image shape. 251 | ratio_pad (tuple): the ratio of the padding to the original image. 252 | 253 | Returns: 254 | masks (np.ndarray): The masks that are being returned. 255 | """ 256 | im1_shape = masks.shape[:2] 257 | if ratio_pad is None: # calculate from im0_shape 258 | gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new 259 | pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding 260 | else: 261 | pad = ratio_pad[1] 262 | 263 | # Calculate tlbr of mask 264 | top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, x 265 | bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1)) 266 | if len(masks.shape) < 2: 267 | raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') 268 | masks = masks[top:bottom, left:right] 269 | masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]), 270 | interpolation=cv2.INTER_LINEAR) # INTER_CUBIC would be better 271 | if len(masks.shape) == 2: 272 | masks = masks[:, :, None] 273 | return masks 274 | 275 | @staticmethod 276 | def crop_mask(masks, boxes): 277 | """ 278 | It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from 279 | https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599) 280 | 281 | Args: 282 | masks (Numpy.ndarray): [n, h, w] tensor of masks. 283 | boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form. 284 | 285 | Returns: 286 | (Numpy.ndarray): The masks are being cropped to the bounding box. 287 | """ 288 | n, h, w = masks.shape 289 | x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1) 290 | r = np.arange(w, dtype=x1.dtype)[None, None, :] 291 | c = np.arange(h, dtype=x1.dtype)[None, :, None] 292 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 293 | 294 | # 绘框,与bbox区别:增加masks可视化 295 | def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True): 296 | """ 297 | Draw and visualize results. 298 | 299 | Args: 300 | im (np.ndarray): original image, shape [h, w, c]. 301 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 302 | segments (List): list of segment masks. 303 | vis (bool): imshow using OpenCV. 304 | save (bool): save image annotated. 305 | 306 | Returns: 307 | None 308 | """ 309 | # Draw rectangles and polygons 310 | im_canvas = im.copy() 311 | # Draw rectangles 312 | for (*box, conf, cls_), segment in zip(bboxes, segments): 313 | # draw contour and fill mask 314 | cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2) # white borderline 315 | cv2.fillPoly(im_canvas, np.int32([segment]), (255, 0, 0)) 316 | 317 | # draw bbox rectangle 318 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 319 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 320 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 321 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 322 | 323 | # Mix image 324 | im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0) 325 | 326 | # Show image 327 | if vis: 328 | cv2.imshow('demo', im) 329 | cv2.waitKey(0) 330 | cv2.destroyAllWindows() 331 | 332 | # Save image 333 | if save: 334 | cv2.imwrite('demo.jpg', im) 335 | 336 | 337 | if __name__ == '__main__': 338 | # Create an argument parser to handle command-line arguments 339 | parser = argparse.ArgumentParser() 340 | parser.add_argument('--model', type=str, default='weights\\yolov5s-seg.onnx', help='Path to ONNX model') 341 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 342 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 343 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 344 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 345 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 346 | args = parser.parse_args() 347 | 348 | # Build model 349 | model = YOLOv5_seg(args.model, args.imgsz, args.infer_tool) 350 | 351 | # Read image by OpenCV 352 | img = cv2.imread(args.source) 353 | 354 | # Inference 355 | boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 356 | 357 | # Visualize, Draw bboxes and polygons 358 | if len(boxes) > 0: 359 | model.draw_and_visualize(img, boxes, segments, vis=False, save=True) 360 | 361 | -------------------------------------------------------------------------------- /YOLOv5_bytetrack.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | import copy 9 | from bytetrack.byte_tracker import BYTETracker 10 | 11 | # COCO默认的80类 12 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 13 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 14 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 15 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 16 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 17 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 18 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 19 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 20 | 21 | 22 | class OpenvinoInference(object): 23 | def __init__(self, onnx_path): 24 | self.onnx_path = onnx_path 25 | ie = Core() 26 | self.model_onnx = ie.read_model(model=self.onnx_path) 27 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 28 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 29 | 30 | def predict(self, datas): 31 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 32 | return predict_data 33 | 34 | 35 | class YOLOv5: 36 | """YOLOv5 object detection model class for handling inference and visualization.""" 37 | 38 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 39 | """ 40 | Initialization. 41 | 42 | Args: 43 | onnx_model (str): Path to the ONNX model. 44 | """ 45 | self.infer_tool = infer_tool 46 | if self.infer_tool == 'openvino': 47 | # 构建openvino推理引擎 48 | self.openvino = OpenvinoInference(onnx_model) 49 | self.ndtype = np.single 50 | else: 51 | # 构建onnxruntime推理引擎 52 | self.ort_session = ort.InferenceSession(onnx_model, 53 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 54 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 55 | 56 | # Numpy dtype: support both FP32 and FP16 onnx model 57 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 58 | 59 | self.classes = CLASSES # 加载模型类别 60 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 61 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 62 | 63 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 64 | """ 65 | The whole pipeline: pre-process -> inference -> post-process. 66 | 67 | Args: 68 | im0 (Numpy.ndarray): original input image. 69 | conf_threshold (float): confidence threshold for filtering predictions. 70 | iou_threshold (float): iou threshold for NMS. 71 | 72 | Returns: 73 | boxes (List): list of bounding boxes. 74 | """ 75 | # 前处理Pre-process 76 | t1 = time.time() 77 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 78 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 79 | 80 | # 推理 inference 81 | t2 = time.time() 82 | if self.infer_tool == 'openvino': 83 | preds = self.openvino.predict(im) 84 | else: 85 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 86 | print('推理时间:{:.3f}s'.format(time.time() - t2)) 87 | 88 | # 后处理Post-process 89 | t3 = time.time() 90 | boxes = self.postprocess(preds, 91 | im0=im0, 92 | ratio=ratio, 93 | pad_w=pad_w, 94 | pad_h=pad_h, 95 | conf_threshold=conf_threshold, 96 | iou_threshold=iou_threshold, 97 | ) 98 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 99 | 100 | return boxes 101 | 102 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 103 | def preprocess(self, img): 104 | """ 105 | Pre-processes the input image. 106 | 107 | Args: 108 | img (Numpy.ndarray): image about to be processed. 109 | 110 | Returns: 111 | img_process (Numpy.ndarray): image preprocessed for inference. 112 | ratio (tuple): width, height ratios in letterbox. 113 | pad_w (float): width padding in letterbox. 114 | pad_h (float): height padding in letterbox. 115 | """ 116 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 117 | shape = img.shape[:2] # original image shape 118 | new_shape = (self.model_height, self.model_width) 119 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 120 | ratio = r, r 121 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 122 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 123 | if shape[::-1] != new_unpad: # resize 124 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 125 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 126 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 127 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 128 | 129 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 130 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 131 | img_process = img[None] if len(img.shape) == 3 else img 132 | return img_process, ratio, (pad_w, pad_h) 133 | 134 | # 后处理,包括:阈值过滤与NMS 135 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 136 | """ 137 | Post-process the prediction. 138 | 139 | Args: 140 | preds (Numpy.ndarray): predictions come from ort.session.run(). 141 | im0 (Numpy.ndarray): [h, w, c] original input image. 142 | ratio (tuple): width, height ratios in letterbox. 143 | pad_w (float): width padding in letterbox. 144 | pad_h (float): height padding in letterbox. 145 | conf_threshold (float): conf threshold. 146 | iou_threshold (float): iou threshold. 147 | 148 | Returns: 149 | boxes (List): list of bounding boxes. 150 | """ 151 | # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数,v8v9采用类别里面最大的概率作为置信度score 152 | x = preds # outputs: predictions (1, 8400*3, 85) 153 | 154 | # Predictions filtering by conf-threshold 155 | x = x[x[..., 4] > conf_threshold] 156 | 157 | # Create a new matrix which merge these(box, score, cls) into one 158 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 159 | x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)] 160 | 161 | # NMS filtering 162 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 163 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 164 | 165 | # 重新缩放边界框,为画图做准备 166 | if len(x) > 0: 167 | # Bounding boxes format change: cxcywh -> xyxy 168 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 169 | x[..., [2, 3]] += x[..., [0, 1]] 170 | 171 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 172 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 173 | x[..., :4] /= min(ratio) 174 | 175 | # Bounding boxes boundary clamp 176 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 177 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 178 | 179 | return x[..., :6] # boxes 180 | else: 181 | return [] 182 | 183 | # 绘框 184 | def draw_and_visualize(self, im, bboxes, video_writer, vis=False, save=False, is_track=False): 185 | """ 186 | Draw and visualize results. 187 | 188 | Args: 189 | im (np.ndarray): original image, shape [h, w, c]. 190 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 191 | vis (bool): imshow using OpenCV. 192 | save (bool): save image annotated. 193 | 194 | Returns: 195 | None 196 | """ 197 | # Draw rectangles 198 | if not is_track: 199 | for (*box, conf, cls_) in bboxes: 200 | # draw bbox rectangle 201 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 202 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 203 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 204 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 205 | else: 206 | for (*box, conf, id_) in bboxes: 207 | # draw bbox rectangle 208 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 209 | (0, 0, 255), 1, cv2.LINE_AA) 210 | cv2.putText(im, f'{id_}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 211 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA) 212 | 213 | # Show image 214 | if vis: 215 | cv2.imshow('demo', im) 216 | cv2.waitKey(1) 217 | 218 | # Save video 219 | if save: 220 | video_writer.write(im) 221 | 222 | 223 | 224 | class ByteTrackerONNX(object): 225 | def __init__(self, args): 226 | self.args = args 227 | self.tracker = BYTETracker(args, frame_rate=30) 228 | 229 | def _tracker_update(self, dets, image): 230 | online_targets = [] 231 | if dets is not None: 232 | online_targets = self.tracker.update( 233 | dets[:, :5], 234 | [image.shape[0], image.shape[1]], 235 | [image.shape[0], image.shape[1]], 236 | ) 237 | 238 | online_tlwhs = [] 239 | online_ids = [] 240 | online_scores = [] 241 | for online_target in online_targets: 242 | tlwh = online_target.tlwh 243 | track_id = online_target.track_id 244 | vertical = tlwh[2] / tlwh[3] > 1.6 245 | if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: 246 | online_tlwhs.append(tlwh) 247 | online_ids.append(track_id) 248 | online_scores.append(online_target.score) 249 | 250 | return online_tlwhs, online_ids, online_scores 251 | 252 | 253 | def inference(self, image, dets): 254 | """ 255 | Args: dets: 检测结果, [x1, y1, x2, y2, conf, cls] 256 | Returns: np.array([[x1, y1, x2, y2, conf, ids], ...]) 257 | """ 258 | bboxes, ids, scores = self._tracker_update(dets, image) 259 | if len(bboxes) == 0: 260 | return [] 261 | # Bounding boxes format change: tlwh -> xyxy 262 | bboxes = np.array(bboxes) 263 | bboxes[..., [2, 3]] += bboxes[..., [0, 1]] 264 | bboxes = np.c_[bboxes, np.array(scores), np.array(ids)] 265 | return bboxes 266 | 267 | 268 | if __name__ == '__main__': 269 | # Create an argument parser to handle command-line arguments 270 | parser = argparse.ArgumentParser() 271 | parser.add_argument('--model', type=str, default='yolov5s.onnx', help='Path to ONNX model') 272 | parser.add_argument('--source', type=str, default=str('test.mp4'), help='Path to input image') 273 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 274 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 275 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 276 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 277 | 278 | parser.add_argument('--is_track', type=bool, default=True, help='是否启用跟踪') 279 | parser.add_argument('--track_thresh', type=float, default=0.5, help='tracking confidence threshold') 280 | parser.add_argument('--track_buffer', type=int, default=30, help='the frames for keep lost tracks, usually as same with FPS') 281 | parser.add_argument('--match_thresh', type=float, default=0.8, help='matching threshold for tracking') 282 | parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes',) 283 | parser.add_argument('--mot20', dest='mot20', default=False, action='store_true', help='test mot20.',) 284 | args = parser.parse_args() 285 | 286 | # Build model 287 | model = YOLOv5(args.model, args.imgsz, args.infer_tool) 288 | 289 | bytetrack = ByteTrackerONNX(args) 290 | 291 | # 读取视频,解析帧数宽高,保存视频 292 | cap = cv2.VideoCapture(args.source) 293 | width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) 294 | height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) 295 | fps = cap.get(cv2.CAP_PROP_FPS) 296 | frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) 297 | video_writer = cv2.VideoWriter('demo.mp4', cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))) 298 | frame_id = 1 299 | 300 | while True: 301 | start_time = time.time() 302 | ret, img = cap.read() 303 | if not ret: 304 | break 305 | 306 | # Inference 307 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 308 | 309 | # track 310 | if args.is_track: 311 | boxes = bytetrack.inference(img, boxes) 312 | 313 | # Visualize 314 | if len(boxes) > 0: 315 | model.draw_and_visualize(copy.deepcopy(img), boxes, video_writer, vis=False, save=True, is_track=args.is_track) 316 | 317 | end_time = time.time() - start_time 318 | print('frame {}/{} (Total time: {:.2f} ms)'.format(frame_id, int(frame_count), end_time * 1000)) 319 | frame_id += 1 320 | 321 | 322 | 323 | -------------------------------------------------------------------------------- /YOLOv5_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 27 | 28 | def predict(self, datas): 29 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 30 | return predict_data 31 | 32 | 33 | class YOLOv5: 34 | """YOLOv5 object detection model class for handling inference and visualization.""" 35 | 36 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 37 | """ 38 | Initialization. 39 | 40 | Args: 41 | onnx_model (str): Path to the ONNX model. 42 | """ 43 | self.infer_tool = infer_tool 44 | if self.infer_tool == 'openvino': 45 | # 构建openvino推理引擎 46 | self.openvino = OpenvinoInference(onnx_model) 47 | self.ndtype = np.single 48 | else: 49 | # 构建onnxruntime推理引擎 50 | self.ort_session = ort.InferenceSession(onnx_model, 51 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 52 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 53 | 54 | # Numpy dtype: support both FP32 and FP16 onnx model 55 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 56 | 57 | self.classes = CLASSES # 加载模型类别 58 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 59 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 60 | 61 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 62 | """ 63 | The whole pipeline: pre-process -> inference -> post-process. 64 | 65 | Args: 66 | im0 (Numpy.ndarray): original input image. 67 | conf_threshold (float): confidence threshold for filtering predictions. 68 | iou_threshold (float): iou threshold for NMS. 69 | 70 | Returns: 71 | boxes (List): list of bounding boxes. 72 | """ 73 | # 前处理Pre-process 74 | t1 = time.time() 75 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 76 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 77 | 78 | # 推理 inference 79 | t2 = time.time() 80 | if self.infer_tool == 'openvino': 81 | preds = self.openvino.predict(im) 82 | else: 83 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 84 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 85 | 86 | # 后处理Post-process 87 | t3 = time.time() 88 | boxes = self.postprocess(preds, 89 | im0=im0, 90 | ratio=ratio, 91 | pad_w=pad_w, 92 | pad_h=pad_h, 93 | conf_threshold=conf_threshold, 94 | iou_threshold=iou_threshold, 95 | ) 96 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 97 | 98 | return boxes 99 | 100 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 101 | def preprocess(self, img): 102 | """ 103 | Pre-processes the input image. 104 | 105 | Args: 106 | img (Numpy.ndarray): image about to be processed. 107 | 108 | Returns: 109 | img_process (Numpy.ndarray): image preprocessed for inference. 110 | ratio (tuple): width, height ratios in letterbox. 111 | pad_w (float): width padding in letterbox. 112 | pad_h (float): height padding in letterbox. 113 | """ 114 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 115 | shape = img.shape[:2] # original image shape 116 | new_shape = (self.model_height, self.model_width) 117 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 118 | ratio = r, r 119 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 120 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 121 | if shape[::-1] != new_unpad: # resize 122 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 123 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 124 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 125 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 126 | 127 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 128 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 129 | img_process = img[None] if len(img.shape) == 3 else img 130 | return img_process, ratio, (pad_w, pad_h) 131 | 132 | # 后处理,包括:阈值过滤与NMS 133 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 134 | """ 135 | Post-process the prediction. 136 | 137 | Args: 138 | preds (Numpy.ndarray): predictions come from ort.session.run(). 139 | im0 (Numpy.ndarray): [h, w, c] original input image. 140 | ratio (tuple): width, height ratios in letterbox. 141 | pad_w (float): width padding in letterbox. 142 | pad_h (float): height padding in letterbox. 143 | conf_threshold (float): conf threshold. 144 | iou_threshold (float): iou threshold. 145 | 146 | Returns: 147 | boxes (List): list of bounding boxes. 148 | """ 149 | # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数,v8v9采用类别里面最大的概率作为置信度score 150 | x = preds # outputs: predictions (1, 8400*3, 85) 151 | 152 | # Predictions filtering by conf-threshold 153 | x = x[x[..., 4] > conf_threshold] 154 | 155 | # Create a new matrix which merge these(box, score, cls) into one 156 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 157 | x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)] 158 | 159 | # NMS filtering 160 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 161 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 162 | 163 | # 重新缩放边界框,为画图做准备 164 | if len(x) > 0: 165 | # Bounding boxes format change: cxcywh -> xyxy 166 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 167 | x[..., [2, 3]] += x[..., [0, 1]] 168 | 169 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 170 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 171 | x[..., :4] /= min(ratio) 172 | 173 | # Bounding boxes boundary clamp 174 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 175 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 176 | 177 | return x[..., :6] # boxes 178 | else: 179 | return [] 180 | 181 | # 绘框 182 | def draw_and_visualize(self, im, bboxes, vis=False, save=True): 183 | """ 184 | Draw and visualize results. 185 | 186 | Args: 187 | im (np.ndarray): original image, shape [h, w, c]. 188 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 189 | vis (bool): imshow using OpenCV. 190 | save (bool): save image annotated. 191 | 192 | Returns: 193 | None 194 | """ 195 | # Draw rectangles 196 | for (*box, conf, cls_) in bboxes: 197 | # draw bbox rectangle 198 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 199 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 200 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 201 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 202 | 203 | # Show image 204 | if vis: 205 | cv2.imshow('demo', im) 206 | cv2.waitKey(0) 207 | cv2.destroyAllWindows() 208 | 209 | # Save image 210 | if save: 211 | cv2.imwrite('demo.jpg', im) 212 | 213 | 214 | if __name__ == '__main__': 215 | # Create an argument parser to handle command-line arguments 216 | parser = argparse.ArgumentParser() 217 | parser.add_argument('--model', type=str, default='yolov5s.onnx', help='Path to ONNX model') 218 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 219 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 220 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 221 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 222 | parser.add_argument('--infer_tool', type=str, default='openvinos', choices=("openvino", "onnxruntime"), help='选择推理引擎') 223 | args = parser.parse_args() 224 | 225 | # Build model 226 | model = YOLOv5(args.model, args.imgsz, args.infer_tool) 227 | 228 | # Read image by OpenCV 229 | img = cv2.imread(args.source) 230 | 231 | # Inference 232 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 233 | 234 | # Visualize 235 | if len(boxes) > 0: 236 | model.draw_and_visualize(img, boxes, vis=False, save=True) 237 | 238 | 239 | -------------------------------------------------------------------------------- /YOLOv6_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 27 | 28 | def predict(self, datas): 29 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 30 | return predict_data 31 | 32 | 33 | class YOLOv6: 34 | """YOLOv6 object detection model class for handling inference and visualization.""" 35 | 36 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 37 | """ 38 | Initialization. 39 | 40 | Args: 41 | onnx_model (str): Path to the ONNX model. 42 | """ 43 | self.infer_tool = infer_tool 44 | if self.infer_tool == 'openvino': 45 | # 构建openvino推理引擎 46 | self.openvino = OpenvinoInference(onnx_model) 47 | self.ndtype = np.single 48 | else: 49 | # 构建onnxruntime推理引擎 50 | self.ort_session = ort.InferenceSession(onnx_model, 51 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 52 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 53 | 54 | # Numpy dtype: support both FP32 and FP16 onnx model 55 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 56 | 57 | self.classes = CLASSES # 加载模型类别 58 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 59 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 60 | 61 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 62 | """ 63 | The whole pipeline: pre-process -> inference -> post-process. 64 | 65 | Args: 66 | im0 (Numpy.ndarray): original input image. 67 | conf_threshold (float): confidence threshold for filtering predictions. 68 | iou_threshold (float): iou threshold for NMS. 69 | 70 | Returns: 71 | boxes (List): list of bounding boxes. 72 | """ 73 | # 前处理Pre-process 74 | t1 = time.time() 75 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 76 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 77 | 78 | # 推理 inference 79 | t2 = time.time() 80 | if self.infer_tool == 'openvino': 81 | preds = self.openvino.predict(im) 82 | else: 83 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 84 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 85 | 86 | # 后处理Post-process 87 | t3 = time.time() 88 | boxes = self.postprocess(preds, 89 | im0=im0, 90 | ratio=ratio, 91 | pad_w=pad_w, 92 | pad_h=pad_h, 93 | conf_threshold=conf_threshold, 94 | iou_threshold=iou_threshold, 95 | ) 96 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 97 | 98 | return boxes 99 | 100 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 101 | def preprocess(self, img): 102 | """ 103 | Pre-processes the input image. 104 | 105 | Args: 106 | img (Numpy.ndarray): image about to be processed. 107 | 108 | Returns: 109 | img_process (Numpy.ndarray): image preprocessed for inference. 110 | ratio (tuple): width, height ratios in letterbox. 111 | pad_w (float): width padding in letterbox. 112 | pad_h (float): height padding in letterbox. 113 | """ 114 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 115 | shape = img.shape[:2] # original image shape 116 | new_shape = (self.model_height, self.model_width) 117 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 118 | ratio = r, r 119 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 120 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 121 | if shape[::-1] != new_unpad: # resize 122 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 123 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 124 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 125 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 126 | 127 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 128 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 129 | img_process = img[None] if len(img.shape) == 3 else img 130 | return img_process, ratio, (pad_w, pad_h) 131 | 132 | # 后处理,包括:阈值过滤与NMS 133 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 134 | """ 135 | Post-process the prediction. 136 | 137 | Args: 138 | preds (Numpy.ndarray): predictions come from ort.session.run(). 139 | im0 (Numpy.ndarray): [h, w, c] original input image. 140 | ratio (tuple): width, height ratios in letterbox. 141 | pad_w (float): width padding in letterbox. 142 | pad_h (float): height padding in letterbox. 143 | conf_threshold (float): conf threshold. 144 | iou_threshold (float): iou threshold. 145 | 146 | Returns: 147 | boxes (List): list of bounding boxes. 148 | """ 149 | # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数,v8v9采用类别里面最大的概率作为置信度score 150 | x = preds # outputs: predictions (1, 8400, 85) 151 | 152 | # Predictions filtering by conf-threshold 153 | x = x[x[..., 4] > conf_threshold] 154 | 155 | # Create a new matrix which merge these(box, score, cls) into one 156 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 157 | x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)] 158 | 159 | # NMS filtering 160 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 161 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 162 | 163 | # 重新缩放边界框,为画图做准备 164 | if len(x) > 0: 165 | # Bounding boxes format change: cxcywh -> xyxy 166 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 167 | x[..., [2, 3]] += x[..., [0, 1]] 168 | 169 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 170 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 171 | x[..., :4] /= min(ratio) 172 | 173 | # Bounding boxes boundary clamp 174 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 175 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 176 | 177 | return x[..., :6] # boxes 178 | else: 179 | return [] 180 | 181 | # 绘框 182 | def draw_and_visualize(self, im, bboxes, vis=False, save=True): 183 | """ 184 | Draw and visualize results. 185 | 186 | Args: 187 | im (np.ndarray): original image, shape [h, w, c]. 188 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 189 | vis (bool): imshow using OpenCV. 190 | save (bool): save image annotated. 191 | 192 | Returns: 193 | None 194 | """ 195 | # Draw rectangles 196 | for (*box, conf, cls_) in bboxes: 197 | # draw bbox rectangle 198 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 199 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 200 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 201 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 202 | 203 | # Show image 204 | if vis: 205 | cv2.imshow('demo', im) 206 | cv2.waitKey(0) 207 | cv2.destroyAllWindows() 208 | 209 | # Save image 210 | if save: 211 | cv2.imwrite('demo.jpg', im) 212 | 213 | 214 | if __name__ == '__main__': 215 | # Create an argument parser to handle command-line arguments 216 | parser = argparse.ArgumentParser() 217 | parser.add_argument('--model', type=str, default='yolov6s_1.0.onnx', help='Path to ONNX model') 218 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 219 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 220 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 221 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 222 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 223 | args = parser.parse_args() 224 | 225 | # Build model 226 | model = YOLOv6(args.model, args.imgsz, args.infer_tool) 227 | 228 | # Read image by OpenCV 229 | img = cv2.imread(args.source) 230 | 231 | # Inference 232 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 233 | 234 | # Visualize 235 | if len(boxes) > 0: 236 | model.draw_and_visualize(img, boxes, vis=False, save=True) 237 | 238 | -------------------------------------------------------------------------------- /YOLOv7-pose_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # Pose默认的person类 10 | CLASSES = ['person'] 11 | 12 | class OpenvinoInference(object): 13 | def __init__(self, onnx_path): 14 | self.onnx_path = onnx_path 15 | ie = Core() 16 | self.model_onnx = ie.read_model(model=self.onnx_path) 17 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 18 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 19 | 20 | def predict(self, datas): 21 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 22 | return predict_data 23 | 24 | 25 | class KeyPoint_draw(object): 26 | def __init__(self): 27 | # 定义一个调色板数组,其中每个元素是一个包含RGB值的列表,用于表示不同的颜色 28 | self.palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], 29 | [230, 230, 0], [255, 153, 255], [153, 204, 255], 30 | [255, 102, 255], [255, 51, 255], [102, 178, 255], 31 | [51, 153, 255], [255, 153, 153], [255, 102, 102], 32 | [255, 51, 51], [153, 255, 153], [102, 255, 102], 33 | [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], 34 | [255, 255, 255]]) 35 | # 定义人体17个关键点的连接顺序,每个子列表包含两个数字,代表要连接的关键点的索引, 1鼻子 2左眼 3右眼 4左耳 5右耳 6左肩 7右肩 36 | # 8左肘 9右肘 10左手腕 11右手腕 12左髋 13右髋 14左膝 15右膝 16左踝 17右踝 37 | self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], 38 | [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], 39 | [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] 40 | # 通过索引从调色板中选择颜色,用于绘制人体骨架的线条,每个索引对应一种颜色 41 | self.pose_limb_color = self.palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] 42 | # 通过索引从调色板中选择颜色,用于绘制人体的关键点,每个索引对应一种颜色 43 | self.pose_kpt_color = self.palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] 44 | 45 | def plot_skeleton_kpts(self, im, kpts, steps=3): 46 | num_kpts = len(kpts) // steps # 51 / 3 =17 47 | # 画点 48 | for kid in range(num_kpts): 49 | r, g, b = self.pose_kpt_color[kid] 50 | x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1] 51 | conf = kpts[steps * kid + 2] 52 | if conf > 0.5: # 关键点的置信度必须大于 0.5 53 | cv2.circle(im, (int(x_coord), int(y_coord)), 10, (int(r), int(g), int(b)), -1) 54 | # 画骨架 55 | for sk_id, sk in enumerate(self.skeleton): 56 | r, g, b = self.pose_limb_color[sk_id] 57 | pos1 = (int(kpts[(sk[0] - 1) * steps]), int(kpts[(sk[0] - 1) * steps + 1])) 58 | pos2 = (int(kpts[(sk[1] - 1) * steps]), int(kpts[(sk[1] - 1) * steps + 1])) 59 | conf1 = kpts[(sk[0] - 1) * steps + 2] 60 | conf2 = kpts[(sk[1] - 1) * steps + 2] 61 | if conf1 > 0.5 and conf2 > 0.5: # 对于肢体,相连的两个关键点置信度 必须同时大于 0.5 62 | cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2) 63 | 64 | 65 | class YOLOv7_pose: 66 | """YOLOv7_pose detection model class for handling inference and visualization.""" 67 | 68 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 69 | """ 70 | Initialization. 71 | 72 | Args: 73 | onnx_model (str): Path to the ONNX model. 74 | """ 75 | self.infer_tool = infer_tool 76 | if self.infer_tool == 'openvino': 77 | # 构建openvino推理引擎 78 | self.openvino = OpenvinoInference(onnx_model) 79 | self.ndtype = np.single 80 | else: 81 | # 构建onnxruntime推理引擎 82 | self.ort_session = ort.InferenceSession(onnx_model, 83 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 84 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 85 | 86 | # Numpy dtype: support both FP32 and FP16 onnx model 87 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 88 | 89 | self.classes = CLASSES # 加载模型类别 90 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 91 | self.color = (0, 0, 255) # 为类别生成调色板 92 | 93 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 94 | """ 95 | The whole pipeline: pre-process -> inference -> post-process. 96 | 97 | Args: 98 | im0 (Numpy.ndarray): original input image. 99 | conf_threshold (float): confidence threshold for filtering predictions. 100 | iou_threshold (float): iou threshold for NMS. 101 | 102 | Returns: 103 | boxes (List): list of bounding boxes. 104 | """ 105 | # 前处理Pre-process 106 | t1 = time.time() 107 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 108 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 109 | 110 | # 推理 inference 111 | t2 = time.time() 112 | if self.infer_tool == 'openvino': 113 | preds = self.openvino.predict(im) 114 | else: 115 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 116 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 117 | 118 | # 后处理Post-process 119 | t3 = time.time() 120 | boxes = self.postprocess(preds, 121 | im0=im0, 122 | ratio=ratio, 123 | pad_w=pad_w, 124 | pad_h=pad_h, 125 | conf_threshold=conf_threshold, 126 | iou_threshold=iou_threshold, 127 | ) 128 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 129 | 130 | return boxes 131 | 132 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 133 | def preprocess(self, img): 134 | """ 135 | Pre-processes the input image. 136 | 137 | Args: 138 | img (Numpy.ndarray): image about to be processed. 139 | 140 | Returns: 141 | img_process (Numpy.ndarray): image preprocessed for inference. 142 | ratio (tuple): width, height ratios in letterbox. 143 | pad_w (float): width padding in letterbox. 144 | pad_h (float): height padding in letterbox. 145 | """ 146 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 147 | shape = img.shape[:2] # original image shape 148 | new_shape = (self.model_height, self.model_width) 149 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 150 | ratio = r, r 151 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 152 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 153 | if shape[::-1] != new_unpad: # resize 154 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 155 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 156 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 157 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 158 | 159 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 160 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 161 | img_process = img[None] if len(img.shape) == 3 else img 162 | return img_process, ratio, (pad_w, pad_h) 163 | 164 | # 后处理,包括:阈值过滤与NMS 165 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 166 | """ 167 | Post-process the prediction. 168 | 169 | Args: 170 | preds (Numpy.ndarray): predictions come from ort.session.run(). 171 | im0 (Numpy.ndarray): [h, w, c] original input image. 172 | ratio (tuple): width, height ratios in letterbox. 173 | pad_w (float): width padding in letterbox. 174 | pad_h (float): height padding in letterbox. 175 | conf_threshold (float): conf threshold. 176 | iou_threshold (float): iou threshold. 177 | 178 | Returns: 179 | boxes (List): list of bounding boxes. 180 | """ 181 | x = preds # outputs: predictions (1, 8400*3, 85),其中56=4+1+17*3,17个关键点(x,y,visibility) 182 | 183 | # Predictions filtering by conf-threshold 184 | x = x[x[..., 4] > conf_threshold] 185 | 186 | # Create a new matrix which merge these(box, score, pose) into one 187 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 188 | x = np.c_[x[..., :4], x[..., 4], x[..., 6:]] 189 | 190 | # NMS filtering 191 | # 经过NMS后的值, np.array([[x, y, w, h, conf, pose], ...]), shape=(-1, 4 + 1 + 17*3) 192 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 193 | 194 | # 重新缩放边界框,为画图做准备 195 | if len(x) > 0: 196 | # Bounding boxes format change: cxcywh -> xyxy 197 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 198 | x[..., [2, 3]] += x[..., [0, 1]] 199 | 200 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 201 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 202 | x[..., :4] /= min(ratio) 203 | 204 | # Bounding boxes boundary clamp 205 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) # clip避免边界框超出图像边界 206 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 207 | 208 | # 关键点坐标映射到原图上,从[:, 5:]开始算 209 | num_kpts = x.shape[1] // 3 # 56 // 3 = 18 210 | for kid in range(2, num_kpts + 1): 211 | x[:, kid * 3 - 1] = (x[:, kid * 3 - 1] - pad_w) / min(ratio) 212 | x[:, kid * 3] = (x[:, kid * 3] - pad_h) / min(ratio) 213 | 214 | return x 215 | else: 216 | return [] 217 | 218 | # 绘框 219 | def draw_and_visualize(self, im, bboxes, keypoint_draw, vis=False, save=True): 220 | """ 221 | Draw and visualize results. 222 | 223 | Args: 224 | im (np.ndarray): original image, shape [h, w, c]. 225 | bboxes (numpy.ndarray): [n, 56], n is number of bboxes. 226 | vis (bool): imshow using OpenCV. 227 | save (bool): save image annotated. 228 | 229 | Returns: 230 | None 231 | """ 232 | 233 | # Draw rectangles 234 | for bbox in bboxes: 235 | box, conf, kpts = bbox[:4], bbox[4], bbox[5:] 236 | # draw bbox rectangle 237 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 238 | self.color, 1, cv2.LINE_AA) 239 | cv2.putText(im, f'{self.classes[0]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 240 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color, 2, cv2.LINE_AA) 241 | 242 | # 画关键点,连线 243 | keypoint_draw.plot_skeleton_kpts(im, kpts) 244 | 245 | # Show image 246 | if vis: 247 | cv2.imshow('demo', im) 248 | cv2.waitKey(0) 249 | cv2.destroyAllWindows() 250 | 251 | # Save image 252 | if save: 253 | cv2.imwrite('demo.jpg', im) 254 | 255 | 256 | if __name__ == '__main__': 257 | # Create an argument parser to handle command-line arguments 258 | parser = argparse.ArgumentParser() 259 | parser.add_argument('--model', type=str, default='weights/yolov7-w6-pose.onnx', help='Path to ONNX model') 260 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 261 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 262 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 263 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 264 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 265 | args = parser.parse_args() 266 | 267 | # Build model 268 | model = YOLOv7_pose(args.model, args.imgsz, args.infer_tool) 269 | keypoint_draw = KeyPoint_draw() # 可视化关键点 270 | 271 | # Read image by OpenCV 272 | img = cv2.imread(args.source) 273 | 274 | # Inference 275 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 276 | 277 | # Visualize 278 | if len(boxes) > 0: 279 | model.draw_and_visualize(img, boxes, keypoint_draw, vis=False, save=True) 280 | 281 | -------------------------------------------------------------------------------- /YOLOv7_bytetrack.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | import copy 9 | from bytetrack.byte_tracker import BYTETracker 10 | 11 | # COCO默认的80类 12 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 13 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 14 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 15 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 16 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 17 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 18 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 19 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 20 | 21 | 22 | class OpenvinoInference(object): 23 | def __init__(self, onnx_path): 24 | self.onnx_path = onnx_path 25 | ie = Core() 26 | self.model_onnx = ie.read_model(model=self.onnx_path) 27 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 28 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 29 | 30 | def predict(self, datas): 31 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 32 | return predict_data 33 | 34 | 35 | class YOLOv7: 36 | """YOLOv7 object detection model class for handling inference and visualization.""" 37 | 38 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 39 | """ 40 | Initialization. 41 | 42 | Args: 43 | onnx_model (str): Path to the ONNX model. 44 | """ 45 | self.infer_tool = infer_tool 46 | if self.infer_tool == 'openvino': 47 | # 构建openvino推理引擎 48 | self.openvino = OpenvinoInference(onnx_model) 49 | self.ndtype = np.single 50 | else: 51 | # 构建onnxruntime推理引擎 52 | self.ort_session = ort.InferenceSession(onnx_model, 53 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 54 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 55 | 56 | # Numpy dtype: support both FP32 and FP16 onnx model 57 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 58 | 59 | self.classes = CLASSES # 加载模型类别 60 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 61 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 62 | 63 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 64 | """ 65 | The whole pipeline: pre-process -> inference -> post-process. 66 | 67 | Args: 68 | im0 (Numpy.ndarray): original input image. 69 | conf_threshold (float): confidence threshold for filtering predictions. 70 | iou_threshold (float): iou threshold for NMS. 71 | 72 | Returns: 73 | boxes (List): list of bounding boxes. 74 | """ 75 | # 前处理Pre-process 76 | t1 = time.time() 77 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 78 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 79 | 80 | # 推理 inference 81 | t2 = time.time() 82 | if self.infer_tool == 'openvino': 83 | preds = self.openvino.predict(im) 84 | else: 85 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 86 | print('推理时间:{:.3f}s'.format(time.time() - t2)) 87 | 88 | # 后处理Post-process 89 | t3 = time.time() 90 | boxes = self.postprocess(preds, 91 | im0=im0, 92 | ratio=ratio, 93 | pad_w=pad_w, 94 | pad_h=pad_h, 95 | conf_threshold=conf_threshold, 96 | iou_threshold=iou_threshold, 97 | ) 98 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 99 | 100 | return boxes 101 | 102 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 103 | def preprocess(self, img): 104 | """ 105 | Pre-processes the input image. 106 | 107 | Args: 108 | img (Numpy.ndarray): image about to be processed. 109 | 110 | Returns: 111 | img_process (Numpy.ndarray): image preprocessed for inference. 112 | ratio (tuple): width, height ratios in letterbox. 113 | pad_w (float): width padding in letterbox. 114 | pad_h (float): height padding in letterbox. 115 | """ 116 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 117 | shape = img.shape[:2] # original image shape 118 | new_shape = (self.model_height, self.model_width) 119 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 120 | ratio = r, r 121 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 122 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 123 | if shape[::-1] != new_unpad: # resize 124 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 125 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 126 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 127 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 128 | 129 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 130 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 131 | img_process = img[None] if len(img.shape) == 3 else img 132 | return img_process, ratio, (pad_w, pad_h) 133 | 134 | # 后处理,包括:阈值过滤与NMS 135 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 136 | """ 137 | Post-process the prediction. 138 | 139 | Args: 140 | preds (Numpy.ndarray): predictions come from ort.session.run(). 141 | im0 (Numpy.ndarray): [h, w, c] original input image. 142 | ratio (tuple): width, height ratios in letterbox. 143 | pad_w (float): width padding in letterbox. 144 | pad_h (float): height padding in letterbox. 145 | conf_threshold (float): conf threshold. 146 | iou_threshold (float): iou threshold. 147 | 148 | Returns: 149 | boxes (List): list of bounding boxes. 150 | """ 151 | # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数,v8v9采用类别里面最大的概率作为置信度score 152 | x = preds # outputs: predictions (1, 8400*3, 85) 153 | 154 | # Predictions filtering by conf-threshold 155 | x = x[x[..., 4] > conf_threshold] 156 | 157 | # Create a new matrix which merge these(box, score, cls) into one 158 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 159 | x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)] 160 | 161 | # NMS filtering 162 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 163 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 164 | 165 | # 重新缩放边界框,为画图做准备 166 | if len(x) > 0: 167 | # Bounding boxes format change: cxcywh -> xyxy 168 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 169 | x[..., [2, 3]] += x[..., [0, 1]] 170 | 171 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 172 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 173 | x[..., :4] /= min(ratio) 174 | 175 | # Bounding boxes boundary clamp 176 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 177 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 178 | 179 | return x[..., :6] # boxes 180 | else: 181 | return [] 182 | 183 | # 绘框 184 | def draw_and_visualize(self, im, bboxes, video_writer, vis=False, save=False, is_track=False): 185 | """ 186 | Draw and visualize results. 187 | 188 | Args: 189 | im (np.ndarray): original image, shape [h, w, c]. 190 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 191 | vis (bool): imshow using OpenCV. 192 | save (bool): save image annotated. 193 | 194 | Returns: 195 | None 196 | """ 197 | # Draw rectangles 198 | if not is_track: 199 | for (*box, conf, cls_) in bboxes: 200 | # draw bbox rectangle 201 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 202 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 203 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 204 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 205 | else: 206 | for (*box, conf, id_) in bboxes: 207 | # draw bbox rectangle 208 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 209 | (0, 0, 255), 1, cv2.LINE_AA) 210 | cv2.putText(im, f'{id_}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 211 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA) 212 | 213 | # Show image 214 | if vis: 215 | cv2.imshow('demo', im) 216 | cv2.waitKey(1) 217 | 218 | # Save video 219 | if save: 220 | video_writer.write(im) 221 | 222 | 223 | class ByteTrackerONNX(object): 224 | def __init__(self, args): 225 | self.args = args 226 | self.tracker = BYTETracker(args, frame_rate=30) 227 | 228 | def _tracker_update(self, dets, image): 229 | online_targets = [] 230 | if dets is not None: 231 | online_targets = self.tracker.update( 232 | dets[:, :5], 233 | [image.shape[0], image.shape[1]], 234 | [image.shape[0], image.shape[1]], 235 | ) 236 | 237 | online_tlwhs = [] 238 | online_ids = [] 239 | online_scores = [] 240 | for online_target in online_targets: 241 | tlwh = online_target.tlwh 242 | track_id = online_target.track_id 243 | vertical = tlwh[2] / tlwh[3] > 1.6 244 | if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: 245 | online_tlwhs.append(tlwh) 246 | online_ids.append(track_id) 247 | online_scores.append(online_target.score) 248 | 249 | return online_tlwhs, online_ids, online_scores 250 | 251 | 252 | def inference(self, image, dets): 253 | """ 254 | Args: dets: 检测结果, [x1, y1, x2, y2, conf, cls] 255 | Returns: np.array([[x1, y1, x2, y2, conf, ids], ...]) 256 | """ 257 | bboxes, ids, scores = self._tracker_update(dets, image) 258 | if len(bboxes) == 0: 259 | return [] 260 | # Bounding boxes format change: tlwh -> xyxy 261 | bboxes = np.array(bboxes) 262 | bboxes[..., [2, 3]] += bboxes[..., [0, 1]] 263 | bboxes = np.c_[bboxes, np.array(scores), np.array(ids)] 264 | return bboxes 265 | 266 | 267 | if __name__ == '__main__': 268 | # Create an argument parser to handle command-line arguments 269 | parser = argparse.ArgumentParser() 270 | parser.add_argument('--model', type=str, default='weights/yolov7.onnx', help='Path to ONNX model') 271 | parser.add_argument('--source', type=str, default=str('test.mp4'), help='Path to input image') 272 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 273 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 274 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 275 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 276 | 277 | parser.add_argument('--is_track', type=bool, default=True, help='是否启用跟踪') 278 | parser.add_argument('--track_thresh', type=float, default=0.5, help='tracking confidence threshold') 279 | parser.add_argument('--track_buffer', type=int, default=30, help='the frames for keep lost tracks, usually as same with FPS') 280 | parser.add_argument('--match_thresh', type=float, default=0.8, help='matching threshold for tracking') 281 | parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes',) 282 | parser.add_argument('--mot20', dest='mot20', default=False, action='store_true', help='test mot20.',) 283 | args = parser.parse_args() 284 | 285 | # Build model 286 | model = YOLOv7(args.model, args.imgsz, args.infer_tool) 287 | 288 | bytetrack = ByteTrackerONNX(args) 289 | 290 | # 读取视频,解析帧数宽高,保存视频 291 | cap = cv2.VideoCapture(args.source) 292 | width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) 293 | height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) 294 | fps = cap.get(cv2.CAP_PROP_FPS) 295 | frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) 296 | video_writer = cv2.VideoWriter('demo.mp4', cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))) 297 | frame_id = 1 298 | 299 | while True: 300 | start_time = time.time() 301 | ret, img = cap.read() 302 | if not ret: 303 | break 304 | 305 | # Inference 306 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 307 | 308 | # track 309 | if args.is_track: 310 | boxes = bytetrack.inference(img, boxes) 311 | 312 | # Visualize 313 | if len(boxes) > 0: 314 | model.draw_and_visualize(copy.deepcopy(img), boxes, video_writer, vis=True, save=False, is_track=args.is_track) 315 | 316 | end_time = time.time() - start_time 317 | print('frame {}/{} (Total time: {:.2f} ms)'.format(frame_id, int(frame_count), end_time * 1000)) 318 | frame_id += 1 319 | 320 | 321 | 322 | -------------------------------------------------------------------------------- /YOLOv7_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 27 | 28 | def predict(self, datas): 29 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 30 | return predict_data 31 | 32 | 33 | class YOLOv7: 34 | """YOLOv7 object detection model class for handling inference and visualization.""" 35 | 36 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 37 | """ 38 | Initialization. 39 | 40 | Args: 41 | onnx_model (str): Path to the ONNX model. 42 | """ 43 | self.infer_tool = infer_tool 44 | if self.infer_tool == 'openvino': 45 | # 构建openvino推理引擎 46 | self.openvino = OpenvinoInference(onnx_model) 47 | self.ndtype = np.single 48 | else: 49 | # 构建onnxruntime推理引擎 50 | self.ort_session = ort.InferenceSession(onnx_model, 51 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 52 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 53 | 54 | # Numpy dtype: support both FP32 and FP16 onnx model 55 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 56 | 57 | self.classes = CLASSES # 加载模型类别 58 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 59 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 60 | 61 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 62 | """ 63 | The whole pipeline: pre-process -> inference -> post-process. 64 | 65 | Args: 66 | im0 (Numpy.ndarray): original input image. 67 | conf_threshold (float): confidence threshold for filtering predictions. 68 | iou_threshold (float): iou threshold for NMS. 69 | 70 | Returns: 71 | boxes (List): list of bounding boxes. 72 | """ 73 | # 前处理Pre-process 74 | t1 = time.time() 75 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 76 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 77 | 78 | # 推理 inference 79 | t2 = time.time() 80 | if self.infer_tool == 'openvino': 81 | preds = self.openvino.predict(im) 82 | else: 83 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 84 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 85 | 86 | # 后处理Post-process 87 | t3 = time.time() 88 | boxes = self.postprocess(preds, 89 | im0=im0, 90 | ratio=ratio, 91 | pad_w=pad_w, 92 | pad_h=pad_h, 93 | conf_threshold=conf_threshold, 94 | iou_threshold=iou_threshold, 95 | ) 96 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 97 | 98 | return boxes 99 | 100 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 101 | def preprocess(self, img): 102 | """ 103 | Pre-processes the input image. 104 | 105 | Args: 106 | img (Numpy.ndarray): image about to be processed. 107 | 108 | Returns: 109 | img_process (Numpy.ndarray): image preprocessed for inference. 110 | ratio (tuple): width, height ratios in letterbox. 111 | pad_w (float): width padding in letterbox. 112 | pad_h (float): height padding in letterbox. 113 | """ 114 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 115 | shape = img.shape[:2] # original image shape 116 | new_shape = (self.model_height, self.model_width) 117 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 118 | ratio = r, r 119 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 120 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 121 | if shape[::-1] != new_unpad: # resize 122 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 123 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 124 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 125 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 126 | 127 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 128 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 129 | img_process = img[None] if len(img.shape) == 3 else img 130 | return img_process, ratio, (pad_w, pad_h) 131 | 132 | # 后处理,包括:阈值过滤与NMS 133 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 134 | """ 135 | Post-process the prediction. 136 | 137 | Args: 138 | preds (Numpy.ndarray): predictions come from ort.session.run(). 139 | im0 (Numpy.ndarray): [h, w, c] original input image. 140 | ratio (tuple): width, height ratios in letterbox. 141 | pad_w (float): width padding in letterbox. 142 | pad_h (float): height padding in letterbox. 143 | conf_threshold (float): conf threshold. 144 | iou_threshold (float): iou threshold. 145 | 146 | Returns: 147 | boxes (List): list of bounding boxes. 148 | """ 149 | # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数,v8v9采用类别里面最大的概率作为置信度score 150 | x = preds # outputs: predictions (1, 8400*3, 85) 151 | 152 | # Predictions filtering by conf-threshold 153 | x = x[x[..., 4] > conf_threshold] 154 | 155 | # Create a new matrix which merge these(box, score, cls) into one 156 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 157 | x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)] 158 | 159 | # NMS filtering 160 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 161 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 162 | 163 | # 重新缩放边界框,为画图做准备 164 | if len(x) > 0: 165 | # Bounding boxes format change: cxcywh -> xyxy 166 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 167 | x[..., [2, 3]] += x[..., [0, 1]] 168 | 169 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 170 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 171 | x[..., :4] /= min(ratio) 172 | 173 | # Bounding boxes boundary clamp 174 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 175 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 176 | 177 | return x[..., :6] # boxes 178 | else: 179 | return [] 180 | 181 | # 绘框 182 | def draw_and_visualize(self, im, bboxes, vis=False, save=True): 183 | """ 184 | Draw and visualize results. 185 | 186 | Args: 187 | im (np.ndarray): original image, shape [h, w, c]. 188 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 189 | vis (bool): imshow using OpenCV. 190 | save (bool): save image annotated. 191 | 192 | Returns: 193 | None 194 | """ 195 | # Draw rectangles 196 | for (*box, conf, cls_) in bboxes: 197 | # draw bbox rectangle 198 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 199 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 200 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 201 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 202 | 203 | # Show image 204 | if vis: 205 | cv2.imshow('demo', im) 206 | cv2.waitKey(0) 207 | cv2.destroyAllWindows() 208 | 209 | # Save image 210 | if save: 211 | cv2.imwrite('demo.jpg', im) 212 | 213 | 214 | if __name__ == '__main__': 215 | # Create an argument parser to handle command-line arguments 216 | parser = argparse.ArgumentParser() 217 | parser.add_argument('--model', type=str, default='weights/yolov7.onnx', help='Path to ONNX model') 218 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 219 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 220 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 221 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 222 | parser.add_argument('--infer_tool', type=str, default='openvinos', choices=("openvino", "onnxruntime"), help='选择推理引擎') 223 | args = parser.parse_args() 224 | 225 | # Build model 226 | model = YOLOv7(args.model, args.imgsz, args.infer_tool) 227 | 228 | # Read image by OpenCV 229 | img = cv2.imread(args.source) 230 | 231 | # Inference 232 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 233 | 234 | # Visualize 235 | if len(boxes) > 0: 236 | model.draw_and_visualize(img, boxes, vis=False, save=True) 237 | 238 | 239 | -------------------------------------------------------------------------------- /YOLOv8-pose_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # Pose默认的person类 10 | CLASSES = ['person'] 11 | 12 | class OpenvinoInference(object): 13 | def __init__(self, onnx_path): 14 | self.onnx_path = onnx_path 15 | ie = Core() 16 | self.model_onnx = ie.read_model(model=self.onnx_path) 17 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 18 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 19 | 20 | def predict(self, datas): 21 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 22 | return predict_data 23 | 24 | 25 | class KeyPoint_draw(object): 26 | def __init__(self): 27 | # 定义一个调色板数组,其中每个元素是一个包含RGB值的列表,用于表示不同的颜色 28 | self.palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], 29 | [230, 230, 0], [255, 153, 255], [153, 204, 255], 30 | [255, 102, 255], [255, 51, 255], [102, 178, 255], 31 | [51, 153, 255], [255, 153, 153], [255, 102, 102], 32 | [255, 51, 51], [153, 255, 153], [102, 255, 102], 33 | [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], 34 | [255, 255, 255]]) 35 | # 定义人体17个关键点的连接顺序,每个子列表包含两个数字,代表要连接的关键点的索引, 1鼻子 2左眼 3右眼 4左耳 5右耳 6左肩 7右肩 36 | # 8左肘 9右肘 10左手腕 11右手腕 12左髋 13右髋 14左膝 15右膝 16左踝 17右踝 37 | self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], 38 | [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], 39 | [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] 40 | # 通过索引从调色板中选择颜色,用于绘制人体骨架的线条,每个索引对应一种颜色 41 | self.pose_limb_color = self.palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] 42 | # 通过索引从调色板中选择颜色,用于绘制人体的关键点,每个索引对应一种颜色 43 | self.pose_kpt_color = self.palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] 44 | 45 | def plot_skeleton_kpts(self, im, kpts, steps=3): 46 | num_kpts = len(kpts) // steps # 51 / 3 =17 47 | # 画点 48 | for kid in range(num_kpts): 49 | r, g, b = self.pose_kpt_color[kid] 50 | x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1] 51 | conf = kpts[steps * kid + 2] 52 | if conf > 0.5: # 关键点的置信度必须大于 0.5 53 | cv2.circle(im, (int(x_coord), int(y_coord)), 10, (int(r), int(g), int(b)), -1) 54 | # 画骨架 55 | for sk_id, sk in enumerate(self.skeleton): 56 | r, g, b = self.pose_limb_color[sk_id] 57 | pos1 = (int(kpts[(sk[0] - 1) * steps]), int(kpts[(sk[0] - 1) * steps + 1])) 58 | pos2 = (int(kpts[(sk[1] - 1) * steps]), int(kpts[(sk[1] - 1) * steps + 1])) 59 | conf1 = kpts[(sk[0] - 1) * steps + 2] 60 | conf2 = kpts[(sk[1] - 1) * steps + 2] 61 | if conf1 > 0.5 and conf2 > 0.5: # 对于肢体,相连的两个关键点置信度 必须同时大于 0.5 62 | cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2) 63 | 64 | 65 | class YOLOv8_pose: 66 | """YOLOv8_pose detection model class for handling inference and visualization.""" 67 | 68 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 69 | """ 70 | Initialization. 71 | 72 | Args: 73 | onnx_model (str): Path to the ONNX model. 74 | """ 75 | self.infer_tool = infer_tool 76 | if self.infer_tool == 'openvino': 77 | # 构建openvino推理引擎 78 | self.openvino = OpenvinoInference(onnx_model) 79 | self.ndtype = np.single 80 | else: 81 | # 构建onnxruntime推理引擎 82 | self.ort_session = ort.InferenceSession(onnx_model, 83 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 84 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 85 | 86 | # Numpy dtype: support both FP32 and FP16 onnx model 87 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 88 | 89 | self.classes = CLASSES # 加载模型类别 90 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 91 | self.color = (0, 0, 255) # 为类别生成调色板 92 | 93 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 94 | """ 95 | The whole pipeline: pre-process -> inference -> post-process. 96 | 97 | Args: 98 | im0 (Numpy.ndarray): original input image. 99 | conf_threshold (float): confidence threshold for filtering predictions. 100 | iou_threshold (float): iou threshold for NMS. 101 | 102 | Returns: 103 | boxes (List): list of bounding boxes. 104 | """ 105 | # 前处理Pre-process 106 | t1 = time.time() 107 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 108 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 109 | 110 | # 推理 inference 111 | t2 = time.time() 112 | if self.infer_tool == 'openvino': 113 | preds = self.openvino.predict(im) 114 | else: 115 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 116 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 117 | 118 | # 后处理Post-process 119 | t3 = time.time() 120 | boxes = self.postprocess(preds, 121 | im0=im0, 122 | ratio=ratio, 123 | pad_w=pad_w, 124 | pad_h=pad_h, 125 | conf_threshold=conf_threshold, 126 | iou_threshold=iou_threshold, 127 | ) 128 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 129 | 130 | return boxes 131 | 132 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 133 | def preprocess(self, img): 134 | """ 135 | Pre-processes the input image. 136 | 137 | Args: 138 | img (Numpy.ndarray): image about to be processed. 139 | 140 | Returns: 141 | img_process (Numpy.ndarray): image preprocessed for inference. 142 | ratio (tuple): width, height ratios in letterbox. 143 | pad_w (float): width padding in letterbox. 144 | pad_h (float): height padding in letterbox. 145 | """ 146 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 147 | shape = img.shape[:2] # original image shape 148 | new_shape = (self.model_height, self.model_width) 149 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 150 | ratio = r, r 151 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 152 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 153 | if shape[::-1] != new_unpad: # resize 154 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 155 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 156 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 157 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 158 | 159 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 160 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 161 | img_process = img[None] if len(img.shape) == 3 else img 162 | return img_process, ratio, (pad_w, pad_h) 163 | 164 | # 后处理,包括:阈值过滤与NMS 165 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 166 | """ 167 | Post-process the prediction. 168 | 169 | Args: 170 | preds (Numpy.ndarray): predictions come from ort.session.run(). 171 | im0 (Numpy.ndarray): [h, w, c] original input image. 172 | ratio (tuple): width, height ratios in letterbox. 173 | pad_w (float): width padding in letterbox. 174 | pad_h (float): height padding in letterbox. 175 | conf_threshold (float): conf threshold. 176 | iou_threshold (float): iou threshold. 177 | 178 | Returns: 179 | boxes (List): list of bounding boxes. 180 | """ 181 | x = preds # outputs: predictions (1, 56, 8400),其中56=4+1+17*3,17个关键点(x,y,visibility) 182 | # Transpose the first output: (Batch_size, xywh_conf_pose, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_pose) 183 | x = np.einsum('bcn->bnc', x) # (1, 8400, 56) 184 | 185 | # Predictions filtering by conf-threshold 186 | x = x[x[..., 4] > conf_threshold] 187 | 188 | # Create a new matrix which merge these(box, score, pose) into one 189 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 190 | x = np.c_[x[..., :4], x[..., 4], x[..., 5:]] 191 | 192 | # NMS filtering 193 | # 经过NMS后的值, np.array([[x, y, w, h, conf, pose], ...]), shape=(-1, 4 + 1 + 17*3) 194 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 195 | 196 | # 重新缩放边界框,为画图做准备 197 | if len(x) > 0: 198 | # Bounding boxes format change: cxcywh -> xyxy 199 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 200 | x[..., [2, 3]] += x[..., [0, 1]] 201 | 202 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 203 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 204 | x[..., :4] /= min(ratio) 205 | 206 | # Bounding boxes boundary clamp 207 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) # clip避免边界框超出图像边界 208 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 209 | 210 | # 关键点坐标映射到原图上,从[:, 5:]开始算 211 | num_kpts = x.shape[1] // 3 # 56 // 3 = 18 212 | for kid in range(2, num_kpts + 1): 213 | x[:, kid * 3 - 1] = (x[:, kid * 3 - 1] - pad_w) / min(ratio) 214 | x[:, kid * 3] = (x[:, kid * 3] - pad_h) / min(ratio) 215 | 216 | return x 217 | else: 218 | return [] 219 | 220 | # 绘框 221 | def draw_and_visualize(self, im, bboxes, keypoint_draw, vis=False, save=True): 222 | """ 223 | Draw and visualize results. 224 | 225 | Args: 226 | im (np.ndarray): original image, shape [h, w, c]. 227 | bboxes (numpy.ndarray): [n, 56], n is number of bboxes. 228 | vis (bool): imshow using OpenCV. 229 | save (bool): save image annotated. 230 | 231 | Returns: 232 | None 233 | """ 234 | 235 | # Draw rectangles 236 | for bbox in bboxes: 237 | box, conf, kpts = bbox[:4], bbox[4], bbox[5:] 238 | # draw bbox rectangle 239 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 240 | self.color, 1, cv2.LINE_AA) 241 | cv2.putText(im, f'{self.classes[0]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 242 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color, 2, cv2.LINE_AA) 243 | 244 | # 画关键点,连线 245 | keypoint_draw.plot_skeleton_kpts(im, kpts) 246 | 247 | # Show image 248 | if vis: 249 | cv2.imshow('demo', im) 250 | cv2.waitKey(0) 251 | cv2.destroyAllWindows() 252 | 253 | # Save image 254 | if save: 255 | cv2.imwrite('demo.jpg', im) 256 | 257 | 258 | if __name__ == '__main__': 259 | # Create an argument parser to handle command-line arguments 260 | parser = argparse.ArgumentParser() 261 | parser.add_argument('--model', type=str, default='weights/yolov8s-pose.onnx', help='Path to ONNX model') 262 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 263 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 264 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 265 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 266 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 267 | args = parser.parse_args() 268 | 269 | # Build model 270 | model = YOLOv8_pose(args.model, args.imgsz, args.infer_tool) 271 | keypoint_draw = KeyPoint_draw() # 可视化关键点 272 | 273 | # Read image by OpenCV 274 | img = cv2.imread(args.source) 275 | 276 | # Inference 277 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 278 | 279 | # Visualize 280 | if len(boxes) > 0: 281 | model.draw_and_visualize(img, boxes, keypoint_draw, vis=False, save=True) 282 | 283 | -------------------------------------------------------------------------------- /YOLOv8-seg_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | 27 | def predict(self, datas): 28 | # 注:self.compiled_model_onnx([datas])是一个字典,self.compiled_model_onnx.output(0)是字典键,第一种读取所有值方法(0.11s) 比 第二种按键取值的方法(0.20s) 耗时减半 29 | predict_data = list(self.compiled_model_onnx([datas]).values()) 30 | # predict_data = [self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(0)], 31 | # self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(1)]] 32 | return predict_data 33 | 34 | 35 | class YOLOv8_seg: 36 | """YOLOv8 segmentation model class for handling inference and visualization.""" 37 | 38 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 39 | """ 40 | Initialization. 41 | 42 | Args: 43 | onnx_model (str): Path to the ONNX model. 44 | """ 45 | self.infer_tool = infer_tool 46 | if self.infer_tool == 'openvino': 47 | # 构建openvino推理引擎 48 | self.openvino = OpenvinoInference(onnx_model) 49 | self.ndtype = np.single 50 | else: 51 | # 构建onnxruntime推理引擎 52 | self.ort_session = ort.InferenceSession(onnx_model, 53 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 54 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 55 | 56 | # Numpy dtype: support both FP32 and FP16 onnx model 57 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 58 | 59 | self.classes = CLASSES # 加载模型类别 60 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 61 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 62 | 63 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32): 64 | """ 65 | The whole pipeline: pre-process -> inference -> post-process. 66 | 67 | Args: 68 | im0 (Numpy.ndarray): original input image. 69 | conf_threshold (float): confidence threshold for filtering predictions. 70 | iou_threshold (float): iou threshold for NMS. 71 | nm (int): the number of masks. 72 | 73 | Returns: 74 | boxes (List): list of bounding boxes. 75 | segments (List): list of segments. 76 | masks (np.ndarray): [N, H, W], output masks. 77 | """ 78 | # 前处理Pre-process 79 | t1 = time.time() 80 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 81 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 82 | 83 | # 推理 inference 84 | t2 = time.time() 85 | if self.infer_tool == 'openvino': 86 | preds = self.openvino.predict(im) 87 | else: 88 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im}) # 与bbox区别,输出是个列表,[检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160)] 89 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 90 | 91 | # 后处理Post-process 92 | t3 = time.time() 93 | boxes, segments, masks = self.postprocess(preds, 94 | im0=im0, 95 | ratio=ratio, 96 | pad_w=pad_w, 97 | pad_h=pad_h, 98 | conf_threshold=conf_threshold, 99 | iou_threshold=iou_threshold, 100 | nm=nm 101 | ) 102 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 103 | 104 | return boxes, segments, masks 105 | 106 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 107 | def preprocess(self, img): 108 | """ 109 | Pre-processes the input image. 110 | 111 | Args: 112 | img (Numpy.ndarray): image about to be processed. 113 | 114 | Returns: 115 | img_process (Numpy.ndarray): image preprocessed for inference. 116 | ratio (tuple): width, height ratios in letterbox. 117 | pad_w (float): width padding in letterbox. 118 | pad_h (float): height padding in letterbox. 119 | """ 120 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 121 | shape = img.shape[:2] # original image shape 122 | new_shape = (self.model_height, self.model_width) 123 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 124 | ratio = r, r 125 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 126 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 127 | if shape[::-1] != new_unpad: # resize 128 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 129 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 130 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 131 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 132 | 133 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 134 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 135 | img_process = img[None] if len(img.shape) == 3 else img 136 | return img_process, ratio, (pad_w, pad_h) 137 | 138 | # 后处理,包括:阈值过滤+NMS+masks处理 139 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32): 140 | """ 141 | Post-process the prediction. 142 | 143 | Args: 144 | preds (Numpy.ndarray): predictions come from ort.session.run(). 145 | im0 (Numpy.ndarray): [h, w, c] original input image. 146 | ratio (tuple): width, height ratios in letterbox. 147 | pad_w (float): width padding in letterbox. 148 | pad_h (float): height padding in letterbox. 149 | conf_threshold (float): conf threshold. 150 | iou_threshold (float): iou threshold. 151 | nm (int): the number of masks. 152 | 153 | Returns: 154 | boxes (List): list of bounding boxes. 155 | segments (List): list of segments. 156 | masks (np.ndarray): [N, H, W], output masks. 157 | """ 158 | x, protos = preds[0], preds[1] # 与bbox区别:Two outputs: 检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160) 159 | 160 | # Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm) 161 | x = np.einsum('bcn->bnc', x) # (1, 8400, 116) 162 | 163 | # Predictions filtering by conf-threshold,不包括后32维的向量(32维的向量可以看作是与每个检测框关联的分割 mask 的系数或权重) 164 | x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold] 165 | 166 | # Create a new matrix which merge these(box, score, cls, nm) into one 167 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 168 | x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]] 169 | 170 | # NMS filtering 171 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls, nm], ...]), shape=(-1, 4 + 1 + 1 + 32) 172 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 173 | 174 | # 重新缩放边界框,为画图做准备 175 | if len(x) > 0: 176 | # Bounding boxes format change: cxcywh -> xyxy 177 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 178 | x[..., [2, 3]] += x[..., [0, 1]] 179 | 180 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 181 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 182 | x[..., :4] /= min(ratio) 183 | 184 | # Bounding boxes boundary clamp 185 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 186 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 187 | 188 | # 与bbox区别:增加masks处理 189 | # Process masks 190 | masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape) 191 | # Masks -> Segments(contours) 192 | segments = self.masks2segments(masks) 193 | 194 | return x[..., :6], segments, masks # boxes, segments, masks 195 | else: 196 | return [], [], [] 197 | 198 | @staticmethod 199 | def masks2segments(masks): 200 | """ 201 | It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from 202 | https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750) 203 | 204 | Args: 205 | masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160). 206 | 207 | Returns: 208 | segments (List): list of segment masks. 209 | """ 210 | segments = [] 211 | for x in masks.astype('uint8'): 212 | c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE 该函数用于查找二值图像中的轮廓。 213 | if c: 214 | # 这段代码的目的是找到图像x中的最外层轮廓,并从中选择最长的轮廓,然后将其转换为NumPy数组的形式。 215 | c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) 216 | else: 217 | c = np.zeros((0, 2)) # no segments found 218 | segments.append(c.astype('float32')) 219 | return segments 220 | 221 | 222 | def process_mask(self, protos, masks_in, bboxes, im0_shape): 223 | """ 224 | Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality 225 | but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618) 226 | 227 | Args: 228 | protos (numpy.ndarray): [mask_dim, mask_h, mask_w]. 229 | masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms. 230 | bboxes (numpy.ndarray): bboxes re-scaled to original image shape. 231 | im0_shape (tuple): the size of the input image (h,w,c). 232 | 233 | Returns: 234 | (numpy.ndarray): The upsampled masks. 235 | """ 236 | c, mh, mw = protos.shape 237 | masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN 238 | masks = np.ascontiguousarray(masks) 239 | masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape 240 | masks = np.einsum('HWN -> NHW', masks) # HWN -> NHW 241 | masks = self.crop_mask(masks, bboxes) 242 | return np.greater(masks, 0.5) 243 | 244 | @staticmethod 245 | def scale_mask(masks, im0_shape, ratio_pad=None): 246 | """ 247 | Takes a mask, and resizes it to the original image size. (Borrowed from 248 | https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305) 249 | 250 | Args: 251 | masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. 252 | im0_shape (tuple): the original image shape. 253 | ratio_pad (tuple): the ratio of the padding to the original image. 254 | 255 | Returns: 256 | masks (np.ndarray): The masks that are being returned. 257 | """ 258 | im1_shape = masks.shape[:2] 259 | if ratio_pad is None: # calculate from im0_shape 260 | gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new 261 | pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding 262 | else: 263 | pad = ratio_pad[1] 264 | 265 | # Calculate tlbr of mask 266 | top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, x 267 | bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1)) 268 | if len(masks.shape) < 2: 269 | raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') 270 | masks = masks[top:bottom, left:right] 271 | masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]), 272 | interpolation=cv2.INTER_LINEAR) # INTER_CUBIC would be better 273 | if len(masks.shape) == 2: 274 | masks = masks[:, :, None] 275 | return masks 276 | 277 | @staticmethod 278 | def crop_mask(masks, boxes): 279 | """ 280 | It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from 281 | https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599) 282 | 283 | Args: 284 | masks (Numpy.ndarray): [n, h, w] tensor of masks. 285 | boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form. 286 | 287 | Returns: 288 | (Numpy.ndarray): The masks are being cropped to the bounding box. 289 | """ 290 | n, h, w = masks.shape 291 | x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1) 292 | r = np.arange(w, dtype=x1.dtype)[None, None, :] 293 | c = np.arange(h, dtype=x1.dtype)[None, :, None] 294 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 295 | 296 | # 绘框,与bbox区别:增加masks可视化 297 | def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True): 298 | """ 299 | Draw and visualize results. 300 | 301 | Args: 302 | im (np.ndarray): original image, shape [h, w, c]. 303 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 304 | segments (List): list of segment masks. 305 | vis (bool): imshow using OpenCV. 306 | save (bool): save image annotated. 307 | 308 | Returns: 309 | None 310 | """ 311 | # Draw rectangles and polygons 312 | im_canvas = im.copy() 313 | # Draw rectangles 314 | for (*box, conf, cls_), segment in zip(bboxes, segments): 315 | # draw contour and fill mask 316 | cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2) # white borderline 317 | cv2.fillPoly(im_canvas, np.int32([segment]), (255, 0, 0)) 318 | 319 | # draw bbox rectangle 320 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 321 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 322 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 323 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 324 | 325 | # Mix image 326 | im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0) 327 | 328 | # Show image 329 | if vis: 330 | cv2.imshow('demo', im) 331 | cv2.waitKey(0) 332 | cv2.destroyAllWindows() 333 | 334 | # Save image 335 | if save: 336 | cv2.imwrite('demo.jpg', im) 337 | 338 | 339 | if __name__ == '__main__': 340 | # Create an argument parser to handle command-line arguments 341 | parser = argparse.ArgumentParser() 342 | parser.add_argument('--model', type=str, default='weights\\yolov8s-seg.onnx', help='Path to ONNX model') 343 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 344 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 345 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 346 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 347 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 348 | args = parser.parse_args() 349 | 350 | # Build model 351 | model = YOLOv8_seg(args.model, args.imgsz, args.infer_tool) 352 | 353 | # Read image by OpenCV 354 | img = cv2.imread(args.source) 355 | 356 | # Inference 357 | boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 358 | 359 | # Visualize, Draw bboxes and polygons 360 | if len(boxes) > 0: 361 | model.draw_and_visualize(img, boxes, segments, vis=False, save=True) 362 | 363 | -------------------------------------------------------------------------------- /YOLOv8_bytetrack.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | import copy 9 | from bytetrack.byte_tracker import BYTETracker 10 | 11 | # COCO默认的80类 12 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 13 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 14 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 15 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 16 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 17 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 18 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 19 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 20 | 21 | 22 | class OpenvinoInference(object): 23 | def __init__(self, onnx_path): 24 | self.onnx_path = onnx_path 25 | ie = Core() 26 | self.model_onnx = ie.read_model(model=self.onnx_path) 27 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 28 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 29 | 30 | def predict(self, datas): 31 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 32 | return predict_data 33 | 34 | 35 | class YOLOv8: 36 | """YOLOv8 object detection model class for handling inference and visualization.""" 37 | 38 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 39 | """ 40 | Initialization. 41 | 42 | Args: 43 | onnx_model (str): Path to the ONNX model. 44 | """ 45 | self.infer_tool = infer_tool 46 | if self.infer_tool == 'openvino': 47 | # 构建openvino推理引擎 48 | self.openvino = OpenvinoInference(onnx_model) 49 | self.ndtype = np.single 50 | else: 51 | # 构建onnxruntime推理引擎 52 | self.ort_session = ort.InferenceSession(onnx_model, 53 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 54 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 55 | 56 | # Numpy dtype: support both FP32 and FP16 onnx model 57 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 58 | 59 | self.classes = CLASSES # 加载模型类别 60 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 61 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 62 | 63 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 64 | """ 65 | The whole pipeline: pre-process -> inference -> post-process. 66 | 67 | Args: 68 | im0 (Numpy.ndarray): original input image. 69 | conf_threshold (float): confidence threshold for filtering predictions. 70 | iou_threshold (float): iou threshold for NMS. 71 | 72 | Returns: 73 | boxes (List): list of bounding boxes. 74 | """ 75 | # 前处理Pre-process 76 | t1 = time.time() 77 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 78 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 79 | 80 | # 推理 inference 81 | t2 = time.time() 82 | if self.infer_tool == 'openvino': 83 | preds = self.openvino.predict(im) 84 | else: 85 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 86 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 87 | 88 | # 后处理Post-process 89 | t3 = time.time() 90 | boxes = self.postprocess(preds, 91 | im0=im0, 92 | ratio=ratio, 93 | pad_w=pad_w, 94 | pad_h=pad_h, 95 | conf_threshold=conf_threshold, 96 | iou_threshold=iou_threshold, 97 | ) 98 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 99 | 100 | return boxes 101 | 102 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 103 | def preprocess(self, img): 104 | """ 105 | Pre-processes the input image. 106 | 107 | Args: 108 | img (Numpy.ndarray): image about to be processed. 109 | 110 | Returns: 111 | img_process (Numpy.ndarray): image preprocessed for inference. 112 | ratio (tuple): width, height ratios in letterbox. 113 | pad_w (float): width padding in letterbox. 114 | pad_h (float): height padding in letterbox. 115 | """ 116 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 117 | shape = img.shape[:2] # original image shape 118 | new_shape = (self.model_height, self.model_width) 119 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 120 | ratio = r, r 121 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 122 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 123 | if shape[::-1] != new_unpad: # resize 124 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 125 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 126 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 127 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 128 | 129 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 130 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 131 | img_process = img[None] if len(img.shape) == 3 else img 132 | return img_process, ratio, (pad_w, pad_h) 133 | 134 | # 后处理,包括:阈值过滤与NMS 135 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 136 | """ 137 | Post-process the prediction. 138 | 139 | Args: 140 | preds (Numpy.ndarray): predictions come from ort.session.run(). 141 | im0 (Numpy.ndarray): [h, w, c] original input image. 142 | ratio (tuple): width, height ratios in letterbox. 143 | pad_w (float): width padding in letterbox. 144 | pad_h (float): height padding in letterbox. 145 | conf_threshold (float): conf threshold. 146 | iou_threshold (float): iou threshold. 147 | 148 | Returns: 149 | boxes (List): list of bounding boxes. 150 | """ 151 | x = preds # outputs: predictions (1, 84, 8400) 152 | # Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls) 153 | x = np.einsum('bcn->bnc', x) # (1, 8400, 84) 154 | 155 | # Predictions filtering by conf-threshold 156 | x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold] 157 | 158 | # Create a new matrix which merge these(box, score, cls) into one 159 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 160 | x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)] 161 | 162 | # NMS filtering 163 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 164 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 165 | 166 | # 重新缩放边界框,为画图做准备 167 | if len(x) > 0: 168 | # Bounding boxes format change: cxcywh -> xyxy 169 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 170 | x[..., [2, 3]] += x[..., [0, 1]] 171 | 172 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 173 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 174 | x[..., :4] /= min(ratio) 175 | 176 | # Bounding boxes boundary clamp 177 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 178 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 179 | 180 | return x[..., :6] # boxes 181 | else: 182 | return [] 183 | 184 | # 绘框 185 | def draw_and_visualize(self, im, bboxes, video_writer, vis=False, save=False, is_track=False): 186 | """ 187 | Draw and visualize results. 188 | 189 | Args: 190 | im (np.ndarray): original image, shape [h, w, c]. 191 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 192 | vis (bool): imshow using OpenCV. 193 | save (bool): save image annotated. 194 | 195 | Returns: 196 | None 197 | """ 198 | # Draw rectangles 199 | if not is_track: 200 | for (*box, conf, cls_) in bboxes: 201 | # draw bbox rectangle 202 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 203 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 204 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 205 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 206 | else: 207 | for (*box, conf, id_) in bboxes: 208 | # draw bbox rectangle 209 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 210 | (0, 0, 255), 1, cv2.LINE_AA) 211 | cv2.putText(im, f'{id_}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 212 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA) 213 | 214 | # Show image 215 | if vis: 216 | cv2.imshow('demo', im) 217 | cv2.waitKey(1) 218 | 219 | # Save video 220 | if save: 221 | video_writer.write(im) 222 | 223 | 224 | 225 | class ByteTrackerONNX(object): 226 | def __init__(self, args): 227 | self.args = args 228 | self.tracker = BYTETracker(args, frame_rate=30) 229 | 230 | def _tracker_update(self, dets, image): 231 | online_targets = [] 232 | if dets is not None: 233 | online_targets = self.tracker.update( 234 | dets[:, :5], 235 | [image.shape[0], image.shape[1]], 236 | [image.shape[0], image.shape[1]], 237 | ) 238 | 239 | online_tlwhs = [] 240 | online_ids = [] 241 | online_scores = [] 242 | for online_target in online_targets: 243 | tlwh = online_target.tlwh 244 | track_id = online_target.track_id 245 | vertical = tlwh[2] / tlwh[3] > 1.6 246 | if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: 247 | online_tlwhs.append(tlwh) 248 | online_ids.append(track_id) 249 | online_scores.append(online_target.score) 250 | 251 | return online_tlwhs, online_ids, online_scores 252 | 253 | 254 | def inference(self, image, dets): 255 | """ 256 | Args: dets: 检测结果, [x1, y1, x2, y2, conf, cls] 257 | Returns: np.array([[x1, y1, x2, y2, conf, ids], ...]) 258 | """ 259 | bboxes, ids, scores = self._tracker_update(dets, image) 260 | if len(bboxes) == 0: 261 | return [] 262 | # Bounding boxes format change: tlwh -> xyxy 263 | bboxes = np.array(bboxes) 264 | bboxes[..., [2, 3]] += bboxes[..., [0, 1]] 265 | bboxes = np.c_[bboxes, np.array(scores), np.array(ids)] 266 | return bboxes 267 | 268 | 269 | if __name__ == '__main__': 270 | # Create an argument parser to handle command-line arguments 271 | parser = argparse.ArgumentParser() 272 | parser.add_argument('--model', type=str, default='yolov8s.onnx', help='Path to ONNX model') 273 | parser.add_argument('--source', type=str, default=str('test.mp4'), help='Path to input image') 274 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 275 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 276 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 277 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 278 | 279 | parser.add_argument('--is_track', type=bool, default=True, help='是否启用跟踪') 280 | parser.add_argument('--track_thresh', type=float, default=0.5, help='tracking confidence threshold') 281 | parser.add_argument('--track_buffer', type=int, default=30, help='the frames for keep lost tracks, usually as same with FPS') 282 | parser.add_argument('--match_thresh', type=float, default=0.8, help='matching threshold for tracking') 283 | parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes',) 284 | parser.add_argument('--mot20', dest='mot20', default=False, action='store_true', help='test mot20.',) 285 | args = parser.parse_args() 286 | 287 | # Build model 288 | model = YOLOv8(args.model, args.imgsz, args.infer_tool) 289 | 290 | bytetrack = ByteTrackerONNX(args) 291 | 292 | # 读取视频,解析帧数宽高,保存视频 293 | cap = cv2.VideoCapture(args.source) 294 | width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) 295 | height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) 296 | fps = cap.get(cv2.CAP_PROP_FPS) 297 | frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) 298 | video_writer = cv2.VideoWriter('demo.mp4', cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))) 299 | frame_id = 1 300 | 301 | while True: 302 | start_time = time.time() 303 | ret, img = cap.read() 304 | if not ret: 305 | break 306 | 307 | # Inference 308 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 309 | 310 | # track 311 | if args.is_track: 312 | boxes = bytetrack.inference(img, boxes) 313 | 314 | # Visualize 315 | if len(boxes) > 0: 316 | model.draw_and_visualize(copy.deepcopy(img), boxes, video_writer, vis=False, save=True, is_track=args.is_track) 317 | 318 | end_time = time.time() - start_time 319 | print('frame {}/{} (Total time: {:.2f} ms)'.format(frame_id, int(frame_count), end_time * 1000)) 320 | frame_id += 1 321 | 322 | -------------------------------------------------------------------------------- /YOLOv8_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 27 | 28 | def predict(self, datas): 29 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 30 | return predict_data 31 | 32 | 33 | class YOLOv8: 34 | """YOLOv8 object detection model class for handling inference and visualization.""" 35 | 36 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 37 | """ 38 | Initialization. 39 | 40 | Args: 41 | onnx_model (str): Path to the ONNX model. 42 | """ 43 | self.infer_tool = infer_tool 44 | if self.infer_tool == 'openvino': 45 | # 构建openvino推理引擎 46 | self.openvino = OpenvinoInference(onnx_model) 47 | self.ndtype = np.single 48 | else: 49 | # 构建onnxruntime推理引擎 50 | self.ort_session = ort.InferenceSession(onnx_model, 51 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 52 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 53 | 54 | # Numpy dtype: support both FP32 and FP16 onnx model 55 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 56 | 57 | self.classes = CLASSES # 加载模型类别 58 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 59 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 60 | 61 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 62 | """ 63 | The whole pipeline: pre-process -> inference -> post-process. 64 | 65 | Args: 66 | im0 (Numpy.ndarray): original input image. 67 | conf_threshold (float): confidence threshold for filtering predictions. 68 | iou_threshold (float): iou threshold for NMS. 69 | 70 | Returns: 71 | boxes (List): list of bounding boxes. 72 | """ 73 | # 前处理Pre-process 74 | t1 = time.time() 75 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 76 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 77 | 78 | # 推理 inference 79 | t2 = time.time() 80 | if self.infer_tool == 'openvino': 81 | preds = self.openvino.predict(im) 82 | else: 83 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 84 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 85 | 86 | # 后处理Post-process 87 | t3 = time.time() 88 | boxes = self.postprocess(preds, 89 | im0=im0, 90 | ratio=ratio, 91 | pad_w=pad_w, 92 | pad_h=pad_h, 93 | conf_threshold=conf_threshold, 94 | iou_threshold=iou_threshold, 95 | ) 96 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 97 | 98 | return boxes 99 | 100 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 101 | def preprocess(self, img): 102 | """ 103 | Pre-processes the input image. 104 | 105 | Args: 106 | img (Numpy.ndarray): image about to be processed. 107 | 108 | Returns: 109 | img_process (Numpy.ndarray): image preprocessed for inference. 110 | ratio (tuple): width, height ratios in letterbox. 111 | pad_w (float): width padding in letterbox. 112 | pad_h (float): height padding in letterbox. 113 | """ 114 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 115 | shape = img.shape[:2] # original image shape 116 | new_shape = (self.model_height, self.model_width) 117 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 118 | ratio = r, r 119 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 120 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 121 | if shape[::-1] != new_unpad: # resize 122 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 123 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 124 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 125 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 126 | 127 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 128 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 129 | img_process = img[None] if len(img.shape) == 3 else img 130 | return img_process, ratio, (pad_w, pad_h) 131 | 132 | # 后处理,包括:阈值过滤与NMS 133 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 134 | """ 135 | Post-process the prediction. 136 | 137 | Args: 138 | preds (Numpy.ndarray): predictions come from ort.session.run(). 139 | im0 (Numpy.ndarray): [h, w, c] original input image. 140 | ratio (tuple): width, height ratios in letterbox. 141 | pad_w (float): width padding in letterbox. 142 | pad_h (float): height padding in letterbox. 143 | conf_threshold (float): conf threshold. 144 | iou_threshold (float): iou threshold. 145 | 146 | Returns: 147 | boxes (List): list of bounding boxes. 148 | """ 149 | x = preds # outputs: predictions (1, 84, 8400) 150 | # Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls) 151 | x = np.einsum('bcn->bnc', x) # (1, 8400, 84) 152 | 153 | # Predictions filtering by conf-threshold 154 | x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold] 155 | 156 | # Create a new matrix which merge these(box, score, cls) into one 157 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 158 | x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)] 159 | 160 | # NMS filtering 161 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 162 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 163 | 164 | # 重新缩放边界框,为画图做准备 165 | if len(x) > 0: 166 | # Bounding boxes format change: cxcywh -> xyxy 167 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 168 | x[..., [2, 3]] += x[..., [0, 1]] 169 | 170 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 171 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 172 | x[..., :4] /= min(ratio) 173 | 174 | # Bounding boxes boundary clamp 175 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 176 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 177 | 178 | return x[..., :6] # boxes 179 | else: 180 | return [] 181 | 182 | # 绘框 183 | def draw_and_visualize(self, im, bboxes, vis=False, save=True): 184 | """ 185 | Draw and visualize results. 186 | 187 | Args: 188 | im (np.ndarray): original image, shape [h, w, c]. 189 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 190 | vis (bool): imshow using OpenCV. 191 | save (bool): save image annotated. 192 | 193 | Returns: 194 | None 195 | """ 196 | # Draw rectangles 197 | for (*box, conf, cls_) in bboxes: 198 | # draw bbox rectangle 199 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 200 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 201 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 202 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 203 | 204 | # Show image 205 | if vis: 206 | cv2.imshow('demo', im) 207 | cv2.waitKey(0) 208 | cv2.destroyAllWindows() 209 | 210 | # Save image 211 | if save: 212 | cv2.imwrite('demo.jpg', im) 213 | 214 | 215 | if __name__ == '__main__': 216 | # Create an argument parser to handle command-line arguments 217 | parser = argparse.ArgumentParser() 218 | parser.add_argument('--model', type=str, default='yolov8s.onnx', help='Path to ONNX model') 219 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 220 | parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size') 221 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 222 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 223 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 224 | args = parser.parse_args() 225 | 226 | # Build model 227 | model = YOLOv8(args.model, args.imgsz, args.infer_tool) 228 | 229 | # Read image by OpenCV 230 | img = cv2.imread(args.source) 231 | 232 | # Inference 233 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 234 | 235 | # Visualize 236 | if len(boxes) > 0: 237 | model.draw_and_visualize(img, boxes, vis=False, save=True) 238 | 239 | -------------------------------------------------------------------------------- /YOLOv9_openvino_onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import cv2 4 | import numpy as np 5 | from openvino.runtime import Core # pip install openvino -i https://pypi.tuna.tsinghua.edu.cn/simple 6 | import onnxruntime as ort # 使用onnxruntime推理用上,pip install onnxruntime,默认安装CPU 7 | 8 | 9 | # COCO默认的80类 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 12 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 13 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 14 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 15 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 16 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 17 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 18 | 19 | 20 | class OpenvinoInference(object): 21 | def __init__(self, onnx_path): 22 | self.onnx_path = onnx_path 23 | ie = Core() 24 | self.model_onnx = ie.read_model(model=self.onnx_path) 25 | self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU") 26 | self.output_layer_onnx = self.compiled_model_onnx.output(0) 27 | 28 | def predict(self, datas): 29 | predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx] 30 | return predict_data 31 | 32 | 33 | class YOLOv9: 34 | """YOLOv9 object detection model class for handling inference and visualization.""" 35 | 36 | def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'): 37 | """ 38 | Initialization. 39 | 40 | Args: 41 | onnx_model (str): Path to the ONNX model. 42 | """ 43 | self.infer_tool = infer_tool 44 | if self.infer_tool == 'openvino': 45 | # 构建openvino推理引擎 46 | self.openvino = OpenvinoInference(onnx_model) 47 | self.ndtype = np.single 48 | else: 49 | # 构建onnxruntime推理引擎 50 | self.ort_session = ort.InferenceSession(onnx_model, 51 | providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] 52 | if ort.get_device() == 'GPU' else ['CPUExecutionProvider']) 53 | 54 | # Numpy dtype: support both FP32 and FP16 onnx model 55 | self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single 56 | 57 | self.classes = CLASSES # 加载模型类别 58 | self.model_height, self.model_width = imgsz[0], imgsz[1] # 图像resize大小 59 | self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) # 为每个类别生成调色板 60 | 61 | def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45): 62 | """ 63 | The whole pipeline: pre-process -> inference -> post-process. 64 | 65 | Args: 66 | im0 (Numpy.ndarray): original input image. 67 | conf_threshold (float): confidence threshold for filtering predictions. 68 | iou_threshold (float): iou threshold for NMS. 69 | 70 | Returns: 71 | boxes (List): list of bounding boxes. 72 | """ 73 | # 前处理Pre-process 74 | t1 = time.time() 75 | im, ratio, (pad_w, pad_h) = self.preprocess(im0) 76 | print('预处理时间:{:.3f}s'.format(time.time() - t1)) 77 | 78 | # 推理 inference 79 | t2 = time.time() 80 | if self.infer_tool == 'openvino': 81 | preds = self.openvino.predict(im) 82 | else: 83 | preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0] 84 | print('推理时间:{:.2f}s'.format(time.time() - t2)) 85 | 86 | # 后处理Post-process 87 | t3 = time.time() 88 | boxes = self.postprocess(preds, 89 | im0=im0, 90 | ratio=ratio, 91 | pad_w=pad_w, 92 | pad_h=pad_h, 93 | conf_threshold=conf_threshold, 94 | iou_threshold=iou_threshold, 95 | ) 96 | print('后处理时间:{:.3f}s'.format(time.time() - t3)) 97 | 98 | return boxes 99 | 100 | # 前处理,包括:resize, pad, HWC to CHW,BGR to RGB,归一化,增加维度CHW -> BCHW 101 | def preprocess(self, img): 102 | """ 103 | Pre-processes the input image. 104 | 105 | Args: 106 | img (Numpy.ndarray): image about to be processed. 107 | 108 | Returns: 109 | img_process (Numpy.ndarray): image preprocessed for inference. 110 | ratio (tuple): width, height ratios in letterbox. 111 | pad_w (float): width padding in letterbox. 112 | pad_h (float): height padding in letterbox. 113 | """ 114 | # Resize and pad input image using letterbox() (Borrowed from Ultralytics) 115 | shape = img.shape[:2] # original image shape 116 | new_shape = (self.model_height, self.model_width) 117 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 118 | ratio = r, r 119 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 120 | pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding 121 | if shape[::-1] != new_unpad: # resize 122 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 123 | top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1)) 124 | left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1)) 125 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充 126 | 127 | # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional) 128 | img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0 129 | img_process = img[None] if len(img.shape) == 3 else img 130 | return img_process, ratio, (pad_w, pad_h) 131 | 132 | # 后处理,包括:阈值过滤与NMS 133 | def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold): 134 | """ 135 | Post-process the prediction. 136 | 137 | Args: 138 | preds (Numpy.ndarray): predictions come from ort.session.run(). 139 | im0 (Numpy.ndarray): [h, w, c] original input image. 140 | ratio (tuple): width, height ratios in letterbox. 141 | pad_w (float): width padding in letterbox. 142 | pad_h (float): height padding in letterbox. 143 | conf_threshold (float): conf threshold. 144 | iou_threshold (float): iou threshold. 145 | 146 | Returns: 147 | boxes (List): list of bounding boxes. 148 | """ 149 | x = preds # outputs: predictions (1, 84, 8400) 150 | # Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls) 151 | x = np.einsum('bcn->bnc', x) # (1, 8400, 84) 152 | 153 | # Predictions filtering by conf-threshold 154 | x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold] 155 | 156 | # Create a new matrix which merge these(box, score, cls) into one 157 | # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html 158 | x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)] 159 | 160 | # NMS filtering 161 | # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1) 162 | x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)] 163 | 164 | # 重新缩放边界框,为画图做准备 165 | if len(x) > 0: 166 | # Bounding boxes format change: cxcywh -> xyxy 167 | x[..., [0, 1]] -= x[..., [2, 3]] / 2 168 | x[..., [2, 3]] += x[..., [0, 1]] 169 | 170 | # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image 171 | x[..., :4] -= [pad_w, pad_h, pad_w, pad_h] 172 | x[..., :4] /= min(ratio) 173 | 174 | # Bounding boxes boundary clamp 175 | x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1]) 176 | x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0]) 177 | 178 | return x[..., :6] # boxes 179 | else: 180 | return [] 181 | 182 | # 绘框 183 | def draw_and_visualize(self, im, bboxes, vis=False, save=True): 184 | """ 185 | Draw and visualize results. 186 | 187 | Args: 188 | im (np.ndarray): original image, shape [h, w, c]. 189 | bboxes (numpy.ndarray): [n, 6], n is number of bboxes. 190 | vis (bool): imshow using OpenCV. 191 | save (bool): save image annotated. 192 | 193 | Returns: 194 | None 195 | """ 196 | # Draw rectangles 197 | for (*box, conf, cls_) in bboxes: 198 | # draw bbox rectangle 199 | cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 200 | self.color_palette[int(cls_)], 1, cv2.LINE_AA) 201 | cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)), 202 | cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA) 203 | 204 | # Show image 205 | if vis: 206 | cv2.imshow('demo', im) 207 | cv2.waitKey(0) 208 | cv2.destroyAllWindows() 209 | 210 | # Save image 211 | if save: 212 | cv2.imwrite('demo.jpg', im) 213 | 214 | 215 | if __name__ == '__main__': 216 | # Create an argument parser to handle command-line arguments 217 | parser = argparse.ArgumentParser() 218 | parser.add_argument('--model', type=str, default='yolov9c.onnx', help='Path to ONNX model') 219 | parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image') 220 | parser.add_argument('--imgsz', type=tuple, default=(640,640), help='Image input size') 221 | parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold') 222 | parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold') 223 | parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎') 224 | args = parser.parse_args() 225 | 226 | # Build model 227 | model = YOLOv9(args.model, args.imgsz, args.infer_tool) 228 | 229 | # Read image by OpenCV 230 | img = cv2.imread(args.source) 231 | 232 | # Inference 233 | boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou) 234 | 235 | # Visualize 236 | if len(boxes) > 0: 237 | model.draw_and_visualize(img, boxes, vis=False, save=True) 238 | 239 | -------------------------------------------------------------------------------- /bytetrack/__pycache__/basetrack.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/basetrack.cpython-36.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/basetrack.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/basetrack.cpython-38.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/byte_tracker.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/byte_tracker.cpython-36.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/byte_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/byte_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/kalman_filter.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/kalman_filter.cpython-36.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/kalman_filter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/kalman_filter.cpython-38.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/matching.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/matching.cpython-36.pyc -------------------------------------------------------------------------------- /bytetrack/__pycache__/matching.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/matching.cpython-38.pyc -------------------------------------------------------------------------------- /bytetrack/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed -------------------------------------------------------------------------------- /bytetrack/byte_tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | import os 4 | import os.path as osp 5 | import copy 6 | 7 | from .kalman_filter import KalmanFilter 8 | from bytetrack import matching 9 | from .basetrack import BaseTrack, TrackState 10 | 11 | class STrack(BaseTrack): 12 | shared_kalman = KalmanFilter() 13 | def __init__(self, tlwh, score): 14 | 15 | # wait activate 16 | self._tlwh = np.asarray(tlwh, dtype=np.float) 17 | self.kalman_filter = None 18 | self.mean, self.covariance = None, None 19 | self.is_activated = False 20 | 21 | self.score = score 22 | self.tracklet_len = 0 23 | 24 | def predict(self): 25 | mean_state = self.mean.copy() 26 | if self.state != TrackState.Tracked: 27 | mean_state[7] = 0 28 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) 29 | 30 | @staticmethod 31 | def multi_predict(stracks): 32 | if len(stracks) > 0: 33 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 34 | multi_covariance = np.asarray([st.covariance for st in stracks]) 35 | for i, st in enumerate(stracks): 36 | if st.state != TrackState.Tracked: 37 | multi_mean[i][7] = 0 38 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) 39 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 40 | stracks[i].mean = mean 41 | stracks[i].covariance = cov 42 | 43 | def activate(self, kalman_filter, frame_id): 44 | """Start a new tracklet""" 45 | self.kalman_filter = kalman_filter 46 | self.track_id = self.next_id() 47 | self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) 48 | 49 | self.tracklet_len = 0 50 | self.state = TrackState.Tracked 51 | if frame_id == 1: 52 | self.is_activated = True 53 | # self.is_activated = True 54 | self.frame_id = frame_id 55 | self.start_frame = frame_id 56 | 57 | def re_activate(self, new_track, frame_id, new_id=False): 58 | self.mean, self.covariance = self.kalman_filter.update( 59 | self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) 60 | ) 61 | self.tracklet_len = 0 62 | self.state = TrackState.Tracked 63 | self.is_activated = True 64 | self.frame_id = frame_id 65 | if new_id: 66 | self.track_id = self.next_id() 67 | self.score = new_track.score 68 | 69 | def update(self, new_track, frame_id): 70 | """ 71 | Update a matched track 72 | :type new_track: STrack 73 | :type frame_id: int 74 | :type update_feature: bool 75 | :return: 76 | """ 77 | self.frame_id = frame_id 78 | self.tracklet_len += 1 79 | 80 | new_tlwh = new_track.tlwh 81 | self.mean, self.covariance = self.kalman_filter.update( 82 | self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) 83 | self.state = TrackState.Tracked 84 | self.is_activated = True 85 | 86 | self.score = new_track.score 87 | 88 | @property 89 | # @jit(nopython=True) 90 | def tlwh(self): 91 | """Get current position in bounding box format `(top left x, top left y, 92 | width, height)`. 93 | """ 94 | if self.mean is None: 95 | return self._tlwh.copy() 96 | ret = self.mean[:4].copy() 97 | ret[2] *= ret[3] 98 | ret[:2] -= ret[2:] / 2 99 | return ret 100 | 101 | @property 102 | # @jit(nopython=True) 103 | def tlbr(self): 104 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 105 | `(top left, bottom right)`. 106 | """ 107 | ret = self.tlwh.copy() 108 | ret[2:] += ret[:2] 109 | return ret 110 | 111 | @staticmethod 112 | # @jit(nopython=True) 113 | def tlwh_to_xyah(tlwh): 114 | """Convert bounding box to format `(center x, center y, aspect ratio, 115 | height)`, where the aspect ratio is `width / height`. 116 | """ 117 | ret = np.asarray(tlwh).copy() 118 | ret[:2] += ret[2:] / 2 119 | ret[2] /= ret[3] 120 | return ret 121 | 122 | def to_xyah(self): 123 | return self.tlwh_to_xyah(self.tlwh) 124 | 125 | @staticmethod 126 | # @jit(nopython=True) 127 | def tlbr_to_tlwh(tlbr): 128 | ret = np.asarray(tlbr).copy() 129 | ret[2:] -= ret[:2] 130 | return ret 131 | 132 | @staticmethod 133 | # @jit(nopython=True) 134 | def tlwh_to_tlbr(tlwh): 135 | ret = np.asarray(tlwh).copy() 136 | ret[2:] += ret[:2] 137 | return ret 138 | 139 | def __repr__(self): 140 | return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) 141 | 142 | 143 | class BYTETracker(object): 144 | def __init__(self, args, frame_rate=30): 145 | self.tracked_stracks = [] # type: list[STrack] 146 | self.lost_stracks = [] # type: list[STrack] 147 | self.removed_stracks = [] # type: list[STrack] 148 | 149 | self.frame_id = 0 150 | self.args = args 151 | #self.det_thresh = args.track_thresh 152 | self.det_thresh = args.track_thresh + 0.1 153 | self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) 154 | self.max_time_lost = self.buffer_size 155 | self.kalman_filter = KalmanFilter() 156 | 157 | def update(self, output_results, img_info, img_size): 158 | self.frame_id += 1 159 | activated_starcks = [] 160 | refind_stracks = [] 161 | lost_stracks = [] 162 | removed_stracks = [] 163 | 164 | if output_results.shape[1] == 5: 165 | scores = output_results[:, 4] 166 | bboxes = output_results[:, :4] 167 | else: 168 | output_results = output_results.cpu().numpy() 169 | scores = output_results[:, 4] * output_results[:, 5] 170 | bboxes = output_results[:, :4] # x1y1x2y2 171 | img_h, img_w = img_info[0], img_info[1] 172 | scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) 173 | bboxes /= scale 174 | 175 | remain_inds = scores > self.args.track_thresh 176 | inds_low = scores > 0.1 177 | inds_high = scores < self.args.track_thresh 178 | 179 | inds_second = np.logical_and(inds_low, inds_high) 180 | dets_second = bboxes[inds_second] 181 | dets = bboxes[remain_inds] 182 | scores_keep = scores[remain_inds] 183 | scores_second = scores[inds_second] 184 | 185 | if len(dets) > 0: 186 | '''Detections''' 187 | detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for 188 | (tlbr, s) in zip(dets, scores_keep)] 189 | else: 190 | detections = [] 191 | 192 | ''' Add newly detected tracklets to tracked_stracks''' 193 | unconfirmed = [] 194 | tracked_stracks = [] # type: list[STrack] 195 | for track in self.tracked_stracks: 196 | if not track.is_activated: 197 | unconfirmed.append(track) 198 | else: 199 | tracked_stracks.append(track) 200 | 201 | ''' Step 2: First association, with high score detection boxes''' 202 | strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) 203 | # Predict the current location with KF 204 | STrack.multi_predict(strack_pool) 205 | dists = matching.iou_distance(strack_pool, detections) 206 | if not self.args.mot20: 207 | dists = matching.fuse_score(dists, detections) 208 | matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) 209 | 210 | for itracked, idet in matches: 211 | track = strack_pool[itracked] 212 | det = detections[idet] 213 | if track.state == TrackState.Tracked: 214 | track.update(detections[idet], self.frame_id) 215 | activated_starcks.append(track) 216 | else: 217 | track.re_activate(det, self.frame_id, new_id=False) 218 | refind_stracks.append(track) 219 | 220 | ''' Step 3: Second association, with low score detection boxes''' 221 | # association the untrack to the low score detections 222 | if len(dets_second) > 0: 223 | '''Detections''' 224 | detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for 225 | (tlbr, s) in zip(dets_second, scores_second)] 226 | else: 227 | detections_second = [] 228 | r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] 229 | dists = matching.iou_distance(r_tracked_stracks, detections_second) 230 | matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) 231 | for itracked, idet in matches: 232 | track = r_tracked_stracks[itracked] 233 | det = detections_second[idet] 234 | if track.state == TrackState.Tracked: 235 | track.update(det, self.frame_id) 236 | activated_starcks.append(track) 237 | else: 238 | track.re_activate(det, self.frame_id, new_id=False) 239 | refind_stracks.append(track) 240 | 241 | for it in u_track: 242 | track = r_tracked_stracks[it] 243 | if not track.state == TrackState.Lost: 244 | track.mark_lost() 245 | lost_stracks.append(track) 246 | 247 | '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' 248 | detections = [detections[i] for i in u_detection] 249 | dists = matching.iou_distance(unconfirmed, detections) 250 | if not self.args.mot20: 251 | dists = matching.fuse_score(dists, detections) 252 | matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) 253 | for itracked, idet in matches: 254 | unconfirmed[itracked].update(detections[idet], self.frame_id) 255 | activated_starcks.append(unconfirmed[itracked]) 256 | for it in u_unconfirmed: 257 | track = unconfirmed[it] 258 | track.mark_removed() 259 | removed_stracks.append(track) 260 | 261 | """ Step 4: Init new stracks""" 262 | for inew in u_detection: 263 | track = detections[inew] 264 | if track.score < self.det_thresh: 265 | continue 266 | track.activate(self.kalman_filter, self.frame_id) 267 | activated_starcks.append(track) 268 | """ Step 5: Update state""" 269 | for track in self.lost_stracks: 270 | if self.frame_id - track.end_frame > self.max_time_lost: 271 | track.mark_removed() 272 | removed_stracks.append(track) 273 | 274 | # print('Ramained match {} s'.format(t4-t3)) 275 | 276 | self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] 277 | self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) 278 | self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) 279 | self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) 280 | self.lost_stracks.extend(lost_stracks) 281 | self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) 282 | self.removed_stracks.extend(removed_stracks) 283 | self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) 284 | # get scores of lost tracks 285 | output_stracks = [track for track in self.tracked_stracks if track.is_activated] 286 | 287 | return output_stracks 288 | 289 | 290 | def joint_stracks(tlista, tlistb): 291 | exists = {} 292 | res = [] 293 | for t in tlista: 294 | exists[t.track_id] = 1 295 | res.append(t) 296 | for t in tlistb: 297 | tid = t.track_id 298 | if not exists.get(tid, 0): 299 | exists[tid] = 1 300 | res.append(t) 301 | return res 302 | 303 | 304 | def sub_stracks(tlista, tlistb): 305 | stracks = {} 306 | for t in tlista: 307 | stracks[t.track_id] = t 308 | for t in tlistb: 309 | tid = t.track_id 310 | if stracks.get(tid, 0): 311 | del stracks[tid] 312 | return list(stracks.values()) 313 | 314 | 315 | def remove_duplicate_stracks(stracksa, stracksb): 316 | pdist = matching.iou_distance(stracksa, stracksb) 317 | pairs = np.where(pdist < 0.15) 318 | dupa, dupb = list(), list() 319 | for p, q in zip(*pairs): 320 | timep = stracksa[p].frame_id - stracksa[p].start_frame 321 | timeq = stracksb[q].frame_id - stracksb[q].start_frame 322 | if timep > timeq: 323 | dupb.append(q) 324 | else: 325 | dupa.append(p) 326 | resa = [t for i, t in enumerate(stracksa) if not i in dupa] 327 | resb = [t for i, t in enumerate(stracksb) if not i in dupb] 328 | return resa, resb 329 | -------------------------------------------------------------------------------- /bytetrack/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | #mean = np.dot(self._motion_mat, mean) 120 | mean = np.dot(mean, self._motion_mat.T) 121 | covariance = np.linalg.multi_dot(( 122 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 123 | 124 | return mean, covariance 125 | 126 | def project(self, mean, covariance): 127 | """Project state distribution to measurement space. 128 | 129 | Parameters 130 | ---------- 131 | mean : ndarray 132 | The state's mean vector (8 dimensional array). 133 | covariance : ndarray 134 | The state's covariance matrix (8x8 dimensional). 135 | 136 | Returns 137 | ------- 138 | (ndarray, ndarray) 139 | Returns the projected mean and covariance matrix of the given state 140 | estimate. 141 | 142 | """ 143 | std = [ 144 | self._std_weight_position * mean[3], 145 | self._std_weight_position * mean[3], 146 | 1e-1, 147 | self._std_weight_position * mean[3]] 148 | innovation_cov = np.diag(np.square(std)) 149 | 150 | mean = np.dot(self._update_mat, mean) 151 | covariance = np.linalg.multi_dot(( 152 | self._update_mat, covariance, self._update_mat.T)) 153 | return mean, covariance + innovation_cov 154 | 155 | def multi_predict(self, mean, covariance): 156 | """Run Kalman filter prediction step (Vectorized version). 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The Nx8 dimensional mean matrix of the object states at the previous 161 | time step. 162 | covariance : ndarray 163 | The Nx8x8 dimensional covariance matrics of the object states at the 164 | previous time step. 165 | Returns 166 | ------- 167 | (ndarray, ndarray) 168 | Returns the mean vector and covariance matrix of the predicted 169 | state. Unobserved velocities are initialized to 0 mean. 170 | """ 171 | std_pos = [ 172 | self._std_weight_position * mean[:, 3], 173 | self._std_weight_position * mean[:, 3], 174 | 1e-2 * np.ones_like(mean[:, 3]), 175 | self._std_weight_position * mean[:, 3]] 176 | std_vel = [ 177 | self._std_weight_velocity * mean[:, 3], 178 | self._std_weight_velocity * mean[:, 3], 179 | 1e-5 * np.ones_like(mean[:, 3]), 180 | self._std_weight_velocity * mean[:, 3]] 181 | sqr = np.square(np.r_[std_pos, std_vel]).T 182 | 183 | motion_cov = [] 184 | for i in range(len(mean)): 185 | motion_cov.append(np.diag(sqr[i])) 186 | motion_cov = np.asarray(motion_cov) 187 | 188 | mean = np.dot(mean, self._motion_mat.T) 189 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) 190 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 191 | 192 | return mean, covariance 193 | 194 | def update(self, mean, covariance, measurement): 195 | """Run Kalman filter correction step. 196 | 197 | Parameters 198 | ---------- 199 | mean : ndarray 200 | The predicted state's mean vector (8 dimensional). 201 | covariance : ndarray 202 | The state's covariance matrix (8x8 dimensional). 203 | measurement : ndarray 204 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 205 | is the center position, a the aspect ratio, and h the height of the 206 | bounding box. 207 | 208 | Returns 209 | ------- 210 | (ndarray, ndarray) 211 | Returns the measurement-corrected state distribution. 212 | 213 | """ 214 | projected_mean, projected_cov = self.project(mean, covariance) 215 | 216 | chol_factor, lower = scipy.linalg.cho_factor( 217 | projected_cov, lower=True, check_finite=False) 218 | kalman_gain = scipy.linalg.cho_solve( 219 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 220 | check_finite=False).T 221 | innovation = measurement - projected_mean 222 | 223 | new_mean = mean + np.dot(innovation, kalman_gain.T) 224 | new_covariance = covariance - np.linalg.multi_dot(( 225 | kalman_gain, projected_cov, kalman_gain.T)) 226 | return new_mean, new_covariance 227 | 228 | def gating_distance(self, mean, covariance, measurements, 229 | only_position=False, metric='maha'): 230 | """Compute gating distance between state distribution and measurements. 231 | A suitable distance threshold can be obtained from `chi2inv95`. If 232 | `only_position` is False, the chi-square distribution has 4 degrees of 233 | freedom, otherwise 2. 234 | Parameters 235 | ---------- 236 | mean : ndarray 237 | Mean vector over the state distribution (8 dimensional). 238 | covariance : ndarray 239 | Covariance of the state distribution (8x8 dimensional). 240 | measurements : ndarray 241 | An Nx4 dimensional matrix of N measurements, each in 242 | format (x, y, a, h) where (x, y) is the bounding box center 243 | position, a the aspect ratio, and h the height. 244 | only_position : Optional[bool] 245 | If True, distance computation is done with respect to the bounding 246 | box center position only. 247 | Returns 248 | ------- 249 | ndarray 250 | Returns an array of length N, where the i-th element contains the 251 | squared Mahalanobis distance between (mean, covariance) and 252 | `measurements[i]`. 253 | """ 254 | mean, covariance = self.project(mean, covariance) 255 | if only_position: 256 | mean, covariance = mean[:2], covariance[:2, :2] 257 | measurements = measurements[:, :2] 258 | 259 | d = measurements - mean 260 | if metric == 'gaussian': 261 | return np.sum(d * d, axis=1) 262 | elif metric == 'maha': 263 | cholesky_factor = np.linalg.cholesky(covariance) 264 | z = scipy.linalg.solve_triangular( 265 | cholesky_factor, d.T, lower=True, check_finite=False, 266 | overwrite_b=True) 267 | squared_maha = np.sum(z * z, axis=0) 268 | return squared_maha 269 | else: 270 | raise ValueError('invalid distance metric') -------------------------------------------------------------------------------- /bytetrack/matching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import scipy 4 | import lap 5 | from scipy.spatial.distance import cdist 6 | 7 | from cython_bbox import bbox_overlaps as bbox_ious 8 | from bytetrack import kalman_filter 9 | import time 10 | 11 | def merge_matches(m1, m2, shape): 12 | O,P,Q = shape 13 | m1 = np.asarray(m1) 14 | m2 = np.asarray(m2) 15 | 16 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 17 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 18 | 19 | mask = M1*M2 20 | match = mask.nonzero() 21 | match = list(zip(match[0], match[1])) 22 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) 23 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) 24 | 25 | return match, unmatched_O, unmatched_Q 26 | 27 | 28 | def _indices_to_matches(cost_matrix, indices, thresh): 29 | matched_cost = cost_matrix[tuple(zip(*indices))] 30 | matched_mask = (matched_cost <= thresh) 31 | 32 | matches = indices[matched_mask] 33 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 34 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 35 | 36 | return matches, unmatched_a, unmatched_b 37 | 38 | 39 | def linear_assignment(cost_matrix, thresh): 40 | if cost_matrix.size == 0: 41 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 42 | matches, unmatched_a, unmatched_b = [], [], [] 43 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 44 | for ix, mx in enumerate(x): 45 | if mx >= 0: 46 | matches.append([ix, mx]) 47 | unmatched_a = np.where(x < 0)[0] 48 | unmatched_b = np.where(y < 0)[0] 49 | matches = np.asarray(matches) 50 | return matches, unmatched_a, unmatched_b 51 | 52 | 53 | def ious(atlbrs, btlbrs): 54 | """ 55 | Compute cost based on IoU 56 | :type atlbrs: list[tlbr] | np.ndarray 57 | :type atlbrs: list[tlbr] | np.ndarray 58 | 59 | :rtype ious np.ndarray 60 | """ 61 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 62 | if ious.size == 0: 63 | return ious 64 | 65 | ious = bbox_ious( 66 | np.ascontiguousarray(atlbrs, dtype=np.float), 67 | np.ascontiguousarray(btlbrs, dtype=np.float) 68 | ) 69 | 70 | return ious 71 | 72 | 73 | def iou_distance(atracks, btracks): 74 | """ 75 | Compute cost based on IoU 76 | :type atracks: list[STrack] 77 | :type btracks: list[STrack] 78 | 79 | :rtype cost_matrix np.ndarray 80 | """ 81 | 82 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 83 | atlbrs = atracks 84 | btlbrs = btracks 85 | else: 86 | atlbrs = [track.tlbr for track in atracks] 87 | btlbrs = [track.tlbr for track in btracks] 88 | _ious = ious(atlbrs, btlbrs) 89 | cost_matrix = 1 - _ious 90 | 91 | return cost_matrix 92 | 93 | def v_iou_distance(atracks, btracks): 94 | """ 95 | Compute cost based on IoU 96 | :type atracks: list[STrack] 97 | :type btracks: list[STrack] 98 | 99 | :rtype cost_matrix np.ndarray 100 | """ 101 | 102 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 103 | atlbrs = atracks 104 | btlbrs = btracks 105 | else: 106 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] 107 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] 108 | _ious = ious(atlbrs, btlbrs) 109 | cost_matrix = 1 - _ious 110 | 111 | return cost_matrix 112 | 113 | def embedding_distance(tracks, detections, metric='cosine'): 114 | """ 115 | :param tracks: list[STrack] 116 | :param detections: list[BaseTrack] 117 | :param metric: 118 | :return: cost_matrix np.ndarray 119 | """ 120 | 121 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 122 | if cost_matrix.size == 0: 123 | return cost_matrix 124 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) 125 | #for i, track in enumerate(tracks): 126 | #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 127 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) 128 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features 129 | return cost_matrix 130 | 131 | 132 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 133 | if cost_matrix.size == 0: 134 | return cost_matrix 135 | gating_dim = 2 if only_position else 4 136 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 137 | measurements = np.asarray([det.to_xyah() for det in detections]) 138 | for row, track in enumerate(tracks): 139 | gating_distance = kf.gating_distance( 140 | track.mean, track.covariance, measurements, only_position) 141 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 142 | return cost_matrix 143 | 144 | 145 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): 146 | if cost_matrix.size == 0: 147 | return cost_matrix 148 | gating_dim = 2 if only_position else 4 149 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 150 | measurements = np.asarray([det.to_xyah() for det in detections]) 151 | for row, track in enumerate(tracks): 152 | gating_distance = kf.gating_distance( 153 | track.mean, track.covariance, measurements, only_position, metric='maha') 154 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 155 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance 156 | return cost_matrix 157 | 158 | 159 | def fuse_iou(cost_matrix, tracks, detections): 160 | if cost_matrix.size == 0: 161 | return cost_matrix 162 | reid_sim = 1 - cost_matrix 163 | iou_dist = iou_distance(tracks, detections) 164 | iou_sim = 1 - iou_dist 165 | fuse_sim = reid_sim * (1 + iou_sim) / 2 166 | det_scores = np.array([det.score for det in detections]) 167 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 168 | #fuse_sim = fuse_sim * (1 + det_scores) / 2 169 | fuse_cost = 1 - fuse_sim 170 | return fuse_cost 171 | 172 | 173 | def fuse_score(cost_matrix, detections): 174 | if cost_matrix.size == 0: 175 | return cost_matrix 176 | iou_sim = 1 - cost_matrix 177 | det_scores = np.array([det.score for det in detections]) 178 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 179 | fuse_sim = iou_sim * det_scores 180 | fuse_cost = 1 - fuse_sim 181 | return fuse_cost -------------------------------------------------------------------------------- /test.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/test.mp4 --------------------------------------------------------------------------------