├── README.md
├── YOLOv5-seg_openvino_onnx.py
├── YOLOv5_bytetrack.py
├── YOLOv5_openvino_onnx.py
├── YOLOv6_openvino_onnx.py
├── YOLOv7-pose_openvino_onnx.py
├── YOLOv7_bytetrack.py
├── YOLOv7_openvino_onnx.py
├── YOLOv8-pose_openvino_onnx.py
├── YOLOv8-seg_openvino_onnx.py
├── YOLOv8_bytetrack.py
├── YOLOv8_openvino_onnx.py
├── YOLOv9_openvino_onnx.py
├── bytetrack
    ├── __pycache__
    │   ├── basetrack.cpython-36.pyc
    │   ├── basetrack.cpython-38.pyc
    │   ├── byte_tracker.cpython-36.pyc
    │   ├── byte_tracker.cpython-38.pyc
    │   ├── kalman_filter.cpython-36.pyc
    │   ├── kalman_filter.cpython-38.pyc
    │   ├── matching.cpython-36.pyc
    │   └── matching.cpython-38.pyc
    ├── basetrack.py
    ├── byte_tracker.py
    ├── kalman_filter.py
    └── matching.py
└── test.mp4


/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv8_Openvino
 2 | ## 0 内容
 3 | 具体包括：基于CPU的Openvino和ONNXRuntime下的YOLOv5/v6/v7/v8/v9的检测、跟踪、分割、关键点检测。
 4 | 
 5 | ## 1 环境：
 6 | CPU：i5-12500
 7 | 
 8 | Python：3.8.18
 9 | 
10 | VS2019
11 | 
12 | 注：Bytetrack中的lap和cython_bbox库需要编译安装，直接安装报错，故下载VS2019。
13 | ## 2 安装Openvino和ONNXRuntime
14 | ### 2.1 Openvino简介
15 | Openvino是由Intel开发的专门用于优化和部署人工智能推理的半开源的工具包，主要用于对深度推理做优化。
16 | 
17 | Openvino内部集成了Opencv、TensorFlow模块，除此之外它还具有强大的Plugin开发框架，允许开发者在Openvino之上对推理过程做优化。
18 | 
19 | Openvino整体框架为：Openvino前端→ Plugin中间层→ Backend后端
20 | Openvino的优点在于它屏蔽了后端接口，提供了统一操作的前端API，开发者可以无需关心后端的实现，例如后端可以是TensorFlow、Keras、ARM-NN，通过Plugin提供给前端接口调用，也就意味着一套代码在Openvino之上可以运行在多个推理引擎之上，Openvino像是类似聚合一样的开发包。
21 | 
22 | ### 2.2 ONNXRuntime简介
23 | ONNXRuntime是微软推出的一款推理框架，用户可以非常便利的用其运行一个onnx模型。ONNXRuntime支持多种运行后端包括CPU，GPU，TensorRT，DML等。可以说ONNXRuntime是对ONNX模型最原生的支持。
24 | 
25 | 虽然大家用ONNX时更多的是作为一个中间表示，从pytorch转到onnx后直接喂到TensorRT或MNN等各种后端框架，但这并不能否认ONNXRuntime是一款非常优秀的推理框架。而且由于其自身只包含推理功能（最新的ONNXRuntime甚至已经可以训练），通过阅读其源码可以解深度学习框架的一些核心功能原理（op注册，内存管理，运行逻辑等）
26 | 总体来看，整个ONNXRuntime的运行可以分为三个阶段，Session构造，模型加载与初始化和运行。和其他所有主流框架相同，ONNXRuntime最常用的语言是python，而实际负责执行框架运行的则是C++。
27 | 
28 | ### 2.3 安装
29 | pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
30 | 
31 | pip install onnxruntime -i  https://pypi.tuna.tsinghua.edu.cn/simple
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/YOLOv5-seg_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 |     
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         
 27 |     def predict(self, datas):
 28 |         # 注：self.compiled_model_onnx([datas])是一个字典，self.compiled_model_onnx.output(0)是字典键，第一种读取所有值方法(0.11s) 比 第二种按键取值的方法(0.20s) 耗时减半
 29 |         predict_data = list(self.compiled_model_onnx([datas]).values()) 
 30 |         # predict_data = [self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(0)],
 31 |         #                  self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(1)]]
 32 |         return predict_data
 33 |     
 34 | 
 35 | class YOLOv5_seg:
 36 |     """YOLOv5 segmentation model class for handling inference and visualization."""
 37 | 
 38 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 39 |         """
 40 |         Initialization.
 41 | 
 42 |         Args:
 43 |             onnx_model (str): Path to the ONNX model.
 44 |         """
 45 |         self.infer_tool = infer_tool
 46 |         if self.infer_tool == 'openvino':
 47 |             # 构建openvino推理引擎
 48 |             self.openvino = OpenvinoInference(onnx_model)
 49 |             self.ndtype = np.single
 50 |         else:
 51 |             # 构建onnxruntime推理引擎
 52 |             self.ort_session = ort.InferenceSession(onnx_model,
 53 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 54 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 55 | 
 56 |             # Numpy dtype: support both FP32 and FP16 onnx model
 57 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 58 |        
 59 |         self.classes = CLASSES  # 加载模型类别
 60 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 61 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 62 | 
 63 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32):
 64 |         """
 65 |         The whole pipeline: pre-process -> inference -> post-process.
 66 |         
 67 |         Args:
 68 |             im0 (Numpy.ndarray): original input image.
 69 |             conf_threshold (float): confidence threshold for filtering predictions.
 70 |             iou_threshold (float): iou threshold for NMS.
 71 |             nm (int): the number of masks.
 72 | 
 73 |         Returns:
 74 |             boxes (List): list of bounding boxes.
 75 |             segments (List): list of segments.
 76 |             masks (np.ndarray): [N, H, W], output masks.
 77 |         """
 78 |         # 前处理Pre-process
 79 |         t1 = time.time()
 80 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 81 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 82 |         
 83 |         # 推理 inference
 84 |         t2 = time.time()
 85 |         if self.infer_tool == 'openvino':
 86 |             preds = self.openvino.predict(im)
 87 |         else:
 88 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})  # 与bbox区别，输出是个列表，[检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160)]
 89 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 90 |         
 91 |         # 后处理Post-process
 92 |         t3 = time.time()
 93 |         boxes, segments, masks = self.postprocess(preds,
 94 |                                 im0=im0,
 95 |                                 ratio=ratio,
 96 |                                 pad_w=pad_w,
 97 |                                 pad_h=pad_h,
 98 |                                 conf_threshold=conf_threshold,
 99 |                                 iou_threshold=iou_threshold,
100 |                                 nm=nm
101 |                                 )
102 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
103 | 
104 |         return boxes, segments, masks
105 |         
106 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
107 |     def preprocess(self, img):
108 |         """
109 |         Pre-processes the input image.
110 | 
111 |         Args:
112 |             img (Numpy.ndarray): image about to be processed.
113 | 
114 |         Returns:
115 |             img_process (Numpy.ndarray): image preprocessed for inference.
116 |             ratio (tuple): width, height ratios in letterbox.
117 |             pad_w (float): width padding in letterbox.
118 |             pad_h (float): height padding in letterbox.
119 |         """
120 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
121 |         shape = img.shape[:2]  # original image shape
122 |         new_shape = (self.model_height, self.model_width)
123 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
124 |         ratio = r, r
125 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
126 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
127 |         if shape[::-1] != new_unpad:  # resize
128 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
129 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
130 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
131 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
132 | 
133 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
134 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
135 |         img_process = img[None] if len(img.shape) == 3 else img
136 |         return img_process, ratio, (pad_w, pad_h)
137 |     
138 |     # 后处理，包括：阈值过滤+NMS+masks处理
139 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32):
140 |         """
141 |         Post-process the prediction.
142 | 
143 |         Args:
144 |             preds (Numpy.ndarray): predictions come from ort.session.run().
145 |             im0 (Numpy.ndarray): [h, w, c] original input image.
146 |             ratio (tuple): width, height ratios in letterbox.
147 |             pad_w (float): width padding in letterbox.
148 |             pad_h (float): height padding in letterbox.
149 |             conf_threshold (float): conf threshold.
150 |             iou_threshold (float): iou threshold.
151 |             nm (int): the number of masks.
152 | 
153 |         Returns:
154 |             boxes (List): list of bounding boxes.
155 |             segments (List): list of segments.
156 |             masks (np.ndarray): [N, H, W], output masks.
157 |         """
158 |         # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数，v8v9采用类别里面最大的概率作为置信度score
159 |         x, protos = preds[0], preds[1]  # 与bbox区别：Two outputs: 检测头的输出(1, 8400*3, 117), 分割头的输出(1, 32, 160, 160)
160 |    
161 |         # Predictions filtering by conf-threshold
162 |         x = x[x[..., 4] > conf_threshold]
163 | 
164 |         # Create a new matrix which merge these(box, score, cls, nm) into one
165 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
166 |         x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:-nm], axis=-1), x[..., -nm:]]
167 | 
168 |         # NMS filtering
169 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls, nm], ...]), shape=(-1, 4 + 1 + 1 + 32)
170 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
171 |        
172 |         # 重新缩放边界框，为画图做准备
173 |         if len(x) > 0:
174 |             # Bounding boxes format change: cxcywh -> xyxy
175 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
176 |             x[..., [2, 3]] += x[..., [0, 1]]
177 | 
178 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
179 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
180 |             x[..., :4] /= min(ratio)
181 | 
182 |             # Bounding boxes boundary clamp
183 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
184 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
185 | 
186 |             # 与bbox区别：增加masks处理
187 |             # Process masks
188 |             masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape)
189 |             # Masks -> Segments(contours)
190 |             segments = self.masks2segments(masks)
191 |             
192 |             return x[..., :6], segments, masks  # boxes, segments, masks
193 |         else:
194 |             return [], [], []
195 | 
196 |     @staticmethod
197 |     def masks2segments(masks):
198 |         """
199 |         It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from
200 |         https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750)
201 | 
202 |         Args:
203 |             masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160).
204 | 
205 |         Returns:
206 |             segments (List): list of segment masks.
207 |         """
208 |         segments = []
209 |         for x in masks.astype('uint8'):
210 |             c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0]  # CHAIN_APPROX_SIMPLE  该函数用于查找二值图像中的轮廓。
211 |             if c:
212 |                 # 这段代码的目的是找到图像x中的最外层轮廓，并从中选择最长的轮廓，然后将其转换为NumPy数组的形式。
213 |                 c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
214 |             else:
215 |                 c = np.zeros((0, 2))  # no segments found
216 |             segments.append(c.astype('float32'))
217 |         return segments
218 | 
219 |     
220 |     def process_mask(self, protos, masks_in, bboxes, im0_shape):
221 |         """
222 |         Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
223 |         but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618)
224 | 
225 |         Args:
226 |             protos (numpy.ndarray): [mask_dim, mask_h, mask_w].
227 |             masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms.
228 |             bboxes (numpy.ndarray): bboxes re-scaled to original image shape.
229 |             im0_shape (tuple): the size of the input image (h,w,c).
230 | 
231 |         Returns:
232 |             (numpy.ndarray): The upsampled masks.
233 |         """
234 |         c, mh, mw = protos.shape
235 |         masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0)  # HWN
236 |         masks = np.ascontiguousarray(masks)
237 |         masks = self.scale_mask(masks, im0_shape)  # re-scale mask from P3 shape to original input image shape
238 |         masks = np.einsum('HWN -> NHW', masks)  # HWN -> NHW
239 |         masks = self.crop_mask(masks, bboxes)
240 |         return np.greater(masks, 0.5)
241 | 
242 |     @staticmethod
243 |     def scale_mask(masks, im0_shape, ratio_pad=None):
244 |         """
245 |         Takes a mask, and resizes it to the original image size. (Borrowed from
246 |         https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305)
247 | 
248 |         Args:
249 |             masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
250 |             im0_shape (tuple): the original image shape.
251 |             ratio_pad (tuple): the ratio of the padding to the original image.
252 | 
253 |         Returns:
254 |             masks (np.ndarray): The masks that are being returned.
255 |         """
256 |         im1_shape = masks.shape[:2]
257 |         if ratio_pad is None:  # calculate from im0_shape
258 |             gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
259 |             pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
260 |         else:
261 |             pad = ratio_pad[1]
262 | 
263 |         # Calculate tlbr of mask
264 |         top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))  # y, x
265 |         bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))
266 |         if len(masks.shape) < 2:
267 |             raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
268 |         masks = masks[top:bottom, left:right]
269 |         masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]),
270 |                            interpolation=cv2.INTER_LINEAR)  # INTER_CUBIC would be better
271 |         if len(masks.shape) == 2:
272 |             masks = masks[:, :, None]
273 |         return masks
274 |     
275 |     @staticmethod
276 |     def crop_mask(masks, boxes):
277 |         """
278 |         It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from
279 |         https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599)
280 | 
281 |         Args:
282 |             masks (Numpy.ndarray): [n, h, w] tensor of masks.
283 |             boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form.
284 | 
285 |         Returns:
286 |             (Numpy.ndarray): The masks are being cropped to the bounding box.
287 |         """
288 |         n, h, w = masks.shape
289 |         x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
290 |         r = np.arange(w, dtype=x1.dtype)[None, None, :]
291 |         c = np.arange(h, dtype=x1.dtype)[None, :, None]
292 |         return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
293 |     
294 |     # 绘框，与bbox区别：增加masks可视化
295 |     def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True):
296 |         """
297 |         Draw and visualize results.
298 | 
299 |         Args:
300 |             im (np.ndarray): original image, shape [h, w, c].
301 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
302 |             segments (List): list of segment masks.
303 |             vis (bool): imshow using OpenCV.
304 |             save (bool): save image annotated.
305 | 
306 |         Returns:
307 |             None
308 |         """
309 |         # Draw rectangles and polygons
310 |         im_canvas = im.copy()
311 |         # Draw rectangles 
312 |         for (*box, conf, cls_), segment in zip(bboxes, segments):
313 |             # draw contour and fill mask
314 |             cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2)  # white borderline
315 |             cv2.fillPoly(im_canvas, np.int32([segment]), (255, 0, 0))
316 | 
317 |             # draw bbox rectangle
318 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
319 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
320 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
321 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
322 | 
323 |         # Mix image
324 |         im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)
325 | 
326 |         # Show image
327 |         if vis:
328 |             cv2.imshow('demo', im)
329 |             cv2.waitKey(0)
330 |             cv2.destroyAllWindows()
331 | 
332 |         # Save image
333 |         if save:
334 |             cv2.imwrite('demo.jpg', im)
335 | 
336 | 
337 | if __name__ == '__main__':
338 |     # Create an argument parser to handle command-line arguments
339 |     parser = argparse.ArgumentParser()
340 |     parser.add_argument('--model', type=str, default='weights\\yolov5s-seg.onnx', help='Path to ONNX model')
341 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
342 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
343 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
344 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
345 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
346 |     args = parser.parse_args()
347 | 
348 |     # Build model
349 |     model = YOLOv5_seg(args.model, args.imgsz, args.infer_tool)
350 | 
351 |     # Read image by OpenCV
352 |     img = cv2.imread(args.source)
353 | 
354 |     # Inference
355 |     boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
356 | 
357 |     # Visualize, Draw bboxes and polygons
358 |     if len(boxes) > 0:
359 |         model.draw_and_visualize(img, boxes, segments, vis=False, save=True)
360 | 
361 | 


--------------------------------------------------------------------------------
/YOLOv5_bytetrack.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | import copy
  9 | from bytetrack.byte_tracker import BYTETracker
 10 | 
 11 | # COCO默认的80类
 12 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 13 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 14 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 15 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 16 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 17 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 18 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 19 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 20 | 
 21 | 
 22 | class OpenvinoInference(object):
 23 |     def __init__(self, onnx_path):
 24 |         self.onnx_path = onnx_path
 25 |         ie = Core()
 26 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 27 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 28 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 29 | 
 30 |     def predict(self, datas):
 31 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 32 |         return predict_data
 33 |     
 34 | 
 35 | class YOLOv5:
 36 |     """YOLOv5 object detection model class for handling inference and visualization."""
 37 | 
 38 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 39 |         """
 40 |         Initialization.
 41 | 
 42 |         Args:
 43 |             onnx_model (str): Path to the ONNX model.
 44 |         """
 45 |         self.infer_tool = infer_tool
 46 |         if self.infer_tool == 'openvino':
 47 |             # 构建openvino推理引擎
 48 |             self.openvino = OpenvinoInference(onnx_model)
 49 |             self.ndtype = np.single
 50 |         else:
 51 |             # 构建onnxruntime推理引擎
 52 |             self.ort_session = ort.InferenceSession(onnx_model,
 53 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 54 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 55 | 
 56 |             # Numpy dtype: support both FP32 and FP16 onnx model
 57 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 58 |        
 59 |         self.classes = CLASSES  # 加载模型类别
 60 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 61 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 62 | 
 63 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 64 |         """
 65 |         The whole pipeline: pre-process -> inference -> post-process.
 66 | 
 67 |         Args:
 68 |             im0 (Numpy.ndarray): original input image.
 69 |             conf_threshold (float): confidence threshold for filtering predictions.
 70 |             iou_threshold (float): iou threshold for NMS.
 71 | 
 72 |         Returns:
 73 |             boxes (List): list of bounding boxes.
 74 |         """
 75 |         # 前处理Pre-process
 76 |         t1 = time.time()
 77 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 78 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 79 |         
 80 |         # 推理 inference
 81 |         t2 = time.time()
 82 |         if self.infer_tool == 'openvino':
 83 |             preds = self.openvino.predict(im)
 84 |         else:
 85 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 86 |         print('推理时间：{:.3f}s'.format(time.time() - t2))
 87 |      
 88 |         # 后处理Post-process
 89 |         t3 = time.time()
 90 |         boxes = self.postprocess(preds,
 91 |                                 im0=im0,
 92 |                                 ratio=ratio,
 93 |                                 pad_w=pad_w,
 94 |                                 pad_h=pad_h,
 95 |                                 conf_threshold=conf_threshold,
 96 |                                 iou_threshold=iou_threshold,
 97 |                                 )
 98 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 99 | 
100 |         return boxes
101 |         
102 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
103 |     def preprocess(self, img):
104 |         """
105 |         Pre-processes the input image.
106 | 
107 |         Args:
108 |             img (Numpy.ndarray): image about to be processed.
109 | 
110 |         Returns:
111 |             img_process (Numpy.ndarray): image preprocessed for inference.
112 |             ratio (tuple): width, height ratios in letterbox.
113 |             pad_w (float): width padding in letterbox.
114 |             pad_h (float): height padding in letterbox.
115 |         """
116 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
117 |         shape = img.shape[:2]  # original image shape
118 |         new_shape = (self.model_height, self.model_width)
119 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
120 |         ratio = r, r
121 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
122 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
123 |         if shape[::-1] != new_unpad:  # resize
124 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
125 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
126 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
127 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
128 | 
129 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
130 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
131 |         img_process = img[None] if len(img.shape) == 3 else img
132 |         return img_process, ratio, (pad_w, pad_h)
133 |     
134 |     # 后处理，包括：阈值过滤与NMS
135 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
136 |         """
137 |         Post-process the prediction.
138 | 
139 |         Args:
140 |             preds (Numpy.ndarray): predictions come from ort.session.run().
141 |             im0 (Numpy.ndarray): [h, w, c] original input image.
142 |             ratio (tuple): width, height ratios in letterbox.
143 |             pad_w (float): width padding in letterbox.
144 |             pad_h (float): height padding in letterbox.
145 |             conf_threshold (float): conf threshold.
146 |             iou_threshold (float): iou threshold.
147 | 
148 |         Returns:
149 |             boxes (List): list of bounding boxes.
150 |         """
151 |         # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数，v8v9采用类别里面最大的概率作为置信度score
152 |         x = preds  # outputs: predictions (1, 8400*3, 85)
153 |     
154 |         # Predictions filtering by conf-threshold
155 |         x = x[x[..., 4] > conf_threshold]
156 |        
157 |         # Create a new matrix which merge these(box, score, cls) into one
158 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
159 |         x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)]
160 | 
161 |         # NMS filtering
162 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
163 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
164 |     
165 |         # 重新缩放边界框，为画图做准备
166 |         if len(x) > 0:
167 |             # Bounding boxes format change: cxcywh -> xyxy
168 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
169 |             x[..., [2, 3]] += x[..., [0, 1]]
170 | 
171 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
172 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
173 |             x[..., :4] /= min(ratio)
174 | 
175 |             # Bounding boxes boundary clamp
176 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
177 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
178 | 
179 |             return x[..., :6]  # boxes
180 |         else:
181 |             return []
182 | 
183 |     # 绘框
184 |     def draw_and_visualize(self, im, bboxes, video_writer, vis=False, save=False, is_track=False):
185 |         """
186 |         Draw and visualize results.
187 | 
188 |         Args:
189 |             im (np.ndarray): original image, shape [h, w, c].
190 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
191 |             vis (bool): imshow using OpenCV.
192 |             save (bool): save image annotated.
193 | 
194 |         Returns:
195 |             None
196 |         """
197 |         # Draw rectangles 
198 |         if not is_track:
199 |             for (*box, conf, cls_) in bboxes:
200 |                 # draw bbox rectangle
201 |                 cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
202 |                             self.color_palette[int(cls_)], 1, cv2.LINE_AA)
203 |                 cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
204 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
205 |         else:
206 |             for (*box, conf, id_) in bboxes:
207 |                 # draw bbox rectangle
208 |                 cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
209 |                             (0, 0, 255), 1, cv2.LINE_AA)
210 |                 cv2.putText(im, f'{id_}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
211 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
212 |     
213 |         # Show image
214 |         if vis:
215 |             cv2.imshow('demo', im)
216 |             cv2.waitKey(1)
217 | 
218 |         # Save video
219 |         if save:
220 |             video_writer.write(im)
221 |             
222 | 
223 | 
224 | class ByteTrackerONNX(object):
225 |     def __init__(self, args):
226 |         self.args = args
227 |         self.tracker = BYTETracker(args, frame_rate=30)
228 | 
229 |     def _tracker_update(self, dets, image):
230 |         online_targets = []
231 |         if dets is not None:
232 |             online_targets = self.tracker.update(
233 |                 dets[:, :5],
234 |                 [image.shape[0], image.shape[1]],
235 |                 [image.shape[0], image.shape[1]],
236 |             )
237 | 
238 |         online_tlwhs = []
239 |         online_ids = []
240 |         online_scores = []
241 |         for online_target in online_targets:
242 |             tlwh = online_target.tlwh
243 |             track_id = online_target.track_id
244 |             vertical = tlwh[2] / tlwh[3] > 1.6
245 |             if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
246 |                 online_tlwhs.append(tlwh)
247 |                 online_ids.append(track_id)
248 |                 online_scores.append(online_target.score)
249 | 
250 |         return online_tlwhs, online_ids, online_scores
251 |     
252 |     
253 |     def inference(self, image, dets):
254 |         """
255 |         Args: dets: 检测结果, [x1, y1, x2, y2, conf, cls]
256 |         Returns: np.array([[x1, y1, x2, y2, conf, ids], ...])
257 |         """
258 |         bboxes, ids, scores = self._tracker_update(dets, image)
259 |         if len(bboxes) == 0:
260 |             return []
261 |         # Bounding boxes format change: tlwh -> xyxy
262 |         bboxes = np.array(bboxes)
263 |         bboxes[..., [2, 3]] += bboxes[..., [0, 1]]
264 |         bboxes = np.c_[bboxes, np.array(scores), np.array(ids)]
265 |         return bboxes
266 | 
267 | 
268 | if __name__ == '__main__':
269 |     # Create an argument parser to handle command-line arguments
270 |     parser = argparse.ArgumentParser()
271 |     parser.add_argument('--model', type=str, default='yolov5s.onnx', help='Path to ONNX model')
272 |     parser.add_argument('--source', type=str, default=str('test.mp4'), help='Path to input image')
273 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
274 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
275 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
276 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
277 | 
278 |     parser.add_argument('--is_track', type=bool, default=True, help='是否启用跟踪')
279 |     parser.add_argument('--track_thresh', type=float, default=0.5, help='tracking confidence threshold')
280 |     parser.add_argument('--track_buffer', type=int, default=30, help='the frames for keep lost tracks, usually as same with FPS')
281 |     parser.add_argument('--match_thresh', type=float, default=0.8, help='matching threshold for tracking')
282 |     parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes',)
283 |     parser.add_argument('--mot20', dest='mot20', default=False, action='store_true', help='test mot20.',)
284 |     args = parser.parse_args()
285 | 
286 |     # Build model
287 |     model = YOLOv5(args.model, args.imgsz, args.infer_tool)
288 | 
289 |     bytetrack = ByteTrackerONNX(args)
290 | 
291 |     # 读取视频,解析帧数宽高,保存视频
292 |     cap = cv2.VideoCapture(args.source)
293 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
294 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
295 |     fps = cap.get(cv2.CAP_PROP_FPS)
296 |     frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
297 |     video_writer = cv2.VideoWriter('demo.mp4', cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)))
298 |     frame_id = 1
299 | 
300 |     while True:
301 |         start_time = time.time()
302 |         ret, img = cap.read()
303 |         if not ret:
304 |             break
305 | 
306 |         # Inference
307 |         boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
308 |         
309 |         # track
310 |         if args.is_track:
311 |             boxes = bytetrack.inference(img, boxes)
312 |         
313 |         # Visualize
314 |         if len(boxes) > 0:
315 |             model.draw_and_visualize(copy.deepcopy(img), boxes, video_writer, vis=False, save=True, is_track=args.is_track)
316 |         
317 |         end_time = time.time() - start_time
318 |         print('frame {}/{} (Total time: {:.2f} ms)'.format(frame_id, int(frame_count), end_time * 1000))
319 |         frame_id += 1
320 | 
321 |     
322 | 
323 | 


--------------------------------------------------------------------------------
/YOLOv5_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 | 
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 27 | 
 28 |     def predict(self, datas):
 29 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 30 |         return predict_data
 31 |     
 32 | 
 33 | class YOLOv5:
 34 |     """YOLOv5 object detection model class for handling inference and visualization."""
 35 | 
 36 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 37 |         """
 38 |         Initialization.
 39 | 
 40 |         Args:
 41 |             onnx_model (str): Path to the ONNX model.
 42 |         """
 43 |         self.infer_tool = infer_tool
 44 |         if self.infer_tool == 'openvino':
 45 |             # 构建openvino推理引擎
 46 |             self.openvino = OpenvinoInference(onnx_model)
 47 |             self.ndtype = np.single
 48 |         else:
 49 |             # 构建onnxruntime推理引擎
 50 |             self.ort_session = ort.InferenceSession(onnx_model,
 51 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 52 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 53 | 
 54 |             # Numpy dtype: support both FP32 and FP16 onnx model
 55 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 56 |        
 57 |         self.classes = CLASSES  # 加载模型类别
 58 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 59 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 60 | 
 61 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 62 |         """
 63 |         The whole pipeline: pre-process -> inference -> post-process.
 64 | 
 65 |         Args:
 66 |             im0 (Numpy.ndarray): original input image.
 67 |             conf_threshold (float): confidence threshold for filtering predictions.
 68 |             iou_threshold (float): iou threshold for NMS.
 69 | 
 70 |         Returns:
 71 |             boxes (List): list of bounding boxes.
 72 |         """
 73 |         # 前处理Pre-process
 74 |         t1 = time.time()
 75 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 76 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 77 |         
 78 |         # 推理 inference
 79 |         t2 = time.time()
 80 |         if self.infer_tool == 'openvino':
 81 |             preds = self.openvino.predict(im)
 82 |         else:
 83 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 84 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 85 |      
 86 |         # 后处理Post-process
 87 |         t3 = time.time()
 88 |         boxes = self.postprocess(preds,
 89 |                                 im0=im0,
 90 |                                 ratio=ratio,
 91 |                                 pad_w=pad_w,
 92 |                                 pad_h=pad_h,
 93 |                                 conf_threshold=conf_threshold,
 94 |                                 iou_threshold=iou_threshold,
 95 |                                 )
 96 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 97 | 
 98 |         return boxes
 99 |         
100 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
101 |     def preprocess(self, img):
102 |         """
103 |         Pre-processes the input image.
104 | 
105 |         Args:
106 |             img (Numpy.ndarray): image about to be processed.
107 | 
108 |         Returns:
109 |             img_process (Numpy.ndarray): image preprocessed for inference.
110 |             ratio (tuple): width, height ratios in letterbox.
111 |             pad_w (float): width padding in letterbox.
112 |             pad_h (float): height padding in letterbox.
113 |         """
114 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
115 |         shape = img.shape[:2]  # original image shape
116 |         new_shape = (self.model_height, self.model_width)
117 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
118 |         ratio = r, r
119 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
120 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
121 |         if shape[::-1] != new_unpad:  # resize
122 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
123 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
124 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
125 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
126 | 
127 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
128 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
129 |         img_process = img[None] if len(img.shape) == 3 else img
130 |         return img_process, ratio, (pad_w, pad_h)
131 |     
132 |     # 后处理，包括：阈值过滤与NMS
133 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
134 |         """
135 |         Post-process the prediction.
136 | 
137 |         Args:
138 |             preds (Numpy.ndarray): predictions come from ort.session.run().
139 |             im0 (Numpy.ndarray): [h, w, c] original input image.
140 |             ratio (tuple): width, height ratios in letterbox.
141 |             pad_w (float): width padding in letterbox.
142 |             pad_h (float): height padding in letterbox.
143 |             conf_threshold (float): conf threshold.
144 |             iou_threshold (float): iou threshold.
145 | 
146 |         Returns:
147 |             boxes (List): list of bounding boxes.
148 |         """
149 |         # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数，v8v9采用类别里面最大的概率作为置信度score
150 |         x = preds  # outputs: predictions (1, 8400*3, 85)
151 |     
152 |         # Predictions filtering by conf-threshold
153 |         x = x[x[..., 4] > conf_threshold]
154 |        
155 |         # Create a new matrix which merge these(box, score, cls) into one
156 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
157 |         x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)]
158 | 
159 |         # NMS filtering
160 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
161 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
162 |     
163 |         # 重新缩放边界框，为画图做准备
164 |         if len(x) > 0:
165 |             # Bounding boxes format change: cxcywh -> xyxy
166 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
167 |             x[..., [2, 3]] += x[..., [0, 1]]
168 | 
169 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
170 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
171 |             x[..., :4] /= min(ratio)
172 | 
173 |             # Bounding boxes boundary clamp
174 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
175 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
176 | 
177 |             return x[..., :6]  # boxes
178 |         else:
179 |             return []
180 | 
181 |     # 绘框
182 |     def draw_and_visualize(self, im, bboxes, vis=False, save=True):
183 |         """
184 |         Draw and visualize results.
185 | 
186 |         Args:
187 |             im (np.ndarray): original image, shape [h, w, c].
188 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
189 |             vis (bool): imshow using OpenCV.
190 |             save (bool): save image annotated.
191 | 
192 |         Returns:
193 |             None
194 |         """
195 |         # Draw rectangles 
196 |         for (*box, conf, cls_) in bboxes:
197 |             # draw bbox rectangle
198 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
199 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
200 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
201 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
202 |     
203 |         # Show image
204 |         if vis:
205 |             cv2.imshow('demo', im)
206 |             cv2.waitKey(0)
207 |             cv2.destroyAllWindows()
208 | 
209 |         # Save image
210 |         if save:
211 |             cv2.imwrite('demo.jpg', im)
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     # Create an argument parser to handle command-line arguments
216 |     parser = argparse.ArgumentParser()
217 |     parser.add_argument('--model', type=str, default='yolov5s.onnx', help='Path to ONNX model')
218 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
219 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
220 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
221 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
222 |     parser.add_argument('--infer_tool', type=str, default='openvinos', choices=("openvino", "onnxruntime"), help='选择推理引擎')
223 |     args = parser.parse_args()
224 | 
225 |     # Build model
226 |     model = YOLOv5(args.model, args.imgsz, args.infer_tool)
227 | 
228 |     # Read image by OpenCV
229 |     img = cv2.imread(args.source)
230 | 
231 |     # Inference
232 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
233 | 
234 |     # Visualize
235 |     if len(boxes) > 0:
236 |         model.draw_and_visualize(img, boxes, vis=False, save=True)
237 |     
238 | 
239 | 


--------------------------------------------------------------------------------
/YOLOv6_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 | 
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 27 | 
 28 |     def predict(self, datas):
 29 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 30 |         return predict_data
 31 |     
 32 | 
 33 | class YOLOv6:
 34 |     """YOLOv6 object detection model class for handling inference and visualization."""
 35 | 
 36 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 37 |         """
 38 |         Initialization.
 39 | 
 40 |         Args:
 41 |             onnx_model (str): Path to the ONNX model.
 42 |         """
 43 |         self.infer_tool = infer_tool
 44 |         if self.infer_tool == 'openvino':
 45 |             # 构建openvino推理引擎
 46 |             self.openvino = OpenvinoInference(onnx_model)
 47 |             self.ndtype = np.single
 48 |         else:
 49 |             # 构建onnxruntime推理引擎
 50 |             self.ort_session = ort.InferenceSession(onnx_model,
 51 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 52 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 53 | 
 54 |             # Numpy dtype: support both FP32 and FP16 onnx model
 55 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 56 |        
 57 |         self.classes = CLASSES  # 加载模型类别
 58 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 59 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 60 | 
 61 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 62 |         """
 63 |         The whole pipeline: pre-process -> inference -> post-process.
 64 | 
 65 |         Args:
 66 |             im0 (Numpy.ndarray): original input image.
 67 |             conf_threshold (float): confidence threshold for filtering predictions.
 68 |             iou_threshold (float): iou threshold for NMS.
 69 | 
 70 |         Returns:
 71 |             boxes (List): list of bounding boxes.
 72 |         """
 73 |         # 前处理Pre-process
 74 |         t1 = time.time()
 75 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 76 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 77 |         
 78 |         # 推理 inference
 79 |         t2 = time.time()
 80 |         if self.infer_tool == 'openvino':
 81 |             preds = self.openvino.predict(im)
 82 |         else:
 83 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 84 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 85 |      
 86 |         # 后处理Post-process
 87 |         t3 = time.time()
 88 |         boxes = self.postprocess(preds,
 89 |                                 im0=im0,
 90 |                                 ratio=ratio,
 91 |                                 pad_w=pad_w,
 92 |                                 pad_h=pad_h,
 93 |                                 conf_threshold=conf_threshold,
 94 |                                 iou_threshold=iou_threshold,
 95 |                                 )
 96 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 97 | 
 98 |         return boxes
 99 |         
100 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
101 |     def preprocess(self, img):
102 |         """
103 |         Pre-processes the input image.
104 | 
105 |         Args:
106 |             img (Numpy.ndarray): image about to be processed.
107 | 
108 |         Returns:
109 |             img_process (Numpy.ndarray): image preprocessed for inference.
110 |             ratio (tuple): width, height ratios in letterbox.
111 |             pad_w (float): width padding in letterbox.
112 |             pad_h (float): height padding in letterbox.
113 |         """
114 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
115 |         shape = img.shape[:2]  # original image shape
116 |         new_shape = (self.model_height, self.model_width)
117 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
118 |         ratio = r, r
119 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
120 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
121 |         if shape[::-1] != new_unpad:  # resize
122 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
123 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
124 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
125 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
126 | 
127 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
128 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
129 |         img_process = img[None] if len(img.shape) == 3 else img
130 |         return img_process, ratio, (pad_w, pad_h)
131 |     
132 |     # 后处理，包括：阈值过滤与NMS
133 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
134 |         """
135 |         Post-process the prediction.
136 | 
137 |         Args:
138 |             preds (Numpy.ndarray): predictions come from ort.session.run().
139 |             im0 (Numpy.ndarray): [h, w, c] original input image.
140 |             ratio (tuple): width, height ratios in letterbox.
141 |             pad_w (float): width padding in letterbox.
142 |             pad_h (float): height padding in letterbox.
143 |             conf_threshold (float): conf threshold.
144 |             iou_threshold (float): iou threshold.
145 | 
146 |         Returns:
147 |             boxes (List): list of bounding boxes.
148 |         """
149 |         # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数，v8v9采用类别里面最大的概率作为置信度score
150 |         x = preds  # outputs: predictions (1, 8400, 85)
151 |         
152 |         # Predictions filtering by conf-threshold
153 |         x = x[x[..., 4] > conf_threshold]
154 |        
155 |         # Create a new matrix which merge these(box, score, cls) into one
156 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
157 |         x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)]
158 | 
159 |         # NMS filtering
160 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
161 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
162 |     
163 |         # 重新缩放边界框，为画图做准备
164 |         if len(x) > 0:
165 |             # Bounding boxes format change: cxcywh -> xyxy
166 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
167 |             x[..., [2, 3]] += x[..., [0, 1]]
168 | 
169 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
170 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
171 |             x[..., :4] /= min(ratio)
172 | 
173 |             # Bounding boxes boundary clamp
174 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
175 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
176 | 
177 |             return x[..., :6]  # boxes
178 |         else:
179 |             return []
180 | 
181 |     # 绘框
182 |     def draw_and_visualize(self, im, bboxes, vis=False, save=True):
183 |         """
184 |         Draw and visualize results.
185 | 
186 |         Args:
187 |             im (np.ndarray): original image, shape [h, w, c].
188 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
189 |             vis (bool): imshow using OpenCV.
190 |             save (bool): save image annotated.
191 | 
192 |         Returns:
193 |             None
194 |         """
195 |         # Draw rectangles 
196 |         for (*box, conf, cls_) in bboxes:
197 |             # draw bbox rectangle
198 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
199 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
200 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
201 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
202 |     
203 |         # Show image
204 |         if vis:
205 |             cv2.imshow('demo', im)
206 |             cv2.waitKey(0)
207 |             cv2.destroyAllWindows()
208 | 
209 |         # Save image
210 |         if save:
211 |             cv2.imwrite('demo.jpg', im)
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     # Create an argument parser to handle command-line arguments
216 |     parser = argparse.ArgumentParser()
217 |     parser.add_argument('--model', type=str, default='yolov6s_1.0.onnx', help='Path to ONNX model')
218 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
219 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
220 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
221 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
222 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
223 |     args = parser.parse_args()
224 | 
225 |     # Build model
226 |     model = YOLOv6(args.model, args.imgsz, args.infer_tool)
227 | 
228 |     # Read image by OpenCV
229 |     img = cv2.imread(args.source)
230 | 
231 |     # Inference
232 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
233 | 
234 |     # Visualize
235 |     if len(boxes) > 0:
236 |         model.draw_and_visualize(img, boxes, vis=False, save=True)
237 | 
238 | 


--------------------------------------------------------------------------------
/YOLOv7-pose_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # Pose默认的person类
 10 | CLASSES = ['person']
 11 | 
 12 | class OpenvinoInference(object):
 13 |     def __init__(self, onnx_path):
 14 |         self.onnx_path = onnx_path
 15 |         ie = Core()
 16 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 17 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 18 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 19 | 
 20 |     def predict(self, datas):
 21 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 22 |         return predict_data
 23 |     
 24 | 
 25 | class KeyPoint_draw(object):
 26 |     def __init__(self):
 27 |         # 定义一个调色板数组，其中每个元素是一个包含RGB值的列表，用于表示不同的颜色
 28 |         self.palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
 29 |                                 [230, 230, 0], [255, 153, 255], [153, 204, 255],
 30 |                                 [255, 102, 255], [255, 51, 255], [102, 178, 255],
 31 |                                 [51, 153, 255], [255, 153, 153], [255, 102, 102],
 32 |                                 [255, 51, 51], [153, 255, 153], [102, 255, 102],
 33 |                                 [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
 34 |                                 [255, 255, 255]])
 35 |         # 定义人体17个关键点的连接顺序，每个子列表包含两个数字，代表要连接的关键点的索引, 1鼻子 2左眼 3右眼 4左耳 5右耳 6左肩 7右肩
 36 |         # 8左肘 9右肘 10左手腕 11右手腕 12左髋 13右髋 14左膝 15右膝 16左踝 17右踝
 37 |         self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
 38 |                         [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
 39 |                         [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
 40 |         # 通过索引从调色板中选择颜色，用于绘制人体骨架的线条，每个索引对应一种颜色
 41 |         self.pose_limb_color = self.palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
 42 |         # 通过索引从调色板中选择颜色，用于绘制人体的关键点，每个索引对应一种颜色
 43 |         self.pose_kpt_color = self.palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
 44 |     
 45 |     def plot_skeleton_kpts(self, im, kpts, steps=3):
 46 |         num_kpts = len(kpts) // steps  # 51 / 3 =17
 47 |         # 画点
 48 |         for kid in range(num_kpts):
 49 |             r, g, b = self.pose_kpt_color[kid]
 50 |             x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
 51 |             conf = kpts[steps * kid + 2]
 52 |             if conf > 0.5:  # 关键点的置信度必须大于 0.5
 53 |                 cv2.circle(im, (int(x_coord), int(y_coord)), 10, (int(r), int(g), int(b)), -1)
 54 |         # 画骨架
 55 |         for sk_id, sk in enumerate(self.skeleton):
 56 |             r, g, b = self.pose_limb_color[sk_id]
 57 |             pos1 = (int(kpts[(sk[0] - 1) * steps]), int(kpts[(sk[0] - 1) * steps + 1]))
 58 |             pos2 = (int(kpts[(sk[1] - 1) * steps]), int(kpts[(sk[1] - 1) * steps + 1]))
 59 |             conf1 = kpts[(sk[0] - 1) * steps + 2]
 60 |             conf2 = kpts[(sk[1] - 1) * steps + 2]
 61 |             if conf1 > 0.5 and conf2 > 0.5:  # 对于肢体，相连的两个关键点置信度 必须同时大于 0.5
 62 |                 cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)
 63 | 
 64 | 
 65 | class YOLOv7_pose:
 66 |     """YOLOv7_pose detection model class for handling inference and visualization."""
 67 | 
 68 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 69 |         """
 70 |         Initialization.
 71 | 
 72 |         Args:
 73 |             onnx_model (str): Path to the ONNX model.
 74 |         """
 75 |         self.infer_tool = infer_tool
 76 |         if self.infer_tool == 'openvino':
 77 |             # 构建openvino推理引擎
 78 |             self.openvino = OpenvinoInference(onnx_model)
 79 |             self.ndtype = np.single
 80 |         else:
 81 |             # 构建onnxruntime推理引擎
 82 |             self.ort_session = ort.InferenceSession(onnx_model,
 83 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 84 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 85 | 
 86 |             # Numpy dtype: support both FP32 and FP16 onnx model
 87 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 88 |        
 89 |         self.classes = CLASSES  # 加载模型类别
 90 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 91 |         self.color = (0, 0, 255)  # 为类别生成调色板
 92 | 
 93 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 94 |         """
 95 |         The whole pipeline: pre-process -> inference -> post-process.
 96 | 
 97 |         Args:
 98 |             im0 (Numpy.ndarray): original input image.
 99 |             conf_threshold (float): confidence threshold for filtering predictions.
100 |             iou_threshold (float): iou threshold for NMS.
101 | 
102 |         Returns:
103 |             boxes (List): list of bounding boxes.
104 |         """
105 |         # 前处理Pre-process
106 |         t1 = time.time()
107 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
108 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
109 |         
110 |         # 推理 inference
111 |         t2 = time.time()
112 |         if self.infer_tool == 'openvino':
113 |             preds = self.openvino.predict(im)
114 |         else:
115 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
116 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
117 |         
118 |         # 后处理Post-process
119 |         t3 = time.time()
120 |         boxes = self.postprocess(preds,
121 |                                 im0=im0,
122 |                                 ratio=ratio,
123 |                                 pad_w=pad_w,
124 |                                 pad_h=pad_h,
125 |                                 conf_threshold=conf_threshold,
126 |                                 iou_threshold=iou_threshold,
127 |                                 )
128 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
129 | 
130 |         return boxes
131 |         
132 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
133 |     def preprocess(self, img):
134 |         """
135 |         Pre-processes the input image.
136 | 
137 |         Args:
138 |             img (Numpy.ndarray): image about to be processed.
139 | 
140 |         Returns:
141 |             img_process (Numpy.ndarray): image preprocessed for inference.
142 |             ratio (tuple): width, height ratios in letterbox.
143 |             pad_w (float): width padding in letterbox.
144 |             pad_h (float): height padding in letterbox.
145 |         """
146 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
147 |         shape = img.shape[:2]  # original image shape
148 |         new_shape = (self.model_height, self.model_width)
149 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
150 |         ratio = r, r
151 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
152 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
153 |         if shape[::-1] != new_unpad:  # resize
154 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
155 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
156 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
157 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
158 | 
159 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
160 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
161 |         img_process = img[None] if len(img.shape) == 3 else img
162 |         return img_process, ratio, (pad_w, pad_h)
163 |     
164 |     # 后处理，包括：阈值过滤与NMS
165 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
166 |         """
167 |         Post-process the prediction.
168 | 
169 |         Args:
170 |             preds (Numpy.ndarray): predictions come from ort.session.run().
171 |             im0 (Numpy.ndarray): [h, w, c] original input image.
172 |             ratio (tuple): width, height ratios in letterbox.
173 |             pad_w (float): width padding in letterbox.
174 |             pad_h (float): height padding in letterbox.
175 |             conf_threshold (float): conf threshold.
176 |             iou_threshold (float): iou threshold.
177 | 
178 |         Returns:
179 |             boxes (List): list of bounding boxes.
180 |         """
181 |         x = preds  # outputs: predictions (1, 8400*3, 85)，其中56=4+1+17*3，17个关键点(x,y,visibility)
182 |         
183 |         # Predictions filtering by conf-threshold
184 |         x = x[x[..., 4] > conf_threshold]
185 |        
186 |         # Create a new matrix which merge these(box, score, pose) into one
187 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
188 |         x = np.c_[x[..., :4], x[..., 4], x[..., 6:]]
189 | 
190 |         # NMS filtering
191 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, pose], ...]), shape=(-1, 4 + 1 + 17*3)
192 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
193 |         
194 |         # 重新缩放边界框，为画图做准备
195 |         if len(x) > 0:
196 |             # Bounding boxes format change: cxcywh -> xyxy
197 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
198 |             x[..., [2, 3]] += x[..., [0, 1]]
199 | 
200 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
201 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
202 |             x[..., :4] /= min(ratio)
203 | 
204 |             # Bounding boxes boundary clamp
205 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])  # clip避免边界框超出图像边界
206 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
207 |             
208 |             # 关键点坐标映射到原图上，从[:, 5:]开始算
209 |             num_kpts = x.shape[1] // 3  # 56 // 3 = 18
210 |             for kid in range(2, num_kpts + 1):
211 |                 x[:, kid * 3 - 1] = (x[:, kid * 3 - 1] - pad_w) / min(ratio)
212 |                 x[:, kid * 3] = (x[:, kid * 3] - pad_h) / min(ratio)
213 | 
214 |             return x
215 |         else:
216 |             return []
217 | 
218 |     # 绘框
219 |     def draw_and_visualize(self, im, bboxes, keypoint_draw, vis=False, save=True):
220 |         """
221 |         Draw and visualize results.
222 | 
223 |         Args:
224 |             im (np.ndarray): original image, shape [h, w, c].
225 |             bboxes (numpy.ndarray): [n, 56], n is number of bboxes.
226 |             vis (bool): imshow using OpenCV.
227 |             save (bool): save image annotated.
228 | 
229 |         Returns:
230 |             None
231 |         """
232 |         
233 |         # Draw rectangles 
234 |         for bbox in bboxes:
235 |             box, conf, kpts = bbox[:4], bbox[4], bbox[5:]
236 |             # draw bbox rectangle
237 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
238 |                           self.color, 1, cv2.LINE_AA)
239 |             cv2.putText(im, f'{self.classes[0]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
240 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color, 2, cv2.LINE_AA)
241 | 
242 |             # 画关键点，连线
243 |             keypoint_draw.plot_skeleton_kpts(im, kpts)
244 | 
245 |         # Show image
246 |         if vis:
247 |             cv2.imshow('demo', im)
248 |             cv2.waitKey(0)
249 |             cv2.destroyAllWindows()
250 | 
251 |         # Save image
252 |         if save:
253 |             cv2.imwrite('demo.jpg', im)
254 | 
255 | 
256 | if __name__ == '__main__':
257 |     # Create an argument parser to handle command-line arguments
258 |     parser = argparse.ArgumentParser()
259 |     parser.add_argument('--model', type=str, default='weights/yolov7-w6-pose.onnx', help='Path to ONNX model')
260 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
261 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
262 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
263 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
264 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
265 |     args = parser.parse_args()
266 | 
267 |     # Build model
268 |     model = YOLOv7_pose(args.model, args.imgsz, args.infer_tool)
269 |     keypoint_draw = KeyPoint_draw()  # 可视化关键点
270 | 
271 |     # Read image by OpenCV
272 |     img = cv2.imread(args.source)
273 | 
274 |     # Inference
275 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
276 | 
277 |     # Visualize
278 |     if len(boxes) > 0:
279 |         model.draw_and_visualize(img, boxes, keypoint_draw, vis=False, save=True)
280 | 
281 | 


--------------------------------------------------------------------------------
/YOLOv7_bytetrack.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | import copy
  9 | from bytetrack.byte_tracker import BYTETracker
 10 | 
 11 | # COCO默认的80类
 12 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 13 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 14 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 15 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 16 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 17 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 18 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 19 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 20 | 
 21 | 
 22 | class OpenvinoInference(object):
 23 |     def __init__(self, onnx_path):
 24 |         self.onnx_path = onnx_path
 25 |         ie = Core()
 26 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 27 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 28 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 29 | 
 30 |     def predict(self, datas):
 31 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 32 |         return predict_data
 33 |     
 34 | 
 35 | class YOLOv7:
 36 |     """YOLOv7 object detection model class for handling inference and visualization."""
 37 | 
 38 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 39 |         """
 40 |         Initialization.
 41 | 
 42 |         Args:
 43 |             onnx_model (str): Path to the ONNX model.
 44 |         """
 45 |         self.infer_tool = infer_tool
 46 |         if self.infer_tool == 'openvino':
 47 |             # 构建openvino推理引擎
 48 |             self.openvino = OpenvinoInference(onnx_model)
 49 |             self.ndtype = np.single
 50 |         else:
 51 |             # 构建onnxruntime推理引擎
 52 |             self.ort_session = ort.InferenceSession(onnx_model,
 53 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 54 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 55 | 
 56 |             # Numpy dtype: support both FP32 and FP16 onnx model
 57 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 58 |        
 59 |         self.classes = CLASSES  # 加载模型类别
 60 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 61 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 62 | 
 63 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 64 |         """
 65 |         The whole pipeline: pre-process -> inference -> post-process.
 66 | 
 67 |         Args:
 68 |             im0 (Numpy.ndarray): original input image.
 69 |             conf_threshold (float): confidence threshold for filtering predictions.
 70 |             iou_threshold (float): iou threshold for NMS.
 71 | 
 72 |         Returns:
 73 |             boxes (List): list of bounding boxes.
 74 |         """
 75 |         # 前处理Pre-process
 76 |         t1 = time.time()
 77 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 78 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 79 |         
 80 |         # 推理 inference
 81 |         t2 = time.time()
 82 |         if self.infer_tool == 'openvino':
 83 |             preds = self.openvino.predict(im)
 84 |         else:
 85 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 86 |         print('推理时间：{:.3f}s'.format(time.time() - t2))
 87 |      
 88 |         # 后处理Post-process
 89 |         t3 = time.time()
 90 |         boxes = self.postprocess(preds,
 91 |                                 im0=im0,
 92 |                                 ratio=ratio,
 93 |                                 pad_w=pad_w,
 94 |                                 pad_h=pad_h,
 95 |                                 conf_threshold=conf_threshold,
 96 |                                 iou_threshold=iou_threshold,
 97 |                                 )
 98 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 99 | 
100 |         return boxes
101 |         
102 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
103 |     def preprocess(self, img):
104 |         """
105 |         Pre-processes the input image.
106 | 
107 |         Args:
108 |             img (Numpy.ndarray): image about to be processed.
109 | 
110 |         Returns:
111 |             img_process (Numpy.ndarray): image preprocessed for inference.
112 |             ratio (tuple): width, height ratios in letterbox.
113 |             pad_w (float): width padding in letterbox.
114 |             pad_h (float): height padding in letterbox.
115 |         """
116 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
117 |         shape = img.shape[:2]  # original image shape
118 |         new_shape = (self.model_height, self.model_width)
119 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
120 |         ratio = r, r
121 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
122 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
123 |         if shape[::-1] != new_unpad:  # resize
124 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
125 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
126 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
127 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
128 | 
129 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
130 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
131 |         img_process = img[None] if len(img.shape) == 3 else img
132 |         return img_process, ratio, (pad_w, pad_h)
133 |     
134 |     # 后处理，包括：阈值过滤与NMS
135 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
136 |         """
137 |         Post-process the prediction.
138 | 
139 |         Args:
140 |             preds (Numpy.ndarray): predictions come from ort.session.run().
141 |             im0 (Numpy.ndarray): [h, w, c] original input image.
142 |             ratio (tuple): width, height ratios in letterbox.
143 |             pad_w (float): width padding in letterbox.
144 |             pad_h (float): height padding in letterbox.
145 |             conf_threshold (float): conf threshold.
146 |             iou_threshold (float): iou threshold.
147 | 
148 |         Returns:
149 |             boxes (List): list of bounding boxes.
150 |         """
151 |         # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数，v8v9采用类别里面最大的概率作为置信度score
152 |         x = preds  # outputs: predictions (1, 8400*3, 85)
153 |     
154 |         # Predictions filtering by conf-threshold
155 |         x = x[x[..., 4] > conf_threshold]
156 |        
157 |         # Create a new matrix which merge these(box, score, cls) into one
158 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
159 |         x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)]
160 | 
161 |         # NMS filtering
162 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
163 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
164 |     
165 |         # 重新缩放边界框，为画图做准备
166 |         if len(x) > 0:
167 |             # Bounding boxes format change: cxcywh -> xyxy
168 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
169 |             x[..., [2, 3]] += x[..., [0, 1]]
170 | 
171 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
172 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
173 |             x[..., :4] /= min(ratio)
174 | 
175 |             # Bounding boxes boundary clamp
176 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
177 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
178 | 
179 |             return x[..., :6]  # boxes
180 |         else:
181 |             return []
182 | 
183 |     # 绘框
184 |     def draw_and_visualize(self, im, bboxes, video_writer, vis=False, save=False, is_track=False):
185 |         """
186 |         Draw and visualize results.
187 | 
188 |         Args:
189 |             im (np.ndarray): original image, shape [h, w, c].
190 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
191 |             vis (bool): imshow using OpenCV.
192 |             save (bool): save image annotated.
193 | 
194 |         Returns:
195 |             None
196 |         """
197 |         # Draw rectangles 
198 |         if not is_track:
199 |             for (*box, conf, cls_) in bboxes:
200 |                 # draw bbox rectangle
201 |                 cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
202 |                             self.color_palette[int(cls_)], 1, cv2.LINE_AA)
203 |                 cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
204 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
205 |         else:
206 |             for (*box, conf, id_) in bboxes:
207 |                 # draw bbox rectangle
208 |                 cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
209 |                             (0, 0, 255), 1, cv2.LINE_AA)
210 |                 cv2.putText(im, f'{id_}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
211 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
212 |     
213 |         # Show image
214 |         if vis:
215 |             cv2.imshow('demo', im)
216 |             cv2.waitKey(1)
217 | 
218 |         # Save video
219 |         if save:
220 |             video_writer.write(im)
221 |             
222 | 
223 | class ByteTrackerONNX(object):
224 |     def __init__(self, args):
225 |         self.args = args
226 |         self.tracker = BYTETracker(args, frame_rate=30)
227 | 
228 |     def _tracker_update(self, dets, image):
229 |         online_targets = []
230 |         if dets is not None:
231 |             online_targets = self.tracker.update(
232 |                 dets[:, :5],
233 |                 [image.shape[0], image.shape[1]],
234 |                 [image.shape[0], image.shape[1]],
235 |             )
236 | 
237 |         online_tlwhs = []
238 |         online_ids = []
239 |         online_scores = []
240 |         for online_target in online_targets:
241 |             tlwh = online_target.tlwh
242 |             track_id = online_target.track_id
243 |             vertical = tlwh[2] / tlwh[3] > 1.6
244 |             if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
245 |                 online_tlwhs.append(tlwh)
246 |                 online_ids.append(track_id)
247 |                 online_scores.append(online_target.score)
248 | 
249 |         return online_tlwhs, online_ids, online_scores
250 |     
251 |     
252 |     def inference(self, image, dets):
253 |         """
254 |         Args: dets: 检测结果, [x1, y1, x2, y2, conf, cls]
255 |         Returns: np.array([[x1, y1, x2, y2, conf, ids], ...])
256 |         """
257 |         bboxes, ids, scores = self._tracker_update(dets, image)
258 |         if len(bboxes) == 0:
259 |             return []
260 |         # Bounding boxes format change: tlwh -> xyxy
261 |         bboxes = np.array(bboxes)
262 |         bboxes[..., [2, 3]] += bboxes[..., [0, 1]]
263 |         bboxes = np.c_[bboxes, np.array(scores), np.array(ids)]
264 |         return bboxes
265 | 
266 | 
267 | if __name__ == '__main__':
268 |     # Create an argument parser to handle command-line arguments
269 |     parser = argparse.ArgumentParser()
270 |     parser.add_argument('--model', type=str, default='weights/yolov7.onnx', help='Path to ONNX model')
271 |     parser.add_argument('--source', type=str, default=str('test.mp4'), help='Path to input image')
272 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
273 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
274 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
275 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
276 | 
277 |     parser.add_argument('--is_track', type=bool, default=True, help='是否启用跟踪')
278 |     parser.add_argument('--track_thresh', type=float, default=0.5, help='tracking confidence threshold')
279 |     parser.add_argument('--track_buffer', type=int, default=30, help='the frames for keep lost tracks, usually as same with FPS')
280 |     parser.add_argument('--match_thresh', type=float, default=0.8, help='matching threshold for tracking')
281 |     parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes',)
282 |     parser.add_argument('--mot20', dest='mot20', default=False, action='store_true', help='test mot20.',)
283 |     args = parser.parse_args()
284 | 
285 |     # Build model
286 |     model = YOLOv7(args.model, args.imgsz, args.infer_tool)
287 | 
288 |     bytetrack = ByteTrackerONNX(args)
289 | 
290 |     # 读取视频,解析帧数宽高,保存视频
291 |     cap = cv2.VideoCapture(args.source)
292 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
293 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
294 |     fps = cap.get(cv2.CAP_PROP_FPS)
295 |     frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
296 |     video_writer = cv2.VideoWriter('demo.mp4', cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)))
297 |     frame_id = 1
298 | 
299 |     while True:
300 |         start_time = time.time()
301 |         ret, img = cap.read()
302 |         if not ret:
303 |             break
304 | 
305 |         # Inference
306 |         boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
307 |         
308 |         # track
309 |         if args.is_track:
310 |             boxes = bytetrack.inference(img, boxes)
311 |         
312 |         # Visualize
313 |         if len(boxes) > 0:
314 |             model.draw_and_visualize(copy.deepcopy(img), boxes, video_writer, vis=True, save=False, is_track=args.is_track)
315 |         
316 |         end_time = time.time() - start_time
317 |         print('frame {}/{} (Total time: {:.2f} ms)'.format(frame_id, int(frame_count), end_time * 1000))
318 |         frame_id += 1
319 | 
320 |     
321 | 
322 | 


--------------------------------------------------------------------------------
/YOLOv7_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 | 
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 27 | 
 28 |     def predict(self, datas):
 29 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 30 |         return predict_data
 31 |     
 32 | 
 33 | class YOLOv7:
 34 |     """YOLOv7 object detection model class for handling inference and visualization."""
 35 | 
 36 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 37 |         """
 38 |         Initialization.
 39 | 
 40 |         Args:
 41 |             onnx_model (str): Path to the ONNX model.
 42 |         """
 43 |         self.infer_tool = infer_tool
 44 |         if self.infer_tool == 'openvino':
 45 |             # 构建openvino推理引擎
 46 |             self.openvino = OpenvinoInference(onnx_model)
 47 |             self.ndtype = np.single
 48 |         else:
 49 |             # 构建onnxruntime推理引擎
 50 |             self.ort_session = ort.InferenceSession(onnx_model,
 51 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 52 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 53 | 
 54 |             # Numpy dtype: support both FP32 and FP16 onnx model
 55 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 56 |        
 57 |         self.classes = CLASSES  # 加载模型类别
 58 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 59 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 60 | 
 61 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 62 |         """
 63 |         The whole pipeline: pre-process -> inference -> post-process.
 64 | 
 65 |         Args:
 66 |             im0 (Numpy.ndarray): original input image.
 67 |             conf_threshold (float): confidence threshold for filtering predictions.
 68 |             iou_threshold (float): iou threshold for NMS.
 69 | 
 70 |         Returns:
 71 |             boxes (List): list of bounding boxes.
 72 |         """
 73 |         # 前处理Pre-process
 74 |         t1 = time.time()
 75 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 76 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 77 |         
 78 |         # 推理 inference
 79 |         t2 = time.time()
 80 |         if self.infer_tool == 'openvino':
 81 |             preds = self.openvino.predict(im)
 82 |         else:
 83 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 84 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 85 |         
 86 |         # 后处理Post-process
 87 |         t3 = time.time()
 88 |         boxes = self.postprocess(preds,
 89 |                                 im0=im0,
 90 |                                 ratio=ratio,
 91 |                                 pad_w=pad_w,
 92 |                                 pad_h=pad_h,
 93 |                                 conf_threshold=conf_threshold,
 94 |                                 iou_threshold=iou_threshold,
 95 |                                 )
 96 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 97 | 
 98 |         return boxes
 99 |         
100 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
101 |     def preprocess(self, img):
102 |         """
103 |         Pre-processes the input image.
104 | 
105 |         Args:
106 |             img (Numpy.ndarray): image about to be processed.
107 | 
108 |         Returns:
109 |             img_process (Numpy.ndarray): image preprocessed for inference.
110 |             ratio (tuple): width, height ratios in letterbox.
111 |             pad_w (float): width padding in letterbox.
112 |             pad_h (float): height padding in letterbox.
113 |         """
114 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
115 |         shape = img.shape[:2]  # original image shape
116 |         new_shape = (self.model_height, self.model_width)
117 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
118 |         ratio = r, r
119 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
120 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
121 |         if shape[::-1] != new_unpad:  # resize
122 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
123 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
124 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
125 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
126 |         
127 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
128 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
129 |         img_process = img[None] if len(img.shape) == 3 else img
130 |         return img_process, ratio, (pad_w, pad_h)
131 |     
132 |     # 后处理，包括：阈值过滤与NMS
133 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
134 |         """
135 |         Post-process the prediction.
136 | 
137 |         Args:
138 |             preds (Numpy.ndarray): predictions come from ort.session.run().
139 |             im0 (Numpy.ndarray): [h, w, c] original input image.
140 |             ratio (tuple): width, height ratios in letterbox.
141 |             pad_w (float): width padding in letterbox.
142 |             pad_h (float): height padding in letterbox.
143 |             conf_threshold (float): conf threshold.
144 |             iou_threshold (float): iou threshold.
145 | 
146 |         Returns:
147 |             boxes (List): list of bounding boxes.
148 |         """
149 |         # (Batch_size, Num_anchors, xywh_score_conf_cls), v5和v6_1.0的[..., 4]是置信度分数，v8v9采用类别里面最大的概率作为置信度score
150 |         x = preds  # outputs: predictions (1, 8400*3, 85)
151 |     
152 |         # Predictions filtering by conf-threshold
153 |         x = x[x[..., 4] > conf_threshold]
154 |        
155 |         # Create a new matrix which merge these(box, score, cls) into one
156 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
157 |         x = np.c_[x[..., :4], x[..., 4], np.argmax(x[..., 5:], axis=-1)]
158 | 
159 |         # NMS filtering
160 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
161 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
162 |     
163 |         # 重新缩放边界框，为画图做准备
164 |         if len(x) > 0:
165 |             # Bounding boxes format change: cxcywh -> xyxy
166 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
167 |             x[..., [2, 3]] += x[..., [0, 1]]
168 | 
169 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
170 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
171 |             x[..., :4] /= min(ratio)
172 | 
173 |             # Bounding boxes boundary clamp
174 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
175 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
176 | 
177 |             return x[..., :6]  # boxes
178 |         else:
179 |             return []
180 | 
181 |     # 绘框
182 |     def draw_and_visualize(self, im, bboxes, vis=False, save=True):
183 |         """
184 |         Draw and visualize results.
185 | 
186 |         Args:
187 |             im (np.ndarray): original image, shape [h, w, c].
188 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
189 |             vis (bool): imshow using OpenCV.
190 |             save (bool): save image annotated.
191 | 
192 |         Returns:
193 |             None
194 |         """
195 |         # Draw rectangles 
196 |         for (*box, conf, cls_) in bboxes:
197 |             # draw bbox rectangle
198 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
199 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
200 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
201 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
202 |     
203 |         # Show image
204 |         if vis:
205 |             cv2.imshow('demo', im)
206 |             cv2.waitKey(0)
207 |             cv2.destroyAllWindows()
208 | 
209 |         # Save image
210 |         if save:
211 |             cv2.imwrite('demo.jpg', im)
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     # Create an argument parser to handle command-line arguments
216 |     parser = argparse.ArgumentParser()
217 |     parser.add_argument('--model', type=str, default='weights/yolov7.onnx', help='Path to ONNX model')
218 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
219 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
220 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
221 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
222 |     parser.add_argument('--infer_tool', type=str, default='openvinos', choices=("openvino", "onnxruntime"), help='选择推理引擎')
223 |     args = parser.parse_args()
224 | 
225 |     # Build model
226 |     model = YOLOv7(args.model, args.imgsz, args.infer_tool)
227 | 
228 |     # Read image by OpenCV
229 |     img = cv2.imread(args.source)
230 |    
231 |     # Inference
232 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
233 | 
234 |     # Visualize
235 |     if len(boxes) > 0:
236 |         model.draw_and_visualize(img, boxes, vis=False, save=True)
237 |     
238 | 
239 | 


--------------------------------------------------------------------------------
/YOLOv8-pose_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # Pose默认的person类
 10 | CLASSES = ['person']
 11 | 
 12 | class OpenvinoInference(object):
 13 |     def __init__(self, onnx_path):
 14 |         self.onnx_path = onnx_path
 15 |         ie = Core()
 16 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 17 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 18 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 19 | 
 20 |     def predict(self, datas):
 21 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 22 |         return predict_data
 23 |     
 24 | 
 25 | class KeyPoint_draw(object):
 26 |     def __init__(self):
 27 |         # 定义一个调色板数组，其中每个元素是一个包含RGB值的列表，用于表示不同的颜色
 28 |         self.palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
 29 |                                 [230, 230, 0], [255, 153, 255], [153, 204, 255],
 30 |                                 [255, 102, 255], [255, 51, 255], [102, 178, 255],
 31 |                                 [51, 153, 255], [255, 153, 153], [255, 102, 102],
 32 |                                 [255, 51, 51], [153, 255, 153], [102, 255, 102],
 33 |                                 [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
 34 |                                 [255, 255, 255]])
 35 |         # 定义人体17个关键点的连接顺序，每个子列表包含两个数字，代表要连接的关键点的索引, 1鼻子 2左眼 3右眼 4左耳 5右耳 6左肩 7右肩
 36 |         # 8左肘 9右肘 10左手腕 11右手腕 12左髋 13右髋 14左膝 15右膝 16左踝 17右踝
 37 |         self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
 38 |                         [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
 39 |                         [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
 40 |         # 通过索引从调色板中选择颜色，用于绘制人体骨架的线条，每个索引对应一种颜色
 41 |         self.pose_limb_color = self.palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
 42 |         # 通过索引从调色板中选择颜色，用于绘制人体的关键点，每个索引对应一种颜色
 43 |         self.pose_kpt_color = self.palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
 44 |     
 45 |     def plot_skeleton_kpts(self, im, kpts, steps=3):
 46 |         num_kpts = len(kpts) // steps  # 51 / 3 =17
 47 |         # 画点
 48 |         for kid in range(num_kpts):
 49 |             r, g, b = self.pose_kpt_color[kid]
 50 |             x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
 51 |             conf = kpts[steps * kid + 2]
 52 |             if conf > 0.5:  # 关键点的置信度必须大于 0.5
 53 |                 cv2.circle(im, (int(x_coord), int(y_coord)), 10, (int(r), int(g), int(b)), -1)
 54 |         # 画骨架
 55 |         for sk_id, sk in enumerate(self.skeleton):
 56 |             r, g, b = self.pose_limb_color[sk_id]
 57 |             pos1 = (int(kpts[(sk[0] - 1) * steps]), int(kpts[(sk[0] - 1) * steps + 1]))
 58 |             pos2 = (int(kpts[(sk[1] - 1) * steps]), int(kpts[(sk[1] - 1) * steps + 1]))
 59 |             conf1 = kpts[(sk[0] - 1) * steps + 2]
 60 |             conf2 = kpts[(sk[1] - 1) * steps + 2]
 61 |             if conf1 > 0.5 and conf2 > 0.5:  # 对于肢体，相连的两个关键点置信度 必须同时大于 0.5
 62 |                 cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)
 63 | 
 64 | 
 65 | class YOLOv8_pose:
 66 |     """YOLOv8_pose detection model class for handling inference and visualization."""
 67 | 
 68 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 69 |         """
 70 |         Initialization.
 71 | 
 72 |         Args:
 73 |             onnx_model (str): Path to the ONNX model.
 74 |         """
 75 |         self.infer_tool = infer_tool
 76 |         if self.infer_tool == 'openvino':
 77 |             # 构建openvino推理引擎
 78 |             self.openvino = OpenvinoInference(onnx_model)
 79 |             self.ndtype = np.single
 80 |         else:
 81 |             # 构建onnxruntime推理引擎
 82 |             self.ort_session = ort.InferenceSession(onnx_model,
 83 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 84 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 85 | 
 86 |             # Numpy dtype: support both FP32 and FP16 onnx model
 87 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 88 |        
 89 |         self.classes = CLASSES  # 加载模型类别
 90 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 91 |         self.color = (0, 0, 255)  # 为类别生成调色板
 92 | 
 93 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 94 |         """
 95 |         The whole pipeline: pre-process -> inference -> post-process.
 96 | 
 97 |         Args:
 98 |             im0 (Numpy.ndarray): original input image.
 99 |             conf_threshold (float): confidence threshold for filtering predictions.
100 |             iou_threshold (float): iou threshold for NMS.
101 | 
102 |         Returns:
103 |             boxes (List): list of bounding boxes.
104 |         """
105 |         # 前处理Pre-process
106 |         t1 = time.time()
107 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
108 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
109 |         
110 |         # 推理 inference
111 |         t2 = time.time()
112 |         if self.infer_tool == 'openvino':
113 |             preds = self.openvino.predict(im)
114 |         else:
115 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
116 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
117 |        
118 |         # 后处理Post-process
119 |         t3 = time.time()
120 |         boxes = self.postprocess(preds,
121 |                                 im0=im0,
122 |                                 ratio=ratio,
123 |                                 pad_w=pad_w,
124 |                                 pad_h=pad_h,
125 |                                 conf_threshold=conf_threshold,
126 |                                 iou_threshold=iou_threshold,
127 |                                 )
128 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
129 | 
130 |         return boxes
131 |         
132 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
133 |     def preprocess(self, img):
134 |         """
135 |         Pre-processes the input image.
136 | 
137 |         Args:
138 |             img (Numpy.ndarray): image about to be processed.
139 | 
140 |         Returns:
141 |             img_process (Numpy.ndarray): image preprocessed for inference.
142 |             ratio (tuple): width, height ratios in letterbox.
143 |             pad_w (float): width padding in letterbox.
144 |             pad_h (float): height padding in letterbox.
145 |         """
146 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
147 |         shape = img.shape[:2]  # original image shape
148 |         new_shape = (self.model_height, self.model_width)
149 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
150 |         ratio = r, r
151 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
152 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
153 |         if shape[::-1] != new_unpad:  # resize
154 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
155 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
156 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
157 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
158 | 
159 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
160 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
161 |         img_process = img[None] if len(img.shape) == 3 else img
162 |         return img_process, ratio, (pad_w, pad_h)
163 |     
164 |     # 后处理，包括：阈值过滤与NMS
165 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
166 |         """
167 |         Post-process the prediction.
168 | 
169 |         Args:
170 |             preds (Numpy.ndarray): predictions come from ort.session.run().
171 |             im0 (Numpy.ndarray): [h, w, c] original input image.
172 |             ratio (tuple): width, height ratios in letterbox.
173 |             pad_w (float): width padding in letterbox.
174 |             pad_h (float): height padding in letterbox.
175 |             conf_threshold (float): conf threshold.
176 |             iou_threshold (float): iou threshold.
177 | 
178 |         Returns:
179 |             boxes (List): list of bounding boxes.
180 |         """
181 |         x = preds  # outputs: predictions (1, 56, 8400)，其中56=4+1+17*3，17个关键点(x,y,visibility)
182 |         # Transpose the first output: (Batch_size, xywh_conf_pose, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_pose)
183 |         x = np.einsum('bcn->bnc', x)  # (1, 8400, 56)
184 |    
185 |         # Predictions filtering by conf-threshold
186 |         x = x[x[..., 4] > conf_threshold]
187 |         
188 |         # Create a new matrix which merge these(box, score, pose) into one
189 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
190 |         x = np.c_[x[..., :4], x[..., 4], x[..., 5:]]
191 | 
192 |         # NMS filtering
193 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, pose], ...]), shape=(-1, 4 + 1 + 17*3)
194 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
195 |         
196 |         # 重新缩放边界框，为画图做准备
197 |         if len(x) > 0:
198 |             # Bounding boxes format change: cxcywh -> xyxy
199 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
200 |             x[..., [2, 3]] += x[..., [0, 1]]
201 | 
202 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
203 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
204 |             x[..., :4] /= min(ratio)
205 | 
206 |             # Bounding boxes boundary clamp
207 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])  # clip避免边界框超出图像边界
208 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
209 |             
210 |             # 关键点坐标映射到原图上，从[:, 5:]开始算
211 |             num_kpts = x.shape[1] // 3  # 56 // 3 = 18
212 |             for kid in range(2, num_kpts + 1):
213 |                 x[:, kid * 3 - 1] = (x[:, kid * 3 - 1] - pad_w) / min(ratio)
214 |                 x[:, kid * 3] = (x[:, kid * 3] - pad_h) / min(ratio)
215 | 
216 |             return x
217 |         else:
218 |             return []
219 | 
220 |     # 绘框
221 |     def draw_and_visualize(self, im, bboxes, keypoint_draw, vis=False, save=True):
222 |         """
223 |         Draw and visualize results.
224 | 
225 |         Args:
226 |             im (np.ndarray): original image, shape [h, w, c].
227 |             bboxes (numpy.ndarray): [n, 56], n is number of bboxes.
228 |             vis (bool): imshow using OpenCV.
229 |             save (bool): save image annotated.
230 | 
231 |         Returns:
232 |             None
233 |         """
234 |         
235 |         # Draw rectangles 
236 |         for bbox in bboxes:
237 |             box, conf, kpts = bbox[:4], bbox[4], bbox[5:]
238 |             # draw bbox rectangle
239 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
240 |                           self.color, 1, cv2.LINE_AA)
241 |             cv2.putText(im, f'{self.classes[0]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
242 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color, 2, cv2.LINE_AA)
243 | 
244 |             # 画关键点，连线
245 |             keypoint_draw.plot_skeleton_kpts(im, kpts)
246 | 
247 |         # Show image
248 |         if vis:
249 |             cv2.imshow('demo', im)
250 |             cv2.waitKey(0)
251 |             cv2.destroyAllWindows()
252 | 
253 |         # Save image
254 |         if save:
255 |             cv2.imwrite('demo.jpg', im)
256 | 
257 | 
258 | if __name__ == '__main__':
259 |     # Create an argument parser to handle command-line arguments
260 |     parser = argparse.ArgumentParser()
261 |     parser.add_argument('--model', type=str, default='weights/yolov8s-pose.onnx', help='Path to ONNX model')
262 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
263 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
264 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
265 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
266 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
267 |     args = parser.parse_args()
268 | 
269 |     # Build model
270 |     model = YOLOv8_pose(args.model, args.imgsz, args.infer_tool)
271 |     keypoint_draw = KeyPoint_draw()  # 可视化关键点
272 | 
273 |     # Read image by OpenCV
274 |     img = cv2.imread(args.source)
275 | 
276 |     # Inference
277 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
278 | 
279 |     # Visualize
280 |     if len(boxes) > 0:
281 |         model.draw_and_visualize(img, boxes, keypoint_draw, vis=False, save=True)
282 | 
283 | 


--------------------------------------------------------------------------------
/YOLOv8-seg_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 |     
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         
 27 |     def predict(self, datas):
 28 |         # 注：self.compiled_model_onnx([datas])是一个字典，self.compiled_model_onnx.output(0)是字典键，第一种读取所有值方法(0.11s) 比 第二种按键取值的方法(0.20s) 耗时减半
 29 |         predict_data = list(self.compiled_model_onnx([datas]).values()) 
 30 |         # predict_data = [self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(0)],
 31 |         #                  self.compiled_model_onnx([datas])[self.compiled_model_onnx.output(1)]]
 32 |         return predict_data
 33 |     
 34 | 
 35 | class YOLOv8_seg:
 36 |     """YOLOv8 segmentation model class for handling inference and visualization."""
 37 | 
 38 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 39 |         """
 40 |         Initialization.
 41 | 
 42 |         Args:
 43 |             onnx_model (str): Path to the ONNX model.
 44 |         """
 45 |         self.infer_tool = infer_tool
 46 |         if self.infer_tool == 'openvino':
 47 |             # 构建openvino推理引擎
 48 |             self.openvino = OpenvinoInference(onnx_model)
 49 |             self.ndtype = np.single
 50 |         else:
 51 |             # 构建onnxruntime推理引擎
 52 |             self.ort_session = ort.InferenceSession(onnx_model,
 53 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 54 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 55 | 
 56 |             # Numpy dtype: support both FP32 and FP16 onnx model
 57 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 58 |        
 59 |         self.classes = CLASSES  # 加载模型类别
 60 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 61 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 62 | 
 63 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32):
 64 |         """
 65 |         The whole pipeline: pre-process -> inference -> post-process.
 66 |         
 67 |         Args:
 68 |             im0 (Numpy.ndarray): original input image.
 69 |             conf_threshold (float): confidence threshold for filtering predictions.
 70 |             iou_threshold (float): iou threshold for NMS.
 71 |             nm (int): the number of masks.
 72 | 
 73 |         Returns:
 74 |             boxes (List): list of bounding boxes.
 75 |             segments (List): list of segments.
 76 |             masks (np.ndarray): [N, H, W], output masks.
 77 |         """
 78 |         # 前处理Pre-process
 79 |         t1 = time.time()
 80 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 81 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 82 |         
 83 |         # 推理 inference
 84 |         t2 = time.time()
 85 |         if self.infer_tool == 'openvino':
 86 |             preds = self.openvino.predict(im)
 87 |         else:
 88 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})  # 与bbox区别，输出是个列表，[检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160)]
 89 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 90 |         
 91 |         # 后处理Post-process
 92 |         t3 = time.time()
 93 |         boxes, segments, masks = self.postprocess(preds,
 94 |                                 im0=im0,
 95 |                                 ratio=ratio,
 96 |                                 pad_w=pad_w,
 97 |                                 pad_h=pad_h,
 98 |                                 conf_threshold=conf_threshold,
 99 |                                 iou_threshold=iou_threshold,
100 |                                 nm=nm
101 |                                 )
102 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
103 | 
104 |         return boxes, segments, masks
105 |         
106 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
107 |     def preprocess(self, img):
108 |         """
109 |         Pre-processes the input image.
110 | 
111 |         Args:
112 |             img (Numpy.ndarray): image about to be processed.
113 | 
114 |         Returns:
115 |             img_process (Numpy.ndarray): image preprocessed for inference.
116 |             ratio (tuple): width, height ratios in letterbox.
117 |             pad_w (float): width padding in letterbox.
118 |             pad_h (float): height padding in letterbox.
119 |         """
120 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
121 |         shape = img.shape[:2]  # original image shape
122 |         new_shape = (self.model_height, self.model_width)
123 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
124 |         ratio = r, r
125 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
126 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
127 |         if shape[::-1] != new_unpad:  # resize
128 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
129 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
130 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
131 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
132 | 
133 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
134 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
135 |         img_process = img[None] if len(img.shape) == 3 else img
136 |         return img_process, ratio, (pad_w, pad_h)
137 |     
138 |     # 后处理，包括：阈值过滤+NMS+masks处理
139 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32):
140 |         """
141 |         Post-process the prediction.
142 | 
143 |         Args:
144 |             preds (Numpy.ndarray): predictions come from ort.session.run().
145 |             im0 (Numpy.ndarray): [h, w, c] original input image.
146 |             ratio (tuple): width, height ratios in letterbox.
147 |             pad_w (float): width padding in letterbox.
148 |             pad_h (float): height padding in letterbox.
149 |             conf_threshold (float): conf threshold.
150 |             iou_threshold (float): iou threshold.
151 |             nm (int): the number of masks.
152 | 
153 |         Returns:
154 |             boxes (List): list of bounding boxes.
155 |             segments (List): list of segments.
156 |             masks (np.ndarray): [N, H, W], output masks.
157 |         """
158 |         x, protos = preds[0], preds[1]  # 与bbox区别：Two outputs: 检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160)
159 | 
160 |         # Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
161 |         x = np.einsum('bcn->bnc', x)  # (1, 8400, 116)
162 |    
163 |         # Predictions filtering by conf-threshold，不包括后32维的向量（32维的向量可以看作是与每个检测框关联的分割 mask 的系数或权重）
164 |         x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]
165 | 
166 |         # Create a new matrix which merge these(box, score, cls, nm) into one
167 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
168 |         x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]]
169 | 
170 |         # NMS filtering
171 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls, nm], ...]), shape=(-1, 4 + 1 + 1 + 32)
172 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
173 |         
174 |         # 重新缩放边界框，为画图做准备
175 |         if len(x) > 0:
176 |             # Bounding boxes format change: cxcywh -> xyxy
177 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
178 |             x[..., [2, 3]] += x[..., [0, 1]]
179 | 
180 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
181 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
182 |             x[..., :4] /= min(ratio)
183 | 
184 |             # Bounding boxes boundary clamp
185 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
186 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
187 | 
188 |             # 与bbox区别：增加masks处理
189 |             # Process masks
190 |             masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape)
191 |             # Masks -> Segments(contours)
192 |             segments = self.masks2segments(masks)
193 |             
194 |             return x[..., :6], segments, masks  # boxes, segments, masks
195 |         else:
196 |             return [], [], []
197 | 
198 |     @staticmethod
199 |     def masks2segments(masks):
200 |         """
201 |         It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from
202 |         https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750)
203 | 
204 |         Args:
205 |             masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160).
206 | 
207 |         Returns:
208 |             segments (List): list of segment masks.
209 |         """
210 |         segments = []
211 |         for x in masks.astype('uint8'):
212 |             c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0]  # CHAIN_APPROX_SIMPLE  该函数用于查找二值图像中的轮廓。
213 |             if c:
214 |                 # 这段代码的目的是找到图像x中的最外层轮廓，并从中选择最长的轮廓，然后将其转换为NumPy数组的形式。
215 |                 c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
216 |             else:
217 |                 c = np.zeros((0, 2))  # no segments found
218 |             segments.append(c.astype('float32'))
219 |         return segments
220 | 
221 |     
222 |     def process_mask(self, protos, masks_in, bboxes, im0_shape):
223 |         """
224 |         Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
225 |         but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618)
226 | 
227 |         Args:
228 |             protos (numpy.ndarray): [mask_dim, mask_h, mask_w].
229 |             masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms.
230 |             bboxes (numpy.ndarray): bboxes re-scaled to original image shape.
231 |             im0_shape (tuple): the size of the input image (h,w,c).
232 | 
233 |         Returns:
234 |             (numpy.ndarray): The upsampled masks.
235 |         """
236 |         c, mh, mw = protos.shape
237 |         masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0)  # HWN
238 |         masks = np.ascontiguousarray(masks)
239 |         masks = self.scale_mask(masks, im0_shape)  # re-scale mask from P3 shape to original input image shape
240 |         masks = np.einsum('HWN -> NHW', masks)  # HWN -> NHW
241 |         masks = self.crop_mask(masks, bboxes)
242 |         return np.greater(masks, 0.5)
243 | 
244 |     @staticmethod
245 |     def scale_mask(masks, im0_shape, ratio_pad=None):
246 |         """
247 |         Takes a mask, and resizes it to the original image size. (Borrowed from
248 |         https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305)
249 | 
250 |         Args:
251 |             masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
252 |             im0_shape (tuple): the original image shape.
253 |             ratio_pad (tuple): the ratio of the padding to the original image.
254 | 
255 |         Returns:
256 |             masks (np.ndarray): The masks that are being returned.
257 |         """
258 |         im1_shape = masks.shape[:2]
259 |         if ratio_pad is None:  # calculate from im0_shape
260 |             gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
261 |             pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
262 |         else:
263 |             pad = ratio_pad[1]
264 | 
265 |         # Calculate tlbr of mask
266 |         top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))  # y, x
267 |         bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))
268 |         if len(masks.shape) < 2:
269 |             raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
270 |         masks = masks[top:bottom, left:right]
271 |         masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]),
272 |                            interpolation=cv2.INTER_LINEAR)  # INTER_CUBIC would be better
273 |         if len(masks.shape) == 2:
274 |             masks = masks[:, :, None]
275 |         return masks
276 |     
277 |     @staticmethod
278 |     def crop_mask(masks, boxes):
279 |         """
280 |         It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from
281 |         https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599)
282 | 
283 |         Args:
284 |             masks (Numpy.ndarray): [n, h, w] tensor of masks.
285 |             boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form.
286 | 
287 |         Returns:
288 |             (Numpy.ndarray): The masks are being cropped to the bounding box.
289 |         """
290 |         n, h, w = masks.shape
291 |         x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
292 |         r = np.arange(w, dtype=x1.dtype)[None, None, :]
293 |         c = np.arange(h, dtype=x1.dtype)[None, :, None]
294 |         return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
295 |     
296 |     # 绘框，与bbox区别：增加masks可视化
297 |     def draw_and_visualize(self, im, bboxes, segments, vis=False, save=True):
298 |         """
299 |         Draw and visualize results.
300 | 
301 |         Args:
302 |             im (np.ndarray): original image, shape [h, w, c].
303 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
304 |             segments (List): list of segment masks.
305 |             vis (bool): imshow using OpenCV.
306 |             save (bool): save image annotated.
307 | 
308 |         Returns:
309 |             None
310 |         """
311 |         # Draw rectangles and polygons
312 |         im_canvas = im.copy()
313 |         # Draw rectangles 
314 |         for (*box, conf, cls_), segment in zip(bboxes, segments):
315 |             # draw contour and fill mask
316 |             cv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2)  # white borderline
317 |             cv2.fillPoly(im_canvas, np.int32([segment]), (255, 0, 0))
318 | 
319 |             # draw bbox rectangle
320 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
321 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
322 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
323 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
324 | 
325 |         # Mix image
326 |         im = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)
327 | 
328 |         # Show image
329 |         if vis:
330 |             cv2.imshow('demo', im)
331 |             cv2.waitKey(0)
332 |             cv2.destroyAllWindows()
333 | 
334 |         # Save image
335 |         if save:
336 |             cv2.imwrite('demo.jpg', im)
337 | 
338 | 
339 | if __name__ == '__main__':
340 |     # Create an argument parser to handle command-line arguments
341 |     parser = argparse.ArgumentParser()
342 |     parser.add_argument('--model', type=str, default='weights\\yolov8s-seg.onnx', help='Path to ONNX model')
343 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
344 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
345 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
346 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
347 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
348 |     args = parser.parse_args()
349 | 
350 |     # Build model
351 |     model = YOLOv8_seg(args.model, args.imgsz, args.infer_tool)
352 | 
353 |     # Read image by OpenCV
354 |     img = cv2.imread(args.source)
355 | 
356 |     # Inference
357 |     boxes, segments, _ = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
358 | 
359 |     # Visualize, Draw bboxes and polygons
360 |     if len(boxes) > 0:
361 |         model.draw_and_visualize(img, boxes, segments, vis=False, save=True)
362 | 
363 | 


--------------------------------------------------------------------------------
/YOLOv8_bytetrack.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | import copy
  9 | from bytetrack.byte_tracker import BYTETracker
 10 | 
 11 | # COCO默认的80类
 12 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 13 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 14 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 15 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 16 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 17 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 18 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 19 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 20 | 
 21 | 
 22 | class OpenvinoInference(object):
 23 |     def __init__(self, onnx_path):
 24 |         self.onnx_path = onnx_path
 25 |         ie = Core()
 26 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 27 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 28 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 29 | 
 30 |     def predict(self, datas):
 31 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 32 |         return predict_data
 33 |     
 34 | 
 35 | class YOLOv8:
 36 |     """YOLOv8 object detection model class for handling inference and visualization."""
 37 | 
 38 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 39 |         """
 40 |         Initialization.
 41 | 
 42 |         Args:
 43 |             onnx_model (str): Path to the ONNX model.
 44 |         """
 45 |         self.infer_tool = infer_tool
 46 |         if self.infer_tool == 'openvino':
 47 |             # 构建openvino推理引擎
 48 |             self.openvino = OpenvinoInference(onnx_model)
 49 |             self.ndtype = np.single
 50 |         else:
 51 |             # 构建onnxruntime推理引擎
 52 |             self.ort_session = ort.InferenceSession(onnx_model,
 53 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 54 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 55 | 
 56 |             # Numpy dtype: support both FP32 and FP16 onnx model
 57 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 58 |        
 59 |         self.classes = CLASSES  # 加载模型类别
 60 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 61 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 62 | 
 63 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 64 |         """
 65 |         The whole pipeline: pre-process -> inference -> post-process.
 66 | 
 67 |         Args:
 68 |             im0 (Numpy.ndarray): original input image.
 69 |             conf_threshold (float): confidence threshold for filtering predictions.
 70 |             iou_threshold (float): iou threshold for NMS.
 71 | 
 72 |         Returns:
 73 |             boxes (List): list of bounding boxes.
 74 |         """
 75 |         # 前处理Pre-process
 76 |         t1 = time.time()
 77 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 78 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 79 |         
 80 |         # 推理 inference
 81 |         t2 = time.time()
 82 |         if self.infer_tool == 'openvino':
 83 |             preds = self.openvino.predict(im)
 84 |         else:
 85 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 86 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 87 | 
 88 |         # 后处理Post-process
 89 |         t3 = time.time()
 90 |         boxes = self.postprocess(preds,
 91 |                                 im0=im0,
 92 |                                 ratio=ratio,
 93 |                                 pad_w=pad_w,
 94 |                                 pad_h=pad_h,
 95 |                                 conf_threshold=conf_threshold,
 96 |                                 iou_threshold=iou_threshold,
 97 |                                 )
 98 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 99 | 
100 |         return boxes
101 |         
102 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
103 |     def preprocess(self, img):
104 |         """
105 |         Pre-processes the input image.
106 | 
107 |         Args:
108 |             img (Numpy.ndarray): image about to be processed.
109 | 
110 |         Returns:
111 |             img_process (Numpy.ndarray): image preprocessed for inference.
112 |             ratio (tuple): width, height ratios in letterbox.
113 |             pad_w (float): width padding in letterbox.
114 |             pad_h (float): height padding in letterbox.
115 |         """
116 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
117 |         shape = img.shape[:2]  # original image shape
118 |         new_shape = (self.model_height, self.model_width)
119 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
120 |         ratio = r, r
121 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
122 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
123 |         if shape[::-1] != new_unpad:  # resize
124 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
125 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
126 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
127 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
128 | 
129 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
130 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
131 |         img_process = img[None] if len(img.shape) == 3 else img
132 |         return img_process, ratio, (pad_w, pad_h)
133 |     
134 |     # 后处理，包括：阈值过滤与NMS
135 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
136 |         """
137 |         Post-process the prediction.
138 | 
139 |         Args:
140 |             preds (Numpy.ndarray): predictions come from ort.session.run().
141 |             im0 (Numpy.ndarray): [h, w, c] original input image.
142 |             ratio (tuple): width, height ratios in letterbox.
143 |             pad_w (float): width padding in letterbox.
144 |             pad_h (float): height padding in letterbox.
145 |             conf_threshold (float): conf threshold.
146 |             iou_threshold (float): iou threshold.
147 | 
148 |         Returns:
149 |             boxes (List): list of bounding boxes.
150 |         """
151 |         x = preds  # outputs: predictions (1, 84, 8400)
152 |         # Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls)
153 |         x = np.einsum('bcn->bnc', x)  # (1, 8400, 84)
154 |    
155 |         # Predictions filtering by conf-threshold
156 |         x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold]
157 | 
158 |         # Create a new matrix which merge these(box, score, cls) into one
159 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
160 |         x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)]
161 | 
162 |         # NMS filtering
163 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
164 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
165 |        
166 |         # 重新缩放边界框，为画图做准备
167 |         if len(x) > 0:
168 |             # Bounding boxes format change: cxcywh -> xyxy
169 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
170 |             x[..., [2, 3]] += x[..., [0, 1]]
171 | 
172 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
173 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
174 |             x[..., :4] /= min(ratio)
175 | 
176 |             # Bounding boxes boundary clamp
177 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
178 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
179 | 
180 |             return x[..., :6]  # boxes
181 |         else:
182 |             return []
183 | 
184 |     # 绘框
185 |     def draw_and_visualize(self, im, bboxes, video_writer, vis=False, save=False, is_track=False):
186 |         """
187 |         Draw and visualize results.
188 | 
189 |         Args:
190 |             im (np.ndarray): original image, shape [h, w, c].
191 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
192 |             vis (bool): imshow using OpenCV.
193 |             save (bool): save image annotated.
194 | 
195 |         Returns:
196 |             None
197 |         """
198 |         # Draw rectangles 
199 |         if not is_track:
200 |             for (*box, conf, cls_) in bboxes:
201 |                 # draw bbox rectangle
202 |                 cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
203 |                             self.color_palette[int(cls_)], 1, cv2.LINE_AA)
204 |                 cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
205 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
206 |         else:
207 |             for (*box, conf, id_) in bboxes:
208 |                 # draw bbox rectangle
209 |                 cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
210 |                             (0, 0, 255), 1, cv2.LINE_AA)
211 |                 cv2.putText(im, f'{id_}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
212 |                             cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
213 |     
214 |         # Show image
215 |         if vis:
216 |             cv2.imshow('demo', im)
217 |             cv2.waitKey(1)
218 | 
219 |         # Save video
220 |         if save:
221 |             video_writer.write(im)
222 | 
223 | 
224 | 
225 | class ByteTrackerONNX(object):
226 |     def __init__(self, args):
227 |         self.args = args
228 |         self.tracker = BYTETracker(args, frame_rate=30)
229 | 
230 |     def _tracker_update(self, dets, image):
231 |         online_targets = []
232 |         if dets is not None:
233 |             online_targets = self.tracker.update(
234 |                 dets[:, :5],
235 |                 [image.shape[0], image.shape[1]],
236 |                 [image.shape[0], image.shape[1]],
237 |             )
238 | 
239 |         online_tlwhs = []
240 |         online_ids = []
241 |         online_scores = []
242 |         for online_target in online_targets:
243 |             tlwh = online_target.tlwh
244 |             track_id = online_target.track_id
245 |             vertical = tlwh[2] / tlwh[3] > 1.6
246 |             if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
247 |                 online_tlwhs.append(tlwh)
248 |                 online_ids.append(track_id)
249 |                 online_scores.append(online_target.score)
250 | 
251 |         return online_tlwhs, online_ids, online_scores
252 |     
253 |     
254 |     def inference(self, image, dets):
255 |         """
256 |         Args: dets: 检测结果, [x1, y1, x2, y2, conf, cls]
257 |         Returns: np.array([[x1, y1, x2, y2, conf, ids], ...])
258 |         """
259 |         bboxes, ids, scores = self._tracker_update(dets, image)
260 |         if len(bboxes) == 0:
261 |             return []
262 |         # Bounding boxes format change: tlwh -> xyxy
263 |         bboxes = np.array(bboxes)
264 |         bboxes[..., [2, 3]] += bboxes[..., [0, 1]]
265 |         bboxes = np.c_[bboxes, np.array(scores), np.array(ids)]
266 |         return bboxes
267 |     
268 | 
269 | if __name__ == '__main__':
270 |     # Create an argument parser to handle command-line arguments
271 |     parser = argparse.ArgumentParser()
272 |     parser.add_argument('--model', type=str, default='yolov8s.onnx', help='Path to ONNX model')
273 |     parser.add_argument('--source', type=str, default=str('test.mp4'), help='Path to input image')
274 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
275 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
276 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
277 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
278 | 
279 |     parser.add_argument('--is_track', type=bool, default=True, help='是否启用跟踪')
280 |     parser.add_argument('--track_thresh', type=float, default=0.5, help='tracking confidence threshold')
281 |     parser.add_argument('--track_buffer', type=int, default=30, help='the frames for keep lost tracks, usually as same with FPS')
282 |     parser.add_argument('--match_thresh', type=float, default=0.8, help='matching threshold for tracking')
283 |     parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes',)
284 |     parser.add_argument('--mot20', dest='mot20', default=False, action='store_true', help='test mot20.',)
285 |     args = parser.parse_args()
286 | 
287 |     # Build model
288 |     model = YOLOv8(args.model, args.imgsz, args.infer_tool)
289 | 
290 |     bytetrack = ByteTrackerONNX(args)
291 | 
292 |     # 读取视频,解析帧数宽高,保存视频
293 |     cap = cv2.VideoCapture(args.source)
294 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
295 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
296 |     fps = cap.get(cv2.CAP_PROP_FPS)
297 |     frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
298 |     video_writer = cv2.VideoWriter('demo.mp4', cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)))
299 |     frame_id = 1
300 | 
301 |     while True:
302 |         start_time = time.time()
303 |         ret, img = cap.read()
304 |         if not ret:
305 |             break
306 | 
307 |         # Inference
308 |         boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
309 |         
310 |         # track
311 |         if args.is_track:
312 |             boxes = bytetrack.inference(img, boxes)
313 |         
314 |         # Visualize
315 |         if len(boxes) > 0:
316 |             model.draw_and_visualize(copy.deepcopy(img), boxes, video_writer, vis=False, save=True, is_track=args.is_track)
317 |         
318 |         end_time = time.time() - start_time
319 |         print('frame {}/{} (Total time: {:.2f} ms)'.format(frame_id, int(frame_count), end_time * 1000))
320 |         frame_id += 1
321 | 
322 | 


--------------------------------------------------------------------------------
/YOLOv8_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 | 
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 27 | 
 28 |     def predict(self, datas):
 29 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 30 |         return predict_data
 31 |     
 32 | 
 33 | class YOLOv8:
 34 |     """YOLOv8 object detection model class for handling inference and visualization."""
 35 | 
 36 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 37 |         """
 38 |         Initialization.
 39 | 
 40 |         Args:
 41 |             onnx_model (str): Path to the ONNX model.
 42 |         """
 43 |         self.infer_tool = infer_tool
 44 |         if self.infer_tool == 'openvino':
 45 |             # 构建openvino推理引擎
 46 |             self.openvino = OpenvinoInference(onnx_model)
 47 |             self.ndtype = np.single
 48 |         else:
 49 |             # 构建onnxruntime推理引擎
 50 |             self.ort_session = ort.InferenceSession(onnx_model,
 51 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 52 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 53 | 
 54 |             # Numpy dtype: support both FP32 and FP16 onnx model
 55 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 56 |        
 57 |         self.classes = CLASSES  # 加载模型类别
 58 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 59 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 60 | 
 61 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 62 |         """
 63 |         The whole pipeline: pre-process -> inference -> post-process.
 64 | 
 65 |         Args:
 66 |             im0 (Numpy.ndarray): original input image.
 67 |             conf_threshold (float): confidence threshold for filtering predictions.
 68 |             iou_threshold (float): iou threshold for NMS.
 69 | 
 70 |         Returns:
 71 |             boxes (List): list of bounding boxes.
 72 |         """
 73 |         # 前处理Pre-process
 74 |         t1 = time.time()
 75 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 76 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 77 |         
 78 |         # 推理 inference
 79 |         t2 = time.time()
 80 |         if self.infer_tool == 'openvino':
 81 |             preds = self.openvino.predict(im)
 82 |         else:
 83 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 84 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 85 | 
 86 |         # 后处理Post-process
 87 |         t3 = time.time()
 88 |         boxes = self.postprocess(preds,
 89 |                                 im0=im0,
 90 |                                 ratio=ratio,
 91 |                                 pad_w=pad_w,
 92 |                                 pad_h=pad_h,
 93 |                                 conf_threshold=conf_threshold,
 94 |                                 iou_threshold=iou_threshold,
 95 |                                 )
 96 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 97 | 
 98 |         return boxes
 99 |         
100 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
101 |     def preprocess(self, img):
102 |         """
103 |         Pre-processes the input image.
104 | 
105 |         Args:
106 |             img (Numpy.ndarray): image about to be processed.
107 | 
108 |         Returns:
109 |             img_process (Numpy.ndarray): image preprocessed for inference.
110 |             ratio (tuple): width, height ratios in letterbox.
111 |             pad_w (float): width padding in letterbox.
112 |             pad_h (float): height padding in letterbox.
113 |         """
114 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
115 |         shape = img.shape[:2]  # original image shape
116 |         new_shape = (self.model_height, self.model_width)
117 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
118 |         ratio = r, r
119 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
120 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
121 |         if shape[::-1] != new_unpad:  # resize
122 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
123 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
124 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
125 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
126 | 
127 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
128 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
129 |         img_process = img[None] if len(img.shape) == 3 else img
130 |         return img_process, ratio, (pad_w, pad_h)
131 |     
132 |     # 后处理，包括：阈值过滤与NMS
133 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
134 |         """
135 |         Post-process the prediction.
136 | 
137 |         Args:
138 |             preds (Numpy.ndarray): predictions come from ort.session.run().
139 |             im0 (Numpy.ndarray): [h, w, c] original input image.
140 |             ratio (tuple): width, height ratios in letterbox.
141 |             pad_w (float): width padding in letterbox.
142 |             pad_h (float): height padding in letterbox.
143 |             conf_threshold (float): conf threshold.
144 |             iou_threshold (float): iou threshold.
145 | 
146 |         Returns:
147 |             boxes (List): list of bounding boxes.
148 |         """
149 |         x = preds  # outputs: predictions (1, 84, 8400)
150 |         # Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls)
151 |         x = np.einsum('bcn->bnc', x)  # (1, 8400, 84)
152 |    
153 |         # Predictions filtering by conf-threshold
154 |         x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold]
155 | 
156 |         # Create a new matrix which merge these(box, score, cls) into one
157 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
158 |         x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)]
159 | 
160 |         # NMS filtering
161 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
162 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
163 |        
164 |         # 重新缩放边界框，为画图做准备
165 |         if len(x) > 0:
166 |             # Bounding boxes format change: cxcywh -> xyxy
167 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
168 |             x[..., [2, 3]] += x[..., [0, 1]]
169 | 
170 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
171 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
172 |             x[..., :4] /= min(ratio)
173 | 
174 |             # Bounding boxes boundary clamp
175 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
176 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
177 | 
178 |             return x[..., :6]  # boxes
179 |         else:
180 |             return []
181 | 
182 |     # 绘框
183 |     def draw_and_visualize(self, im, bboxes, vis=False, save=True):
184 |         """
185 |         Draw and visualize results.
186 | 
187 |         Args:
188 |             im (np.ndarray): original image, shape [h, w, c].
189 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
190 |             vis (bool): imshow using OpenCV.
191 |             save (bool): save image annotated.
192 | 
193 |         Returns:
194 |             None
195 |         """
196 |         # Draw rectangles 
197 |         for (*box, conf, cls_) in bboxes:
198 |             # draw bbox rectangle
199 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
200 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
201 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
202 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
203 |     
204 |         # Show image
205 |         if vis:
206 |             cv2.imshow('demo', im)
207 |             cv2.waitKey(0)
208 |             cv2.destroyAllWindows()
209 | 
210 |         # Save image
211 |         if save:
212 |             cv2.imwrite('demo.jpg', im)
213 | 
214 | 
215 | if __name__ == '__main__':
216 |     # Create an argument parser to handle command-line arguments
217 |     parser = argparse.ArgumentParser()
218 |     parser.add_argument('--model', type=str, default='yolov8s.onnx', help='Path to ONNX model')
219 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
220 |     parser.add_argument('--imgsz', type=tuple, default=(640, 640), help='Image input size')
221 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
222 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
223 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
224 |     args = parser.parse_args()
225 | 
226 |     # Build model
227 |     model = YOLOv8(args.model, args.imgsz, args.infer_tool)
228 | 
229 |     # Read image by OpenCV
230 |     img = cv2.imread(args.source)
231 | 
232 |     # Inference
233 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
234 | 
235 |     # Visualize
236 |     if len(boxes) > 0:
237 |         model.draw_and_visualize(img, boxes, vis=False, save=True)
238 | 
239 | 


--------------------------------------------------------------------------------
/YOLOv9_openvino_onnx.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time 
  3 | import cv2
  4 | import numpy as np
  5 | from openvino.runtime import Core  # pip install openvino -i  https://pypi.tuna.tsinghua.edu.cn/simple
  6 | import onnxruntime as ort  # 使用onnxruntime推理用上，pip install onnxruntime，默认安装CPU
  7 | 
  8 | 
  9 | # COCO默认的80类
 10 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 11 |             'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 12 |               'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 13 |                 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 14 |                   'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 15 |                     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 16 |                       'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
 17 |                         'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 18 | 
 19 | 
 20 | class OpenvinoInference(object):
 21 |     def __init__(self, onnx_path):
 22 |         self.onnx_path = onnx_path
 23 |         ie = Core()
 24 |         self.model_onnx = ie.read_model(model=self.onnx_path)
 25 |         self.compiled_model_onnx = ie.compile_model(model=self.model_onnx, device_name="CPU")
 26 |         self.output_layer_onnx = self.compiled_model_onnx.output(0)
 27 | 
 28 |     def predict(self, datas):
 29 |         predict_data = self.compiled_model_onnx([datas])[self.output_layer_onnx]
 30 |         return predict_data
 31 |     
 32 | 
 33 | class YOLOv9:
 34 |     """YOLOv9 object detection model class for handling inference and visualization."""
 35 | 
 36 |     def __init__(self, onnx_model, imgsz=(640, 640), infer_tool='openvino'):
 37 |         """
 38 |         Initialization.
 39 | 
 40 |         Args:
 41 |             onnx_model (str): Path to the ONNX model.
 42 |         """
 43 |         self.infer_tool = infer_tool
 44 |         if self.infer_tool == 'openvino':
 45 |             # 构建openvino推理引擎
 46 |             self.openvino = OpenvinoInference(onnx_model)
 47 |             self.ndtype = np.single
 48 |         else:
 49 |             # 构建onnxruntime推理引擎
 50 |             self.ort_session = ort.InferenceSession(onnx_model,
 51 |                                                 providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
 52 |                                                 if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])
 53 | 
 54 |             # Numpy dtype: support both FP32 and FP16 onnx model
 55 |             self.ndtype = np.half if self.ort_session.get_inputs()[0].type == 'tensor(float16)' else np.single
 56 |        
 57 |         self.classes = CLASSES  # 加载模型类别
 58 |         self.model_height, self.model_width = imgsz[0], imgsz[1]  # 图像resize大小
 59 |         self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))  # 为每个类别生成调色板
 60 | 
 61 |     def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45):
 62 |         """
 63 |         The whole pipeline: pre-process -> inference -> post-process.
 64 | 
 65 |         Args:
 66 |             im0 (Numpy.ndarray): original input image.
 67 |             conf_threshold (float): confidence threshold for filtering predictions.
 68 |             iou_threshold (float): iou threshold for NMS.
 69 | 
 70 |         Returns:
 71 |             boxes (List): list of bounding boxes.
 72 |         """
 73 |         # 前处理Pre-process
 74 |         t1 = time.time()
 75 |         im, ratio, (pad_w, pad_h) = self.preprocess(im0)
 76 |         print('预处理时间：{:.3f}s'.format(time.time() - t1))
 77 |         
 78 |         # 推理 inference
 79 |         t2 = time.time()
 80 |         if self.infer_tool == 'openvino':
 81 |             preds = self.openvino.predict(im)
 82 |         else:
 83 |             preds = self.ort_session.run(None, {self.ort_session.get_inputs()[0].name: im})[0]
 84 |         print('推理时间：{:.2f}s'.format(time.time() - t2))
 85 |        
 86 |         # 后处理Post-process
 87 |         t3 = time.time()
 88 |         boxes = self.postprocess(preds,
 89 |                                 im0=im0,
 90 |                                 ratio=ratio,
 91 |                                 pad_w=pad_w,
 92 |                                 pad_h=pad_h,
 93 |                                 conf_threshold=conf_threshold,
 94 |                                 iou_threshold=iou_threshold,
 95 |                                 )
 96 |         print('后处理时间：{:.3f}s'.format(time.time() - t3))
 97 | 
 98 |         return boxes
 99 |         
100 |     # 前处理，包括：resize, pad, HWC to CHW，BGR to RGB，归一化，增加维度CHW -> BCHW
101 |     def preprocess(self, img):
102 |         """
103 |         Pre-processes the input image.
104 | 
105 |         Args:
106 |             img (Numpy.ndarray): image about to be processed.
107 | 
108 |         Returns:
109 |             img_process (Numpy.ndarray): image preprocessed for inference.
110 |             ratio (tuple): width, height ratios in letterbox.
111 |             pad_w (float): width padding in letterbox.
112 |             pad_h (float): height padding in letterbox.
113 |         """
114 |         # Resize and pad input image using letterbox() (Borrowed from Ultralytics)
115 |         shape = img.shape[:2]  # original image shape
116 |         new_shape = (self.model_height, self.model_width)
117 |         r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
118 |         ratio = r, r
119 |         new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
120 |         pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh padding
121 |         if shape[::-1] != new_unpad:  # resize
122 |             img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
123 |         top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
124 |         left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
125 |         img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))  # 填充
126 | 
127 |         # Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
128 |         img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
129 |         img_process = img[None] if len(img.shape) == 3 else img
130 |         return img_process, ratio, (pad_w, pad_h)
131 |     
132 |     # 后处理，包括：阈值过滤与NMS
133 |     def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
134 |         """
135 |         Post-process the prediction.
136 | 
137 |         Args:
138 |             preds (Numpy.ndarray): predictions come from ort.session.run().
139 |             im0 (Numpy.ndarray): [h, w, c] original input image.
140 |             ratio (tuple): width, height ratios in letterbox.
141 |             pad_w (float): width padding in letterbox.
142 |             pad_h (float): height padding in letterbox.
143 |             conf_threshold (float): conf threshold.
144 |             iou_threshold (float): iou threshold.
145 | 
146 |         Returns:
147 |             boxes (List): list of bounding boxes.
148 |         """
149 |         x = preds  # outputs: predictions (1, 84, 8400)
150 |         # Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls)
151 |         x = np.einsum('bcn->bnc', x)  # (1, 8400, 84)
152 |    
153 |         # Predictions filtering by conf-threshold
154 |         x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold]
155 |       
156 |         # Create a new matrix which merge these(box, score, cls) into one
157 |         # For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
158 |         x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)]
159 | 
160 |         # NMS filtering
161 |         # 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
162 |         x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
163 |        
164 |         # 重新缩放边界框，为画图做准备
165 |         if len(x) > 0:
166 |             # Bounding boxes format change: cxcywh -> xyxy
167 |             x[..., [0, 1]] -= x[..., [2, 3]] / 2
168 |             x[..., [2, 3]] += x[..., [0, 1]]
169 | 
170 |             # Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
171 |             x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
172 |             x[..., :4] /= min(ratio)
173 | 
174 |             # Bounding boxes boundary clamp
175 |             x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
176 |             x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
177 | 
178 |             return x[..., :6]  # boxes
179 |         else:
180 |             return []
181 | 
182 |     # 绘框
183 |     def draw_and_visualize(self, im, bboxes, vis=False, save=True):
184 |         """
185 |         Draw and visualize results.
186 | 
187 |         Args:
188 |             im (np.ndarray): original image, shape [h, w, c].
189 |             bboxes (numpy.ndarray): [n, 6], n is number of bboxes.
190 |             vis (bool): imshow using OpenCV.
191 |             save (bool): save image annotated.
192 | 
193 |         Returns:
194 |             None
195 |         """
196 |         # Draw rectangles 
197 |         for (*box, conf, cls_) in bboxes:
198 |             # draw bbox rectangle
199 |             cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
200 |                           self.color_palette[int(cls_)], 1, cv2.LINE_AA)
201 |             cv2.putText(im, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
202 |                         cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette[int(cls_)], 2, cv2.LINE_AA)
203 |     
204 |         # Show image
205 |         if vis:
206 |             cv2.imshow('demo', im)
207 |             cv2.waitKey(0)
208 |             cv2.destroyAllWindows()
209 | 
210 |         # Save image
211 |         if save:
212 |             cv2.imwrite('demo.jpg', im)
213 | 
214 | 
215 | if __name__ == '__main__':
216 |     # Create an argument parser to handle command-line arguments
217 |     parser = argparse.ArgumentParser()
218 |     parser.add_argument('--model', type=str, default='yolov9c.onnx', help='Path to ONNX model')
219 |     parser.add_argument('--source', type=str, default=str('bus.jpg'), help='Path to input image')
220 |     parser.add_argument('--imgsz', type=tuple, default=(640,640), help='Image input size')
221 |     parser.add_argument('--conf', type=float, default=0.25, help='Confidence threshold')
222 |     parser.add_argument('--iou', type=float, default=0.45, help='NMS IoU threshold')
223 |     parser.add_argument('--infer_tool', type=str, default='openvino', choices=("openvino", "onnxruntime"), help='选择推理引擎')
224 |     args = parser.parse_args()
225 | 
226 |     # Build model
227 |     model = YOLOv9(args.model, args.imgsz, args.infer_tool)
228 | 
229 |     # Read image by OpenCV
230 |     img = cv2.imread(args.source)
231 | 
232 |     # Inference
233 |     boxes = model(img, conf_threshold=args.conf, iou_threshold=args.iou)
234 | 
235 |     # Visualize
236 |     if len(boxes) > 0:
237 |         model.draw_and_visualize(img, boxes, vis=False, save=True)
238 | 
239 | 


--------------------------------------------------------------------------------
/bytetrack/__pycache__/basetrack.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/basetrack.cpython-36.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/basetrack.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/basetrack.cpython-38.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/byte_tracker.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/byte_tracker.cpython-36.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/byte_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/byte_tracker.cpython-38.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/kalman_filter.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/kalman_filter.cpython-36.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/kalman_filter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/kalman_filter.cpython-38.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/matching.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/matching.cpython-36.pyc


--------------------------------------------------------------------------------
/bytetrack/__pycache__/matching.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/bytetrack/__pycache__/matching.cpython-38.pyc


--------------------------------------------------------------------------------
/bytetrack/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed


--------------------------------------------------------------------------------
/bytetrack/byte_tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import deque
  3 | import os
  4 | import os.path as osp
  5 | import copy
  6 | 
  7 | from .kalman_filter import KalmanFilter
  8 | from bytetrack import matching
  9 | from .basetrack import BaseTrack, TrackState
 10 | 
 11 | class STrack(BaseTrack):
 12 |     shared_kalman = KalmanFilter()
 13 |     def __init__(self, tlwh, score):
 14 | 
 15 |         # wait activate
 16 |         self._tlwh = np.asarray(tlwh, dtype=np.float)
 17 |         self.kalman_filter = None
 18 |         self.mean, self.covariance = None, None
 19 |         self.is_activated = False
 20 | 
 21 |         self.score = score
 22 |         self.tracklet_len = 0
 23 | 
 24 |     def predict(self):
 25 |         mean_state = self.mean.copy()
 26 |         if self.state != TrackState.Tracked:
 27 |             mean_state[7] = 0
 28 |         self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
 29 | 
 30 |     @staticmethod
 31 |     def multi_predict(stracks):
 32 |         if len(stracks) > 0:
 33 |             multi_mean = np.asarray([st.mean.copy() for st in stracks])
 34 |             multi_covariance = np.asarray([st.covariance for st in stracks])
 35 |             for i, st in enumerate(stracks):
 36 |                 if st.state != TrackState.Tracked:
 37 |                     multi_mean[i][7] = 0
 38 |             multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
 39 |             for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 40 |                 stracks[i].mean = mean
 41 |                 stracks[i].covariance = cov
 42 | 
 43 |     def activate(self, kalman_filter, frame_id):
 44 |         """Start a new tracklet"""
 45 |         self.kalman_filter = kalman_filter
 46 |         self.track_id = self.next_id()
 47 |         self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
 48 | 
 49 |         self.tracklet_len = 0
 50 |         self.state = TrackState.Tracked
 51 |         if frame_id == 1:
 52 |             self.is_activated = True
 53 |         # self.is_activated = True
 54 |         self.frame_id = frame_id
 55 |         self.start_frame = frame_id
 56 | 
 57 |     def re_activate(self, new_track, frame_id, new_id=False):
 58 |         self.mean, self.covariance = self.kalman_filter.update(
 59 |             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
 60 |         )
 61 |         self.tracklet_len = 0
 62 |         self.state = TrackState.Tracked
 63 |         self.is_activated = True
 64 |         self.frame_id = frame_id
 65 |         if new_id:
 66 |             self.track_id = self.next_id()
 67 |         self.score = new_track.score
 68 | 
 69 |     def update(self, new_track, frame_id):
 70 |         """
 71 |         Update a matched track
 72 |         :type new_track: STrack
 73 |         :type frame_id: int
 74 |         :type update_feature: bool
 75 |         :return:
 76 |         """
 77 |         self.frame_id = frame_id
 78 |         self.tracklet_len += 1
 79 | 
 80 |         new_tlwh = new_track.tlwh
 81 |         self.mean, self.covariance = self.kalman_filter.update(
 82 |             self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
 83 |         self.state = TrackState.Tracked
 84 |         self.is_activated = True
 85 | 
 86 |         self.score = new_track.score
 87 | 
 88 |     @property
 89 |     # @jit(nopython=True)
 90 |     def tlwh(self):
 91 |         """Get current position in bounding box format `(top left x, top left y,
 92 |                 width, height)`.
 93 |         """
 94 |         if self.mean is None:
 95 |             return self._tlwh.copy()
 96 |         ret = self.mean[:4].copy()
 97 |         ret[2] *= ret[3]
 98 |         ret[:2] -= ret[2:] / 2
 99 |         return ret
100 | 
101 |     @property
102 |     # @jit(nopython=True)
103 |     def tlbr(self):
104 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
105 |         `(top left, bottom right)`.
106 |         """
107 |         ret = self.tlwh.copy()
108 |         ret[2:] += ret[:2]
109 |         return ret
110 | 
111 |     @staticmethod
112 |     # @jit(nopython=True)
113 |     def tlwh_to_xyah(tlwh):
114 |         """Convert bounding box to format `(center x, center y, aspect ratio,
115 |         height)`, where the aspect ratio is `width / height`.
116 |         """
117 |         ret = np.asarray(tlwh).copy()
118 |         ret[:2] += ret[2:] / 2
119 |         ret[2] /= ret[3]
120 |         return ret
121 | 
122 |     def to_xyah(self):
123 |         return self.tlwh_to_xyah(self.tlwh)
124 | 
125 |     @staticmethod
126 |     # @jit(nopython=True)
127 |     def tlbr_to_tlwh(tlbr):
128 |         ret = np.asarray(tlbr).copy()
129 |         ret[2:] -= ret[:2]
130 |         return ret
131 | 
132 |     @staticmethod
133 |     # @jit(nopython=True)
134 |     def tlwh_to_tlbr(tlwh):
135 |         ret = np.asarray(tlwh).copy()
136 |         ret[2:] += ret[:2]
137 |         return ret
138 | 
139 |     def __repr__(self):
140 |         return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
141 | 
142 | 
143 | class BYTETracker(object):
144 |     def __init__(self, args, frame_rate=30):
145 |         self.tracked_stracks = []  # type: list[STrack]
146 |         self.lost_stracks = []  # type: list[STrack]
147 |         self.removed_stracks = []  # type: list[STrack]
148 | 
149 |         self.frame_id = 0
150 |         self.args = args
151 |         #self.det_thresh = args.track_thresh
152 |         self.det_thresh = args.track_thresh + 0.1
153 |         self.buffer_size = int(frame_rate / 30.0 * args.track_buffer)
154 |         self.max_time_lost = self.buffer_size
155 |         self.kalman_filter = KalmanFilter()
156 | 
157 |     def update(self, output_results, img_info, img_size):
158 |         self.frame_id += 1
159 |         activated_starcks = []
160 |         refind_stracks = []
161 |         lost_stracks = []
162 |         removed_stracks = []
163 | 
164 |         if output_results.shape[1] == 5:
165 |             scores = output_results[:, 4]
166 |             bboxes = output_results[:, :4]
167 |         else:
168 |             output_results = output_results.cpu().numpy()
169 |             scores = output_results[:, 4] * output_results[:, 5]
170 |             bboxes = output_results[:, :4]  # x1y1x2y2
171 |         img_h, img_w = img_info[0], img_info[1]
172 |         scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w))
173 |         bboxes /= scale
174 | 
175 |         remain_inds = scores > self.args.track_thresh
176 |         inds_low = scores > 0.1
177 |         inds_high = scores < self.args.track_thresh
178 | 
179 |         inds_second = np.logical_and(inds_low, inds_high)
180 |         dets_second = bboxes[inds_second]
181 |         dets = bboxes[remain_inds]
182 |         scores_keep = scores[remain_inds]
183 |         scores_second = scores[inds_second]
184 | 
185 |         if len(dets) > 0:
186 |             '''Detections'''
187 |             detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
188 |                           (tlbr, s) in zip(dets, scores_keep)]
189 |         else:
190 |             detections = []
191 | 
192 |         ''' Add newly detected tracklets to tracked_stracks'''
193 |         unconfirmed = []
194 |         tracked_stracks = []  # type: list[STrack]
195 |         for track in self.tracked_stracks:
196 |             if not track.is_activated:
197 |                 unconfirmed.append(track)
198 |             else:
199 |                 tracked_stracks.append(track)
200 | 
201 |         ''' Step 2: First association, with high score detection boxes'''
202 |         strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
203 |         # Predict the current location with KF
204 |         STrack.multi_predict(strack_pool)
205 |         dists = matching.iou_distance(strack_pool, detections)
206 |         if not self.args.mot20:
207 |             dists = matching.fuse_score(dists, detections)
208 |         matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
209 | 
210 |         for itracked, idet in matches:
211 |             track = strack_pool[itracked]
212 |             det = detections[idet]
213 |             if track.state == TrackState.Tracked:
214 |                 track.update(detections[idet], self.frame_id)
215 |                 activated_starcks.append(track)
216 |             else:
217 |                 track.re_activate(det, self.frame_id, new_id=False)
218 |                 refind_stracks.append(track)
219 | 
220 |         ''' Step 3: Second association, with low score detection boxes'''
221 |         # association the untrack to the low score detections
222 |         if len(dets_second) > 0:
223 |             '''Detections'''
224 |             detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
225 |                           (tlbr, s) in zip(dets_second, scores_second)]
226 |         else:
227 |             detections_second = []
228 |         r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
229 |         dists = matching.iou_distance(r_tracked_stracks, detections_second)
230 |         matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
231 |         for itracked, idet in matches:
232 |             track = r_tracked_stracks[itracked]
233 |             det = detections_second[idet]
234 |             if track.state == TrackState.Tracked:
235 |                 track.update(det, self.frame_id)
236 |                 activated_starcks.append(track)
237 |             else:
238 |                 track.re_activate(det, self.frame_id, new_id=False)
239 |                 refind_stracks.append(track)
240 | 
241 |         for it in u_track:
242 |             track = r_tracked_stracks[it]
243 |             if not track.state == TrackState.Lost:
244 |                 track.mark_lost()
245 |                 lost_stracks.append(track)
246 | 
247 |         '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
248 |         detections = [detections[i] for i in u_detection]
249 |         dists = matching.iou_distance(unconfirmed, detections)
250 |         if not self.args.mot20:
251 |             dists = matching.fuse_score(dists, detections)
252 |         matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
253 |         for itracked, idet in matches:
254 |             unconfirmed[itracked].update(detections[idet], self.frame_id)
255 |             activated_starcks.append(unconfirmed[itracked])
256 |         for it in u_unconfirmed:
257 |             track = unconfirmed[it]
258 |             track.mark_removed()
259 |             removed_stracks.append(track)
260 | 
261 |         """ Step 4: Init new stracks"""
262 |         for inew in u_detection:
263 |             track = detections[inew]
264 |             if track.score < self.det_thresh:
265 |                 continue
266 |             track.activate(self.kalman_filter, self.frame_id)
267 |             activated_starcks.append(track)
268 |         """ Step 5: Update state"""
269 |         for track in self.lost_stracks:
270 |             if self.frame_id - track.end_frame > self.max_time_lost:
271 |                 track.mark_removed()
272 |                 removed_stracks.append(track)
273 | 
274 |         # print('Ramained match {} s'.format(t4-t3))
275 | 
276 |         self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
277 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
278 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
279 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
280 |         self.lost_stracks.extend(lost_stracks)
281 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
282 |         self.removed_stracks.extend(removed_stracks)
283 |         self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
284 |         # get scores of lost tracks
285 |         output_stracks = [track for track in self.tracked_stracks if track.is_activated]
286 | 
287 |         return output_stracks
288 | 
289 | 
290 | def joint_stracks(tlista, tlistb):
291 |     exists = {}
292 |     res = []
293 |     for t in tlista:
294 |         exists[t.track_id] = 1
295 |         res.append(t)
296 |     for t in tlistb:
297 |         tid = t.track_id
298 |         if not exists.get(tid, 0):
299 |             exists[tid] = 1
300 |             res.append(t)
301 |     return res
302 | 
303 | 
304 | def sub_stracks(tlista, tlistb):
305 |     stracks = {}
306 |     for t in tlista:
307 |         stracks[t.track_id] = t
308 |     for t in tlistb:
309 |         tid = t.track_id
310 |         if stracks.get(tid, 0):
311 |             del stracks[tid]
312 |     return list(stracks.values())
313 | 
314 | 
315 | def remove_duplicate_stracks(stracksa, stracksb):
316 |     pdist = matching.iou_distance(stracksa, stracksb)
317 |     pairs = np.where(pdist < 0.15)
318 |     dupa, dupb = list(), list()
319 |     for p, q in zip(*pairs):
320 |         timep = stracksa[p].frame_id - stracksa[p].start_frame
321 |         timeq = stracksb[q].frame_id - stracksb[q].start_frame
322 |         if timep > timeq:
323 |             dupb.append(q)
324 |         else:
325 |             dupa.append(p)
326 |     resa = [t for i, t in enumerate(stracksa) if not i in dupa]
327 |     resb = [t for i, t in enumerate(stracksb) if not i in dupb]
328 |     return resa, resb
329 | 


--------------------------------------------------------------------------------
/bytetrack/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         #mean = np.dot(self._motion_mat, mean)
120 |         mean = np.dot(mean, self._motion_mat.T)
121 |         covariance = np.linalg.multi_dot((
122 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
123 | 
124 |         return mean, covariance
125 | 
126 |     def project(self, mean, covariance):
127 |         """Project state distribution to measurement space.
128 | 
129 |         Parameters
130 |         ----------
131 |         mean : ndarray
132 |             The state's mean vector (8 dimensional array).
133 |         covariance : ndarray
134 |             The state's covariance matrix (8x8 dimensional).
135 | 
136 |         Returns
137 |         -------
138 |         (ndarray, ndarray)
139 |             Returns the projected mean and covariance matrix of the given state
140 |             estimate.
141 | 
142 |         """
143 |         std = [
144 |             self._std_weight_position * mean[3],
145 |             self._std_weight_position * mean[3],
146 |             1e-1,
147 |             self._std_weight_position * mean[3]]
148 |         innovation_cov = np.diag(np.square(std))
149 | 
150 |         mean = np.dot(self._update_mat, mean)
151 |         covariance = np.linalg.multi_dot((
152 |             self._update_mat, covariance, self._update_mat.T))
153 |         return mean, covariance + innovation_cov
154 | 
155 |     def multi_predict(self, mean, covariance):
156 |         """Run Kalman filter prediction step (Vectorized version).
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The Nx8 dimensional mean matrix of the object states at the previous
161 |             time step.
162 |         covariance : ndarray
163 |             The Nx8x8 dimensional covariance matrics of the object states at the
164 |             previous time step.
165 |         Returns
166 |         -------
167 |         (ndarray, ndarray)
168 |             Returns the mean vector and covariance matrix of the predicted
169 |             state. Unobserved velocities are initialized to 0 mean.
170 |         """
171 |         std_pos = [
172 |             self._std_weight_position * mean[:, 3],
173 |             self._std_weight_position * mean[:, 3],
174 |             1e-2 * np.ones_like(mean[:, 3]),
175 |             self._std_weight_position * mean[:, 3]]
176 |         std_vel = [
177 |             self._std_weight_velocity * mean[:, 3],
178 |             self._std_weight_velocity * mean[:, 3],
179 |             1e-5 * np.ones_like(mean[:, 3]),
180 |             self._std_weight_velocity * mean[:, 3]]
181 |         sqr = np.square(np.r_[std_pos, std_vel]).T
182 | 
183 |         motion_cov = []
184 |         for i in range(len(mean)):
185 |             motion_cov.append(np.diag(sqr[i]))
186 |         motion_cov = np.asarray(motion_cov)
187 | 
188 |         mean = np.dot(mean, self._motion_mat.T)
189 |         left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
190 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
191 | 
192 |         return mean, covariance
193 | 
194 |     def update(self, mean, covariance, measurement):
195 |         """Run Kalman filter correction step.
196 | 
197 |         Parameters
198 |         ----------
199 |         mean : ndarray
200 |             The predicted state's mean vector (8 dimensional).
201 |         covariance : ndarray
202 |             The state's covariance matrix (8x8 dimensional).
203 |         measurement : ndarray
204 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
205 |             is the center position, a the aspect ratio, and h the height of the
206 |             bounding box.
207 | 
208 |         Returns
209 |         -------
210 |         (ndarray, ndarray)
211 |             Returns the measurement-corrected state distribution.
212 | 
213 |         """
214 |         projected_mean, projected_cov = self.project(mean, covariance)
215 | 
216 |         chol_factor, lower = scipy.linalg.cho_factor(
217 |             projected_cov, lower=True, check_finite=False)
218 |         kalman_gain = scipy.linalg.cho_solve(
219 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
220 |             check_finite=False).T
221 |         innovation = measurement - projected_mean
222 | 
223 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
224 |         new_covariance = covariance - np.linalg.multi_dot((
225 |             kalman_gain, projected_cov, kalman_gain.T))
226 |         return new_mean, new_covariance
227 | 
228 |     def gating_distance(self, mean, covariance, measurements,
229 |                         only_position=False, metric='maha'):
230 |         """Compute gating distance between state distribution and measurements.
231 |         A suitable distance threshold can be obtained from `chi2inv95`. If
232 |         `only_position` is False, the chi-square distribution has 4 degrees of
233 |         freedom, otherwise 2.
234 |         Parameters
235 |         ----------
236 |         mean : ndarray
237 |             Mean vector over the state distribution (8 dimensional).
238 |         covariance : ndarray
239 |             Covariance of the state distribution (8x8 dimensional).
240 |         measurements : ndarray
241 |             An Nx4 dimensional matrix of N measurements, each in
242 |             format (x, y, a, h) where (x, y) is the bounding box center
243 |             position, a the aspect ratio, and h the height.
244 |         only_position : Optional[bool]
245 |             If True, distance computation is done with respect to the bounding
246 |             box center position only.
247 |         Returns
248 |         -------
249 |         ndarray
250 |             Returns an array of length N, where the i-th element contains the
251 |             squared Mahalanobis distance between (mean, covariance) and
252 |             `measurements[i]`.
253 |         """
254 |         mean, covariance = self.project(mean, covariance)
255 |         if only_position:
256 |             mean, covariance = mean[:2], covariance[:2, :2]
257 |             measurements = measurements[:, :2]
258 | 
259 |         d = measurements - mean
260 |         if metric == 'gaussian':
261 |             return np.sum(d * d, axis=1)
262 |         elif metric == 'maha':
263 |             cholesky_factor = np.linalg.cholesky(covariance)
264 |             z = scipy.linalg.solve_triangular(
265 |                 cholesky_factor, d.T, lower=True, check_finite=False,
266 |                 overwrite_b=True)
267 |             squared_maha = np.sum(z * z, axis=0)
268 |             return squared_maha
269 |         else:
270 |             raise ValueError('invalid distance metric')


--------------------------------------------------------------------------------
/bytetrack/matching.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import scipy
  4 | import lap
  5 | from scipy.spatial.distance import cdist
  6 | 
  7 | from cython_bbox import bbox_overlaps as bbox_ious
  8 | from bytetrack import kalman_filter
  9 | import time
 10 | 
 11 | def merge_matches(m1, m2, shape):
 12 |     O,P,Q = shape
 13 |     m1 = np.asarray(m1)
 14 |     m2 = np.asarray(m2)
 15 | 
 16 |     M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
 17 |     M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
 18 | 
 19 |     mask = M1*M2
 20 |     match = mask.nonzero()
 21 |     match = list(zip(match[0], match[1]))
 22 |     unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
 23 |     unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
 24 | 
 25 |     return match, unmatched_O, unmatched_Q
 26 | 
 27 | 
 28 | def _indices_to_matches(cost_matrix, indices, thresh):
 29 |     matched_cost = cost_matrix[tuple(zip(*indices))]
 30 |     matched_mask = (matched_cost <= thresh)
 31 | 
 32 |     matches = indices[matched_mask]
 33 |     unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
 34 |     unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
 35 | 
 36 |     return matches, unmatched_a, unmatched_b
 37 | 
 38 | 
 39 | def linear_assignment(cost_matrix, thresh):
 40 |     if cost_matrix.size == 0:
 41 |         return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
 42 |     matches, unmatched_a, unmatched_b = [], [], []
 43 |     cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
 44 |     for ix, mx in enumerate(x):
 45 |         if mx >= 0:
 46 |             matches.append([ix, mx])
 47 |     unmatched_a = np.where(x < 0)[0]
 48 |     unmatched_b = np.where(y < 0)[0]
 49 |     matches = np.asarray(matches)
 50 |     return matches, unmatched_a, unmatched_b
 51 | 
 52 | 
 53 | def ious(atlbrs, btlbrs):
 54 |     """
 55 |     Compute cost based on IoU
 56 |     :type atlbrs: list[tlbr] | np.ndarray
 57 |     :type atlbrs: list[tlbr] | np.ndarray
 58 | 
 59 |     :rtype ious np.ndarray
 60 |     """
 61 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
 62 |     if ious.size == 0:
 63 |         return ious
 64 | 
 65 |     ious = bbox_ious(
 66 |         np.ascontiguousarray(atlbrs, dtype=np.float),
 67 |         np.ascontiguousarray(btlbrs, dtype=np.float)
 68 |     )
 69 | 
 70 |     return ious
 71 | 
 72 | 
 73 | def iou_distance(atracks, btracks):
 74 |     """
 75 |     Compute cost based on IoU
 76 |     :type atracks: list[STrack]
 77 |     :type btracks: list[STrack]
 78 | 
 79 |     :rtype cost_matrix np.ndarray
 80 |     """
 81 | 
 82 |     if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 83 |         atlbrs = atracks
 84 |         btlbrs = btracks
 85 |     else:
 86 |         atlbrs = [track.tlbr for track in atracks]
 87 |         btlbrs = [track.tlbr for track in btracks]
 88 |     _ious = ious(atlbrs, btlbrs)
 89 |     cost_matrix = 1 - _ious
 90 | 
 91 |     return cost_matrix
 92 | 
 93 | def v_iou_distance(atracks, btracks):
 94 |     """
 95 |     Compute cost based on IoU
 96 |     :type atracks: list[STrack]
 97 |     :type btracks: list[STrack]
 98 | 
 99 |     :rtype cost_matrix np.ndarray
100 |     """
101 | 
102 |     if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
103 |         atlbrs = atracks
104 |         btlbrs = btracks
105 |     else:
106 |         atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
107 |         btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
108 |     _ious = ious(atlbrs, btlbrs)
109 |     cost_matrix = 1 - _ious
110 | 
111 |     return cost_matrix
112 | 
113 | def embedding_distance(tracks, detections, metric='cosine'):
114 |     """
115 |     :param tracks: list[STrack]
116 |     :param detections: list[BaseTrack]
117 |     :param metric:
118 |     :return: cost_matrix np.ndarray
119 |     """
120 | 
121 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
122 |     if cost_matrix.size == 0:
123 |         return cost_matrix
124 |     det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
125 |     #for i, track in enumerate(tracks):
126 |         #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
127 |     track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
128 |     cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
129 |     return cost_matrix
130 | 
131 | 
132 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
133 |     if cost_matrix.size == 0:
134 |         return cost_matrix
135 |     gating_dim = 2 if only_position else 4
136 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
137 |     measurements = np.asarray([det.to_xyah() for det in detections])
138 |     for row, track in enumerate(tracks):
139 |         gating_distance = kf.gating_distance(
140 |             track.mean, track.covariance, measurements, only_position)
141 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
142 |     return cost_matrix
143 | 
144 | 
145 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
146 |     if cost_matrix.size == 0:
147 |         return cost_matrix
148 |     gating_dim = 2 if only_position else 4
149 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
150 |     measurements = np.asarray([det.to_xyah() for det in detections])
151 |     for row, track in enumerate(tracks):
152 |         gating_distance = kf.gating_distance(
153 |             track.mean, track.covariance, measurements, only_position, metric='maha')
154 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
155 |         cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
156 |     return cost_matrix
157 | 
158 | 
159 | def fuse_iou(cost_matrix, tracks, detections):
160 |     if cost_matrix.size == 0:
161 |         return cost_matrix
162 |     reid_sim = 1 - cost_matrix
163 |     iou_dist = iou_distance(tracks, detections)
164 |     iou_sim = 1 - iou_dist
165 |     fuse_sim = reid_sim * (1 + iou_sim) / 2
166 |     det_scores = np.array([det.score for det in detections])
167 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
168 |     #fuse_sim = fuse_sim * (1 + det_scores) / 2
169 |     fuse_cost = 1 - fuse_sim
170 |     return fuse_cost
171 | 
172 | 
173 | def fuse_score(cost_matrix, detections):
174 |     if cost_matrix.size == 0:
175 |         return cost_matrix
176 |     iou_sim = 1 - cost_matrix
177 |     det_scores = np.array([det.score for det in detections])
178 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
179 |     fuse_sim = iou_sim * det_scores
180 |     fuse_cost = 1 - fuse_sim
181 |     return fuse_cost


--------------------------------------------------------------------------------
/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bigtuo/YOLOv8_Openvino/61fd58feb40d288905a2e7ee78fbdece094d10f6/test.mp4


--------------------------------------------------------------------------------