├── README.md ├── coco.names ├── images ├── bus.jpg ├── dog.jpg ├── person.jpg └── zidane.jpg ├── main.cpp └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # yolox-opencv-dnn 2 | 使用OpenCV部署YOLOX,支持YOLOX-S、YOLOX-M、YOLOX-L、YOLOX-X、YOLOX-Darknet53五种结构,包含C++和Python两种版本的程序 3 | 4 | onnx文件在百度云盘,下载链接:https://pan.baidu.com/s/11UAVSPWbDKY_LmmoHlUQXw 5 | 提取码:147w 6 | 7 | 下载完成后,把文件放在代码文件所在目录里,就可以运行程序了。如果出现读取onnx文件失败, 8 | 那很有可能是你的opencv版本低了,需要升级到4.5以上的 9 | 10 | 11 | 在10月20日,我看了一下官方代码https://github.com/Megvii-BaseDetection/YOLOX 12 | 新版的在做推理时,预处理没有做BGR2RGB,除以255.0, 减均值除以方差这几步的。 13 | 因此如果用最新代码训练后生成onnx文件,然后用本仓库里的程序做推理时,需要注释掉“BGR2RGB, 14 | 除以255.0, 减均值除以方差这几步” 15 | -------------------------------------------------------------------------------- /coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolox-opencv-dnn/d2c45b3f56b50bba955db8e8c32d4c364c0a0724/images/bus.jpg -------------------------------------------------------------------------------- /images/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolox-opencv-dnn/d2c45b3f56b50bba955db8e8c32d4c364c0a0724/images/dog.jpg -------------------------------------------------------------------------------- /images/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolox-opencv-dnn/d2c45b3f56b50bba955db8e8c32d4c364c0a0724/images/person.jpg -------------------------------------------------------------------------------- /images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolox-opencv-dnn/d2c45b3f56b50bba955db8e8c32d4c364c0a0724/images/zidane.jpg -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolox-opencv-dnn/d2c45b3f56b50bba955db8e8c32d4c364c0a0724/main.cpp -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import numpy as np 4 | 5 | class yolox(): 6 | def __init__(self, model, p6=False, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5): 7 | with open('coco.names', 'rt') as f: 8 | self.class_names = f.read().rstrip('\n').split('\n') 9 | self.net = cv2.dnn.readNet(model) 10 | self.input_size = (640, 640) 11 | self.mean = (0.485, 0.456, 0.406) 12 | self.std = (0.229, 0.224, 0.225) 13 | if not p6: 14 | self.strides = [8, 16, 32] 15 | else: 16 | self.strides = [8, 16, 32, 64] 17 | self.confThreshold = confThreshold 18 | self.nmsThreshold = nmsThreshold 19 | self.objThreshold = objThreshold 20 | def preprocess(self, image): 21 | if len(image.shape) == 3: 22 | padded_img = np.ones((self.input_size[0], self.input_size[1], 3)) * 114.0 23 | else: 24 | padded_img = np.ones(self.input_size) * 114.0 25 | img = np.array(image) 26 | r = min(self.input_size[0] / img.shape[0], self.input_size[1] / img.shape[1]) 27 | resized_img = cv2.resize( 28 | img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR 29 | ).astype(np.float32) 30 | padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img 31 | image = padded_img 32 | 33 | image = image.astype(np.float32) 34 | image = image[:, :, ::-1] 35 | image /= 255.0 36 | image -= self.mean 37 | image /= self.std 38 | return image, r 39 | def demo_postprocess(self, outputs): 40 | grids = [] 41 | expanded_strides = [] 42 | hsizes = [self.input_size[0] // stride for stride in self.strides] 43 | wsizes = [self.input_size[1] // stride for stride in self.strides] 44 | 45 | for hsize, wsize, stride in zip(hsizes, wsizes, self.strides): 46 | xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize)) 47 | grid = np.stack((xv, yv), 2).reshape(1, -1, 2) 48 | grids.append(grid) 49 | shape = grid.shape[:2] 50 | expanded_strides.append(np.full((*shape, 1), stride)) 51 | 52 | grids = np.concatenate(grids, 1) 53 | expanded_strides = np.concatenate(expanded_strides, 1) 54 | outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides 55 | outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides 56 | return outputs 57 | def nms(self, boxes, scores): 58 | """Single class NMS implemented in Numpy.""" 59 | x1 = boxes[:, 0] 60 | y1 = boxes[:, 1] 61 | x2 = boxes[:, 2] 62 | y2 = boxes[:, 3] 63 | 64 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 65 | order = scores.argsort()[::-1] 66 | 67 | keep = [] 68 | while order.size > 0: 69 | i = order[0] 70 | keep.append(i) 71 | xx1 = np.maximum(x1[i], x1[order[1:]]) 72 | yy1 = np.maximum(y1[i], y1[order[1:]]) 73 | xx2 = np.minimum(x2[i], x2[order[1:]]) 74 | yy2 = np.minimum(y2[i], y2[order[1:]]) 75 | 76 | w = np.maximum(0.0, xx2 - xx1 + 1) 77 | h = np.maximum(0.0, yy2 - yy1 + 1) 78 | inter = w * h 79 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 80 | 81 | inds = np.where(ovr <= self.nmsThreshold)[0] 82 | order = order[inds + 1] 83 | 84 | return keep 85 | def multiclass_nms(self, boxes, scores): 86 | """Multiclass NMS implemented in Numpy""" 87 | final_dets = [] 88 | num_classes = scores.shape[1] 89 | for cls_ind in range(num_classes): 90 | cls_scores = scores[:, cls_ind] 91 | valid_score_mask = cls_scores > self.confThreshold 92 | if valid_score_mask.sum() == 0: 93 | continue 94 | else: 95 | valid_scores = cls_scores[valid_score_mask] 96 | valid_boxes = boxes[valid_score_mask] 97 | keep = self.nms(valid_boxes, valid_scores) 98 | if len(keep) > 0: 99 | cls_inds = np.ones((len(keep), 1)) * cls_ind 100 | dets = np.concatenate([valid_boxes[keep], valid_scores[keep, None], cls_inds], 1) 101 | final_dets.append(dets) 102 | if len(final_dets) == 0: 103 | return None 104 | return np.concatenate(final_dets, 0) 105 | def vis(self, img, boxes, scores, cls_ids): 106 | for i in range(len(boxes)): 107 | box = boxes[i] 108 | cls_id = int(cls_ids[i]) 109 | score = scores[i] 110 | if score < self.confThreshold: 111 | continue 112 | x0 = int(box[0]) 113 | y0 = int(box[1]) 114 | x1 = int(box[2]) 115 | y1 = int(box[3]) 116 | 117 | text = '{}:{:.1f}%'.format(self.class_names[cls_id], score * 100) 118 | font = cv2.FONT_HERSHEY_SIMPLEX 119 | txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] 120 | cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 2) 121 | cv2.rectangle(img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1) 122 | cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 255, 0), thickness=1) 123 | return img 124 | def detect(self, srcimg): 125 | img, ratio = self.preprocess(srcimg) 126 | blob = cv2.dnn.blobFromImage(img) 127 | self.net.setInput(blob) 128 | outs = self.net.forward(self.net.getUnconnectedOutLayersNames()) 129 | predictions = self.demo_postprocess(outs[0])[0] 130 | 131 | boxes = predictions[:, :4] 132 | scores = predictions[:, 4:5] * predictions[:, 5:] 133 | 134 | boxes_xyxy = np.ones_like(boxes) 135 | boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2. 136 | boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2. 137 | boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2. 138 | boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2. 139 | boxes_xyxy /= ratio 140 | dets = self.multiclass_nms(boxes_xyxy, scores) 141 | if dets is not None: 142 | final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5] 143 | srcimg = self.vis(srcimg, final_boxes, final_scores, final_cls_inds) 144 | return srcimg 145 | 146 | if __name__ == '__main__': 147 | parser = argparse.ArgumentParser("opencv inference sample") 148 | parser.add_argument("--model", type=str, default="yolox_s.onnx", help="Input your onnx model.") 149 | parser.add_argument("--image_path", type=str, default='test_image.png', help="Path to your input image.") 150 | parser.add_argument("--score_thr", type=float, default=0.3, help="Score threshould to filter the result.") 151 | parser.add_argument("--with_p6", action="store_true", help="Whether your model uses p6 in FPN/PAN.") 152 | args = parser.parse_args() 153 | net = yolox(args.model, p6=args.with_p6, confThreshold=args.score_thr) 154 | srcimg = cv2.imread(args.image_path) 155 | srcimg = net.detect(srcimg) 156 | 157 | winName = 'Deep learning object detection in OpenCV' 158 | cv2.namedWindow(winName, cv2.WINDOW_NORMAL) 159 | cv2.imshow(winName, srcimg) 160 | cv2.waitKey(0) 161 | cv2.destroyAllWindows() --------------------------------------------------------------------------------