├── README.md ├── coco.names ├── convert_onnx ├── detector.py └── pytorch2onnx.py ├── img ├── 000004.jpg ├── 000139.jpg ├── 000148.jpg ├── 000181.jpg └── 000230.jpg ├── main.cpp ├── main.py └── model.onnx /README.md: -------------------------------------------------------------------------------- 1 | # yolo-fastestv2-opencv 2 | 使用OpenCV部署Yolo-FastestV2,包含C++和Python两种版本的程序 3 | 4 | 根据运行体验,这套程序的运行速度真的很快,而且模型文件也很小,可以直接上传到仓库里, 5 | 不用再从百度云盘下载的。 6 | -------------------------------------------------------------------------------- /coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /convert_onnx/detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from model.fpn import * 5 | from model.backbone.shufflenetv2 import * 6 | 7 | class Detector(nn.Module): 8 | def __init__(self, classes, anchor_num, load_param): 9 | super(Detector, self).__init__() 10 | out_depth = 72 11 | stage_out_channels = [-1, 24, 48, 96, 192] 12 | 13 | self.backbone = ShuffleNetV2(stage_out_channels, load_param) 14 | self.fpn = LightFPN(stage_out_channels[-2] + stage_out_channels[-1], stage_out_channels[-1], out_depth) 15 | 16 | self.output_reg_layers = nn.Conv2d(out_depth, 4 * anchor_num, 1, 1, 0, bias=True) 17 | self.output_obj_layers = nn.Conv2d(out_depth, anchor_num, 1, 1, 0, bias=True) 18 | self.output_cls_layers = nn.Conv2d(out_depth, classes, 1, 1, 0, bias=True) 19 | 20 | def forward(self, x): 21 | C2, C3 = self.backbone(x) 22 | cls_2, obj_2, reg_2, cls_3, obj_3, reg_3 = self.fpn(C2, C3) 23 | 24 | out_reg_2 = self.output_reg_layers(reg_2) 25 | out_obj_2 = self.output_obj_layers(obj_2) 26 | out_cls_2 = self.output_cls_layers(cls_2) 27 | 28 | out_reg_3 = self.output_reg_layers(reg_3) 29 | out_obj_3 = self.output_obj_layers(obj_3) 30 | out_cls_3 = self.output_cls_layers(cls_3) 31 | if not torch.onnx.is_in_onnx_export(): 32 | return out_reg_2, out_obj_2, out_cls_2, out_reg_3, out_obj_3, out_cls_3 33 | else: 34 | # for out in (out_reg_2, out_obj_2, out_cls_2, out_reg_3, out_obj_3, out_cls_3): 35 | # print(out.shape) 36 | 37 | c = out_reg_2.shape[1] 38 | out_reg_2 = out_reg_2.permute(0, 2, 3, 1).view(-1, c) 39 | c = out_obj_2.shape[1] 40 | out_obj_2 = out_obj_2.permute(0, 2, 3, 1).view(-1, c) 41 | c = out_cls_2.shape[1] 42 | out_cls_2 = out_cls_2.permute(0, 2, 3, 1).view(-1, c) 43 | out_reg_2 = torch.sigmoid(out_reg_2) 44 | out_obj_2 = torch.sigmoid(out_obj_2) 45 | out_cls_2 = F.softmax(out_cls_2, dim=1) 46 | out2 = torch.cat((out_reg_2, out_obj_2, out_cls_2), dim=1) 47 | 48 | c = out_reg_3.shape[1] 49 | out_reg_3 = out_reg_3.permute(0, 2, 3, 1).view(-1, c) 50 | c = out_obj_3.shape[1] 51 | out_obj_3 = out_obj_3.permute(0, 2, 3, 1).view(-1, c) 52 | c = out_cls_3.shape[1] 53 | out_cls_3 = out_cls_3.permute(0, 2, 3, 1).view(-1, c) 54 | out_reg_3 = torch.sigmoid(out_reg_3) 55 | out_obj_3 = torch.sigmoid(out_obj_3) 56 | out_cls_3 = F.softmax(out_cls_3, dim=1) 57 | out3 = torch.cat((out_reg_3, out_obj_3, out_cls_3), dim=1) 58 | return torch.cat((out2, out3), dim=0) 59 | 60 | if __name__ == "__main__": 61 | model = Detector(80, 3, False) 62 | test_data = torch.rand(1, 3, 352, 352) 63 | torch.onnx.export(model, #model being run 64 | test_data, # model input (or a tuple for multiple inputs) 65 | "test.onnx", # where to save the model (can be a file or file-like object) 66 | export_params=True, # store the trained parameter weights inside the model file 67 | opset_version=11, # the ONNX version to export the model to 68 | do_constant_folding=True) # whether to execute constant folding for optimization 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /convert_onnx/pytorch2onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import model.detector 4 | import utils.utils 5 | 6 | if __name__ == '__main__': 7 | #指定训练配置文件 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--data', type=str, default='', 10 | help='Specify training profile *.data') 11 | parser.add_argument('--weights', type=str, default='', 12 | help='The path of the .pth model to be transformed') 13 | 14 | parser.add_argument('--output', type=str, default='./model.onnx', 15 | help='The path where the onnx model is saved') 16 | 17 | opt = parser.parse_args() 18 | cfg = utils.utils.load_datafile(opt.data) 19 | 20 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | model = model.detector.Detector(cfg["classes"], cfg["anchor_num"], True) 22 | model.load_state_dict(torch.load(opt.weights, map_location=device)) 23 | #sets the module in eval node 24 | model.eval() 25 | 26 | test_data = torch.rand(1, 3, cfg["height"], cfg["width"]) 27 | torch.onnx.export(model, #model being run 28 | test_data, # model input (or a tuple for multiple inputs) 29 | opt.output, # where to save the model (can be a file or file-like object) 30 | export_params=True, # store the trained parameter weights inside the model file 31 | opset_version=11, # the ONNX version to export the model to 32 | do_constant_folding=True) # whether to execute constant folding for optimization 33 | # torch.onnx.export(model, # model being run 34 | # test_data, # model input (or a tuple for multiple inputs) 35 | # opt.output, # where to save the model (can be a file or file-like object) 36 | # opset_version=11, # the ONNX version to export the model to 37 | # input_names = ['images'], output_names = ['out']) 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /img/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000004.jpg -------------------------------------------------------------------------------- /img/000139.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000139.jpg -------------------------------------------------------------------------------- /img/000148.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000148.jpg -------------------------------------------------------------------------------- /img/000181.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000181.jpg -------------------------------------------------------------------------------- /img/000230.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000230.jpg -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | using namespace dnn; 10 | using namespace std; 11 | 12 | class yolo_fast 13 | { 14 | public: 15 | yolo_fast(string modelpath, float objThreshold, float confThreshold, float nmsThreshold); 16 | void detect(Mat& srcimg); 17 | 18 | private: 19 | private: 20 | const float anchors[2][6] = { {12.64,19.39, 37.88,51.48, 55.71,138.31}, {126.91,78.23, 131.57,214.55, 279.92,258.87} }; 21 | const float stride[3] = { 16.0, 32.0 }; 22 | const int inpWidth = 352; 23 | const int inpHeight = 352; 24 | const int num_stage = 2; 25 | const int anchor_num = 3; 26 | float objThreshold; 27 | float confThreshold; 28 | float nmsThreshold; 29 | vector classes; 30 | const string classesFile = "coco.names"; 31 | int num_class; 32 | Net net; 33 | void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); 34 | }; 35 | 36 | yolo_fast::yolo_fast(string modelpath, float obj_Threshold, float conf_Threshold, float nms_Threshold) 37 | { 38 | this->objThreshold = obj_Threshold; 39 | this->confThreshold = conf_Threshold; 40 | this->nmsThreshold = nms_Threshold; 41 | 42 | ifstream ifs(this->classesFile.c_str()); 43 | string line; 44 | while (getline(ifs, line)) this->classes.push_back(line); 45 | this->num_class = this->classes.size(); 46 | this->net = readNet(modelpath); 47 | } 48 | 49 | void yolo_fast::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) // Draw the predicted bounding box 50 | { 51 | //Draw a rectangle displaying the bounding box 52 | rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2); 53 | 54 | //Get the label for the class name and its confidence 55 | string label = format("%.2f", conf); 56 | label = this->classes[classId] + ":" + label; 57 | 58 | //Display the label at the top of the bounding box 59 | int baseLine; 60 | Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 61 | top = max(top, labelSize.height); 62 | //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED); 63 | putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 1); 64 | } 65 | 66 | void yolo_fast::detect(Mat& frame) 67 | { 68 | Mat blob; 69 | blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight)); 70 | this->net.setInput(blob); 71 | vector outs; 72 | this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); 73 | 74 | /////generate proposals 75 | vector classIds; 76 | vector confidences; 77 | vector boxes; 78 | float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth; 79 | int n = 0, q = 0, i = 0, j = 0, nout = this->anchor_num * 5 + this->classes.size(), row_ind = 0; 80 | float* pdata = (float*)outs[0].data; 81 | for (n = 0; n < this->num_stage; n++) ///stage 82 | { 83 | int num_grid_x = (int)(this->inpWidth / this->stride[n]); 84 | int num_grid_y = (int)(this->inpHeight / this->stride[n]); 85 | for (i = 0; i < num_grid_y; i++) 86 | { 87 | for (j = 0; j < num_grid_x; j++) 88 | { 89 | Mat scores = outs[0].row(row_ind).colRange(this->anchor_num * 5, outs[0].cols); 90 | Point classIdPoint; 91 | double max_class_socre; 92 | // Get the value and location of the maximum score 93 | minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint); 94 | for (q = 0; q < this->anchor_num; q++) ///anchor 95 | { 96 | const float anchor_w = this->anchors[n][q * 2]; 97 | const float anchor_h = this->anchors[n][q * 2 + 1]; 98 | float box_score = pdata[4 * this->anchor_num + q]; 99 | if (box_score > this->objThreshold && max_class_socre > this->confThreshold) 100 | { 101 | float cx = (pdata[4 * q] * 2.f - 0.5f + j) * this->stride[n]; ///cx 102 | float cy = (pdata[4 * q+ 1] * 2.f - 0.5f + i) * this->stride[n]; ///cy 103 | float w = powf(pdata[4 * q + 2] * 2.f, 2.f) * anchor_w; ///w 104 | float h = powf(pdata[4 * q + 3] * 2.f, 2.f) * anchor_h; ///h 105 | 106 | int left = (cx - 0.5*w)*ratiow; 107 | int top = (cy - 0.5*h)*ratioh; ///���껹ԭ��ԭͼ�� 108 | 109 | classIds.push_back(classIdPoint.x); 110 | confidences.push_back(box_score * max_class_socre); 111 | boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh))); 112 | } 113 | } 114 | row_ind++; 115 | pdata += nout; 116 | } 117 | } 118 | } 119 | 120 | // Perform non maximum suppression to eliminate redundant overlapping boxes with 121 | // lower confidences 122 | vector indices; 123 | NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices); 124 | for (size_t i = 0; i < indices.size(); ++i) 125 | { 126 | int idx = indices[i]; 127 | Rect box = boxes[idx]; 128 | this->drawPred(classIds[idx], confidences[idx], box.x, box.y, 129 | box.x + box.width, box.y + box.height, frame); 130 | } 131 | } 132 | 133 | int main() 134 | { 135 | yolo_fast yolo_model("model.onnx", 0.3, 0.3, 0.4); 136 | string imgpath = "img/000148.jpg"; 137 | Mat srcimg = imread(imgpath); 138 | yolo_model.detect(srcimg); 139 | 140 | static const string kWinName = "Deep learning object detection in OpenCV"; 141 | namedWindow(kWinName, WINDOW_NORMAL); 142 | imshow(kWinName, srcimg); 143 | waitKey(0); 144 | destroyAllWindows(); 145 | } 146 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import argparse 4 | 5 | class yolo_fast_v2(): 6 | def __init__(self, objThreshold=0.3, confThreshold=0.3, nmsThreshold=0.4): 7 | with open('coco.names', 'rt') as f: 8 | self.classes = f.read().rstrip('\n').split('\n') ###这个是在coco数据集上训练的模型做opencv部署的,如果你在自己的数据集上训练出的模型做opencv部署,那么需要修改self.classes 9 | self.stride = [16, 32] 10 | self.anchor_num = 3 11 | self.anchors = np.array([12.64, 19.39, 37.88, 51.48, 55.71, 138.31, 126.91, 78.23, 131.57, 214.55, 279.92, 258.87], 12 | dtype=np.float32).reshape(len(self.stride), self.anchor_num, 2) 13 | self.inpWidth = 352 14 | self.inpHeight = 352 15 | self.net = cv2.dnn.readNet('model.onnx') 16 | self.confThreshold = confThreshold 17 | self.nmsThreshold = nmsThreshold 18 | self.objThreshold = objThreshold 19 | def _make_grid(self, nx=20, ny=20): 20 | xv, yv = np.meshgrid(np.arange(ny), np.arange(nx)) 21 | return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32) 22 | 23 | def postprocess(self, frame, outs): 24 | frameHeight = frame.shape[0] 25 | frameWidth = frame.shape[1] 26 | ratioh, ratiow = frameHeight / self.inpHeight, frameWidth / self.inpWidth 27 | # Scan through all the bounding boxes output from the network and keep only the 28 | # ones with high confidence scores. Assign the box's class label as the class with the highest score. 29 | classIds = [] 30 | confidences = [] 31 | boxes = [] 32 | for detection in outs: 33 | scores = detection[5:] 34 | classId = np.argmax(scores) 35 | confidence = scores[classId] 36 | if confidence > self.confThreshold and detection[4] > self.objThreshold: 37 | center_x = int(detection[0] * ratiow) 38 | center_y = int(detection[1] * ratioh) 39 | width = int(detection[2] * ratiow) 40 | height = int(detection[3] * ratioh) 41 | left = int(center_x - width / 2) 42 | top = int(center_y - height / 2) 43 | classIds.append(classId) 44 | # confidences.append(float(confidence)) 45 | confidences.append(float(confidence*detection[4])) 46 | boxes.append([left, top, width, height]) 47 | 48 | # Perform non maximum suppression to eliminate redundant overlapping boxes with 49 | # lower confidences. 50 | indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold) 51 | for i in indices: 52 | i = i[0] 53 | box = boxes[i] 54 | left = box[0] 55 | top = box[1] 56 | width = box[2] 57 | height = box[3] 58 | frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height) 59 | return frame 60 | 61 | def drawPred(self, frame, classId, conf, left, top, right, bottom): 62 | # Draw a bounding box. 63 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2) 64 | 65 | label = '%.2f' % conf 66 | label = '%s:%s' % (self.classes[classId], label) 67 | 68 | # Display the label at the top of the bounding box 69 | labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 70 | top = max(top, labelSize[1]) 71 | # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) 72 | cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=1) 73 | return frame 74 | def detect(self, srcimg): 75 | blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (self.inpWidth, self.inpHeight)) 76 | self.net.setInput(blob) 77 | outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0] 78 | 79 | outputs = np.zeros((outs.shape[0]*self.anchor_num, 5+len(self.classes))) 80 | row_ind = 0 81 | for i in range(len(self.stride)): 82 | h, w = int(self.inpHeight / self.stride[i]), int(self.inpWidth / self.stride[i]) 83 | length = int(h * w) 84 | grid = self._make_grid(w, h) 85 | for j in range(self.anchor_num): 86 | top = row_ind+j*length 87 | left = 4*j 88 | outputs[top:top + length, 0:2] = (outs[row_ind:row_ind + length, left:left+2] * 2. - 0.5 + grid) * int(self.stride[i]) 89 | outputs[top:top + length, 2:4] = (outs[row_ind:row_ind + length, left+2:left+4] * 2) ** 2 * np.repeat(self.anchors[i, j, :].reshape(1,-1), h * w, axis=0) 90 | outputs[top:top + length, 4] = outs[row_ind:row_ind + length, 4*self.anchor_num+j] 91 | outputs[top:top + length, 5:] = outs[row_ind:row_ind + length, 5*self.anchor_num:] 92 | row_ind += length 93 | return outputs 94 | 95 | if __name__ == '__main__': 96 | parser = argparse.ArgumentParser() 97 | parser.add_argument('--imgpath', type=str, default='img/000139.jpg', help="image path") 98 | parser.add_argument('--objThreshold', default=0.3, type=float, help='object confidence') 99 | parser.add_argument('--confThreshold', default=0.3, type=float, help='class confidence') 100 | parser.add_argument('--nmsThreshold', default=0.4, type=float, help='nms iou thresh') 101 | args = parser.parse_args() 102 | 103 | srcimg = cv2.imread(args.imgpath) 104 | model = yolo_fast_v2(objThreshold=args.objThreshold, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold) 105 | outputs = model.detect(srcimg) 106 | srcimg = model.postprocess(srcimg, outputs) 107 | 108 | winName = 'Deep learning object detection in OpenCV' 109 | cv2.namedWindow(winName, 0) 110 | cv2.imshow(winName, srcimg) 111 | cv2.waitKey(0) 112 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/model.onnx --------------------------------------------------------------------------------