├── README.md
├── coco.names
├── convert_onnx
    ├── detector.py
    └── pytorch2onnx.py
├── img
    ├── 000004.jpg
    ├── 000139.jpg
    ├── 000148.jpg
    ├── 000181.jpg
    └── 000230.jpg
├── main.cpp
├── main.py
└── model.onnx


/README.md:
--------------------------------------------------------------------------------
1 | # yolo-fastestv2-opencv
2 | 使用OpenCV部署Yolo-FastestV2，包含C++和Python两种版本的程序
3 | 
4 | 根据运行体验，这套程序的运行速度真的很快，而且模型文件也很小，可以直接上传到仓库里，
5 | 不用再从百度云盘下载的。
6 | 


--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/convert_onnx/detector.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from model.fpn import *
 5 | from model.backbone.shufflenetv2 import *
 6 | 
 7 | class Detector(nn.Module):
 8 |     def __init__(self, classes, anchor_num, load_param):
 9 |         super(Detector, self).__init__()
10 |         out_depth = 72
11 |         stage_out_channels = [-1, 24, 48, 96, 192]
12 | 
13 |         self.backbone = ShuffleNetV2(stage_out_channels, load_param)
14 |         self.fpn = LightFPN(stage_out_channels[-2] + stage_out_channels[-1], stage_out_channels[-1], out_depth)
15 | 
16 |         self.output_reg_layers = nn.Conv2d(out_depth, 4 * anchor_num, 1, 1, 0, bias=True)
17 |         self.output_obj_layers = nn.Conv2d(out_depth, anchor_num, 1, 1, 0, bias=True)
18 |         self.output_cls_layers = nn.Conv2d(out_depth, classes, 1, 1, 0, bias=True)
19 | 
20 |     def forward(self, x):
21 |         C2, C3 = self.backbone(x)
22 |         cls_2, obj_2, reg_2, cls_3, obj_3, reg_3 = self.fpn(C2, C3)
23 |         
24 |         out_reg_2 = self.output_reg_layers(reg_2)
25 |         out_obj_2 = self.output_obj_layers(obj_2)
26 |         out_cls_2 = self.output_cls_layers(cls_2)
27 | 
28 |         out_reg_3 = self.output_reg_layers(reg_3)
29 |         out_obj_3 = self.output_obj_layers(obj_3)
30 |         out_cls_3 = self.output_cls_layers(cls_3)
31 |         if not torch.onnx.is_in_onnx_export():
32 |             return out_reg_2, out_obj_2, out_cls_2, out_reg_3, out_obj_3, out_cls_3
33 |         else:
34 |             # for out in (out_reg_2, out_obj_2, out_cls_2, out_reg_3, out_obj_3, out_cls_3):
35 |             #     print(out.shape)
36 | 
37 |             c = out_reg_2.shape[1]
38 |             out_reg_2 = out_reg_2.permute(0, 2, 3, 1).view(-1, c)
39 |             c = out_obj_2.shape[1]
40 |             out_obj_2 = out_obj_2.permute(0, 2, 3, 1).view(-1, c)
41 |             c = out_cls_2.shape[1]
42 |             out_cls_2 = out_cls_2.permute(0, 2, 3, 1).view(-1, c)
43 |             out_reg_2 = torch.sigmoid(out_reg_2)
44 |             out_obj_2 = torch.sigmoid(out_obj_2)
45 |             out_cls_2 = F.softmax(out_cls_2, dim=1)
46 |             out2 = torch.cat((out_reg_2, out_obj_2, out_cls_2), dim=1)
47 | 
48 |             c = out_reg_3.shape[1]
49 |             out_reg_3 = out_reg_3.permute(0, 2, 3, 1).view(-1, c)
50 |             c = out_obj_3.shape[1]
51 |             out_obj_3 = out_obj_3.permute(0, 2, 3, 1).view(-1, c)
52 |             c = out_cls_3.shape[1]
53 |             out_cls_3 = out_cls_3.permute(0, 2, 3, 1).view(-1, c)
54 |             out_reg_3 = torch.sigmoid(out_reg_3)
55 |             out_obj_3 = torch.sigmoid(out_obj_3)
56 |             out_cls_3 = F.softmax(out_cls_3, dim=1)
57 |             out3 = torch.cat((out_reg_3, out_obj_3, out_cls_3), dim=1)
58 |             return torch.cat((out2, out3), dim=0)
59 | 
60 | if __name__ == "__main__":
61 |     model = Detector(80, 3, False)
62 |     test_data = torch.rand(1, 3, 352, 352)
63 |     torch.onnx.export(model,                    #model being run
64 |                      test_data,                 # model input (or a tuple for multiple inputs)
65 |                      "test.onnx",               # where to save the model (can be a file or file-like object)
66 |                      export_params=True,        # store the trained parameter weights inside the model file
67 |                      opset_version=11,          # the ONNX version to export the model to
68 |                      do_constant_folding=True)  # whether to execute constant folding for optimization
69 |     
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/convert_onnx/pytorch2onnx.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | import model.detector
 4 | import utils.utils
 5 | 
 6 | if __name__ == '__main__':
 7 |     #指定训练配置文件
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('--data', type=str, default='', 
10 |                         help='Specify training profile *.data')
11 |     parser.add_argument('--weights', type=str, default='', 
12 |                         help='The path of the .pth model to be transformed')
13 | 
14 |     parser.add_argument('--output', type=str, default='./model.onnx', 
15 |                         help='The path where the onnx model is saved')
16 | 
17 |     opt = parser.parse_args()
18 |     cfg = utils.utils.load_datafile(opt.data)
19 | 
20 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |     model = model.detector.Detector(cfg["classes"], cfg["anchor_num"], True)
22 |     model.load_state_dict(torch.load(opt.weights, map_location=device))
23 |     #sets the module in eval node
24 |     model.eval()
25 | 
26 |     test_data = torch.rand(1, 3, cfg["height"], cfg["width"])
27 |     torch.onnx.export(model,                    #model being run
28 |                      test_data,                 # model input (or a tuple for multiple inputs)
29 |                      opt.output,               # where to save the model (can be a file or file-like object)
30 |                      export_params=True,        # store the trained parameter weights inside the model file
31 |                      opset_version=11,          # the ONNX version to export the model to
32 |                      do_constant_folding=True)  # whether to execute constant folding for optimization
33 |     # torch.onnx.export(model,  # model being run
34 |     #                   test_data,  # model input (or a tuple for multiple inputs)
35 |     #                   opt.output,  # where to save the model (can be a file or file-like object)
36 |     #                   opset_version=11,  # the ONNX version to export the model to
37 |     #                   input_names = ['images'], output_names = ['out'])
38 | 
39 |     
40 | 
41 | 


--------------------------------------------------------------------------------
/img/000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000004.jpg


--------------------------------------------------------------------------------
/img/000139.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000139.jpg


--------------------------------------------------------------------------------
/img/000148.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000148.jpg


--------------------------------------------------------------------------------
/img/000181.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000181.jpg


--------------------------------------------------------------------------------
/img/000230.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/img/000230.jpg


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | ﻿#include <fstream>
  2 | #include <sstream>
  3 | #include <iostream>
  4 | #include <opencv2/dnn.hpp>
  5 | #include <opencv2/imgproc.hpp>
  6 | #include <opencv2/highgui.hpp>
  7 | 
  8 | using namespace cv;
  9 | using namespace dnn;
 10 | using namespace std;
 11 | 
 12 | class yolo_fast
 13 | {
 14 | public:
 15 | 	yolo_fast(string modelpath, float objThreshold, float confThreshold, float nmsThreshold);
 16 | 	void detect(Mat& srcimg);
 17 | 
 18 | private:
 19 | private:
 20 | 	const float anchors[2][6] = { {12.64,19.39, 37.88,51.48, 55.71,138.31}, {126.91,78.23, 131.57,214.55, 279.92,258.87} };
 21 | 	const float stride[3] = { 16.0, 32.0 };
 22 | 	const int inpWidth = 352;
 23 | 	const int inpHeight = 352;
 24 | 	const int num_stage = 2;
 25 | 	const int anchor_num = 3;
 26 | 	float objThreshold;
 27 | 	float confThreshold;
 28 | 	float nmsThreshold;
 29 | 	vector<string> classes;
 30 | 	const string classesFile = "coco.names";
 31 | 	int num_class;
 32 | 	Net net;
 33 | 	void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
 34 | };
 35 | 
 36 | yolo_fast::yolo_fast(string modelpath, float obj_Threshold, float conf_Threshold, float nms_Threshold)
 37 | {
 38 | 	this->objThreshold = obj_Threshold;
 39 | 	this->confThreshold = conf_Threshold;
 40 | 	this->nmsThreshold = nms_Threshold;
 41 | 
 42 | 	ifstream ifs(this->classesFile.c_str());
 43 | 	string line;
 44 | 	while (getline(ifs, line)) this->classes.push_back(line);
 45 | 	this->num_class = this->classes.size();
 46 | 	this->net = readNet(modelpath);
 47 | }
 48 | 
 49 | void yolo_fast::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)   // Draw the predicted bounding box
 50 | {
 51 | 	//Draw a rectangle displaying the bounding box
 52 | 	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
 53 | 
 54 | 	//Get the label for the class name and its confidence
 55 | 	string label = format("%.2f", conf);
 56 | 	label = this->classes[classId] + ":" + label;
 57 | 
 58 | 	//Display the label at the top of the bounding box
 59 | 	int baseLine;
 60 | 	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
 61 | 	top = max(top, labelSize.height);
 62 | 	//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
 63 | 	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 1);
 64 | }
 65 | 
 66 | void yolo_fast::detect(Mat& frame)
 67 | {
 68 | 	Mat blob;
 69 | 	blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight));
 70 | 	this->net.setInput(blob);
 71 | 	vector<Mat> outs;
 72 | 	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
 73 | 
 74 | 	/////generate proposals
 75 | 	vector<int> classIds;
 76 | 	vector<float> confidences;
 77 | 	vector<Rect> boxes;
 78 | 	float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
 79 | 	int n = 0, q = 0, i = 0, j = 0, nout = this->anchor_num * 5 + this->classes.size(), row_ind = 0;
 80 | 	float* pdata = (float*)outs[0].data;
 81 | 	for (n = 0; n < this->num_stage; n++)   ///stage
 82 | 	{
 83 | 		int num_grid_x = (int)(this->inpWidth / this->stride[n]);
 84 | 		int num_grid_y = (int)(this->inpHeight / this->stride[n]);
 85 | 		for (i = 0; i < num_grid_y; i++)
 86 | 		{
 87 | 			for (j = 0; j < num_grid_x; j++)
 88 | 			{
 89 | 				Mat scores = outs[0].row(row_ind).colRange(this->anchor_num * 5, outs[0].cols);
 90 | 				Point classIdPoint;
 91 | 				double max_class_socre;
 92 | 				// Get the value and location of the maximum score
 93 | 				minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
 94 | 				for (q = 0; q < this->anchor_num; q++)    ///anchor
 95 | 				{
 96 | 					const float anchor_w = this->anchors[n][q * 2];
 97 | 					const float anchor_h = this->anchors[n][q * 2 + 1];
 98 | 					float box_score = pdata[4 * this->anchor_num + q];
 99 | 					if (box_score > this->objThreshold && max_class_socre > this->confThreshold)
100 | 					{
101 | 						float cx = (pdata[4 * q] * 2.f - 0.5f + j) * this->stride[n];  ///cx
102 | 						float cy = (pdata[4 * q+ 1] * 2.f - 0.5f + i) * this->stride[n];   ///cy
103 | 						float w = powf(pdata[4 * q + 2] * 2.f, 2.f) * anchor_w;   ///w
104 | 						float h = powf(pdata[4 * q + 3] * 2.f, 2.f) * anchor_h;  ///h
105 | 
106 | 						int left = (cx - 0.5*w)*ratiow;
107 | 						int top = (cy - 0.5*h)*ratioh;   ///���껹ԭ��ԭͼ��
108 | 
109 | 						classIds.push_back(classIdPoint.x);
110 | 						confidences.push_back(box_score * max_class_socre);
111 | 						boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
112 | 					}
113 | 				}
114 | 				row_ind++;
115 | 				pdata += nout;
116 | 			}
117 | 		}
118 | 	}
119 | 
120 | 	// Perform non maximum suppression to eliminate redundant overlapping boxes with
121 | 	// lower confidences
122 | 	vector<int> indices;
123 | 	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
124 | 	for (size_t i = 0; i < indices.size(); ++i)
125 | 	{
126 | 		int idx = indices[i];
127 | 		Rect box = boxes[idx];
128 | 		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
129 | 			box.x + box.width, box.y + box.height, frame);
130 | 	}
131 | }
132 | 
133 | int main()
134 | {
135 | 	yolo_fast yolo_model("model.onnx", 0.3, 0.3, 0.4);
136 | 	string imgpath = "img/000148.jpg";
137 | 	Mat srcimg = imread(imgpath);
138 | 	yolo_model.detect(srcimg);
139 | 
140 | 	static const string kWinName = "Deep learning object detection in OpenCV";
141 | 	namedWindow(kWinName, WINDOW_NORMAL);
142 | 	imshow(kWinName, srcimg);
143 | 	waitKey(0);
144 | 	destroyAllWindows();
145 | }
146 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import argparse
  4 | 
  5 | class yolo_fast_v2():
  6 |     def __init__(self, objThreshold=0.3, confThreshold=0.3, nmsThreshold=0.4):
  7 |         with open('coco.names', 'rt') as f:
  8 |             self.classes = f.read().rstrip('\n').split('\n')   ###这个是在coco数据集上训练的模型做opencv部署的，如果你在自己的数据集上训练出的模型做opencv部署，那么需要修改self.classes
  9 |         self.stride = [16, 32]
 10 |         self.anchor_num = 3
 11 |         self.anchors = np.array([12.64, 19.39, 37.88, 51.48, 55.71, 138.31, 126.91, 78.23, 131.57, 214.55, 279.92, 258.87],
 12 |                            dtype=np.float32).reshape(len(self.stride), self.anchor_num, 2)
 13 |         self.inpWidth = 352
 14 |         self.inpHeight = 352
 15 |         self.net = cv2.dnn.readNet('model.onnx')
 16 |         self.confThreshold = confThreshold
 17 |         self.nmsThreshold = nmsThreshold
 18 |         self.objThreshold = objThreshold
 19 |     def _make_grid(self, nx=20, ny=20):
 20 |         xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
 21 |         return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
 22 | 
 23 |     def postprocess(self, frame, outs):
 24 |         frameHeight = frame.shape[0]
 25 |         frameWidth = frame.shape[1]
 26 |         ratioh, ratiow = frameHeight / self.inpHeight, frameWidth / self.inpWidth
 27 |         # Scan through all the bounding boxes output from the network and keep only the
 28 |         # ones with high confidence scores. Assign the box's class label as the class with the highest score.
 29 |         classIds = []
 30 |         confidences = []
 31 |         boxes = []
 32 |         for detection in outs:
 33 |             scores = detection[5:]
 34 |             classId = np.argmax(scores)
 35 |             confidence = scores[classId]
 36 |             if confidence > self.confThreshold and detection[4] > self.objThreshold:
 37 |                 center_x = int(detection[0] * ratiow)
 38 |                 center_y = int(detection[1] * ratioh)
 39 |                 width = int(detection[2] * ratiow)
 40 |                 height = int(detection[3] * ratioh)
 41 |                 left = int(center_x - width / 2)
 42 |                 top = int(center_y - height / 2)
 43 |                 classIds.append(classId)
 44 |                 # confidences.append(float(confidence))
 45 |                 confidences.append(float(confidence*detection[4]))
 46 |                 boxes.append([left, top, width, height])
 47 | 
 48 |         # Perform non maximum suppression to eliminate redundant overlapping boxes with
 49 |         # lower confidences.
 50 |         indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
 51 |         for i in indices:
 52 |             i = i[0]
 53 |             box = boxes[i]
 54 |             left = box[0]
 55 |             top = box[1]
 56 |             width = box[2]
 57 |             height = box[3]
 58 |             frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
 59 |         return frame
 60 | 
 61 |     def drawPred(self, frame, classId, conf, left, top, right, bottom):
 62 |         # Draw a bounding box.
 63 |         cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2)
 64 | 
 65 |         label = '%.2f' % conf
 66 |         label = '%s:%s' % (self.classes[classId], label)
 67 | 
 68 |         # Display the label at the top of the bounding box
 69 |         labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
 70 |         top = max(top, labelSize[1])
 71 |         # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
 72 |         cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=1)
 73 |         return frame
 74 |     def detect(self, srcimg):
 75 |         blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (self.inpWidth, self.inpHeight))
 76 |         self.net.setInput(blob)
 77 |         outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0]
 78 | 
 79 |         outputs = np.zeros((outs.shape[0]*self.anchor_num, 5+len(self.classes)))
 80 |         row_ind = 0
 81 |         for i in range(len(self.stride)):
 82 |             h, w = int(self.inpHeight / self.stride[i]), int(self.inpWidth / self.stride[i])
 83 |             length = int(h * w)
 84 |             grid = self._make_grid(w, h)
 85 |             for j in range(self.anchor_num):
 86 |                 top = row_ind+j*length
 87 |                 left = 4*j
 88 |                 outputs[top:top + length, 0:2] = (outs[row_ind:row_ind + length, left:left+2] * 2. - 0.5 + grid) * int(self.stride[i])
 89 |                 outputs[top:top + length, 2:4] = (outs[row_ind:row_ind + length, left+2:left+4] * 2) ** 2 * np.repeat(self.anchors[i, j, :].reshape(1,-1), h * w, axis=0)
 90 |                 outputs[top:top + length, 4] = outs[row_ind:row_ind + length, 4*self.anchor_num+j]
 91 |                 outputs[top:top + length, 5:] = outs[row_ind:row_ind + length, 5*self.anchor_num:]
 92 |             row_ind += length
 93 |         return outputs
 94 | 
 95 | if __name__ == '__main__':
 96 |     parser = argparse.ArgumentParser()
 97 |     parser.add_argument('--imgpath', type=str, default='img/000139.jpg', help="image path")
 98 |     parser.add_argument('--objThreshold', default=0.3, type=float, help='object confidence')
 99 |     parser.add_argument('--confThreshold', default=0.3, type=float, help='class confidence')
100 |     parser.add_argument('--nmsThreshold', default=0.4, type=float, help='nms iou thresh')
101 |     args = parser.parse_args()
102 | 
103 |     srcimg = cv2.imread(args.imgpath)
104 |     model = yolo_fast_v2(objThreshold=args.objThreshold, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold)
105 |     outputs = model.detect(srcimg)
106 |     srcimg = model.postprocess(srcimg, outputs)
107 | 
108 |     winName = 'Deep learning object detection in OpenCV'
109 |     cv2.namedWindow(winName, 0)
110 |     cv2.imshow(winName, srcimg)
111 |     cv2.waitKey(0)
112 |     cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolo-fastestv2-opencv/f865b7c18780c5e5f699e5fcb66074fad0ba3497/model.onnx


--------------------------------------------------------------------------------