├── README.md
├── bus.jpg
├── coco.names
├── dog.jpg
├── main_yolo.cpp
├── main_yolo.py
├── person.jpg
├── yolo.h
└── zidane.jpg


/README.md:
--------------------------------------------------------------------------------
 1 | # 4种YOLO目标检测的C++和Python两种版本的实现
 2 | 本程序包含了经典的YOLOv3，YOLOv4，Yolo-Fastest和YOLObile这4种YOLO目标检测的实现，
 3 | 这4种yolo的.cfg和.weights文件，从百度云盘里下载
 4 | 
 5 | 链接：https://pan.baidu.com/s/1Kcw-VhuDTRzCtVkaNEDOBg 
 6 | 提取码：imgu
 7 | 
 8 | 下载完成后把下载得到的4个文件夹拷贝到和main_yolo.cpp同一目录下，
 9 | 只要安装了opencv4.4.0及其以上版本的，就可以在windows和linux系统编译并运行main_yolo.cpp
10 | 
11 | 此外，在Net_config配置参数项里，可以添加一个参数swapRB，控制输入图像是否交换RGB通道的，
12 | 之所以要添加这个参数，是因为我看到有的YOLO模型的输入图像并没有做交换通道到RGB的处理。
13 | 


--------------------------------------------------------------------------------
/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/bus.jpg


--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/dog.jpg


--------------------------------------------------------------------------------
/main_yolo.cpp:
--------------------------------------------------------------------------------
  1 | #include "yolo.h"
  2 | 
  3 | YOLO::YOLO(Net_config config)
  4 | {
  5 | 	cout << "Net use " << config.netname << endl;
  6 | 	this->confThreshold = config.confThreshold;
  7 | 	this->nmsThreshold = config.nmsThreshold;
  8 | 	this->inpWidth = config.inpWidth;
  9 | 	this->inpHeight = config.inpHeight;
 10 | 	strcpy_s(this->netname, config.netname.c_str());
 11 | 
 12 | 	ifstream ifs(config.classesFile.c_str());
 13 | 	string line;
 14 | 	while (getline(ifs, line)) this->classes.push_back(line);
 15 | 
 16 | 	this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights);
 17 | 	this->net.setPreferableBackend(DNN_BACKEND_OPENCV);
 18 | 	this->net.setPreferableTarget(DNN_TARGET_CPU);
 19 | }
 20 | 
 21 | void YOLO::postprocess(Mat& frame, const vector<Mat>& outs)   // Remove the bounding boxes with low confidence using non-maxima suppression
 22 | {
 23 | 	vector<int> classIds;
 24 | 	vector<float> confidences;
 25 | 	vector<Rect> boxes;
 26 | 
 27 | 	for (size_t i = 0; i < outs.size(); ++i)
 28 | 	{
 29 | 		// Scan through all the bounding boxes output from the network and keep only the
 30 | 		// ones with high confidence scores. Assign the box's class label as the class
 31 | 		// with the highest score for the box.
 32 | 		float* data = (float*)outs[i].data;
 33 | 		for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
 34 | 		{
 35 | 			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
 36 | 			Point classIdPoint;
 37 | 			double confidence;
 38 | 			// Get the value and location of the maximum score
 39 | 			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
 40 | 			if (confidence > this->confThreshold)
 41 | 			{
 42 | 				int centerX = (int)(data[0] * frame.cols);
 43 | 				int centerY = (int)(data[1] * frame.rows);
 44 | 				int width = (int)(data[2] * frame.cols);
 45 | 				int height = (int)(data[3] * frame.rows);
 46 | 				int left = centerX - width / 2;
 47 | 				int top = centerY - height / 2;
 48 | 
 49 | 				classIds.push_back(classIdPoint.x);
 50 | 				confidences.push_back((float)confidence);
 51 | 				boxes.push_back(Rect(left, top, width, height));
 52 | 			}
 53 | 		}
 54 | 	}
 55 | 
 56 | 	// Perform non maximum suppression to eliminate redundant overlapping boxes with
 57 | 	// lower confidences
 58 | 	vector<int> indices;
 59 | 	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
 60 | 	for (size_t i = 0; i < indices.size(); ++i)
 61 | 	{
 62 | 		int idx = indices[i];
 63 | 		Rect box = boxes[idx];
 64 | 		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
 65 | 			box.x + box.width, box.y + box.height, frame);
 66 | 	}
 67 | }
 68 | 
 69 | void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)   // Draw the predicted bounding box
 70 | {
 71 | 	//Draw a rectangle displaying the bounding box
 72 | 	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
 73 | 
 74 | 	//Get the label for the class name and its confidence
 75 | 	string label = format("%.2f", conf);
 76 | 	if (!this->classes.empty())
 77 | 	{
 78 | 		CV_Assert(classId < (int)this->classes.size());
 79 | 		label = this->classes[classId] + ":" + label;
 80 | 	}
 81 | 
 82 | 	//Display the label at the top of the bounding box
 83 | 	int baseLine;
 84 | 	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
 85 | 	top = max(top, labelSize.height);
 86 | 	//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
 87 | 	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
 88 | }
 89 | 
 90 | void YOLO::detect(Mat& frame)
 91 | {
 92 | 	Mat blob;
 93 | 	blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
 94 | 	this->net.setInput(blob);
 95 | 	vector<Mat> outs;
 96 | 	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
 97 | 	this->postprocess(frame, outs);
 98 | 
 99 | 	vector<double> layersTimes;
100 | 	double freq = getTickFrequency() / 1000;
101 | 	double t = net.getPerfProfile(layersTimes) / freq;
102 | 	string label = format("%s Inference time : %.2f ms", this->netname, t);
103 | 	putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
104 | 	//imwrite(format("%s_out.jpg", this->netname), frame);
105 | }
106 | 
107 | int main()
108 | {
109 | 	YOLO yolo_model(yolo_nets[2]);
110 | 	string imgpath = "person.jpg";
111 | 	Mat srcimg = imread(imgpath);
112 | 	yolo_model.detect(srcimg);
113 | 
114 | 	static const string kWinName = "Deep learning object detection in OpenCV";
115 | 	namedWindow(kWinName, WINDOW_NORMAL);
116 | 	imshow(kWinName, srcimg);
117 | 	waitKey(0);
118 | 	destroyAllWindows();
119 | }


--------------------------------------------------------------------------------
/main_yolo.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import argparse
 3 | import numpy as np
 4 | 
 5 | class yolo():
 6 |     def __init__(self, config):
 7 |         print('Net use', config['netname'])
 8 |         self.confThreshold = config['confThreshold']
 9 |         self.nmsThreshold = config['nmsThreshold']
10 |         self.inpWidth = config['inpWidth']
11 |         self.inpHeight = config['inpHeight']
12 |         with open(config['classesFile'], 'rt') as f:
13 |             self.classes = f.read().rstrip('\n').split('\n')
14 |         self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
15 |         self.net = cv2.dnn.readNet(config['modelConfiguration'], config['modelWeights'])
16 | 
17 |     def drawPred(self, frame, classId, conf, left, top, right, bottom):
18 |         # Draw a bounding box.
19 |         cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)
20 | 
21 |         label = '%.2f' % conf
22 |         label = '%s:%s' % (self.classes[classId], label)
23 | 
24 |         # Display the label at the top of the bounding box
25 |         labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
26 |         top = max(top, labelSize[1])
27 |         # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
28 |         cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
29 |         return frame
30 | 
31 |     # Remove the bounding boxes with low confidence using non-maxima suppression
32 |     def postprocess(self, frame, outs):
33 |         frameHeight = frame.shape[0]
34 |         frameWidth = frame.shape[1]
35 | 
36 |         # Scan through all the bounding boxes output from the network and keep only the
37 |         # ones with high confidence scores. Assign the box's class label as the class with the highest score.
38 |         classIds = []
39 |         confidences = []
40 |         boxes = []
41 |         for out in outs:
42 |             for detection in out:
43 |                 scores = detection[5:]
44 |                 classId = np.argmax(scores)
45 |                 confidence = scores[classId]
46 |                 if confidence > self.confThreshold:
47 |                     center_x = int(detection[0] * frameWidth)
48 |                     center_y = int(detection[1] * frameHeight)
49 |                     width = int(detection[2] * frameWidth)
50 |                     height = int(detection[3] * frameHeight)
51 |                     left = int(center_x - width / 2)
52 |                     top = int(center_y - height / 2)
53 |                     classIds.append(classId)
54 |                     confidences.append(float(confidence))
55 |                     boxes.append([left, top, width, height])
56 | 
57 |         # Perform non maximum suppression to eliminate redundant overlapping boxes with
58 |         # lower confidences.
59 |         indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
60 |         for i in indices:
61 |             i = i[0]
62 |             box = boxes[i]
63 |             left = box[0]
64 |             top = box[1]
65 |             width = box[2]
66 |             height = box[3]
67 |             self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
68 | 
69 |     def detect(self, srcimg):
70 |         blob = cv2.dnn.blobFromImage(srcimg, 1/255.0, (self.inpWidth, self.inpHeight), [0, 0, 0], swapRB=True, crop=False)
71 |         # Sets the input to the network
72 |         self.net.setInput(blob)
73 | 
74 |         # Runs the forward pass to get output of the output layers
75 |         outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
76 |         self.postprocess(srcimg, outs)
77 |         return srcimg
78 | 
79 | Net_config = [{'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':416, 'inpHeight':416, 'classesFile':'coco.names', 'modelConfiguration':'yolov3/yolov3.cfg', 'modelWeights':'yolov3/yolov3.weights', 'netname':'yolov3'},
80 |               {'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':608, 'inpHeight':608, 'classesFile':'coco.names', 'modelConfiguration':'yolov4/yolov4.cfg', 'modelWeights':'yolov4/yolov4.weights', 'netname':'yolov4'},
81 |               {'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':320, 'inpHeight':320, 'classesFile':'coco.names', 'modelConfiguration':'yolo-fastest/yolo-fastest-xl.cfg', 'modelWeights':'yolo-fastest/yolo-fastest-xl.weights', 'netname':'yolo-fastest'},
82 |               {'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':320, 'inpHeight':320, 'classesFile':'coco.names', 'modelConfiguration':'yolobile/csdarknet53s-panet-spp.cfg', 'modelWeights':'yolobile/yolobile.weights', 'netname':'yolobile'}]
83 | 
84 | if __name__ == "__main__":
85 |     parser = argparse.ArgumentParser()
86 |     parser.add_argument('--imgpath', type=str, default='bus.jpg', help='image path')
87 |     parser.add_argument('--net_type', default=0, type=int, choices=[0, 1, 2, 3])
88 |     args = parser.parse_args()
89 | 
90 |     yolonet = yolo(Net_config[args.net_type])
91 |     srcimg = cv2.imread(args.imgpath)
92 |     srcimg = yolonet.detect(srcimg)
93 | 
94 |     winName = 'Deep learning object detection in OpenCV'
95 |     cv2.namedWindow(winName, 0)
96 |     cv2.imshow(winName, srcimg)
97 |     cv2.waitKey(0)
98 |     cv2.destroyAllWindows()
99 | 


--------------------------------------------------------------------------------
/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/person.jpg


--------------------------------------------------------------------------------
/yolo.h:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <sstream>
 3 | #include <iostream>
 4 | #include <opencv2/dnn.hpp>
 5 | #include <opencv2/imgproc.hpp>
 6 | #include <opencv2/highgui.hpp>
 7 | 
 8 | using namespace cv;
 9 | using namespace dnn;
10 | using namespace std;
11 | 
12 | struct Net_config
13 | {
14 | 	float confThreshold; // Confidence threshold
15 | 	float nmsThreshold;  // Non-maximum suppression threshold
16 | 	int inpWidth;  // Width of network's input image
17 | 	int inpHeight; // Height of network's input image
18 | 	string classesFile;
19 | 	string modelConfiguration;
20 | 	string modelWeights;
21 | 	string netname;
22 | };
23 | 
24 | class YOLO
25 | {
26 | 	public:
27 | 		YOLO(Net_config config);
28 | 		void detect(Mat& frame);
29 | 	private:
30 | 		float confThreshold;
31 | 		float nmsThreshold;
32 | 		int inpWidth;
33 | 		int inpHeight;
34 | 		char netname[20];
35 | 		vector<string> classes;
36 | 		Net net;
37 | 		void postprocess(Mat& frame, const vector<Mat>& outs);
38 | 		void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
39 | };
40 | 
41 | Net_config yolo_nets[4] = {
42 | 	{0.5, 0.4, 416, 416,"coco.names", "yolov3/yolov3.cfg", "yolov3/yolov3.weights", "yolov3"},
43 | 	{0.5, 0.4, 608, 608,"coco.names", "yolov4/yolov4.cfg", "yolov4/yolov4.weights", "yolov4"},
44 | 	{0.5, 0.4, 320, 320,"coco.names", "yolo-fastest/yolo-fastest-xl.cfg", "yolo-fastest/yolo-fastest-xl.weights", "yolo-fastest"},
45 | 	{0.5, 0.4, 320, 320,"coco.names", "yolobile/csdarknet53s-panet-spp.cfg", "yolobile/yolobile.weights", "yolobile"}
46 | };


--------------------------------------------------------------------------------
/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/zidane.jpg


--------------------------------------------------------------------------------