├── README.md ├── bus.jpg ├── coco.names ├── dog.jpg ├── main_yolo.cpp ├── main_yolo.py ├── person.jpg ├── yolo.h └── zidane.jpg /README.md: -------------------------------------------------------------------------------- 1 | # 4种YOLO目标检测的C++和Python两种版本的实现 2 | 本程序包含了经典的YOLOv3,YOLOv4,Yolo-Fastest和YOLObile这4种YOLO目标检测的实现, 3 | 这4种yolo的.cfg和.weights文件,从百度云盘里下载 4 | 5 | 链接:https://pan.baidu.com/s/1Kcw-VhuDTRzCtVkaNEDOBg 6 | 提取码:imgu 7 | 8 | 下载完成后把下载得到的4个文件夹拷贝到和main_yolo.cpp同一目录下, 9 | 只要安装了opencv4.4.0及其以上版本的,就可以在windows和linux系统编译并运行main_yolo.cpp 10 | 11 | 此外,在Net_config配置参数项里,可以添加一个参数swapRB,控制输入图像是否交换RGB通道的, 12 | 之所以要添加这个参数,是因为我看到有的YOLO模型的输入图像并没有做交换通道到RGB的处理。 13 | -------------------------------------------------------------------------------- /bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/bus.jpg -------------------------------------------------------------------------------- /coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/dog.jpg -------------------------------------------------------------------------------- /main_yolo.cpp: -------------------------------------------------------------------------------- 1 | #include "yolo.h" 2 | 3 | YOLO::YOLO(Net_config config) 4 | { 5 | cout << "Net use " << config.netname << endl; 6 | this->confThreshold = config.confThreshold; 7 | this->nmsThreshold = config.nmsThreshold; 8 | this->inpWidth = config.inpWidth; 9 | this->inpHeight = config.inpHeight; 10 | strcpy_s(this->netname, config.netname.c_str()); 11 | 12 | ifstream ifs(config.classesFile.c_str()); 13 | string line; 14 | while (getline(ifs, line)) this->classes.push_back(line); 15 | 16 | this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights); 17 | this->net.setPreferableBackend(DNN_BACKEND_OPENCV); 18 | this->net.setPreferableTarget(DNN_TARGET_CPU); 19 | } 20 | 21 | void YOLO::postprocess(Mat& frame, const vector& outs) // Remove the bounding boxes with low confidence using non-maxima suppression 22 | { 23 | vector classIds; 24 | vector confidences; 25 | vector boxes; 26 | 27 | for (size_t i = 0; i < outs.size(); ++i) 28 | { 29 | // Scan through all the bounding boxes output from the network and keep only the 30 | // ones with high confidence scores. Assign the box's class label as the class 31 | // with the highest score for the box. 32 | float* data = (float*)outs[i].data; 33 | for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) 34 | { 35 | Mat scores = outs[i].row(j).colRange(5, outs[i].cols); 36 | Point classIdPoint; 37 | double confidence; 38 | // Get the value and location of the maximum score 39 | minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); 40 | if (confidence > this->confThreshold) 41 | { 42 | int centerX = (int)(data[0] * frame.cols); 43 | int centerY = (int)(data[1] * frame.rows); 44 | int width = (int)(data[2] * frame.cols); 45 | int height = (int)(data[3] * frame.rows); 46 | int left = centerX - width / 2; 47 | int top = centerY - height / 2; 48 | 49 | classIds.push_back(classIdPoint.x); 50 | confidences.push_back((float)confidence); 51 | boxes.push_back(Rect(left, top, width, height)); 52 | } 53 | } 54 | } 55 | 56 | // Perform non maximum suppression to eliminate redundant overlapping boxes with 57 | // lower confidences 58 | vector indices; 59 | NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices); 60 | for (size_t i = 0; i < indices.size(); ++i) 61 | { 62 | int idx = indices[i]; 63 | Rect box = boxes[idx]; 64 | this->drawPred(classIds[idx], confidences[idx], box.x, box.y, 65 | box.x + box.width, box.y + box.height, frame); 66 | } 67 | } 68 | 69 | void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) // Draw the predicted bounding box 70 | { 71 | //Draw a rectangle displaying the bounding box 72 | rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3); 73 | 74 | //Get the label for the class name and its confidence 75 | string label = format("%.2f", conf); 76 | if (!this->classes.empty()) 77 | { 78 | CV_Assert(classId < (int)this->classes.size()); 79 | label = this->classes[classId] + ":" + label; 80 | } 81 | 82 | //Display the label at the top of the bounding box 83 | int baseLine; 84 | Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 85 | top = max(top, labelSize.height); 86 | //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED); 87 | putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1); 88 | } 89 | 90 | void YOLO::detect(Mat& frame) 91 | { 92 | Mat blob; 93 | blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false); 94 | this->net.setInput(blob); 95 | vector outs; 96 | this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); 97 | this->postprocess(frame, outs); 98 | 99 | vector layersTimes; 100 | double freq = getTickFrequency() / 1000; 101 | double t = net.getPerfProfile(layersTimes) / freq; 102 | string label = format("%s Inference time : %.2f ms", this->netname, t); 103 | putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2); 104 | //imwrite(format("%s_out.jpg", this->netname), frame); 105 | } 106 | 107 | int main() 108 | { 109 | YOLO yolo_model(yolo_nets[2]); 110 | string imgpath = "person.jpg"; 111 | Mat srcimg = imread(imgpath); 112 | yolo_model.detect(srcimg); 113 | 114 | static const string kWinName = "Deep learning object detection in OpenCV"; 115 | namedWindow(kWinName, WINDOW_NORMAL); 116 | imshow(kWinName, srcimg); 117 | waitKey(0); 118 | destroyAllWindows(); 119 | } -------------------------------------------------------------------------------- /main_yolo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import numpy as np 4 | 5 | class yolo(): 6 | def __init__(self, config): 7 | print('Net use', config['netname']) 8 | self.confThreshold = config['confThreshold'] 9 | self.nmsThreshold = config['nmsThreshold'] 10 | self.inpWidth = config['inpWidth'] 11 | self.inpHeight = config['inpHeight'] 12 | with open(config['classesFile'], 'rt') as f: 13 | self.classes = f.read().rstrip('\n').split('\n') 14 | self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))] 15 | self.net = cv2.dnn.readNet(config['modelConfiguration'], config['modelWeights']) 16 | 17 | def drawPred(self, frame, classId, conf, left, top, right, bottom): 18 | # Draw a bounding box. 19 | cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4) 20 | 21 | label = '%.2f' % conf 22 | label = '%s:%s' % (self.classes[classId], label) 23 | 24 | # Display the label at the top of the bounding box 25 | labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 26 | top = max(top, labelSize[1]) 27 | # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED) 28 | cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) 29 | return frame 30 | 31 | # Remove the bounding boxes with low confidence using non-maxima suppression 32 | def postprocess(self, frame, outs): 33 | frameHeight = frame.shape[0] 34 | frameWidth = frame.shape[1] 35 | 36 | # Scan through all the bounding boxes output from the network and keep only the 37 | # ones with high confidence scores. Assign the box's class label as the class with the highest score. 38 | classIds = [] 39 | confidences = [] 40 | boxes = [] 41 | for out in outs: 42 | for detection in out: 43 | scores = detection[5:] 44 | classId = np.argmax(scores) 45 | confidence = scores[classId] 46 | if confidence > self.confThreshold: 47 | center_x = int(detection[0] * frameWidth) 48 | center_y = int(detection[1] * frameHeight) 49 | width = int(detection[2] * frameWidth) 50 | height = int(detection[3] * frameHeight) 51 | left = int(center_x - width / 2) 52 | top = int(center_y - height / 2) 53 | classIds.append(classId) 54 | confidences.append(float(confidence)) 55 | boxes.append([left, top, width, height]) 56 | 57 | # Perform non maximum suppression to eliminate redundant overlapping boxes with 58 | # lower confidences. 59 | indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold) 60 | for i in indices: 61 | i = i[0] 62 | box = boxes[i] 63 | left = box[0] 64 | top = box[1] 65 | width = box[2] 66 | height = box[3] 67 | self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height) 68 | 69 | def detect(self, srcimg): 70 | blob = cv2.dnn.blobFromImage(srcimg, 1/255.0, (self.inpWidth, self.inpHeight), [0, 0, 0], swapRB=True, crop=False) 71 | # Sets the input to the network 72 | self.net.setInput(blob) 73 | 74 | # Runs the forward pass to get output of the output layers 75 | outs = self.net.forward(self.net.getUnconnectedOutLayersNames()) 76 | self.postprocess(srcimg, outs) 77 | return srcimg 78 | 79 | Net_config = [{'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':416, 'inpHeight':416, 'classesFile':'coco.names', 'modelConfiguration':'yolov3/yolov3.cfg', 'modelWeights':'yolov3/yolov3.weights', 'netname':'yolov3'}, 80 | {'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':608, 'inpHeight':608, 'classesFile':'coco.names', 'modelConfiguration':'yolov4/yolov4.cfg', 'modelWeights':'yolov4/yolov4.weights', 'netname':'yolov4'}, 81 | {'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':320, 'inpHeight':320, 'classesFile':'coco.names', 'modelConfiguration':'yolo-fastest/yolo-fastest-xl.cfg', 'modelWeights':'yolo-fastest/yolo-fastest-xl.weights', 'netname':'yolo-fastest'}, 82 | {'confThreshold':0.5, 'nmsThreshold':0.4, 'inpWidth':320, 'inpHeight':320, 'classesFile':'coco.names', 'modelConfiguration':'yolobile/csdarknet53s-panet-spp.cfg', 'modelWeights':'yolobile/yolobile.weights', 'netname':'yolobile'}] 83 | 84 | if __name__ == "__main__": 85 | parser = argparse.ArgumentParser() 86 | parser.add_argument('--imgpath', type=str, default='bus.jpg', help='image path') 87 | parser.add_argument('--net_type', default=0, type=int, choices=[0, 1, 2, 3]) 88 | args = parser.parse_args() 89 | 90 | yolonet = yolo(Net_config[args.net_type]) 91 | srcimg = cv2.imread(args.imgpath) 92 | srcimg = yolonet.detect(srcimg) 93 | 94 | winName = 'Deep learning object detection in OpenCV' 95 | cv2.namedWindow(winName, 0) 96 | cv2.imshow(winName, srcimg) 97 | cv2.waitKey(0) 98 | cv2.destroyAllWindows() 99 | -------------------------------------------------------------------------------- /person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/person.jpg -------------------------------------------------------------------------------- /yolo.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | using namespace dnn; 10 | using namespace std; 11 | 12 | struct Net_config 13 | { 14 | float confThreshold; // Confidence threshold 15 | float nmsThreshold; // Non-maximum suppression threshold 16 | int inpWidth; // Width of network's input image 17 | int inpHeight; // Height of network's input image 18 | string classesFile; 19 | string modelConfiguration; 20 | string modelWeights; 21 | string netname; 22 | }; 23 | 24 | class YOLO 25 | { 26 | public: 27 | YOLO(Net_config config); 28 | void detect(Mat& frame); 29 | private: 30 | float confThreshold; 31 | float nmsThreshold; 32 | int inpWidth; 33 | int inpHeight; 34 | char netname[20]; 35 | vector classes; 36 | Net net; 37 | void postprocess(Mat& frame, const vector& outs); 38 | void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); 39 | }; 40 | 41 | Net_config yolo_nets[4] = { 42 | {0.5, 0.4, 416, 416,"coco.names", "yolov3/yolov3.cfg", "yolov3/yolov3.weights", "yolov3"}, 43 | {0.5, 0.4, 608, 608,"coco.names", "yolov4/yolov4.cfg", "yolov4/yolov4.weights", "yolov4"}, 44 | {0.5, 0.4, 320, 320,"coco.names", "yolo-fastest/yolo-fastest-xl.cfg", "yolo-fastest/yolo-fastest-xl.weights", "yolo-fastest"}, 45 | {0.5, 0.4, 320, 320,"coco.names", "yolobile/csdarknet53s-panet-spp.cfg", "yolobile/yolobile.weights", "yolobile"} 46 | }; -------------------------------------------------------------------------------- /zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/yolov34-cpp-opencv-dnn/26deaf897f2121df0f3dc254f96768efc8581819/zidane.jpg --------------------------------------------------------------------------------