├── DegreeToNum ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── templateclass1.cpython-36.pyc │ └── templateclass2.cpython-36.pyc ├── templateclass1.py └── templateclass2.py ├── Identify_picture.py ├── LICENSE ├── MeterAutoReader.py ├── MeterReader.py ├── README.md ├── VOCdevkit └── VOC2007 │ ├── ImageSets │ └── Main │ │ ├── test.txt │ │ ├── train.txt │ │ ├── trainval.txt │ │ └── val.txt │ └── test.py ├── __init__.py ├── __pycache__ ├── MeterReader.cpython-36.pyc ├── ammeter.cpython-36.pyc ├── img_match.cpython-36.pyc └── yolo.cpython-36.pyc ├── ammeter.py ├── coco_annotation.py ├── contours.py ├── convert.py ├── darknet53.cfg ├── img_match.py ├── kmeans.py ├── kuangxuan.py ├── mAPgetpridict.py ├── mAPgettxt.py ├── mAPmain.py ├── model_data ├── coco_classes.txt ├── tiny_yolo_anchors.txt └── voc_classes.txt ├── origin.py ├── read_num.py ├── test.txt ├── testimages ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── 4.jpg └── 5.jpg ├── train.py ├── train.txt ├── train_bottleneck.py ├── val.txt ├── voc_annotation.py ├── xyh.py ├── yolo.py ├── yolo3 ├── model.py ├── test_yolo.py └── utils.py ├── yolo_video.py └── yolov3.cfg /DegreeToNum/__init__.py: -------------------------------------------------------------------------------- 1 | # Detect the pointer and compute the degree, then map the degree to instrument number -------------------------------------------------------------------------------- /DegreeToNum/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/DegreeToNum/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /DegreeToNum/__pycache__/templateclass1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/DegreeToNum/__pycache__/templateclass1.cpython-36.pyc -------------------------------------------------------------------------------- /DegreeToNum/__pycache__/templateclass2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/DegreeToNum/__pycache__/templateclass2.cpython-36.pyc -------------------------------------------------------------------------------- /DegreeToNum/templateclass1.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from PIL import Image 4 | 5 | def degree2num(corrected_img_path): 6 | """get the class1 pointer degree and map to the number 7 | 8 | :param corrected_img_path: the corrected image path; eg: "./img_test_corrected/test1.png" 9 | :return: Instrument number 10 | """ 11 | # read the image and convert to gray image 12 | gray = cv2.imread(corrected_img_path, 0) 13 | 14 | # Image edge detection 15 | edges = cv2.Canny(gray, 50, 150, apertureSize=3) 16 | 17 | # downsample the image for saving calculating time 18 | edges_img = Image.fromarray(edges) 19 | w, h = edges_img.size 20 | edges_img_resized = edges_img.resize((w // 3, h // 3)) 21 | edges_img_resized_array = np.array(edges_img_resized) 22 | 23 | # use Hough Circle Transform to detect the dashboard of reduced images 24 | circles = cv2.HoughCircles(edges_img_resized_array, cv2.HOUGH_GRADIENT, 1, 100, 25 | param1=150, param2=100, minRadius=0, maxRadius=0) 26 | circles_int = np.uint16(np.around(circles)) # for visualizing 27 | x, y, _ = circles[0][0] # suppose to find the biggest cycle !!!!!!!! 28 | x, y = x * 3, y * 3 # map the cycle center to source image 29 | 30 | # detect the lines 31 | minLineLength = 120 32 | maxLineGap = 10 33 | lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength, maxLineGap).squeeze(1) 34 | 35 | """Detect the pointer line using a prior conditions: 36 | 1. a straight line passes through the cycle center; 37 | 2. the length of the line segment of the pointer is the longest 38 | """ 39 | current_lines = [] 40 | for x1, y1, x2, y2 in lines: 41 | # pass through the cycle center 42 | error = np.abs((y2 - y) * (x1 - x) - (y1 - y) * (x2 - x)) 43 | if error < 1000: # can change the threshold !!!!!! 44 | current_lines.append((x1, y1, x2, y2)) 45 | # for visualizing 46 | # cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2) 47 | 48 | # find the longest line 49 | pointer_line = () 50 | pointer_length = 0 51 | for x1, y1, x2, y2 in current_lines: 52 | length = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) 53 | if length > pointer_length: 54 | pointer_length = length 55 | pointer_line = (x1, y1, x2, y2) 56 | 57 | # for visualizing 58 | x1, y1, x2, y2 = pointer_line 59 | cv2.line(gray, (x1, y1), (x2, y2), (0, 255, 0), 2) 60 | 61 | # compute the pointer degree 62 | pointer_grad = np.abs(x2 - x1) / np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) 63 | poiner_degree = np.arccos(pointer_grad) / np.pi * 180 64 | 65 | # The center of the circle is compared to determine 66 | # the position of the pointer and then obtain the real pointer degree 67 | if x1 > x and y1 < y: # In the first quadrant 68 | poiner_degree = poiner_degree 69 | elif x1 < x and y1 < y: # In the second quadrant 70 | poiner_degree = 180 - poiner_degree 71 | elif x1 < x and y1 > y: # In the third quadrant 72 | poiner_degree = 180 + poiner_degree 73 | else: # In the fourth quadrant 74 | poiner_degree = 360 - poiner_degree 75 | 76 | # map the degree to num 77 | num = 0.56 # from the map (poiner_degree to num) 78 | 79 | # for visualizing 80 | for i in circles_int[0, :]: 81 | # draw the outer circle 82 | cv2.circle(edges_img_resized_array, (i[0], i[1]), i[2], (255, 255, 0), 2) 83 | # draw the center of the circle 84 | cv2.circle(edges_img_resized_array, (i[0], i[1]), 2, (255, 0, 0), 3) 85 | 86 | # show the result 87 | cv2.imshow("edges", edges) 88 | cv2.imshow("img", gray) 89 | cv2.imshow("edges_resized", edges_img_resized_array) 90 | cv2.waitKey(0) 91 | 92 | return num 93 | 94 | if __name__ == "__main__": 95 | corrected_img_path = "../img_test_corrected/test1.png" 96 | degree = degree2num(corrected_img_path) 97 | print(degree) -------------------------------------------------------------------------------- /DegreeToNum/templateclass2.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import imutils 4 | from PIL import Image 5 | def degree2num(corrected_img_path): 6 | """Get the number of second pointer meter 7 | 8 | :param corrected_img_path: the path of test image 9 | :return: the num or None 10 | """ 11 | """ to detect the rectangle """ 12 | # load the image and resize it to a smaller factor so that 13 | # the shapes can be approximated better 14 | img = cv2.imread(corrected_img_path, 0) 15 | resized_gray = imutils.resize(img, width=300) 16 | ratio = img.shape[0] / float(resized_gray.shape[0]) 17 | 18 | # blur it slightly, and threshold it 19 | blurred = cv2.bilateralFilter(resized_gray, 11, 17, 17) 20 | _, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY_INV) 21 | kernel = np.ones((2, 2), np.uint8) 22 | thresh = cv2.dilate(thresh, kernel, iterations=2) 23 | 24 | # find contours in the thresholded image 25 | cants = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 26 | cants = imutils.grab_contours(cants) 27 | 28 | # detect the rectangle: 1. have four points 2. the area is biggest 29 | rectangle = None 30 | biggest_area = 0 31 | for cant in cants: 32 | peri = cv2.arcLength(cant, True) 33 | approx = cv2.approxPolyDP(cant, 0.03 * peri, True) 34 | if len(approx) == 4: 35 | area = cv2.contourArea(cant) 36 | if area > biggest_area: 37 | rectangle = {"cant": [cant], "approx": approx} 38 | biggest_area = area 39 | 40 | """ detect the pointer and computer the degree, then using the map(degree to num) to find the num""" 41 | 42 | # crop the rectangle 43 | points = rectangle["approx"] 44 | y1, y2, x1, x2 = int(points[0][0][0] * ratio), int(points[2][0][0] * ratio), int(points[0][0][1] * ratio), int(points[2][0][1] * ratio) 45 | img_rectangele_cut = img[x1:x2, y1:y2] 46 | img_rectangele_cut_blurred = cv2.bilateralFilter(img_rectangele_cut, 11, 17, 17) 47 | 48 | # Image edge detection 49 | edges = cv2.Canny(img_rectangele_cut_blurred, 100, 150, apertureSize=3) 50 | # detect the lines 51 | minLineLength = 300 52 | maxLineGap = 25 53 | lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 30, minLineLength, maxLineGap).squeeze(1) 54 | 55 | # to show 56 | current_lines = [] 57 | for x1, y1, x2, y2 in lines: 58 | # remove the surrounding lines 59 | if y2 - y1 > 10: 60 | if x1 > 10 and x1 < img_rectangele_cut.shape[0] - 10: # !!!!! can change 61 | current_lines.append((x1, y1, x2, y2)) 62 | # for show 63 | cv2.line(img_rectangele_cut_blurred, (x1, y1), (x2, y2), (0, 0, 255), 2) 64 | 65 | # compute the pointer degree 66 | x1, y1, x2, y2 = current_lines[0] 67 | pointer_grad = np.abs(x2 - x1) / np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) 68 | poiner_degree = np.arccos(pointer_grad) / np.pi * 180 69 | 70 | if y2 > y1: 71 | poiner_degree = 180 - poiner_degree 72 | 73 | # map the degree to num 74 | num = 0.33 # from the map (poiner_degree to num) 75 | 76 | # to show 77 | for key in range(len(rectangle["cant"])): 78 | cv2.drawContours(resized_gray, rectangle["cant"], key, (0, 255, 0), 3) 79 | 80 | #cv2.imshow("gray0", resized_gray) 81 | #cv2.imshow("gray1", img) 82 | #cv2.imshow("gray2", edges) 83 | cv2.imshow("gray3", img_rectangele_cut_blurred) 84 | cv2.waitKey(0) 85 | return num 86 | 87 | if __name__ == "__main__": 88 | degree2num("../template/class2.png") -------------------------------------------------------------------------------- /Identify_picture.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author:CherryXuan 3 | Email:shenzexuan1994@foxmail.com 4 | Wechat:cherry19940614 5 | 6 | File:Identify_picture.py 7 | Name: 8 | Version:v0.0.1 9 | Date:2019/6/14 13:29 10 | ''' 11 | from MeterReader import METER 12 | 13 | if __name__ == '__main__': 14 | input_path = input('请输入图片地址:') 15 | METER(input_path).iden_pic() 16 | 17 | 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 qqwweee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MeterReader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author:CherryXuan 3 | Email:shenzexuan1994@foxmail.com 4 | Wechat:cherry19940614 5 | 6 | File:MeterReader.py 7 | Name:仪表识别类 8 | Version:v0.0.1 9 | Date:2019/6/14 12:30 10 | ''' 11 | import os 12 | import cv2 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | 17 | 18 | class METER(object): 19 | def __init__(self,Imagepath): 20 | self.Imagepath = Imagepath 21 | 22 | # 读取图片 23 | def readData(self): 24 | imgs_path = [] 25 | for filename in os.listdir(self.Imagepath): 26 | if filename.endswith('.jpg'): 27 | filename = self.path + '/' + filename 28 | # 图片归一化处理 29 | #res = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR) # 按照比例缩放,如x,y轴均缩小一倍 30 | imgs_path.append(filename) 31 | return imgs_path 32 | 33 | # 图片归一化处理 34 | def normalized_picture(self): 35 | img = cv2.imread(self.Imagepath) 36 | nor = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR) # 按照比例缩放,如x,y轴均缩小一倍 37 | cv2.imshow('Normalized picture',nor) 38 | return nor 39 | 40 | # 颜色空间转换:灰度化 41 | def color_conversion(self,img): 42 | img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换为灰度图 43 | cv2.imshow('Graying pictures', img_gray) 44 | return img_gray 45 | 46 | # 中值滤波去噪 47 | def median_filter(self,img): 48 | median = cv2.medianBlur(img, 1) # 中值滤波 49 | cv2.imshow('Median filter', median) 50 | return median 51 | 52 | # 双边滤波去噪 53 | def bilateral_filter(self,img): 54 | bilateral = cv2.bilateralFilter(img, 9, 50, 50) 55 | cv2.imshow('Bilateral filter', bilateral) 56 | return bilateral 57 | 58 | # 高斯滤波去噪 59 | def gaussian_filter(self,img): 60 | gaussian = cv2.GaussianBlur(img, (3, 3), 0) 61 | cv2.imshow('Gaussian filter', gaussian) 62 | return gaussian 63 | 64 | # 图像二值化 65 | def binary_image(self,img): 66 | # 应用5种不同的阈值方法 67 | # ret, th1 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) 68 | # ret, th2 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV) 69 | # ret, th3 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_TRUNC) 70 | # ret, th4 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_TOZERO) 71 | # ret, th5 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_TOZERO_INV) 72 | # titles = ['Gray', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV'] 73 | # images = [img_gray, th1, th2, th3, th4, th5] 74 | # 使用Matplotlib显示 75 | # for i in range(6): 76 | # plt.subplot(2, 3, i + 1) 77 | # plt.imshow(images[i], 'gray') 78 | # plt.title(titles[i], fontsize=8) 79 | # plt.xticks([]), plt.yticks([]) # 隐藏坐标轴 80 | # plt.show() 81 | 82 | # Otsu阈值 83 | _, th = cv2.threshold(img, 0, 255, cv2.THRESH_TOZERO + cv2.THRESH_OTSU) 84 | cv2.imshow('Binary image', th) 85 | return th 86 | 87 | # 边缘检测 88 | def candy_image(self,img): 89 | edges = cv2.Canny(img, 60, 143, apertureSize=3) 90 | cv2.imshow('canny', edges) 91 | return edges 92 | 93 | # 开运算:先腐蚀后膨胀 94 | def open_operation(self,img): 95 | # 定义结构元素 96 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) # 矩形结构 97 | # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) # 椭圆结构 98 | # kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5, 5)) # 十字形结构 99 | opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) # 开运算 100 | cv2.imshow('Open operation', opening) 101 | return opening 102 | 103 | # 霍夫圆变换:检测表盘 104 | def detect_circles(self,gray,img): 105 | circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 100, param2=150, minRadius=160) 106 | circles = np.uint16(np.around(circles)) # 把circles包含的圆心和半径的值变成整数 107 | cir = img.copy() 108 | 109 | for i in circles[0, :]: 110 | cv2.circle(cir, (i[0], i[1]), i[2], (0, 255, 0), 2, cv2.LINE_AA) # 画圆 111 | cv2.circle(cir, (i[0], i[1]), 2, (0, 255, 0), 2, cv2.LINE_AA) # 画圆心 112 | cv2.imshow("circles", cir) 113 | return cir 114 | 115 | # 霍夫直线变换:检测指针 116 | def detect_pointer(self,cir): 117 | 118 | img = cv2.GaussianBlur(cir, (3, 3), 0) 119 | edges = cv2.Canny(img, 50, 150, apertureSize=3) 120 | lines = cv2.HoughLines(edges, 1, np.pi / 180, 118) # 这里对最后一个参数使用了经验型的值 121 | result = cir.copy() 122 | 123 | for line in lines[0]: 124 | rho = line[0] # 第一个元素是距离rho 125 | theta = line[1] # 第二个元素是角度theta 126 | rtheta = theta * (180 / np.pi) 127 | print('θ1:', rtheta) 128 | if (theta < (np.pi / 4.)) or (theta > (3. * np.pi / 4.0)): # 垂直直线 129 | # 该直线与第一行的交点 130 | pt1 = (int(rho / np.cos(theta)), 0) 131 | # 该直线与最后一行的焦点 132 | pt2 = (int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta)), result.shape[0]) 133 | a = int( 134 | int(int(rho / np.cos(theta)) + int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta))) / 2) 135 | b = int(result.shape[0] / 2) 136 | pt3 = (a, b) 137 | pt4 = (int(int(int(rho / np.cos(theta)) + a) / 2), int(b / 2)) 138 | # 绘制一条白线 139 | cv2.putText(result, 'theta1={}'.format(int(rtheta)), pt4, cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) 140 | cv2.line(result, pt3, pt4, (0, 0, 255), 2, cv2.LINE_AA) 141 | else: # 水平直线 142 | # 该直线与第一列的交点 143 | pt1 = (0, int(rho / np.sin(theta))) 144 | # 该直线与最后一列的交点 145 | pt2 = (result.shape[1], int((rho - result.shape[1] * np.cos(theta)) / np.sin(theta))) 146 | a = int( 147 | int(int(rho / np.cos(theta)) + int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta))) / 2) 148 | b = int(result.shape[0] / 2) 149 | pt3 = (a, b) 150 | pt4 = (int(int(int(rho / np.cos(theta)) + a) / 2), int(b / 2)) 151 | # 绘制一条直线 152 | cv2.line(result, pt3, pt4, (0, 0, 255), 2, cv2.LINE_AA) 153 | 154 | for line in lines[2]: 155 | rho = line[0] # 第一个元素是距离rho 156 | theta = line[1] # 第二个元素是角度theta 157 | rtheta = theta * (180 / np.pi) 158 | print('θ2:', - rtheta - 90) 159 | if (theta < (np.pi / 4.)) or (theta > (3. * np.pi / 4.0)): # 垂直直线 160 | # 该直线与第一行的交点 161 | pt1 = (int(rho / np.cos(theta)), 0) 162 | # 该直线与最后一行的焦点 163 | pt2 = (int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta)), result.shape[0]) 164 | a = int( 165 | int(int(rho / np.cos(theta)) + int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta))) / 2) 166 | b = int(result.shape[0] / 2) 167 | pt3 = (a, b) 168 | pt4 = (int(int(int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta)) + a) / 2), 169 | int(int(int(b + result.shape[0]) / 2))) 170 | # 绘制一条白线 171 | cv2.putText(result, 'theta2={}'.format(int(- rtheta - 90)), pt4, cv2.FONT_HERSHEY_COMPLEX, 0.5, 172 | (0, 0, 255), 1) 173 | cv2.line(result, pt3, pt4, (255, 0, 0), 2, cv2.LINE_AA) 174 | else: # 水平直线 175 | # 该直线与第一列的交点 176 | pt1 = (0, int(rho / np.sin(theta))) 177 | # 该直线与最后一列的交点 178 | pt2 = (result.shape[1], int((rho - result.shape[1] * np.cos(theta)) / np.sin(theta))) 179 | a = int( 180 | int(int(rho / np.cos(theta)) + int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta))) / 2) 181 | b = int(result.shape[0] / 2) 182 | pt3 = (a, b) 183 | pt4 = (int(int(int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta)) + a) / 2), 184 | int(int(int(b + result.shape[0]) / 2))) 185 | # 绘制一条直线 186 | cv2.line(result, pt3, pt4, (255, 0, 0), 2, cv2.LINE_AA) 187 | 188 | # cv2.imshow('Canny', edges) 189 | cv2.imshow('Result', result) 190 | 191 | return result 192 | def iden_pic(self): 193 | 194 | image = METER(self.Imagepath) 195 | nor = image.normalized_picture() 196 | gray = image.color_conversion(nor) 197 | binary = image.binary_image(gray) 198 | median = image.median_filter(binary) 199 | #bilateral = image.bilateral_filter(median) 200 | gaussian = image.gaussian_filter(median) 201 | candy = image.candy_image(median) 202 | cir = image.detect_circles(gray,nor) 203 | pointer = image.detect_pointer(cir) 204 | 205 | nor = cv2.cvtColor(nor, cv2.COLOR_BGR2RGB) 206 | gray = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB) 207 | gaussian = cv2.cvtColor(gaussian, cv2.COLOR_BGR2RGB) 208 | median = cv2.cvtColor(median, cv2.COLOR_BGR2RGB) 209 | binary = cv2.cvtColor(binary, cv2.COLOR_BGR2RGB) 210 | candy = cv2.cvtColor(candy, cv2.COLOR_BGR2RGB) 211 | cir = cv2.cvtColor(cir, cv2.COLOR_BGR2RGB) 212 | pointer = cv2.cvtColor(pointer, cv2.COLOR_BGR2RGB) 213 | 214 | titles = ['Original', 'Gray', 'Gaussian', 'Mdian', 'Binary', 'Candy', 'Circle', 'Pointer'] 215 | images = [nor, gray, gaussian, median, binary, candy, cir, pointer] 216 | # 使用Matplotlib显示 217 | for i in range(8): 218 | plt.subplot(2, 4, i + 1) 219 | plt.imshow(images[i]) 220 | plt.title(titles[i], fontsize=8) 221 | plt.xticks([]), plt.yticks([]) # 隐藏坐标轴 222 | plt.show() 223 | 224 | cv2.waitKey(0) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pointer-meter-reading-algorithm-by-Python 2 | Using YoloV3 to detect pointer instruments and reading the number by Hough Transform 3 | 本小白上传该项目的初衷只是为了记录一下已经结束的本科求学生涯,上传的项目质量可能并没有那么高,还望大家多多担待! 4 | 下面对于一些小问题进行Q&A 5 | 1)Q:数据集可否进行分享? 6 | A:非常抱歉,由于毕设项目属于校企合作,所以很多数据无法进行透露与分享。 7 | 2)Q:相关思路或者流程上存在一些不明确的地方,想要进一步了解? 8 | A:我将我自己的毕设论文进行相应的整理,并上传至CSDN上,如果有兴趣的话,可以移步至以下CSDN网址进行阅读:https://blog.csdn.net/qq_45006390/category_11238344.html?spm=1001.2014.3001.5482 9 | 3)Q:平台搭建或训练相关的问题? 10 | A:小白的记性已经逐渐退化,对于相关的硬核知识已经遗忘的差不多了,但是我和之前同组的同学曾整理过一个文档(主要是围绕平台搭建与模型训练,在此感谢我的好大儿xuyihao同学提供的支持),现已上传至百度网盘(永久有效,欢迎自取): 11 | 链接:https://pan.baidu.com/s/1R0hB1hcW0Ha4uIwAF4jFGA 12 | 提取码:e22d 13 | 4)Q:在文件夹里有好多py,是都有用处吗? 14 | A:并不是,除去模型训练用到的一些官方py,其实有很多都是我为了测试编写的,最后使用的py是MeterAutoReader这个,这个py里面实现了yolo检测提取指针式仪表图片和霍夫变换检测直线读取角度(这部分做的并不是很好,因为霍夫检要做很多调参,且检测出的直线很多,结果并不稳定),关于读取角度这部分我推荐可以将起始刻度和结束刻度也纳入yolo检测提取的范畴,通过计算其boudingbox之间的角度来进行计算(可能会比较复杂,但会比较稳定)。 15 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/test.txt: -------------------------------------------------------------------------------- 1 | 10008 2 | 10009 3 | 10021 4 | 10024 5 | 1004 6 | 10044 7 | 10046 8 | 10048 9 | 10052 10 | 10056 11 | 10057 12 | 1006 13 | 10079 14 | 10087 15 | 10096 16 | 10099 17 | 10113 18 | 10117 19 | 10127 20 | 10131 21 | 10136 22 | 10140 23 | 1016 24 | 10161 25 | 10169 26 | 1017 27 | 10174 28 | 10199 29 | 10204 30 | 10209 31 | 10216 32 | 1022 33 | 10222 34 | 10230 35 | 10233 36 | 10273 37 | 10276 38 | 10282 39 | 10284 40 | 10286 41 | 10294 42 | 1030 43 | 1049 44 | 1070 45 | 1073 46 | 1083 47 | 109 48 | 1091 49 | 1097 50 | 1106 51 | 1108 52 | 1118 53 | 1138 54 | 1139 55 | 1152 56 | 1170 57 | 1175 58 | 1183 59 | 1222 60 | 1224 61 | 1245 62 | 1248 63 | 1254 64 | 1266 65 | 1273 66 | 1304 67 | 1315 68 | 132 69 | 1380 70 | 1381 71 | 1383 72 | 1388 73 | 1393 74 | 1411 75 | 1417 76 | 1418 77 | 1423 78 | 1444 79 | 1446 80 | 1459 81 | 1468 82 | 1495 83 | 1496 84 | 150 85 | 1505 86 | 151 87 | 1519 88 | 152 89 | 1527 90 | 1533 91 | 154 92 | 1544 93 | 1549 94 | 1554 95 | 1555 96 | 1570 97 | 1593 98 | 1597 99 | 1598 100 | 1599 101 | 1608 102 | 1627 103 | 1635 104 | 1647 105 | 1655 106 | 1670 107 | 1671 108 | 1673 109 | 1688 110 | 1692 111 | 1696 112 | 170 113 | 1703 114 | 1706 115 | 1713 116 | 1730 117 | 1732 118 | 1749 119 | 1759 120 | 1777 121 | 1798 122 | 1824 123 | 1828 124 | 1831 125 | 1836 126 | 187 127 | 1880 128 | 1886 129 | 1892 130 | 1914 131 | 1921 132 | 1946 133 | 1949 134 | 1954 135 | 1959 136 | 196 137 | 1974 138 | 1984 139 | 1985 140 | 1989 141 | 2008 142 | 2019 143 | 2026 144 | 2028 145 | 2033 146 | 2045 147 | 2069 148 | 207 149 | 2073 150 | 2079 151 | 21 152 | 210 153 | 211 154 | 212 155 | 213 156 | 2133 157 | 2145 158 | 2156 159 | 2162 160 | 2164 161 | 2176 162 | 218 163 | 2184 164 | 2186 165 | 2189 166 | 219 167 | 2195 168 | 2206 169 | 2215 170 | 2220 171 | 2221 172 | 2229 173 | 2235 174 | 2240 175 | 2257 176 | 2277 177 | 2285 178 | 2287 179 | 2300 180 | 2308 181 | 2316 182 | 2340 183 | 2347 184 | 2363 185 | 2367 186 | 2377 187 | 2379 188 | 240 189 | 242 190 | 245 191 | 2451 192 | 2463 193 | 2470 194 | 25 195 | 250 196 | 2501 197 | 2515 198 | 2516 199 | 2520 200 | 2543 201 | 2555 202 | 2562 203 | 2599 204 | 2608 205 | 2610 206 | 2617 207 | 2632 208 | 2654 209 | 2655 210 | 2662 211 | 2674 212 | 2682 213 | 2683 214 | 2689 215 | 2697 216 | 271 217 | 2713 218 | 2718 219 | 2747 220 | 2757 221 | 2764 222 | 277 223 | 2781 224 | 281 225 | 2860 226 | 2865 227 | 2878 228 | 2885 229 | 2893 230 | 2905 231 | 292 232 | 2931 233 | 2938 234 | 295 235 | 2964 236 | 2965 237 | 2975 238 | 2996 239 | 300 240 | 3005 241 | 3013 242 | 3015 243 | 3017 244 | 307 245 | 3072 246 | 3075 247 | 3081 248 | 3106 249 | 3107 250 | 3114 251 | 312 252 | 3121 253 | 3122 254 | 3125 255 | 3133 256 | 3139 257 | 3148 258 | 315 259 | 3151 260 | 3152 261 | 3170 262 | 3178 263 | 3193 264 | 3202 265 | 3216 266 | 3220 267 | 3225 268 | 3238 269 | 326 270 | 3291 271 | 3294 272 | 3296 273 | 3298 274 | 3299 275 | 3300 276 | 3305 277 | 3310 278 | 3313 279 | 3330 280 | 3352 281 | 3361 282 | 3404 283 | 3439 284 | 3465 285 | 3471 286 | 3472 287 | 3553 288 | 3555 289 | 3570 290 | 3572 291 | 3579 292 | 3587 293 | 360 294 | 3602 295 | 3607 296 | 3609 297 | 3611 298 | 3614 299 | 3649 300 | 3671 301 | 3678 302 | 369 303 | 3690 304 | 3699 305 | 3709 306 | 3718 307 | 3741 308 | 3753 309 | 3764 310 | 3767 311 | 377 312 | 3786 313 | 3792 314 | 3795 315 | 3824 316 | 3830 317 | 3841 318 | 385 319 | 3870 320 | 3896 321 | 390 322 | 391 323 | 3923 324 | 3924 325 | 394 326 | 3941 327 | 3954 328 | 3965 329 | 3976 330 | 3995 331 | 3999 332 | 4005 333 | 4010 334 | 4013 335 | 4018 336 | 4039 337 | 405 338 | 4055 339 | 4058 340 | 4068 341 | 4073 342 | 4077 343 | 4079 344 | 4081 345 | 4087 346 | 411 347 | 4125 348 | 4144 349 | 4152 350 | 4157 351 | 4184 352 | 4191 353 | 4196 354 | 4210 355 | 4228 356 | 4251 357 | 4253 358 | 4299 359 | 4307 360 | 4323 361 | 435 362 | 4354 363 | 4380 364 | 4390 365 | 4392 366 | 4434 367 | 4435 368 | 4443 369 | 4444 370 | 4445 371 | 4459 372 | 4469 373 | 4472 374 | 4476 375 | 4481 376 | 4486 377 | 4491 378 | 4499 379 | 450 380 | 4506 381 | 4510 382 | 4514 383 | 4515 384 | 4519 385 | 4553 386 | 4567 387 | 457 388 | 4589 389 | 460 390 | 4610 391 | 4623 392 | 4634 393 | 4656 394 | 4662 395 | 4663 396 | 4664 397 | 4675 398 | 4687 399 | 4690 400 | 4699 401 | 4713 402 | 4744 403 | 4762 404 | 4765 405 | 4774 406 | 4778 407 | 4781 408 | 4784 409 | 479 410 | 4827 411 | 4838 412 | 4839 413 | 4846 414 | 4848 415 | 485 416 | 4859 417 | 4864 418 | 4865 419 | 4880 420 | 4892 421 | 4903 422 | 4962 423 | 498 424 | 4980 425 | 4990 426 | 4999 427 | 5000 428 | 501 429 | 5023 430 | 5024 431 | 5035 432 | 5045 433 | 5064 434 | 5085 435 | 5090 436 | 5094 437 | 5096 438 | 5099 439 | 51 440 | 511 441 | 5116 442 | 5122 443 | 5128 444 | 5131 445 | 5133 446 | 5141 447 | 5142 448 | 5159 449 | 5160 450 | 5166 451 | 5177 452 | 5184 453 | 5199 454 | 5209 455 | 5230 456 | 5238 457 | 5245 458 | 5246 459 | 5250 460 | 5251 461 | 5259 462 | 5272 463 | 5276 464 | 5281 465 | 5282 466 | 5284 467 | 5301 468 | 5308 469 | 5321 470 | 5324 471 | 5333 472 | 5348 473 | 5358 474 | 5362 475 | 5373 476 | 5381 477 | 5393 478 | 54 479 | 540 480 | 5413 481 | 5426 482 | 544 483 | 5462 484 | 548 485 | 5480 486 | 5484 487 | 5509 488 | 5513 489 | 5520 490 | 553 491 | 5530 492 | 5532 493 | 5569 494 | 5573 495 | 5579 496 | 5585 497 | 5588 498 | 5599 499 | 5616 500 | 5617 501 | 5621 502 | 5648 503 | 5658 504 | 5670 505 | 5684 506 | 569 507 | 5690 508 | 5710 509 | 5712 510 | 5725 511 | 575 512 | 5756 513 | 5776 514 | 580 515 | 5842 516 | 5843 517 | 5860 518 | 5869 519 | 5870 520 | 5924 521 | 593 522 | 5965 523 | 5966 524 | 5969 525 | 6005 526 | 6018 527 | 6043 528 | 6046 529 | 6056 530 | 6060 531 | 6068 532 | 6074 533 | 6093 534 | 6094 535 | 6103 536 | 6115 537 | 612 538 | 6124 539 | 6133 540 | 6144 541 | 6172 542 | 6183 543 | 619 544 | 6193 545 | 6195 546 | 6196 547 | 6212 548 | 6223 549 | 6224 550 | 6237 551 | 6261 552 | 6263 553 | 6288 554 | 6314 555 | 633 556 | 6337 557 | 6340 558 | 6365 559 | 6367 560 | 6372 561 | 6380 562 | 6381 563 | 6388 564 | 6390 565 | 6400 566 | 6403 567 | 6407 568 | 6414 569 | 6416 570 | 6421 571 | 6433 572 | 644 573 | 6443 574 | 6445 575 | 647 576 | 6492 577 | 650 578 | 6509 579 | 6523 580 | 6527 581 | 653 582 | 6537 583 | 6547 584 | 6548 585 | 6554 586 | 6562 587 | 6571 588 | 6615 589 | 6630 590 | 6641 591 | 6647 592 | 6650 593 | 6655 594 | 6668 595 | 6671 596 | 6686 597 | 6700 598 | 6702 599 | 6717 600 | 6721 601 | 6740 602 | 6769 603 | 677 604 | 6782 605 | 6783 606 | 679 607 | 6824 608 | 6833 609 | 6835 610 | 6843 611 | 6870 612 | 6880 613 | 6907 614 | 6909 615 | 6927 616 | 6946 617 | 6953 618 | 6959 619 | 6964 620 | 6968 621 | 6986 622 | 7000 623 | 7013 624 | 7018 625 | 7019 626 | 702 627 | 7021 628 | 7036 629 | 704 630 | 7045 631 | 7061 632 | 7066 633 | 7070 634 | 7072 635 | 7073 636 | 7075 637 | 7077 638 | 7079 639 | 7087 640 | 7092 641 | 7109 642 | 7113 643 | 7114 644 | 712 645 | 7130 646 | 7139 647 | 7140 648 | 7151 649 | 7153 650 | 7157 651 | 7166 652 | 7171 653 | 7179 654 | 7187 655 | 7190 656 | 7250 657 | 7285 658 | 7305 659 | 7309 660 | 7329 661 | 733 662 | 7350 663 | 7361 664 | 7367 665 | 7373 666 | 7388 667 | 7390 668 | 7391 669 | 7403 670 | 7405 671 | 741 672 | 7439 673 | 7454 674 | 746 675 | 7467 676 | 7483 677 | 7491 678 | 7518 679 | 7526 680 | 7527 681 | 7534 682 | 754 683 | 758 684 | 7584 685 | 7606 686 | 7615 687 | 7644 688 | 7647 689 | 765 690 | 766 691 | 767 692 | 7677 693 | 7679 694 | 769 695 | 7691 696 | 7694 697 | 7712 698 | 7713 699 | 7721 700 | 7729 701 | 774 702 | 7745 703 | 7764 704 | 7783 705 | 781 706 | 7829 707 | 7834 708 | 7845 709 | 7863 710 | 7868 711 | 7873 712 | 7878 713 | 7890 714 | 7900 715 | 7919 716 | 7922 717 | 7924 718 | 7934 719 | 7935 720 | 7937 721 | 7943 722 | 7945 723 | 7958 724 | 7977 725 | 7981 726 | 8001 727 | 8020 728 | 8031 729 | 8037 730 | 8055 731 | 8078 732 | 8083 733 | 8093 734 | 8095 735 | 8098 736 | 8109 737 | 8124 738 | 8126 739 | 8134 740 | 8136 741 | 8141 742 | 8159 743 | 816 744 | 8166 745 | 8168 746 | 8175 747 | 8176 748 | 8182 749 | 8194 750 | 8199 751 | 821 752 | 8212 753 | 8216 754 | 822 755 | 8230 756 | 8232 757 | 8233 758 | 8235 759 | 8239 760 | 8244 761 | 8264 762 | 8267 763 | 8291 764 | 8336 765 | 834 766 | 8342 767 | 8345 768 | 8348 769 | 8364 770 | 837 771 | 8386 772 | 8398 773 | 8400 774 | 8406 775 | 8411 776 | 8420 777 | 8425 778 | 8430 779 | 844 780 | 8441 781 | 8445 782 | 8450 783 | 8451 784 | 8455 785 | 8461 786 | 8468 787 | 8474 788 | 8489 789 | 8497 790 | 8520 791 | 8522 792 | 8527 793 | 8537 794 | 8560 795 | 8567 796 | 8577 797 | 8599 798 | 86 799 | 8624 800 | 8641 801 | 8656 802 | 8657 803 | 8658 804 | 8661 805 | 8664 806 | 868 807 | 8682 808 | 8686 809 | 8709 810 | 8721 811 | 8738 812 | 8750 813 | 8755 814 | 8768 815 | 877 816 | 8778 817 | 8798 818 | 8807 819 | 8810 820 | 8818 821 | 8829 822 | 883 823 | 8830 824 | 8859 825 | 8871 826 | 8892 827 | 8904 828 | 8920 829 | 8924 830 | 8939 831 | 8963 832 | 8976 833 | 8981 834 | 8988 835 | 9015 836 | 9023 837 | 9033 838 | 9044 839 | 9065 840 | 9069 841 | 9093 842 | 9106 843 | 9123 844 | 9124 845 | 9138 846 | 9140 847 | 9164 848 | 9180 849 | 919 850 | 9201 851 | 9217 852 | 9222 853 | 924 854 | 9243 855 | 9260 856 | 9263 857 | 9277 858 | 9280 859 | 9289 860 | 9310 861 | 9313 862 | 9318 863 | 9320 864 | 9353 865 | 9363 866 | 937 867 | 9373 868 | 9380 869 | 9393 870 | 9395 871 | 9415 872 | 9423 873 | 9444 874 | 9456 875 | 9473 876 | 9500 877 | 9519 878 | 9562 879 | 9567 880 | 9569 881 | 9575 882 | 9577 883 | 9582 884 | 9598 885 | 96 886 | 9608 887 | 963 888 | 9635 889 | 9651 890 | 9666 891 | 9691 892 | 9702 893 | 9721 894 | 9723 895 | 9725 896 | 9746 897 | 9760 898 | 9763 899 | 9765 900 | 9771 901 | 9781 902 | 9789 903 | 9792 904 | 9793 905 | 9797 906 | 9798 907 | 9804 908 | 9808 909 | 9810 910 | 9813 911 | 982 912 | 9822 913 | 9835 914 | 984 915 | 9840 916 | 9841 917 | 9848 918 | 9867 919 | 988 920 | 9894 921 | 9929 922 | 9931 923 | 9964 924 | 9970 925 | 9975 926 | 9994 927 | 9996 928 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/trainval.txt: -------------------------------------------------------------------------------- 1 | 10008 2 | 10009 3 | 10021 4 | 10024 5 | 1004 6 | 10044 7 | 10046 8 | 10048 9 | 10052 10 | 10056 11 | 10057 12 | 1006 13 | 10079 14 | 10087 15 | 10096 16 | 10099 17 | 10113 18 | 10117 19 | 10127 20 | 10131 21 | 10136 22 | 10140 23 | 1016 24 | 10161 25 | 10169 26 | 1017 27 | 10174 28 | 10178 29 | 10199 30 | 10204 31 | 10209 32 | 10216 33 | 10219 34 | 1022 35 | 10222 36 | 10230 37 | 10233 38 | 10273 39 | 10276 40 | 10282 41 | 10284 42 | 10286 43 | 10294 44 | 1030 45 | 1049 46 | 1070 47 | 1073 48 | 1083 49 | 109 50 | 1091 51 | 1097 52 | 1106 53 | 1108 54 | 1118 55 | 1138 56 | 1139 57 | 1152 58 | 1170 59 | 1175 60 | 1183 61 | 1194 62 | 1211 63 | 1222 64 | 1224 65 | 1245 66 | 1248 67 | 1254 68 | 1266 69 | 1273 70 | 1276 71 | 1304 72 | 1315 73 | 132 74 | 1361 75 | 1380 76 | 1381 77 | 1383 78 | 1388 79 | 1393 80 | 1411 81 | 1417 82 | 1418 83 | 1423 84 | 1444 85 | 1446 86 | 1456 87 | 1459 88 | 146 89 | 1467 90 | 1468 91 | 1469 92 | 1495 93 | 1496 94 | 150 95 | 1505 96 | 151 97 | 1519 98 | 152 99 | 1527 100 | 1533 101 | 154 102 | 1544 103 | 1549 104 | 1554 105 | 1555 106 | 1570 107 | 1583 108 | 1593 109 | 1597 110 | 1598 111 | 1599 112 | 1608 113 | 1615 114 | 1627 115 | 1635 116 | 1647 117 | 1655 118 | 1670 119 | 1671 120 | 1673 121 | 1688 122 | 1692 123 | 1696 124 | 170 125 | 1703 126 | 1706 127 | 1713 128 | 1730 129 | 1732 130 | 1743 131 | 1749 132 | 1759 133 | 1761 134 | 1777 135 | 1798 136 | 1803 137 | 1811 138 | 1821 139 | 1824 140 | 1828 141 | 1831 142 | 1836 143 | 1848 144 | 1865 145 | 187 146 | 1880 147 | 1886 148 | 1892 149 | 1914 150 | 1921 151 | 1946 152 | 1949 153 | 1954 154 | 1959 155 | 196 156 | 1974 157 | 1984 158 | 1985 159 | 1989 160 | 2008 161 | 2019 162 | 2026 163 | 2028 164 | 2031 165 | 2033 166 | 2045 167 | 2069 168 | 207 169 | 2073 170 | 2079 171 | 21 172 | 210 173 | 211 174 | 212 175 | 213 176 | 2133 177 | 2145 178 | 2148 179 | 2156 180 | 2162 181 | 2164 182 | 2176 183 | 218 184 | 2184 185 | 2186 186 | 2189 187 | 219 188 | 2195 189 | 2206 190 | 2215 191 | 2220 192 | 2221 193 | 2229 194 | 2235 195 | 2240 196 | 2257 197 | 2277 198 | 2285 199 | 2287 200 | 2300 201 | 2308 202 | 2316 203 | 2340 204 | 2347 205 | 2363 206 | 2367 207 | 2377 208 | 2379 209 | 240 210 | 242 211 | 245 212 | 2451 213 | 2463 214 | 2469 215 | 2470 216 | 25 217 | 250 218 | 2501 219 | 2515 220 | 2516 221 | 2520 222 | 2543 223 | 2549 224 | 2555 225 | 2562 226 | 2599 227 | 2608 228 | 2610 229 | 2617 230 | 2632 231 | 2654 232 | 2655 233 | 2662 234 | 2674 235 | 2682 236 | 2683 237 | 2689 238 | 2697 239 | 271 240 | 2713 241 | 2717 242 | 2718 243 | 2734 244 | 2747 245 | 2757 246 | 2764 247 | 277 248 | 2781 249 | 281 250 | 2855 251 | 2860 252 | 2865 253 | 2878 254 | 2885 255 | 2893 256 | 2905 257 | 292 258 | 2931 259 | 2938 260 | 294 261 | 295 262 | 2964 263 | 2965 264 | 2975 265 | 2996 266 | 300 267 | 3005 268 | 3013 269 | 3015 270 | 3017 271 | 307 272 | 3072 273 | 3075 274 | 3081 275 | 31 276 | 3106 277 | 3107 278 | 3114 279 | 312 280 | 3121 281 | 3122 282 | 3125 283 | 3133 284 | 3139 285 | 3148 286 | 315 287 | 3151 288 | 3152 289 | 3170 290 | 3178 291 | 3193 292 | 3202 293 | 3216 294 | 3220 295 | 3225 296 | 3238 297 | 326 298 | 3288 299 | 3291 300 | 3294 301 | 3296 302 | 3298 303 | 3299 304 | 3300 305 | 3305 306 | 3310 307 | 3313 308 | 3330 309 | 3352 310 | 3361 311 | 3391 312 | 3404 313 | 3422 314 | 3439 315 | 345 316 | 3465 317 | 3471 318 | 3472 319 | 3482 320 | 3553 321 | 3555 322 | 3570 323 | 3572 324 | 3579 325 | 3587 326 | 359 327 | 360 328 | 3602 329 | 3607 330 | 3609 331 | 3611 332 | 3614 333 | 363 334 | 3645 335 | 3649 336 | 3671 337 | 3674 338 | 3678 339 | 369 340 | 3690 341 | 3699 342 | 3709 343 | 3714 344 | 3718 345 | 373 346 | 3741 347 | 3753 348 | 3762 349 | 3764 350 | 3767 351 | 377 352 | 3778 353 | 3786 354 | 3792 355 | 3795 356 | 3804 357 | 3824 358 | 3830 359 | 3841 360 | 385 361 | 3870 362 | 3896 363 | 390 364 | 391 365 | 3923 366 | 3924 367 | 393 368 | 394 369 | 3941 370 | 3954 371 | 3965 372 | 3976 373 | 3995 374 | 3999 375 | 4005 376 | 4010 377 | 4013 378 | 4018 379 | 4039 380 | 405 381 | 4055 382 | 4058 383 | 4068 384 | 4073 385 | 4077 386 | 4079 387 | 4081 388 | 4087 389 | 411 390 | 4125 391 | 4140 392 | 4144 393 | 4152 394 | 4157 395 | 4184 396 | 4191 397 | 4196 398 | 4210 399 | 4228 400 | 4251 401 | 4253 402 | 4299 403 | 430 404 | 4307 405 | 4323 406 | 435 407 | 4354 408 | 4380 409 | 4390 410 | 4392 411 | 4434 412 | 4435 413 | 4443 414 | 4444 415 | 4445 416 | 4459 417 | 4469 418 | 4472 419 | 4476 420 | 4481 421 | 4486 422 | 4491 423 | 4499 424 | 450 425 | 4506 426 | 4510 427 | 4514 428 | 4515 429 | 4519 430 | 4551 431 | 4553 432 | 4567 433 | 457 434 | 4589 435 | 460 436 | 4610 437 | 4623 438 | 4634 439 | 4656 440 | 4662 441 | 4663 442 | 4664 443 | 4675 444 | 4681 445 | 4687 446 | 4690 447 | 4699 448 | 4713 449 | 4744 450 | 4762 451 | 4765 452 | 4774 453 | 4778 454 | 4781 455 | 4784 456 | 479 457 | 4827 458 | 4838 459 | 4839 460 | 4846 461 | 4848 462 | 485 463 | 4859 464 | 486 465 | 4864 466 | 4865 467 | 4880 468 | 4892 469 | 4903 470 | 4962 471 | 498 472 | 4980 473 | 4990 474 | 4999 475 | 5000 476 | 501 477 | 5023 478 | 5024 479 | 5035 480 | 5045 481 | 5064 482 | 5085 483 | 5090 484 | 5094 485 | 5096 486 | 5099 487 | 51 488 | 5108 489 | 511 490 | 5116 491 | 5122 492 | 5128 493 | 5131 494 | 5133 495 | 5141 496 | 5142 497 | 5159 498 | 5160 499 | 5166 500 | 5177 501 | 5184 502 | 5199 503 | 5209 504 | 5230 505 | 5238 506 | 5240 507 | 5245 508 | 5246 509 | 5250 510 | 5251 511 | 5259 512 | 5272 513 | 5273 514 | 5276 515 | 5281 516 | 5282 517 | 5284 518 | 5301 519 | 5308 520 | 5321 521 | 5324 522 | 5332 523 | 5333 524 | 5348 525 | 5358 526 | 5362 527 | 537 528 | 5372 529 | 5373 530 | 5378 531 | 5381 532 | 5393 533 | 54 534 | 540 535 | 5413 536 | 5424 537 | 5426 538 | 544 539 | 5462 540 | 548 541 | 5480 542 | 5484 543 | 5509 544 | 5513 545 | 5520 546 | 5526 547 | 553 548 | 5530 549 | 5532 550 | 5555 551 | 5569 552 | 5573 553 | 5579 554 | 5585 555 | 5588 556 | 5599 557 | 5616 558 | 5617 559 | 5621 560 | 5648 561 | 5658 562 | 5670 563 | 5684 564 | 569 565 | 5690 566 | 5695 567 | 5710 568 | 5712 569 | 5725 570 | 575 571 | 5756 572 | 5776 573 | 5782 574 | 580 575 | 5820 576 | 5842 577 | 5843 578 | 5860 579 | 5869 580 | 5870 581 | 5924 582 | 593 583 | 5958 584 | 5965 585 | 5966 586 | 5969 587 | 5993 588 | 6005 589 | 6018 590 | 6043 591 | 6045 592 | 6046 593 | 6056 594 | 6060 595 | 6068 596 | 6074 597 | 6093 598 | 6094 599 | 6103 600 | 6115 601 | 6119 602 | 612 603 | 6124 604 | 6129 605 | 6133 606 | 6144 607 | 6172 608 | 6183 609 | 619 610 | 6193 611 | 6195 612 | 6196 613 | 6212 614 | 6221 615 | 6223 616 | 6224 617 | 6237 618 | 6261 619 | 6263 620 | 6288 621 | 6314 622 | 633 623 | 6337 624 | 6340 625 | 6365 626 | 6367 627 | 6372 628 | 6380 629 | 6381 630 | 6388 631 | 6390 632 | 6400 633 | 6403 634 | 6407 635 | 6414 636 | 6416 637 | 6421 638 | 6433 639 | 644 640 | 6443 641 | 6445 642 | 6450 643 | 647 644 | 6492 645 | 650 646 | 6509 647 | 6523 648 | 6527 649 | 653 650 | 6537 651 | 6547 652 | 6548 653 | 6554 654 | 6562 655 | 6571 656 | 6599 657 | 6615 658 | 663 659 | 6630 660 | 6641 661 | 6647 662 | 6650 663 | 6655 664 | 6668 665 | 6671 666 | 6686 667 | 6697 668 | 6700 669 | 6702 670 | 6711 671 | 6717 672 | 6721 673 | 6740 674 | 6761 675 | 6769 676 | 677 677 | 6782 678 | 6783 679 | 679 680 | 6824 681 | 6828 682 | 6833 683 | 6835 684 | 6843 685 | 6870 686 | 6880 687 | 6889 688 | 6907 689 | 6909 690 | 6927 691 | 6946 692 | 6953 693 | 6959 694 | 6964 695 | 6968 696 | 6986 697 | 6989 698 | 7000 699 | 7013 700 | 7018 701 | 7019 702 | 702 703 | 7021 704 | 7033 705 | 7036 706 | 704 707 | 7045 708 | 7059 709 | 7061 710 | 7066 711 | 7070 712 | 7072 713 | 7073 714 | 7075 715 | 7077 716 | 7079 717 | 7087 718 | 7092 719 | 7109 720 | 7113 721 | 7114 722 | 712 723 | 7130 724 | 7139 725 | 7140 726 | 7151 727 | 7153 728 | 7157 729 | 7166 730 | 7171 731 | 7179 732 | 7187 733 | 7190 734 | 7197 735 | 7250 736 | 7285 737 | 7305 738 | 7309 739 | 7329 740 | 733 741 | 7350 742 | 7361 743 | 7367 744 | 7373 745 | 7388 746 | 7390 747 | 7391 748 | 7403 749 | 7405 750 | 741 751 | 7439 752 | 7454 753 | 746 754 | 7467 755 | 7483 756 | 7491 757 | 7518 758 | 7526 759 | 7527 760 | 7534 761 | 754 762 | 758 763 | 7584 764 | 7606 765 | 7615 766 | 7644 767 | 7647 768 | 765 769 | 766 770 | 767 771 | 7677 772 | 7679 773 | 769 774 | 7691 775 | 7694 776 | 7710 777 | 7712 778 | 7713 779 | 7721 780 | 7727 781 | 7729 782 | 774 783 | 7745 784 | 7764 785 | 7783 786 | 781 787 | 7819 788 | 7829 789 | 7834 790 | 7845 791 | 7863 792 | 7868 793 | 787 794 | 7873 795 | 7878 796 | 7880 797 | 7882 798 | 7890 799 | 7900 800 | 7919 801 | 7922 802 | 7923 803 | 7924 804 | 7934 805 | 7935 806 | 7937 807 | 7943 808 | 7945 809 | 7958 810 | 7977 811 | 7981 812 | 8001 813 | 8008 814 | 8020 815 | 8031 816 | 8037 817 | 8039 818 | 8055 819 | 8078 820 | 8083 821 | 8093 822 | 8095 823 | 8098 824 | 8109 825 | 8124 826 | 8126 827 | 8134 828 | 8136 829 | 8141 830 | 8159 831 | 816 832 | 8166 833 | 8168 834 | 8175 835 | 8176 836 | 8182 837 | 8194 838 | 8199 839 | 821 840 | 8212 841 | 8216 842 | 822 843 | 8230 844 | 8232 845 | 8233 846 | 8235 847 | 8239 848 | 8244 849 | 8264 850 | 8267 851 | 8284 852 | 8291 853 | 8324 854 | 8336 855 | 834 856 | 8342 857 | 8345 858 | 8348 859 | 8364 860 | 837 861 | 8386 862 | 8398 863 | 8400 864 | 8406 865 | 8411 866 | 8420 867 | 8425 868 | 8430 869 | 844 870 | 8441 871 | 8445 872 | 8450 873 | 8451 874 | 8455 875 | 8461 876 | 8465 877 | 8468 878 | 8474 879 | 8489 880 | 8497 881 | 8520 882 | 8522 883 | 8527 884 | 8537 885 | 8560 886 | 8567 887 | 8577 888 | 8599 889 | 86 890 | 8624 891 | 8641 892 | 8655 893 | 8656 894 | 8657 895 | 8658 896 | 8661 897 | 8664 898 | 868 899 | 8682 900 | 8686 901 | 8709 902 | 8721 903 | 873 904 | 8738 905 | 8750 906 | 8755 907 | 8768 908 | 877 909 | 8778 910 | 8790 911 | 8798 912 | 8807 913 | 8810 914 | 8818 915 | 8829 916 | 883 917 | 8830 918 | 8859 919 | 8871 920 | 8892 921 | 8904 922 | 8905 923 | 8917 924 | 8920 925 | 8924 926 | 8939 927 | 8963 928 | 8976 929 | 8981 930 | 8988 931 | 9015 932 | 9023 933 | 9033 934 | 9044 935 | 9065 936 | 9069 937 | 9093 938 | 9106 939 | 9123 940 | 9124 941 | 9138 942 | 9140 943 | 9150 944 | 9164 945 | 9179 946 | 9180 947 | 919 948 | 9201 949 | 9217 950 | 9222 951 | 924 952 | 9243 953 | 9260 954 | 9263 955 | 9277 956 | 9280 957 | 9289 958 | 9300 959 | 9310 960 | 9313 961 | 9318 962 | 9320 963 | 933 964 | 9353 965 | 9363 966 | 937 967 | 9373 968 | 9380 969 | 9393 970 | 9395 971 | 9415 972 | 9419 973 | 9423 974 | 9444 975 | 9456 976 | 9457 977 | 9473 978 | 9500 979 | 9519 980 | 9562 981 | 9567 982 | 9569 983 | 9575 984 | 9577 985 | 9582 986 | 9598 987 | 96 988 | 9608 989 | 963 990 | 9635 991 | 9636 992 | 9651 993 | 9666 994 | 9691 995 | 9702 996 | 9721 997 | 9723 998 | 9725 999 | 9746 1000 | 9760 1001 | 9763 1002 | 9765 1003 | 9771 1004 | 9781 1005 | 9789 1006 | 9792 1007 | 9793 1008 | 9797 1009 | 9798 1010 | 9804 1011 | 9808 1012 | 9810 1013 | 9813 1014 | 982 1015 | 9822 1016 | 9835 1017 | 984 1018 | 9840 1019 | 9841 1020 | 9848 1021 | 9867 1022 | 988 1023 | 9894 1024 | 9929 1025 | 9931 1026 | 9964 1027 | 9970 1028 | 9975 1029 | 9994 1030 | 9996 1031 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/ImageSets/Main/val.txt: -------------------------------------------------------------------------------- 1 | 10178 2 | 10219 3 | 1194 4 | 1211 5 | 1276 6 | 1361 7 | 1456 8 | 146 9 | 1467 10 | 1469 11 | 1583 12 | 1615 13 | 1743 14 | 1761 15 | 1803 16 | 1811 17 | 1821 18 | 1848 19 | 1865 20 | 2031 21 | 2148 22 | 2469 23 | 2549 24 | 2717 25 | 2734 26 | 2855 27 | 294 28 | 31 29 | 3288 30 | 3391 31 | 3422 32 | 345 33 | 3482 34 | 359 35 | 363 36 | 3645 37 | 3674 38 | 3714 39 | 373 40 | 3762 41 | 3778 42 | 3804 43 | 393 44 | 4140 45 | 430 46 | 4551 47 | 4681 48 | 486 49 | 5108 50 | 5240 51 | 5273 52 | 5332 53 | 537 54 | 5372 55 | 5378 56 | 5424 57 | 5526 58 | 5555 59 | 5695 60 | 5782 61 | 5820 62 | 5958 63 | 5993 64 | 6045 65 | 6119 66 | 6129 67 | 6221 68 | 6450 69 | 6599 70 | 663 71 | 6697 72 | 6711 73 | 6761 74 | 6828 75 | 6889 76 | 6989 77 | 7033 78 | 7059 79 | 7197 80 | 7710 81 | 7727 82 | 7819 83 | 787 84 | 7880 85 | 7882 86 | 7923 87 | 8008 88 | 8039 89 | 8284 90 | 8324 91 | 8465 92 | 8655 93 | 873 94 | 8790 95 | 8905 96 | 8917 97 | 9150 98 | 9179 99 | 9300 100 | 933 101 | 9419 102 | 9457 103 | 9636 104 | -------------------------------------------------------------------------------- /VOCdevkit/VOC2007/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | trainval_percent = 0.1 5 | train_percent = 0.9 6 | xmlfilepath = 'Annotations' 7 | txtsavepath = 'ImageSets\Main' 8 | total_xml = os.listdir(xmlfilepath) 9 | 10 | num = len(total_xml) 11 | list = range(num) 12 | tv = int(num * trainval_percent) 13 | tr = int(tv * train_percent) 14 | trainval = random.sample(list, tv) 15 | train = random.sample(trainval, tr) 16 | 17 | ftrainval = open('ImageSets/Main/trainval.txt', 'w') 18 | ftest = open('ImageSets/Main/test.txt', 'w') 19 | ftrain = open('ImageSets/Main/train.txt', 'w') 20 | fval = open('ImageSets/Main/val.txt', 'w') 21 | 22 | for i in list: 23 | name = total_xml[i][:-4] + '\n' 24 | if i in trainval: 25 | ftrainval.write(name) 26 | if i in train: 27 | ftest.write(name) 28 | else: 29 | fval.write(name) 30 | else: 31 | ftrain.write(name) 32 | 33 | ftrainval.close() 34 | ftrain.close() 35 | fval.close() 36 | ftest.close() 37 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from ammeter import C_ammerter 3 | 4 | 5 | if __name__ == "__main__": 6 | # 加载模板 7 | template = cv2.imread('images/10.JPG',1) 8 | # 初始化 9 | am = C_ammerter(template) 10 | # 运行 11 | am.am_run() 12 | # 结束 13 | am.close() 14 | 15 | 16 | -------------------------------------------------------------------------------- /__pycache__/MeterReader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/__pycache__/MeterReader.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/ammeter.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/__pycache__/ammeter.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/img_match.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/__pycache__/img_match.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/yolo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/__pycache__/yolo.cpython-36.pyc -------------------------------------------------------------------------------- /ammeter.py: -------------------------------------------------------------------------------- 1 | """ 2 | created by:maogu123@126.com 3 | data:2021-01-04 4 | 5 | 功能:在视频中寻找匹配的仪表,并识别指针的角度 6 | """ 7 | import numpy as np 8 | import cv2 9 | import time 10 | from matplotlib import pyplot as plt 11 | 12 | 13 | class C_ammerter: 14 | def __init__(self,temp): 15 | # 获取模板样本 16 | self.template = temp 17 | # 基于视频流 18 | self.cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) 19 | 20 | # 获取模板的尺寸 21 | self.w = self.template.shape[0] 22 | self.h = self.template.shape[1] 23 | 24 | methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR', 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 25 | 'cv2.TM_SQDIFF_NORMED'] 26 | # 平方差 SQDIFF 27 | # 相关匹配 CCORR 28 | # 相关系数法 CCOEFF 29 | self.method = cv2.TM_CCORR 30 | 31 | 32 | def close(self): 33 | self.cap.release() 34 | cv2.destroyAllWindows() 35 | 36 | 37 | 38 | 39 | 40 | 41 | # 获取匹配的图片位置 42 | def get_match(self,img): 43 | res = cv2.matchTemplate(img, self.template, self.method) 44 | 45 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 46 | top_left = max_loc 47 | print("----------------------------" ) 48 | print("min_val", min_val) 49 | print("max_val", max_val) 50 | print("min_loc", min_loc) 51 | print("max_loc", max_loc) 52 | print("----------------------------" ) 53 | bottom_right = (top_left[0] + self.w, top_left[1] + self.h) 54 | cv2.rectangle(img, top_left, bottom_right, 255, 2) 55 | c_x, c_y = ((np.array(top_left) + np.array(bottom_right)) / 2).astype(np.int) 56 | # print(c_x, c_y) 57 | return max_val,top_left, bottom_right 58 | 59 | # return img[top_left[1]:top_left[1] + h, top_left[0]:top_left[0] + w] 60 | 61 | 62 | def am_run(self): 63 | while True: 64 | ret, frame = self.cap.read() 65 | if frame is None: 66 | print('video picture is none --continue ') 67 | continue 68 | 69 | gray = frame.copy() 70 | # cv2.imshow('origin', gray) 71 | 72 | # 匹配模板 框出匹配区域 73 | image = gray.copy() 74 | maxval,t_left, b_right = self.get_match(gray) 75 | if maxval < 16000000000: # 对匹配程度做判断 76 | print("---------------------------------------") 77 | print('matchTemplate is not enough --continue') 78 | print("---------------------------------------") 79 | result =frame 80 | image=frame 81 | else: 82 | 83 | cv2.rectangle(image, t_left, b_right, 255, 2) 84 | 85 | 86 | 87 | # 高斯除噪 88 | kernel = np.ones((6,6), np.float32) / 36 89 | gray_cut_filter2D = cv2.filter2D(image[t_left[1]:t_left[1] + self.h, t_left[0]:t_left[0] + self.w], -1, kernel) 90 | 91 | # 灰度图 二值化 92 | gray_img = cv2.cvtColor(gray_cut_filter2D, cv2.COLOR_BGR2GRAY) 93 | ret, thresh1 = cv2.threshold(gray_img, 130, 255, cv2.THRESH_BINARY) 94 | 95 | # 二值化后 分割主要区域 减小干扰 模板图尺寸371*369 96 | tm = thresh1.copy() 97 | test_main = tm[50:500, 50:500] 98 | 99 | # 边缘化检测 100 | edges = cv2.Canny(test_main, 50, 150, apertureSize=3) 101 | 102 | # 霍夫直线 103 | lines = cv2.HoughLines(edges, 1, np.pi / 180, 60) 104 | if lines is None: 105 | continue 106 | result = edges.copy() 107 | 108 | for line in lines[0]: 109 | rho = line[0] # 第一个元素是距离rho 110 | theta = line[1] # 第二个元素是角度theta 111 | print('distance:' + str(rho), 'theta:' + str(((theta / np.pi) * 180))) 112 | lbael_text = 'distance:' + str(round(rho))+ 'theta:' + str(round((theta / np.pi) * 180-90,2)) 113 | cv2.putText(image, lbael_text,(t_left[0],t_left[1]-12),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2) 114 | if (theta > 3 * (np.pi / 3)) or (theta < (np.pi / 2)): # 垂直直线 115 | # 该直线与第一行的交点 116 | pt1 = (int(rho / np.cos(theta)), 0) 117 | # 该直线与最后一行的焦点 118 | pt2 = (int((rho - result.shape[0] * np.sin(theta)) / np.cos(theta)), result.shape[0]) 119 | # 绘制一条白线 120 | cv2.line(result, pt1, pt2,255, 1) 121 | # print('theat >180 theta<90') 122 | 123 | else: # 水平直线 124 | # 该直线与第一列的交点 125 | pt1 = (0, int(rho / np.sin(theta))) 126 | # 该直线与最后一列的交点 127 | pt2 = (result.shape[1], int((rho - result.shape[1] * np.cos(theta)) / np.sin(theta))) 128 | # 绘制一条直线 129 | cv2.line(result, pt1, pt2, 255, 1) 130 | # print('theat <180 theta > 90') 131 | 132 | # 直线拟合 133 | cv2.imshow('result', result) 134 | cv2.imshow('rectangle', image) 135 | if cv2.waitKey(1) & 0XFF == ord('q'): 136 | break 137 | 138 | 139 | # Terminal进入当前文件 命令生成exe: pyinstaller --console --onefile ammeter.py 140 | -------------------------------------------------------------------------------- /coco_annotation.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | 4 | name_box_id = defaultdict(list) 5 | id_name = dict() 6 | f = open( 7 | "mscoco2017/annotations/instances_train2017.json", 8 | encoding='utf-8') 9 | data = json.load(f) 10 | 11 | annotations = data['annotations'] 12 | for ant in annotations: 13 | id = ant['image_id'] 14 | name = 'mscoco2017/train2017/%012d.jpg' % id 15 | cat = ant['category_id'] 16 | 17 | if cat >= 1 and cat <= 11: 18 | cat = cat - 1 19 | elif cat >= 13 and cat <= 25: 20 | cat = cat - 2 21 | elif cat >= 27 and cat <= 28: 22 | cat = cat - 3 23 | elif cat >= 31 and cat <= 44: 24 | cat = cat - 5 25 | elif cat >= 46 and cat <= 65: 26 | cat = cat - 6 27 | elif cat == 67: 28 | cat = cat - 7 29 | elif cat == 70: 30 | cat = cat - 9 31 | elif cat >= 72 and cat <= 82: 32 | cat = cat - 10 33 | elif cat >= 84 and cat <= 90: 34 | cat = cat - 11 35 | 36 | name_box_id[name].append([ant['bbox'], cat]) 37 | 38 | f = open('train.txt', 'w') 39 | for key in name_box_id.keys(): 40 | f.write(key) 41 | box_infos = name_box_id[key] 42 | for info in box_infos: 43 | x_min = int(info[0][0]) 44 | y_min = int(info[0][1]) 45 | x_max = x_min + int(info[0][2]) 46 | y_max = y_min + int(info[0][3]) 47 | 48 | box_info = " %d,%d,%d,%d,%d" % ( 49 | x_min, y_min, x_max, y_max, int(info[1])) 50 | f.write(box_info) 51 | f.write('\n') 52 | f.close() 53 | -------------------------------------------------------------------------------- /contours.py: -------------------------------------------------------------------------------- 1 | """ 2 | created by:maogu123@126.com 3 | 4 | data:2021-01-06 5 | 6 | 功能:在视频中,抓取目标仪表的轮廓,按轮廓切割 ,然后霍夫直线得到指针的角度 7 | 8 | """ 9 | from typing import Optional, Any 10 | 11 | import cv2 12 | import numpy as np 13 | import time 14 | from matplotlib import pyplot as plt 15 | 16 | 17 | # 获取面积最大的轮廓数据 18 | def max_contours(contour): 19 | area = map(cv2.contourArea, contour) 20 | area_list = list(area) 21 | get_area_max = max(area_list) 22 | get_post = area_list.index(get_area_max) 23 | return get_post, get_area_max 24 | 25 | 26 | # 将直线延长与边界相交, 在图形中画出 27 | def get_HImg(H_image, Lines): 28 | for Line in Lines[0]: 29 | Rho = Line[0] # 第一个元素是距离rho 30 | Theta = Line[1] # 第二个元素是角度theta 31 | 32 | print('theta:' + str(((Theta / np.pi) * 180))) 33 | if (Theta > 3 * (np.pi / 3)) or (Theta < (np.pi / 2)): # 垂直直线 34 | # 该直线与第一行的交点 35 | Pt1 = (int(Rho / np.cos(Theta)), 0) 36 | # 该直线与最后一行的焦点 37 | Pt2 = (int((Rho - H_image.shape[0] * np.sin(Theta)) / np.cos(Theta)), H_image.shape[0]) 38 | # 绘制一条线 39 | cv2.line(H_image, Pt1, Pt2, 255, 1) 40 | 41 | else: # 水平直线 42 | # 该直线与第一列的交点 43 | Pt1 = (0, int(Rho / np.sin(Theta))) 44 | # 该直线与最后一列的交点 45 | Pt2 = (H_image.shape[1], int((Rho - H_image.shape[1] * np.cos(Theta)) / np.sin(Theta))) 46 | # 绘制一条直线 47 | cv2.line(H_image, Pt1, Pt2, 255, 1) 48 | 49 | return H_image 50 | 51 | 52 | 53 | cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) 54 | while True: 55 | ret, frame = cap.read() 56 | if frame is None: 57 | print('frame is none --continue') 58 | continue 59 | # 保存原图片 60 | origin = frame 61 | 62 | # 高斯除噪 63 | kernel = np.ones((5, 5), np.float32) / 25 64 | gray_cut_filter2D = cv2.filter2D(frame, -1, kernel) 65 | 66 | # 转为灰度图。再二值化 67 | img_gray = cv2.cvtColor(gray_cut_filter2D, cv2.COLOR_BGR2GRAY) 68 | ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) # THRESH_BINARY_INV 69 | # cv2.imshow('thresh', thresh) 70 | 71 | # 查找轮廓 72 | contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, 73 | cv2.CHAIN_APPROX_SIMPLE) # cv2.CHAIN_APPROX_NONE CHAIN_APPROX_SIMPLE 74 | 75 | # 查找面积最大的轮廓 76 | post, area_max = max_contours(contours) 77 | print('area:' + str(area_max)) 78 | 79 | # 过滤面积小的区域 80 | if area_max < 145000: 81 | print('area_max is not enough --continue') 82 | continue 83 | else: 84 | # 在原图上画出轮廓 85 | C_img = cv2.drawContours(frame, contours, post, (0, 255, 0), 1) 86 | # cv2.imshow('C_img', C_img) 87 | 88 | # 新建空白图像,放入轮廓内的图像 89 | cimg = np.zeros_like(frame) 90 | cimg[:, :, :] = 255 91 | cv2.drawContours(cimg, contours, post, (0, 0, 0), -1) 92 | 93 | # 抓取后的图像 94 | final = cv2.bitwise_or(frame, cimg) 95 | 96 | # 高斯除噪 灰度图 二值化 边缘化检测 97 | final_filter2D = cv2.filter2D(final, -1, kernel) 98 | final_gray = cv2.cvtColor(final_filter2D, cv2.COLOR_BGR2GRAY) 99 | ret, thresh1 = cv2.threshold(final_gray, 80, 255, cv2.THRESH_BINARY) 100 | edges = cv2.Canny(thresh1, 50, 150, apertureSize=3) 101 | lines = cv2.HoughLines(edges, 1, np.pi / 180, 60) 102 | 103 | if lines is None or len(lines) < 1: 104 | continue 105 | 106 | Line = lines[0][0] 107 | Rho = Line[0] # 第一个元素是距离rho 108 | Theta = Line[1] # 第二个元素是角度theta 109 | print('------------------------') 110 | print('Line:', Line) 111 | print('------------------------') 112 | 113 | lbael_text = 'distance:' + str(round(Rho)) + 'theta:' + str(round((Theta / np.pi) * 180 - 90, 2)) 114 | cv2.putText(frame, lbael_text, (0,25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 115 | result = get_HImg(frame, lines) # edges 116 | 117 | 118 | cv2.imshow('result', result) 119 | 120 | gray_result = get_HImg(edges, lines) 121 | cv2.imshow('gray_result', gray_result) 122 | 123 | if cv2.waitKey(1) & 0xFF == ord('q'): 124 | break 125 | # Terminal进入当前文件 命令生成exe: pyinstaller --console --onefile ammeter.py 126 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Reads Darknet config and weights and creates Keras model with TF backend. 4 | 5 | """ 6 | 7 | import argparse 8 | import configparser 9 | import io 10 | import os 11 | from collections import defaultdict 12 | 13 | import numpy as np 14 | from keras import backend as K 15 | from keras.layers import (Conv2D, Input, ZeroPadding2D, Add, 16 | UpSampling2D, MaxPooling2D, Concatenate) 17 | from keras.layers.advanced_activations import LeakyReLU 18 | from keras.layers.normalization import BatchNormalization 19 | from keras.models import Model 20 | from keras.regularizers import l2 21 | from keras.utils.vis_utils import plot_model as plot 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Darknet To Keras Converter.') 25 | parser.add_argument('config_path', help='Path to Darknet cfg file.') 26 | parser.add_argument('weights_path', help='Path to Darknet weights file.') 27 | parser.add_argument('output_path', help='Path to output Keras model file.') 28 | parser.add_argument( 29 | '-p', 30 | '--plot_model', 31 | help='Plot generated Keras model and save as image.', 32 | action='store_true') 33 | parser.add_argument( 34 | '-w', 35 | '--weights_only', 36 | help='Save as Keras weights file instead of model file.', 37 | action='store_true') 38 | 39 | def unique_config_sections(config_file): 40 | """Convert all config sections to have unique names. 41 | 42 | Adds unique suffixes to config sections for compability with configparser. 43 | """ 44 | section_counters = defaultdict(int) 45 | output_stream = io.StringIO() 46 | with open(config_file) as fin: 47 | for line in fin: 48 | if line.startswith('['): 49 | section = line.strip().strip('[]') 50 | _section = section + '_' + str(section_counters[section]) 51 | section_counters[section] += 1 52 | line = line.replace(section, _section) 53 | output_stream.write(line) 54 | output_stream.seek(0) 55 | return output_stream 56 | 57 | # %% 58 | def _main(args): 59 | config_path = os.path.expanduser(args.config_path) 60 | weights_path = os.path.expanduser(args.weights_path) 61 | assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( 62 | config_path) 63 | assert weights_path.endswith( 64 | '.weights'), '{} is not a .weights file'.format(weights_path) 65 | 66 | output_path = os.path.expanduser(args.output_path) 67 | assert output_path.endswith( 68 | '.h5'), 'output path {} is not a .h5 file'.format(output_path) 69 | output_root = os.path.splitext(output_path)[0] 70 | 71 | # Load weights and config. 72 | print('Loading weights.') 73 | weights_file = open(weights_path, 'rb') 74 | major, minor, revision = np.ndarray( 75 | shape=(3, ), dtype='int32', buffer=weights_file.read(12)) 76 | if (major*10+minor)>=2 and major<1000 and minor<1000: 77 | seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) 78 | else: 79 | seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) 80 | print('Weights Header: ', major, minor, revision, seen) 81 | 82 | print('Parsing Darknet config.') 83 | unique_config_file = unique_config_sections(config_path) 84 | cfg_parser = configparser.ConfigParser() 85 | cfg_parser.read_file(unique_config_file) 86 | 87 | print('Creating Keras model.') 88 | input_layer = Input(shape=(None, None, 3)) 89 | prev_layer = input_layer 90 | all_layers = [] 91 | 92 | weight_decay = float(cfg_parser['net_0']['decay'] 93 | ) if 'net_0' in cfg_parser.sections() else 5e-4 94 | count = 0 95 | out_index = [] 96 | for section in cfg_parser.sections(): 97 | print('Parsing section {}'.format(section)) 98 | if section.startswith('convolutional'): 99 | filters = int(cfg_parser[section]['filters']) 100 | size = int(cfg_parser[section]['size']) 101 | stride = int(cfg_parser[section]['stride']) 102 | pad = int(cfg_parser[section]['pad']) 103 | activation = cfg_parser[section]['activation'] 104 | batch_normalize = 'batch_normalize' in cfg_parser[section] 105 | 106 | padding = 'same' if pad == 1 and stride == 1 else 'valid' 107 | 108 | # Setting weights. 109 | # Darknet serializes convolutional weights as: 110 | # [bias/beta, [gamma, mean, variance], conv_weights] 111 | prev_layer_shape = K.int_shape(prev_layer) 112 | 113 | weights_shape = (size, size, prev_layer_shape[-1], filters) 114 | darknet_w_shape = (filters, weights_shape[2], size, size) 115 | weights_size = np.product(weights_shape) 116 | 117 | print('conv2d', 'bn' 118 | if batch_normalize else ' ', activation, weights_shape) 119 | 120 | conv_bias = np.ndarray( 121 | shape=(filters, ), 122 | dtype='float32', 123 | buffer=weights_file.read(filters * 4)) 124 | count += filters 125 | 126 | if batch_normalize: 127 | bn_weights = np.ndarray( 128 | shape=(3, filters), 129 | dtype='float32', 130 | buffer=weights_file.read(filters * 12)) 131 | count += 3 * filters 132 | 133 | bn_weight_list = [ 134 | bn_weights[0], # scale gamma 135 | conv_bias, # shift beta 136 | bn_weights[1], # running mean 137 | bn_weights[2] # running var 138 | ] 139 | 140 | conv_weights = np.ndarray( 141 | shape=darknet_w_shape, 142 | dtype='float32', 143 | buffer=weights_file.read(weights_size * 4)) 144 | count += weights_size 145 | 146 | # DarkNet conv_weights are serialized Caffe-style: 147 | # (out_dim, in_dim, height, width) 148 | # We would like to set these to Tensorflow order: 149 | # (height, width, in_dim, out_dim) 150 | conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) 151 | conv_weights = [conv_weights] if batch_normalize else [ 152 | conv_weights, conv_bias 153 | ] 154 | 155 | # Handle activation. 156 | act_fn = None 157 | if activation == 'leaky': 158 | pass # Add advanced activation later. 159 | elif activation != 'linear': 160 | raise ValueError( 161 | 'Unknown activation function `{}` in section {}'.format( 162 | activation, section)) 163 | 164 | # Create Conv2D layer 165 | if stride>1: 166 | # Darknet uses left and top padding instead of 'same' mode 167 | prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer) 168 | conv_layer = (Conv2D( 169 | filters, (size, size), 170 | strides=(stride, stride), 171 | kernel_regularizer=l2(weight_decay), 172 | use_bias=not batch_normalize, 173 | weights=conv_weights, 174 | activation=act_fn, 175 | padding=padding))(prev_layer) 176 | 177 | if batch_normalize: 178 | conv_layer = (BatchNormalization( 179 | weights=bn_weight_list))(conv_layer) 180 | prev_layer = conv_layer 181 | 182 | if activation == 'linear': 183 | all_layers.append(prev_layer) 184 | elif activation == 'leaky': 185 | act_layer = LeakyReLU(alpha=0.1)(prev_layer) 186 | prev_layer = act_layer 187 | all_layers.append(act_layer) 188 | 189 | elif section.startswith('route'): 190 | ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] 191 | layers = [all_layers[i] for i in ids] 192 | if len(layers) > 1: 193 | print('Concatenating route layers:', layers) 194 | concatenate_layer = Concatenate()(layers) 195 | all_layers.append(concatenate_layer) 196 | prev_layer = concatenate_layer 197 | else: 198 | skip_layer = layers[0] # only one layer to route 199 | all_layers.append(skip_layer) 200 | prev_layer = skip_layer 201 | 202 | elif section.startswith('maxpool'): 203 | size = int(cfg_parser[section]['size']) 204 | stride = int(cfg_parser[section]['stride']) 205 | all_layers.append( 206 | MaxPooling2D( 207 | pool_size=(size, size), 208 | strides=(stride, stride), 209 | padding='same')(prev_layer)) 210 | prev_layer = all_layers[-1] 211 | 212 | elif section.startswith('shortcut'): 213 | index = int(cfg_parser[section]['from']) 214 | activation = cfg_parser[section]['activation'] 215 | assert activation == 'linear', 'Only linear activation supported.' 216 | all_layers.append(Add()([all_layers[index], prev_layer])) 217 | prev_layer = all_layers[-1] 218 | 219 | elif section.startswith('upsample'): 220 | stride = int(cfg_parser[section]['stride']) 221 | assert stride == 2, 'Only stride=2 supported.' 222 | all_layers.append(UpSampling2D(stride)(prev_layer)) 223 | prev_layer = all_layers[-1] 224 | 225 | elif section.startswith('yolo'): 226 | out_index.append(len(all_layers)-1) 227 | all_layers.append(None) 228 | prev_layer = all_layers[-1] 229 | 230 | elif section.startswith('net'): 231 | pass 232 | 233 | else: 234 | raise ValueError( 235 | 'Unsupported section header type: {}'.format(section)) 236 | 237 | # Create and save model. 238 | if len(out_index)==0: out_index.append(len(all_layers)-1) 239 | model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) 240 | print(model.summary()) 241 | if args.weights_only: 242 | model.save_weights('{}'.format(output_path)) 243 | print('Saved Keras weights to {}'.format(output_path)) 244 | else: 245 | model.save('{}'.format(output_path)) 246 | print('Saved Keras model to {}'.format(output_path)) 247 | 248 | # Check to see if all weights have been read. 249 | remaining_weights = len(weights_file.read()) / 4 250 | weights_file.close() 251 | print('Read {} of {} from Darknet weights.'.format(count, count + 252 | remaining_weights)) 253 | if remaining_weights > 0: 254 | print('Warning: {} unused weights'.format(remaining_weights)) 255 | 256 | if args.plot_model: 257 | plot(model, to_file='{}.png'.format(output_root), show_shapes=True) 258 | print('Saved model plot to {}.png'.format(output_root)) 259 | 260 | 261 | if __name__ == '__main__': 262 | _main(parser.parse_args()) 263 | -------------------------------------------------------------------------------- /darknet53.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | -------------------------------------------------------------------------------- /img_match.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | 5 | 6 | def CorrectImage(queryImagePath, templateImgDir, outImg, val_num=100, threshold=90): 7 | """ 8 | Find the template class for the query image and to correct the query image 9 | :param queryImagePath: the path of the query image eg. "./img_test/test3.png" 10 | :param templateImgDir: the dir of the template eg. "./template/" 11 | :param outImg: the out put dir of corrected image eg. "./img_test_corrected/" 12 | :param val_num: the number of samples for validating the Homography matrix eg. val_num=100 13 | :param threshold: the error threshold of the Homography mapping 14 | from A to B using Homography_matrix. Suggest: 15 | 30<= threshold <=100 (Note that the results we got after the experiment 16 | by Statistically matching template mean) 17 | :return the class of template or None 18 | """ 19 | 20 | # queryImage 21 | queryImage = cv2.imread(queryImagePath, 0) 22 | 23 | # Initiate SIFT detector 24 | sift = cv2.xfeatures2d.SIFT_create() 25 | 26 | # find the keypoints and descriptors of queryImage with SIFT 27 | queryImageKP, queryImageDES = sift.detectAndCompute(queryImage, None) 28 | 29 | # template images 30 | result = [] # [{"template_class": 1, "template_filename": "class1.png", 31 | # "homography_matrix": narray() }] 32 | for templateImgName in os.listdir(templateImgDir): 33 | # get the keypoints and descriptors of templateImage with SIFT 34 | templateImgPath = templateImgDir + templateImgName 35 | templateImg = cv2.imread(templateImgPath, 0) 36 | templateImgKP, templateImgDES = sift.detectAndCompute(templateImg, None) 37 | 38 | # match the keypoints 39 | bfMatcher = cv2.BFMatcher(crossCheck=True) 40 | matches = bfMatcher.match(queryImageDES, templateImgDES) 41 | matchesSorted = sorted(matches, key=lambda x: x.distance) 42 | 43 | """ 44 | choose the first four matches to compute the Homography matrix 45 | and other 100 keypoints to validate the Homography matrix. 46 | """ 47 | matchesForHMatrix = matchesSorted[:4] 48 | matchesForValidateH = matchesSorted[4:4 + val_num] 49 | 50 | # get the Homography matrix 51 | src_points = [] 52 | target_points = [] 53 | for match in matchesForHMatrix: 54 | query_index = match.queryIdx 55 | src_points.append(queryImageKP[query_index].pt) 56 | template_index = match.trainIdx 57 | target_points.append(templateImgKP[template_index].pt) 58 | hMatrix, s = cv2.findHomography(np.float32(src_points), np.float32(target_points), cv2.RANSAC, 10) 59 | 60 | # statistical the val set to find matching points to compute 61 | # the ratio of suitability 62 | error_total = 0 63 | for valMatche in matchesForValidateH: 64 | valsrc_index = valMatche.queryIdx 65 | valsrc_point = queryImageKP[valsrc_index].pt 66 | valsrc_point = valsrc_point + (1,) 67 | valtarget_index = valMatche.trainIdx 68 | valtarget_point = templateImgKP[valtarget_index].pt 69 | valtarget_point = valtarget_point + (1,) 70 | valsrc_point = np.array(valsrc_point) 71 | valtarget_point = np.array(valtarget_point) 72 | 73 | # b = H * aT 74 | error = np.sum(np.abs(valtarget_point - np.matmul(hMatrix, valsrc_point))) 75 | error_total = error_total + error 76 | 77 | if error_total / val_num < threshold: # maybe change the threshold 78 | # finded the right template 79 | template_finded = {"template_class": int(templateImgName.split(".")[0][5:]), 80 | "template_filename": templateImgName, 81 | "homography_matrix": hMatrix} 82 | result.append(template_finded) 83 | # Draw first 10 matches. 84 | # imgShow = cv2.drawMatches(queryImage, queryImageKP, templateImg, 85 | # templateImgKP, matchesSorted[:10], None, flags=2) 86 | # plt.imshow(imgShow), plt.show() 87 | # cv2.findHomography() 88 | 89 | if len(result) == 0: 90 | print("no find the correct template") 91 | return None 92 | if len(result) > 1: 93 | print("warring: there are two templates that match the query image and we just return one") 94 | 95 | # template class 96 | result_tamplate_class = result[0]["template_class"] 97 | 98 | # correct the query img 99 | corrected_img = cv2.warpPerspective(queryImage, result[0]["homography_matrix"], queryImage.shape) 100 | cv2.imwrite(outImg + queryImagePath.split("/")[-1], corrected_img) 101 | 102 | return result_tamplate_class 103 | if __name__ == "__main__": 104 | queryImagePath = "./img_test/test1.png" # the image to be corrected 105 | templateImgDir = "./template/" # the tamplate dir 106 | outImg = "./img_test_corrected/" 107 | 108 | # find the corresponding template and correct the img 109 | matchedTemplateClass = CorrectImage(queryImagePath, templateImgDir, outImg) -------------------------------------------------------------------------------- /kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class YOLO_Kmeans: 5 | 6 | def __init__(self, cluster_number, filename): 7 | self.cluster_number = cluster_number 8 | self.filename = "2012_train.txt" 9 | 10 | def iou(self, boxes, clusters): # 1 box -> k clusters 11 | n = boxes.shape[0] 12 | k = self.cluster_number 13 | 14 | box_area = boxes[:, 0] * boxes[:, 1] 15 | box_area = box_area.repeat(k) 16 | box_area = np.reshape(box_area, (n, k)) 17 | 18 | cluster_area = clusters[:, 0] * clusters[:, 1] 19 | cluster_area = np.tile(cluster_area, [1, n]) 20 | cluster_area = np.reshape(cluster_area, (n, k)) 21 | 22 | box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) 23 | cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) 24 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) 25 | 26 | box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) 27 | cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) 28 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) 29 | inter_area = np.multiply(min_w_matrix, min_h_matrix) 30 | 31 | result = inter_area / (box_area + cluster_area - inter_area) 32 | return result 33 | 34 | def avg_iou(self, boxes, clusters): 35 | accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) 36 | return accuracy 37 | 38 | def kmeans(self, boxes, k, dist=np.median): 39 | box_number = boxes.shape[0] 40 | distances = np.empty((box_number, k)) 41 | last_nearest = np.zeros((box_number,)) 42 | np.random.seed() 43 | clusters = boxes[np.random.choice( 44 | box_number, k, replace=False)] # init k clusters 45 | while True: 46 | 47 | distances = 1 - self.iou(boxes, clusters) 48 | 49 | current_nearest = np.argmin(distances, axis=1) 50 | if (last_nearest == current_nearest).all(): 51 | break # clusters won't change 52 | for cluster in range(k): 53 | clusters[cluster] = dist( # update clusters 54 | boxes[current_nearest == cluster], axis=0) 55 | 56 | last_nearest = current_nearest 57 | 58 | return clusters 59 | 60 | def result2txt(self, data): 61 | f = open("yolo_anchors.txt", 'w') 62 | row = np.shape(data)[0] 63 | for i in range(row): 64 | if i == 0: 65 | x_y = "%d,%d" % (data[i][0], data[i][1]) 66 | else: 67 | x_y = ", %d,%d" % (data[i][0], data[i][1]) 68 | f.write(x_y) 69 | f.close() 70 | 71 | def txt2boxes(self): 72 | f = open(self.filename, 'r') 73 | dataSet = [] 74 | for line in f: 75 | infos = line.split(" ") 76 | length = len(infos) 77 | for i in range(1, length): 78 | width = int(infos[i].split(",")[2]) - \ 79 | int(infos[i].split(",")[0]) 80 | height = int(infos[i].split(",")[3]) - \ 81 | int(infos[i].split(",")[1]) 82 | dataSet.append([width, height]) 83 | result = np.array(dataSet) 84 | f.close() 85 | return result 86 | 87 | def txt2clusters(self): 88 | all_boxes = self.txt2boxes() 89 | result = self.kmeans(all_boxes, k=self.cluster_number) 90 | result = result[np.lexsort(result.T[0, None])] 91 | self.result2txt(result) 92 | print("K anchors:\n {}".format(result)) 93 | print("Accuracy: {:.2f}%".format( 94 | self.avg_iou(all_boxes, result) * 100)) 95 | 96 | 97 | if __name__ == "__main__": 98 | cluster_number = 9 99 | filename = "2012_train.txt" 100 | kmeans = YOLO_Kmeans(cluster_number, filename) 101 | kmeans.txt2clusters() 102 | -------------------------------------------------------------------------------- /kuangxuan.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import cv2 as cv 5 | import argparse 6 | 7 | weightsPath = "D:/git/work/keras-yolo3/yolov3.weights" 8 | configPath = "D:/git/work/keras-yolo3/yolov3.cfg" 9 | labelsPath = "D:/git/work/keras-yolo3/model_data/coco_classes.txt" 10 | rootdir = "D:/git/work/keras-yolo3/images" # 图像读取地址 11 | savepath = "D:/git/work/keras-yolo3/kuangxuanimages" # 图像保存地址 12 | 13 | # 初始化一些参数 14 | LABELS = open(labelsPath).read().strip().split("\n") # 物体类别 15 | #print(LABELS) 16 | COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # 颜色 17 | #print(COLORS) 18 | 19 | filelist = os.listdir(rootdir) # 打开对应的文件夹 20 | total_num = len(filelist) # 得到文件夹中图像的个数 21 | #print(total_num) 22 | # 如果输出的文件夹不存在,创建即可 23 | if not os.path.isdir(savepath): 24 | os.makedirs(savepath) 25 | 26 | for (dirpath, dirnames, filenames) in os.walk(rootdir): 27 | for filename in filenames: 28 | # 必须将boxes在遍历新的图片后初始化 29 | boxes = [] 30 | confidences = [] 31 | classIDs = [] 32 | net = cv2.dnn.readNetFromDarknet(configPath, weightsPath) 33 | path = os.path.join(dirpath, filename) 34 | image = cv.imread(path) 35 | (H, W) = image.shape[:2] 36 | # 得到 YOLO需要的输出层 37 | #print(H, W) 38 | ln = net.getLayerNames() 39 | ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] 40 | # 从输入图像构造一个blob,然后通过加载的模型,给我们提供边界框和相关概率 41 | blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False) 42 | net.setInput(blob) 43 | layerOutputs = net.forward(ln) 44 | # 在每层输出上循环 45 | for output in layerOutputs: 46 | # 对每个检测进行循环 47 | for detection in output: 48 | scores = detection[5:] 49 | classID = np.argmax(scores) 50 | confidence = scores[classID] 51 | # 过滤掉那些置信度较小的检测结果 52 | if confidence > 0.5: 53 | # 框后接框的宽度和高度 54 | box = detection[0:4] * np.array([W, H, W, H]) 55 | (centerX, centerY, width, height) = box.astype("int") 56 | # 边框的左上角 57 | x = int(centerX - (width / 2)) 58 | y = int(centerY - (height / 2)) 59 | # 更新检测出来的框 60 | # 批量检测图片注意此处的boxes在每一次遍历的时候要初始化,否则检测出来的图像框会叠加 61 | boxes.append([x, y, int(width), int(height)]) 62 | confidences.append(float(confidence)) 63 | classIDs.append(classID) 64 | # 极大值抑制 65 | idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) 66 | #print(confidence) 67 | k = -1 68 | if len(idxs) > 0: 69 | # for k in range(0,len(boxes)): 70 | for i in idxs.flatten(): 71 | (x, y) = (boxes[i][0], boxes[i][1]) 72 | (w, h) = (boxes[i][2], boxes[i][3]) 73 | # 在原图上绘制边框和类别 74 | color = [int(c) for c in COLORS[classIDs[i]]] 75 | # image是原图, 左上点坐标, 右下点坐标, 颜色, 画线的宽度 76 | cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) 77 | text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) 78 | # 各参数依次是:图片,添加的文字,左上角坐标(整数),字体, 字体大小,颜色,字体粗细 79 | cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) 80 | # 图像裁剪注意坐标要一一对应 81 | # 图片裁剪 裁剪区域【Ly:Ry,Lx:Rx】 82 | cut = image[y:(y + h), x:(x + w)] 83 | # boxes的长度即为识别出来的车辆个数,利用boxes的长度来定义裁剪后车辆的路径名称 84 | if k < len(boxes): 85 | k = k + 1 86 | # 从字母a开始每次+1 87 | t = chr(ord("a") + k) 88 | # 写入文件夹,这块写入的时候不支持int(我也不知道为啥),所以才用的字母 89 | cv.imwrite(savepath + "/" + filename.split(".")[0] + "_" + t + ".jpg", cut) -------------------------------------------------------------------------------- /mAPgetpridict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Class definition of YOLO_v3 style detection model on image and video 4 | """ 5 | 6 | import colorsys 7 | import os 8 | import sys 9 | from timeit import default_timer as timer 10 | 11 | import numpy as np 12 | from keras import backend as K 13 | from keras.models import load_model 14 | from keras.layers import Input 15 | from PIL import Image, ImageFont, ImageDraw 16 | 17 | from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body 18 | from yolo3.utils import letterbox_image 19 | import os 20 | from keras.utils import multi_gpu_model 21 | 22 | class YOLO(object): 23 | _defaults = { 24 | "model_path": 'logs/000/trained_weights_final.h5', ##训练好的模型的路径 25 | "anchors_path": 'model_data/yolo_anchors.txt', 26 | "classes_path": 'model_data/voc_classes.txt', 27 | "score" : 0.3, 28 | "iou" : 0.45, 29 | "model_image_size" : (416, 416), 30 | "gpu_num" : 0 31 | } 32 | 33 | @classmethod 34 | def get_defaults(cls, n): 35 | if n in cls._defaults: 36 | return cls._defaults[n] 37 | else: 38 | return "Unrecognized attribute name '" + n + "'" 39 | 40 | def __init__(self, **kwargs): 41 | self.__dict__.update(self._defaults) # set up default values 42 | self.__dict__.update(kwargs) # and update with user overrides 43 | self.class_names = self._get_class() 44 | self.anchors = self._get_anchors() 45 | self.sess = K.get_session() 46 | self.boxes, self.scores, self.classes = self.generate() 47 | 48 | def _get_class(self): 49 | classes_path = os.path.expanduser(self.classes_path) 50 | with open(classes_path) as f: 51 | class_names = f.readlines() 52 | class_names = [c.strip() for c in class_names] 53 | return class_names 54 | 55 | def _get_anchors(self): 56 | anchors_path = os.path.expanduser(self.anchors_path) 57 | with open(anchors_path) as f: 58 | anchors = f.readline() 59 | anchors = [float(x) for x in anchors.split(',')] 60 | return np.array(anchors).reshape(-1, 2) 61 | 62 | def generate(self): 63 | model_path = os.path.expanduser(self.model_path) 64 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' 65 | 66 | # Load model, or construct model and load weights. 67 | num_anchors = len(self.anchors) 68 | num_classes = len(self.class_names) 69 | is_tiny_version = num_anchors==6 # default setting 70 | try: 71 | self.yolo_model = load_model(model_path, compile=False) 72 | except: 73 | self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ 74 | if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) 75 | self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match 76 | else: 77 | assert self.yolo_model.layers[-1].output_shape[-1] == \ 78 | num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 79 | 'Mismatch between model and given anchor and class sizes' 80 | 81 | print('{} model, anchors, and classes loaded.'.format(model_path)) 82 | 83 | # Generate colors for drawing bounding boxes. 84 | hsv_tuples = [(x / len(self.class_names), 1., 1.) 85 | for x in range(len(self.class_names))] 86 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 87 | self.colors = list( 88 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 89 | self.colors)) 90 | np.random.seed(10101) # Fixed seed for consistent colors across runs. 91 | np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. 92 | np.random.seed(None) # Reset seed to default. 93 | 94 | # Generate output tensor targets for filtered bounding boxes. 95 | self.input_image_shape = K.placeholder(shape=(2, )) 96 | if self.gpu_num>=2: 97 | self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) 98 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, 99 | len(self.class_names), self.input_image_shape, 100 | score_threshold=self.score, iou_threshold=self.iou) 101 | return boxes, scores, classes 102 | 103 | def detect_image(self, image): 104 | start = timer() 105 | 106 | if self.model_image_size != (None, None): 107 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' 108 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' 109 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) 110 | else: 111 | new_image_size = (image.width - (image.width % 32), 112 | image.height - (image.height % 32)) 113 | boxed_image = letterbox_image(image, new_image_size) 114 | image_data = np.array(boxed_image, dtype='float32') 115 | 116 | print(image_data.shape) 117 | image_data /= 255. 118 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 119 | 120 | out_boxes, out_scores, out_classes = self.sess.run( 121 | [self.boxes, self.scores, self.classes], 122 | feed_dict={ 123 | self.yolo_model.input: image_data, 124 | self.input_image_shape: [image.size[1], image.size[0]], 125 | K.learning_phase(): 0 126 | }) 127 | 128 | print('Found {} boxes for {}'.format(len(out_boxes), 'img')) 129 | 130 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf', 131 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 132 | thickness = (image.size[0] + image.size[1]) // 300 133 | 134 | for i, c in reversed(list(enumerate(out_classes))): 135 | predicted_class = self.class_names[c] 136 | box = out_boxes[i] 137 | score = out_scores[i] 138 | 139 | label = '{} {:.2f}'.format(predicted_class, score) 140 | draw = ImageDraw.Draw(image) 141 | label_size = draw.textsize(label, font) 142 | 143 | top, left, bottom, right = box 144 | top = max(0, np.floor(top + 0.5).astype('int32')) 145 | left = max(0, np.floor(left + 0.5).astype('int32')) 146 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 147 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 148 | print(label, (left, top), (right, bottom)) 149 | #new_f=open("/home/shan/xws/pro/keras-yolo3/detection-results/"+tmp_file.replace(".jpg", ".txt"), "a") 150 | new_f.write("%s %s %s %s %s\n" % (label, left, top, right, bottom)) 151 | if top - label_size[1] >= 0: 152 | text_origin = np.array([left, top - label_size[1]]) 153 | else: 154 | text_origin = np.array([left, top + 1]) 155 | 156 | # My kingdom for a good redistributable image drawing library. 157 | for i in range(thickness): 158 | draw.rectangle( 159 | [left + i, top + i, right - i, bottom - i], 160 | outline=self.colors[c]) 161 | draw.rectangle( 162 | [tuple(text_origin), tuple(text_origin + label_size)], 163 | fill=self.colors[c]) 164 | draw.text(text_origin, label, fill=(0, 0, 0), font=font) 165 | del draw 166 | 167 | end = timer() 168 | print(end - start) 169 | return image 170 | 171 | def close_session(self): 172 | self.sess.close() 173 | 174 | if __name__ == '__main__': 175 | yolo=YOLO() 176 | # path = '1.jpg' 177 | # try: 178 | # image = Image.open(path) 179 | # except: 180 | # print('Open Error! Try again!') 181 | # else: 182 | # r_image = yolo.detect_image(image) 183 | # r_image.show() 184 | # yolo.close_session() 185 | #strat1=timer() 186 | dirname="input\images-optional/" ##该目录为测试照片的存储路径,每次测试照片的数量可以自己设定 187 | #dirname = "input/images-optional" 188 | path=os.path.join(dirname) 189 | pic_list=os.listdir(path) 190 | count=0 191 | for filename in pic_list: 192 | tmp_file=pic_list[count] 193 | new_f=open("input\detection-results/"+tmp_file.replace(".jpg", ".txt"), "a") #预测坐标生成txt文件保存的路径 194 | #new_f = open("input/detection-results" + tmp_file.replace(".jpg", ".txt"), "a") # 预测坐标生成txt文件保存的路径 195 | abs_path=path+pic_list[count] 196 | image = Image.open(abs_path) 197 | r_image = yolo.detect_image(image) 198 | count=count+1 199 | # yolo = YOLO() 200 | #end1=timer() 201 | print(count) 202 | yolo.close_session() 203 | -------------------------------------------------------------------------------- /mAPgettxt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | import xml.etree.ElementTree as ET 5 | 6 | # make sure that the cwd() in the beginning is the location of the python script (so that every path makes sense) 7 | os.chdir(os.path.dirname(os.path.abspath(__file__))) 8 | 9 | # change directory to the one with the files to be changed 10 | parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) 11 | parent_path = os.path.abspath(os.path.join(parent_path, os.pardir)) 12 | GT_PATH = os.path.join(parent_path, 'input','ground-truth') 13 | #print(GT_PATH) 14 | os.chdir(GT_PATH) 15 | 16 | # old files (xml format) will be moved to a "backup" folder 17 | ## create the backup dir if it doesn't exist already 18 | if not os.path.exists("backup"): 19 | os.makedirs("backup") 20 | 21 | # create VOC format files 22 | xml_list = glob.glob('*.xml') 23 | if len(xml_list) == 0: 24 | print("Error: no .xml files found in ground-truth") 25 | sys.exit() 26 | for tmp_file in xml_list: 27 | #print(tmp_file) 28 | # 1. create new file (VOC format) 29 | with open(tmp_file.replace(".xml", ".txt"), "a") as new_f: 30 | root = ET.parse(tmp_file).getroot() 31 | for obj in root.findall('object'): 32 | obj_name = obj.find('name').text 33 | bndbox = obj.find('bndbox') 34 | left = bndbox.find('xmin').text 35 | top = bndbox.find('ymin').text 36 | right = bndbox.find('xmax').text 37 | bottom = bndbox.find('ymax').text 38 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 39 | # 2. move old file (xml format) to backup 40 | os.rename(tmp_file, os.path.join("backup", tmp_file)) 41 | print("Conversion completed!") 42 | -------------------------------------------------------------------------------- /model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | meter 2 | vameter -------------------------------------------------------------------------------- /model_data/tiny_yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 2 | -------------------------------------------------------------------------------- /model_data/voc_classes.txt: -------------------------------------------------------------------------------- 1 | meter 2 | vameter -------------------------------------------------------------------------------- /origin.py: -------------------------------------------------------------------------------- 1 | #定义相关的引用 2 | import colorsys 3 | import random 4 | import os 5 | import numpy as np 6 | from yolo import YOLO 7 | from PIL import Image 8 | import cv2 9 | 10 | #定义相关的图片及视频的路径 11 | video_path = "D:/test.mp4" 12 | output_path = "D:/0.mp4" 13 | ImageDir = os.listdir("D:/test/testimages") 14 | #用于矩形框的绘制 15 | RecDraw = [] 16 | 17 | 18 | # 用来存储矩形框 19 | # 此段代码为开源项目白嫖代码,用于对每一类产生相应的颜色与之对应 20 | # 很明显开源项目的这段代码也是从yolov3代码白嫖来的 21 | def colors_classes(num_classes): 22 | if (hasattr(colors_classes, "colors") and 23 | len(colors_classes.colors) == num_classes): 24 | return colors_classes.colors 25 | 26 | hsv_tuples = [(x / num_classes, 1., 1.) for x in range(num_classes)] 27 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 28 | colors = list( 29 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 30 | colors)) 31 | random.seed(10101) # Fixed seed for consistent colors across runs. 32 | random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. 33 | random.seed(None) # Reset seed to default. 34 | colors_classes.colors = colors # Save colors for future calls. 35 | return colors 36 | 37 | #这段代码主要是为了完成两个目标框之间的交并比iou的计算 38 | def compute_iou(box1, box2): 39 | ymin1, xmin1, ymax1, xmax1 = box1 40 | ymin2, xmin2, ymax2, xmax2 = box2 41 | # 获取矩形框交集对应的左上角和右下角的坐标(intersection) 42 | xx1 = np.max([xmin1, xmin2]) 43 | yy1 = np.max([ymin1, ymin2]) 44 | xx2 = np.min([xmax1, xmax2]) 45 | yy2 = np.min([ymax1, ymax2]) 46 | # 计算两个矩形框面积 47 | area1 = (xmax1 - xmin1) * (ymax1 - ymin1) 48 | area2 = (xmax2 - xmin2) * (ymax2 - ymin2) 49 | inter_area = (np.max([0, xx2 - xx1])) * (np.max([0, yy2 - yy1])) 50 | # 计算交集面积 51 | iou = inter_area / (area1 + area2 - inter_area + 1e-6) 52 | # 计算交并比 53 | return iou 54 | 55 | 56 | def doiou(boxFilterPerson, boxFilterHat, numPeople, numHat): 57 | perhat = np.zeros(shape=(numPeople, numHat)) 58 | for perindex in range(numPeople): 59 | for hatindex in range(numHat): 60 | perhat[perindex][hatindex] = compute_iou(boxFilterPerson[perindex], boxFilterHat[hatindex]) 61 | return perhat 62 | 63 | # track_obj被用作类结构体变量,表征的是被监测物体本身的属性(定义的可以更加简单化一些) 64 | class track_obj(object): 65 | #def __init__(self,newname):self.name=newname,通过访问self.name的形式给实例中增加了name变量,并给name赋了初值newname 66 | def __init__(self): 67 | self.last_rstate = 0 68 | self.last_cov = 0 69 | self.frames = 0 70 | self.trace_id = 0 71 | self.color = 0 72 | #该段代码主要是用于计算两个目标框之间的距离 73 | def calDistance(centersPerson, centersHat): 74 | xAxisDis = np.zeros(shape=(len(centersPerson), len(centersHat))) 75 | for perindex in range(len(centersPerson)): 76 | for hatindex in range(len(centersHat)): 77 | xAxisDis[perindex][hatindex] = centersPerson[perindex][0][0] - centersHat[hatindex][0][0] 78 | # print(np.fabs(xAxisDis).min(1)) 79 | return(xAxisDis) 80 | 81 | # 计算被检测到物体的中点,可以理解为是传感器,能够检测到实际的值 82 | def cal_centre(out_boxes, out_classes, out_scores, score_thres): 83 | print(out_boxes, out_classes, out_scores) 84 | dict = {} 85 | for key in out_classes: 86 | dict[key] = dict.get(key, 0) + 1 87 | print(len(dict)) 88 | if len(dict) == 0: 89 | return 000, 000 90 | if len(dict) == 1: 91 | for PersonIndex in range(len(out_boxes)): 92 | RecDraw.append(out_boxes[PersonIndex]) 93 | return 666, 666 94 | # elif dict[0] > dict[1]: 95 | # return 666, 666 # 简单版本的就是人比帽子多直接警告,复杂一点就直接pass交给后面的任务 96 | boxFilterPerson = [] 97 | boxFilterHat = [] 98 | centersPerson = [] 99 | centersHat = [] 100 | for div_box, div_class, div_score in zip(out_boxes, out_classes, out_scores): 101 | if (div_score >= score_thres) and (div_class == 0): 102 | boxFilterPerson.append(div_box) 103 | centre = np.array([[(div_box[1] + div_box[3]) // 2], [(div_box[0] + div_box[2]) // 2]]) 104 | centersPerson.append(centre) 105 | if (div_score >= score_thres) and (div_class == 1): 106 | boxFilterHat.append(div_box) 107 | centre_hat = np.array([[(div_box[1] + div_box[3]) // 2], [(div_box[0] + div_box[2]) // 2]]) 108 | centersHat.append(centre_hat) 109 | numPeople = len(centersPerson) 110 | numHat = len(centersHat) 111 | perhat = doiou(boxFilterPerson, boxFilterHat, numPeople, numHat) 112 | m = perhat.sum(axis=1) 113 | for index in range(len(m)): 114 | if m[index] == 0: 115 | RecDraw.append(boxFilterPerson[index]) 116 | return 666, 666 117 | xAxisDis = calDistance(centersPerson, centersHat) 118 | dis = np.fabs(xAxisDis).min(1) 119 | for dis_index in range(len(dis)): 120 | if m[dis_index] > 20: 121 | RecDraw.append(boxFilterPerson[index]) 122 | return 666, 666 123 | return centersPerson, numPeople 124 | 125 | 126 | class dokalman(): 127 | def __init__(self): 128 | self.count = 0 129 | self.dotracking = [] 130 | self.str_location = [] 131 | self.distance_max = 200 132 | 133 | # """注意矩阵的行列此处并没有进行修改""" 134 | def tracking(self, centers, num, image, mode): # 先尝试着单目标的追踪 135 | font = cv2.FONT_HERSHEY_SIMPLEX 136 | if mode == 0: 137 | if centers == 000 and num == 000: 138 | cv2.putText(image, "need check", (11, 33), font, 1, [230, 0, 0], 2) 139 | if centers == 666 and num == 666: 140 | cv2.putText(image, "detecting", (11, 11 + 22), font, 1, [230, 0, 0], 2) 141 | for index in range(len(RecDraw)): 142 | cv2.rectangle(image, (RecDraw[index][1], RecDraw[index][0]), (RecDraw[index][3], RecDraw[index][2]), (230, 0, 0), 2) 143 | elif centers != 666 or num != 666: 144 | cv2.putText(image, "need check", (11, 33), font, 1, [230, 0, 0], 2) 145 | elif mode == 1: 146 | if centers == 000 and num == 000: 147 | cv2.putText(image, "need check", (11, 33), font, 1, [0, 0, 230], 2) 148 | if centers == 666 and num == 666: 149 | cv2.putText(image, "detecting", (11, 11 + 22), font, 1, [0, 0, 230], 2) 150 | for index in range(len(RecDraw)): 151 | cv2.rectangle(image, (RecDraw[index][1], RecDraw[index][0]), (RecDraw[index][3], RecDraw[index][2]), 152 | (0, 0, 230), 2) 153 | elif centers != 666 or num != 666: 154 | cv2.putText(image, "need check", (11, 33), font, 1, [0, 0, 230], 2) 155 | 156 | 157 | yolov3_args = { 158 | "model_path": 'logs/000/trained_weights_final.h5', 159 | "anchors_path": 'model_data/yolo_anchors.txt', 160 | "classes_path": 'model_data/coco_classes.txt', 161 | "score": 0.50, 162 | "iou": 0.3, 163 | "model_image_size": (416, 416), 164 | "gpu_num": 1, 165 | } 166 | 167 | 168 | def image(pic_path): 169 | mode = 0 170 | RecDraw.clear() 171 | if pic_path == 0: 172 | yolov3 = YOLO(**yolov3_args) 173 | for i in range(len(ImageDir)): 174 | RecDraw.clear() 175 | ImagePath = "D:/test/testimages/" + ImageDir[i] 176 | ImageName = "D:/test/image/" + str(i) + ".jpg" 177 | img = Image.open(ImagePath) 178 | image, boxes, scores, classes = yolov3.detect_image_mul(img) 179 | centers, num = cal_centre(boxes, classes, scores, 0.05) 180 | result = np.asarray(image) 181 | tracker = dokalman() 182 | tracker.tracking(centers, num, result, mode) 183 | image_bgr = cv2.cvtColor(np.asarray(result), cv2.COLOR_RGB2BGR) 184 | cv2.imwrite(ImageName, image_bgr) 185 | elif pic_path != 0: 186 | yolov3 = YOLO(**yolov3_args) 187 | img = Image.open(pic_path) 188 | image, boxes, scores, classes = yolov3.detect_image_mul(img) 189 | centers, num = cal_centre(boxes, classes, scores, 0.05) 190 | result = np.asarray(image) 191 | tracker = dokalman() 192 | tracker.tracking(centers, num, result, mode) 193 | # print("look there!", RecDraw) 194 | image_bgr = cv2.cvtColor(np.asarray(result), cv2.COLOR_RGB2BGR) 195 | cv2.imwrite("D:/test/pp30.jpg", image_bgr) 196 | cv2.imshow("re", image_bgr) 197 | cv2.waitKey(0) 198 | cv2.destroyAllWindows() 199 | 200 | 201 | def video(): 202 | mode = 1 203 | yolov3 = YOLO(**yolov3_args) 204 | video_cap = cv2.VideoCapture(video_path) 205 | if not video_cap.isOpened(): 206 | raise IOError 207 | video_FourCC = int(video_cap.get(cv2.CAP_PROP_FOURCC)) 208 | video_fps = video_cap.get(cv2.CAP_PROP_FPS) 209 | video_size = (int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)), 210 | int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) 211 | # video_size = (int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))) 212 | isOutput = True if output_path != "" else False 213 | if isOutput: 214 | out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) 215 | tracker = dokalman() 216 | 217 | frame_index = 0 218 | name = 11632 219 | while True: 220 | RecDraw.clear() 221 | return_value, frame = video_cap.read() 222 | frame_index = frame_index + 1 223 | if frame is None: 224 | break 225 | if frame_index % 2 == 1: 226 | x, y = frame.shape[0:2] 227 | new_image = cv2.resize(frame, (int(y / 2), int(x / 2))) 228 | name += 1 229 | strname = "D:/test/" + str(name) + ".jpg" 230 | cv2.imwrite(strname, new_image) 231 | # transposedImage = cv2.transpose(frame) 232 | # flipedImageX = cv2.flip(transposedImage, 0) 233 | # image_new = Image.fromarray(flipedImageX) 234 | image_new = Image.fromarray(frame) 235 | image, boxes, scores, classes = yolov3.detect_image_mul(image_new) 236 | centers, num = cal_centre(boxes, classes, scores, 0.05) 237 | result = np.asarray(image) 238 | tracker.tracking(centers, num, result, mode) 239 | cv2.namedWindow("result", cv2.WINDOW_NORMAL) 240 | cv2.imshow("result", result) 241 | if isOutput: 242 | out.write(result) 243 | if cv2.waitKey(1) & 0xFF == ord('q'): 244 | break 245 | 246 | 247 | if __name__ == '__main__': 248 | # print("please input the type of your want to identify") 249 | # m = input("pic or video? Answer: ") 250 | # if m == "video": 251 | video() 252 | # elif m == "pic": 253 | # pic_path = input("please input image path : ") 254 | # image(pic_path) 255 | # image("D:/git/work/keras-yolo3/images/1.jpg") 256 | # image("D:/r.jpg") 257 | # image(0) 258 | -------------------------------------------------------------------------------- /read_num.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import img_match 3 | import importlib 4 | 5 | def find_templateclass_using_name(class_num): 6 | 7 | templateclass_name = "DegreeToNum.templateclass" + str(class_num) 8 | templateclass = importlib.import_module(templateclass_name) 9 | 10 | if templateclass is None: 11 | raise NotImplementedError("In DegreeToNum package, the model %s not find." % (templateclass_name)) 12 | 13 | return templateclass 14 | 15 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument("--img_path", type=str, default="./img_test/test2.png", 17 | help='the path of the test image') 18 | parser.add_argument("--template_dir", type=str, default="./template/", 19 | help='the dir of template images') 20 | parser.add_argument("--siftedimg_dir", type=str, default="./img_test_corrected/", 21 | help='the dir of sifted images') 22 | 23 | opt, _ = parser.parse_known_args() 24 | 25 | # find the right template and correct the image 26 | queryImagePath = opt.img_path 27 | templateImgDir = opt.template_dir 28 | outImg = opt.siftedimg_dir 29 | matchedTemplateClass = img_match.CorrectImage(queryImagePath, templateImgDir, outImg) 30 | 31 | # check the pointer position and compute the num according to the degree of pointer 32 | if matchedTemplateClass == None: 33 | raise ValueError("no find the right template class") 34 | 35 | corrected_img_path = outImg + queryImagePath.split("/")[-1] 36 | templateclass = find_templateclass_using_name(matchedTemplateClass) 37 | num = templateclass.degree2num(corrected_img_path) 38 | print(num) 39 | -------------------------------------------------------------------------------- /testimages/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/testimages/1.jpg -------------------------------------------------------------------------------- /testimages/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/testimages/2.jpg -------------------------------------------------------------------------------- /testimages/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/testimages/3.jpg -------------------------------------------------------------------------------- /testimages/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/testimages/4.jpg -------------------------------------------------------------------------------- /testimages/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonJerome/Pointer-meter-reading-algorithm-by-Python/48ce521723ac8dd405d3280028f9820b78ac9d94/testimages/5.jpg -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Retrain the YOLO model for your own dataset. 3 | """ 4 | 5 | import numpy as np 6 | import keras.backend as K 7 | from keras.layers import Input, Lambda 8 | from keras.models import Model 9 | from keras.optimizers import Adam 10 | from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping 11 | 12 | from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss 13 | from yolo3.utils import get_random_data 14 | 15 | 16 | def _main(): 17 | annotation_path = 'train.txt' 18 | log_dir = 'logs/000/' 19 | classes_path = 'model_data/voc_classes.txt' 20 | anchors_path = 'model_data/yolo_anchors.txt' 21 | class_names = get_classes(classes_path) 22 | num_classes = len(class_names) 23 | anchors = get_anchors(anchors_path) 24 | 25 | input_shape = (416,416) # multiple of 32, hw 26 | 27 | is_tiny_version = len(anchors)==6 # default setting 28 | if is_tiny_version: 29 | model = create_tiny_model(input_shape, anchors, num_classes, 30 | freeze_body=2, weights_path='logs/000/trained_weights_final.h5') 31 | else: 32 | model = create_model(input_shape, anchors, num_classes, 33 | freeze_body=2, weights_path='logs/000/trained_weights_final.h5') # make sure you know what you freeze 34 | 35 | logging = TensorBoard(log_dir=log_dir) 36 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', 37 | monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) 38 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) 39 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) 40 | 41 | val_split = 0.1 42 | with open(annotation_path) as f: 43 | lines = f.readlines() 44 | np.random.seed(10101) 45 | np.random.shuffle(lines) 46 | np.random.seed(None) 47 | num_val = int(len(lines)*val_split) 48 | num_train = len(lines) - num_val 49 | 50 | # Train with frozen layers first, to get a stable loss. 51 | # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. 52 | if True: 53 | model.compile(optimizer=Adam(lr=1e-6), loss={ 54 | # use custom yolo_loss Lambda layer. 55 | 'yolo_loss': lambda y_true, y_pred: y_pred}) 56 | 57 | batch_size = 4 58 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 59 | model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), 60 | steps_per_epoch=max(1, num_train//batch_size), 61 | validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), 62 | validation_steps=max(1, num_val//batch_size), 63 | epochs=0, 64 | initial_epoch=0, 65 | callbacks=[logging, checkpoint]) 66 | model.save(log_dir + 'trained_weights_stage_1.h5') 67 | 68 | # Unfreeze and continue training, to fine-tune. 69 | # Train longer if the result is not good. 70 | if True: 71 | for i in range(len(model.layers)): 72 | model.layers[i].trainable = True 73 | model.compile(optimizer=Adam(lr=1e-6), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change 74 | print('Unfreeze all of the layers.') 75 | 76 | batch_size = 4 # note that more GPU memory is required after unfreezing the body 77 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 78 | model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), 79 | steps_per_epoch=max(1, num_train//batch_size), 80 | validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), 81 | validation_steps=max(1, num_val//batch_size), 82 | epochs=10, 83 | initial_epoch=0, 84 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 85 | model.save(log_dir + 'trained_weights_final.h5') 86 | 87 | # Further training if needed. 88 | 89 | def get_classes(classes_path): 90 | '''loads the classes''' 91 | with open(classes_path) as f: 92 | class_names = f.readlines() 93 | class_names = [c.strip() for c in class_names] 94 | return class_names 95 | 96 | def get_anchors(anchors_path): 97 | '''loads the anchors from a file''' 98 | with open(anchors_path) as f: 99 | anchors = f.readline() 100 | anchors = [float(x) for x in anchors.split(',')] 101 | return np.array(anchors).reshape(-1, 2) 102 | 103 | 104 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, 105 | weights_path='logs/000/trained_weights_final.h5'): 106 | '''create the training model''' 107 | K.clear_session() # get a new session 108 | image_input = Input(shape=(None, None, 3)) 109 | h, w = input_shape 110 | num_anchors = len(anchors) 111 | 112 | y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ 113 | num_anchors//3, num_classes+5)) for l in range(3)] 114 | 115 | model_body = yolo_body(image_input, num_anchors//3, num_classes) 116 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 117 | 118 | if load_pretrained: 119 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 120 | print('Load weights {}.'.format(weights_path)) 121 | if freeze_body in [1, 2]: 122 | # Freeze darknet53 body or freeze all but 3 output layers. 123 | num = (185, len(model_body.layers)-3)[freeze_body-1] 124 | for i in range(num): model_body.layers[i].trainable = False 125 | print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) 126 | 127 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 128 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( 129 | [*model_body.output, *y_true]) 130 | model = Model([model_body.input, *y_true], model_loss) 131 | 132 | return model 133 | 134 | def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, 135 | weights_path='logs/000/trained_weights_final.h5'): 136 | '''create the training model, for Tiny YOLOv3''' 137 | K.clear_session() # get a new session 138 | image_input = Input(shape=(None, None, 3)) 139 | h, w = input_shape 140 | num_anchors = len(anchors) 141 | 142 | y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \ 143 | num_anchors//2, num_classes+5)) for l in range(2)] 144 | 145 | model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes) 146 | print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 147 | 148 | if load_pretrained: 149 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 150 | print('Load weights {}.'.format(weights_path)) 151 | if freeze_body in [1, 2]: 152 | # Freeze the darknet body or freeze all but 2 output layers. 153 | num = (20, len(model_body.layers)-2)[freeze_body-1] 154 | for i in range(num): model_body.layers[i].trainable = False 155 | print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) 156 | 157 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 158 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})( 159 | [*model_body.output, *y_true]) 160 | model = Model([model_body.input, *y_true], model_loss) 161 | 162 | return model 163 | 164 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes): 165 | '''data generator for fit_generator''' 166 | n = len(annotation_lines) 167 | i = 0 168 | while True: 169 | image_data = [] 170 | box_data = [] 171 | for b in range(batch_size): 172 | if i==0: 173 | np.random.shuffle(annotation_lines) 174 | image, box = get_random_data(annotation_lines[i], input_shape, random=True) 175 | image_data.append(image) 176 | box_data.append(box) 177 | i = (i+1) % n 178 | image_data = np.array(image_data) 179 | box_data = np.array(box_data) 180 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 181 | yield [image_data, *y_true], np.zeros(batch_size) 182 | 183 | def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes): 184 | n = len(annotation_lines) 185 | if n==0 or batch_size<=0: return None 186 | return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes) 187 | 188 | if __name__ == '__main__': 189 | _main() 190 | -------------------------------------------------------------------------------- /train_bottleneck.py: -------------------------------------------------------------------------------- 1 | """ 2 | Retrain the YOLO model for your own dataset. 3 | """ 4 | import os 5 | import numpy as np 6 | import keras.backend as K 7 | from keras.layers import Input, Lambda 8 | from keras.models import Model 9 | from keras.optimizers import Adam 10 | from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping 11 | 12 | from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss 13 | from yolo3.utils import get_random_data 14 | 15 | 16 | def _main(): 17 | annotation_path = 'train.txt' 18 | log_dir = 'logs/000/' 19 | classes_path = 'model_data/coco_classes.txt' 20 | anchors_path = 'model_data/yolo_anchors.txt' 21 | class_names = get_classes(classes_path) 22 | num_classes = len(class_names) 23 | anchors = get_anchors(anchors_path) 24 | 25 | input_shape = (416,416) # multiple of 32, hw 26 | 27 | model, bottleneck_model, last_layer_model = create_model(input_shape, anchors, num_classes, 28 | freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze 29 | 30 | logging = TensorBoard(log_dir=log_dir) 31 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', 32 | monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) 33 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) 34 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) 35 | 36 | val_split = 0.1 37 | with open(annotation_path) as f: 38 | lines = f.readlines() 39 | np.random.seed(10101) 40 | np.random.shuffle(lines) 41 | np.random.seed(None) 42 | num_val = int(len(lines)*val_split) 43 | num_train = len(lines) - num_val 44 | 45 | # Train with frozen layers first, to get a stable loss. 46 | # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. 47 | if True: 48 | # perform bottleneck training 49 | if not os.path.isfile("bottlenecks.npz"): 50 | print("calculating bottlenecks") 51 | batch_size=8 52 | bottlenecks=bottleneck_model.predict_generator(data_generator_wrapper(lines, batch_size, input_shape, anchors, num_classes, random=False, verbose=True), 53 | steps=(len(lines)//batch_size)+1, max_queue_size=1) 54 | np.savez("bottlenecks.npz", bot0=bottlenecks[0], bot1=bottlenecks[1], bot2=bottlenecks[2]) 55 | 56 | # load bottleneck features from file 57 | dict_bot=np.load("bottlenecks.npz") 58 | bottlenecks_train=[dict_bot["bot0"][:num_train], dict_bot["bot1"][:num_train], dict_bot["bot2"][:num_train]] 59 | bottlenecks_val=[dict_bot["bot0"][num_train:], dict_bot["bot1"][num_train:], dict_bot["bot2"][num_train:]] 60 | 61 | # train last layers with fixed bottleneck features 62 | batch_size=8 63 | print("Training last layers with bottleneck features") 64 | print('with {} samples, val on {} samples and batch size {}.'.format(num_train, num_val, batch_size)) 65 | last_layer_model.compile(optimizer='adam', loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 66 | last_layer_model.fit_generator(bottleneck_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, bottlenecks_train), 67 | steps_per_epoch=max(1, num_train//batch_size), 68 | validation_data=bottleneck_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, bottlenecks_val), 69 | validation_steps=max(1, num_val//batch_size), 70 | epochs=30, 71 | initial_epoch=0, max_queue_size=1) 72 | model.save_weights(log_dir + 'trained_weights_stage_0.h5') 73 | 74 | # train last layers with random augmented data 75 | model.compile(optimizer=Adam(lr=1e-3), loss={ 76 | # use custom yolo_loss Lambda layer. 77 | 'yolo_loss': lambda y_true, y_pred: y_pred}) 78 | batch_size = 16 79 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 80 | model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), 81 | steps_per_epoch=max(1, num_train//batch_size), 82 | validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), 83 | validation_steps=max(1, num_val//batch_size), 84 | epochs=50, 85 | initial_epoch=0, 86 | callbacks=[logging, checkpoint]) 87 | model.save_weights(log_dir + 'trained_weights_stage_1.h5') 88 | 89 | # Unfreeze and continue training, to fine-tune. 90 | # Train longer if the result is not good. 91 | if True: 92 | for i in range(len(model.layers)): 93 | model.layers[i].trainable = True 94 | model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change 95 | print('Unfreeze all of the layers.') 96 | 97 | batch_size = 4 # note that more GPU memory is required after unfreezing the body 98 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 99 | model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), 100 | steps_per_epoch=max(1, num_train//batch_size), 101 | validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), 102 | validation_steps=max(1, num_val//batch_size), 103 | epochs=100, 104 | initial_epoch=50, 105 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 106 | model.save_weights(log_dir + 'trained_weights_final.h5') 107 | 108 | # Further training if needed. 109 | 110 | 111 | def get_classes(classes_path): 112 | '''loads the classes''' 113 | with open(classes_path) as f: 114 | class_names = f.readlines() 115 | class_names = [c.strip() for c in class_names] 116 | return class_names 117 | 118 | def get_anchors(anchors_path): 119 | '''loads the anchors from a file''' 120 | with open(anchors_path) as f: 121 | anchors = f.readline() 122 | anchors = [float(x) for x in anchors.split(',')] 123 | return np.array(anchors).reshape(-1, 2) 124 | 125 | 126 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, 127 | weights_path='model_data/yolo_weights.h5'): 128 | '''create the training model''' 129 | K.clear_session() # get a new session 130 | image_input = Input(shape=(None, None, 3)) 131 | h, w = input_shape 132 | num_anchors = len(anchors) 133 | 134 | y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ 135 | num_anchors//3, num_classes+5)) for l in range(3)] 136 | 137 | model_body = yolo_body(image_input, num_anchors//3, num_classes) 138 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 139 | 140 | if load_pretrained: 141 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 142 | print('Load weights {}.'.format(weights_path)) 143 | if freeze_body in [1, 2]: 144 | # Freeze darknet53 body or freeze all but 3 output layers. 145 | num = (185, len(model_body.layers)-3)[freeze_body-1] 146 | for i in range(num): model_body.layers[i].trainable = False 147 | print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers))) 148 | 149 | # get output of second last layers and create bottleneck model of it 150 | out1=model_body.layers[246].output 151 | out2=model_body.layers[247].output 152 | out3=model_body.layers[248].output 153 | bottleneck_model = Model([model_body.input, *y_true], [out1, out2, out3]) 154 | 155 | # create last layer model of last layers from yolo model 156 | in0 = Input(shape=bottleneck_model.output[0].shape[1:].as_list()) 157 | in1 = Input(shape=bottleneck_model.output[1].shape[1:].as_list()) 158 | in2 = Input(shape=bottleneck_model.output[2].shape[1:].as_list()) 159 | last_out0=model_body.layers[249](in0) 160 | last_out1=model_body.layers[250](in1) 161 | last_out2=model_body.layers[251](in2) 162 | model_last=Model(inputs=[in0, in1, in2], outputs=[last_out0, last_out1, last_out2]) 163 | model_loss_last =Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 164 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( 165 | [*model_last.output, *y_true]) 166 | last_layer_model = Model([in0,in1,in2, *y_true], model_loss_last) 167 | 168 | 169 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 170 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( 171 | [*model_body.output, *y_true]) 172 | model = Model([model_body.input, *y_true], model_loss) 173 | 174 | return model, bottleneck_model, last_layer_model 175 | 176 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False): 177 | '''data generator for fit_generator''' 178 | n = len(annotation_lines) 179 | i = 0 180 | while True: 181 | image_data = [] 182 | box_data = [] 183 | for b in range(batch_size): 184 | if i==0 and random: 185 | np.random.shuffle(annotation_lines) 186 | image, box = get_random_data(annotation_lines[i], input_shape, random=random) 187 | image_data.append(image) 188 | box_data.append(box) 189 | i = (i+1) % n 190 | image_data = np.array(image_data) 191 | if verbose: 192 | print("Progress: ",i,"/",n) 193 | box_data = np.array(box_data) 194 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 195 | yield [image_data, *y_true], np.zeros(batch_size) 196 | 197 | def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False): 198 | n = len(annotation_lines) 199 | if n==0 or batch_size<=0: return None 200 | return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random, verbose) 201 | 202 | def bottleneck_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, bottlenecks): 203 | n = len(annotation_lines) 204 | i = 0 205 | while True: 206 | box_data = [] 207 | b0=np.zeros((batch_size,bottlenecks[0].shape[1],bottlenecks[0].shape[2],bottlenecks[0].shape[3])) 208 | b1=np.zeros((batch_size,bottlenecks[1].shape[1],bottlenecks[1].shape[2],bottlenecks[1].shape[3])) 209 | b2=np.zeros((batch_size,bottlenecks[2].shape[1],bottlenecks[2].shape[2],bottlenecks[2].shape[3])) 210 | for b in range(batch_size): 211 | _, box = get_random_data(annotation_lines[i], input_shape, random=False, proc_img=False) 212 | box_data.append(box) 213 | b0[b]=bottlenecks[0][i] 214 | b1[b]=bottlenecks[1][i] 215 | b2[b]=bottlenecks[2][i] 216 | i = (i+1) % n 217 | box_data = np.array(box_data) 218 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 219 | yield [b0, b1, b2, *y_true], np.zeros(batch_size) 220 | 221 | if __name__ == '__main__': 222 | _main() 223 | -------------------------------------------------------------------------------- /val.txt: -------------------------------------------------------------------------------- 1 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/10178.jpg 347,120,578,357,0 2 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/10219.jpg 294,154,523,378,0 3 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1194.jpg 19,96,410,500,0 440,74,927,501,0 4 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1211.jpg 37,130,408,498,0 465,82,874,493,0 5 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1276.jpg 85,117,448,513,0 517,88,951,540,0 6 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1361.jpg 242,201,567,540,0 626,195,945,479,0 7 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1456.jpg 211,87,662,530,1 8 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/146.jpg 236,9,747,506,0 9 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1467.jpg 221,85,641,518,1 10 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1469.jpg 199,85,628,524,1 11 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1583.jpg 13,180,278,430,1 416,174,686,425,1 12 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1615.jpg 41,194,247,410,1 393,175,616,402,1 708,186,936,395,1 13 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1743.jpg 8,123,328,458,1 545,135,899,466,1 14 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1761.jpg 41,167,311,431,1 445,173,710,432,1 15 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1803.jpg 252,156,526,444,1 713,175,960,430,1 16 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1811.jpg 123,101,548,514,1 17 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1821.jpg 139,60,589,530,1 18 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1848.jpg 192,33,704,540,1 19 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/1865.jpg 191,39,692,539,1 20 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2031.jpg 66,74,488,481,1 21 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2148.jpg 161,140,508,463,1 673,146,960,436,1 22 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2469.jpg 42,109,374,457,1 591,106,951,443,1 23 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2549.jpg 59,193,269,393,1 403,183,598,383,1 726,192,950,400,1 24 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2717.jpg 28,180,266,428,1 410,188,656,419,1 25 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2734.jpg 7,192,207,415,1 360,193,578,415,1 712,215,928,416,1 26 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/2855.jpg 46,112,394,487,1 27 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/294.jpg 97,89,453,446,0 519,52,947,470,0 28 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/31.jpg 241,16,704,493,0 29 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3288.jpg 208,62,688,533,1 30 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3391.jpg 468,158,786,477,1 10,169,325,469,1 31 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3422.jpg 496,150,827,476,1 74,161,395,482,1 32 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/345.jpg 297,125,510,345,0 554,130,789,352,0 33 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3482.jpg 495,153,833,455,1 111,174,413,451,1 34 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/359.jpg 283,86,520,331,0 554,84,809,341,0 35 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/363.jpg 284,73,528,314,0 562,88,815,337,0 36 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3645.jpg 58,109,351,417,1 460,110,772,429,1 37 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3674.jpg 260,169,522,424,1 614,153,888,427,1 38 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3714.jpg 466,57,903,498,1 39 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/373.jpg 249,93,519,336,0 534,108,792,351,0 40 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3762.jpg 462,98,826,448,1 41 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3778.jpg 123,119,474,451,1 583,136,930,450,1 42 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/3804.jpg 124,116,502,470,1 595,115,945,460,1 43 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/393.jpg 246,72,502,317,0 541,69,802,334,0 44 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/4140.jpg 244,10,785,531,1 45 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/430.jpg 250,137,460,362,0 490,85,752,339,0 46 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/4551.jpg 248,149,463,372,0 502,148,757,378,0 47 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/4681.jpg 317,64,619,347,0 647,48,960,371,0 48 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/486.jpg 365,30,831,482,0 49 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5108.jpg 176,4,744,540,0 50 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5240.jpg 177,3,753,535,0 51 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5273.jpg 181,2,747,538,0 52 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5332.jpg 177,9,753,540,0 53 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/537.jpg 257,3,813,530,0 54 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5372.jpg 181,1,743,540,0 55 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5378.jpg 183,4,753,540,0 56 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5424.jpg 194,7,747,540,0 57 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5526.jpg 193,8,758,540,0 58 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5555.jpg 165,2,747,540,0 59 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5695.jpg 159,2,753,536,0 60 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5782.jpg 176,4,732,540,0 61 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5820.jpg 179,4,747,540,0 62 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5958.jpg 194,3,751,540,0 63 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/5993.jpg 185,6,755,540,0 64 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6045.jpg 334,108,538,305,0 65 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6119.jpg 348,144,558,361,0 66 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6129.jpg 349,174,534,353,0 67 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6221.jpg 359,259,528,369,0 68 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6450.jpg 289,159,468,311,0 69 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6599.jpg 323,165,535,363,0 70 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/663.jpg 415,15,833,413,0 71 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6697.jpg 357,257,543,371,0 72 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6711.jpg 369,248,589,396,0 73 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6761.jpg 344,188,556,383,0 74 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6828.jpg 352,238,553,453,0 75 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6889.jpg 318,202,505,373,0 76 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/6989.jpg 254,91,604,425,0 77 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7033.jpg 277,144,616,441,0 78 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7059.jpg 355,139,710,482,0 79 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7197.jpg 1,120,331,515,0 80 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7710.jpg 279,100,644,463,0 81 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7727.jpg 308,129,614,454,0 82 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7819.jpg 270,117,627,465,0 83 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/787.jpg 150,103,493,448,0 84 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7880.jpg 275,69,712,493,0 85 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7882.jpg 279,67,719,506,0 86 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/7923.jpg 248,83,670,412,0 87 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8008.jpg 144,218,727,540,0 88 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8039.jpg 215,83,735,502,0 89 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8284.jpg 141,30,660,536,0 90 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8324.jpg 255,43,707,437,0 91 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8465.jpg 207,180,720,540,0 92 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8655.jpg 239,20,719,488,0 93 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/873.jpg 164,237,432,513,0 477,214,791,512,0 94 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8790.jpg 215,40,712,457,0 95 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8905.jpg 241,94,694,465,0 96 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/8917.jpg 196,75,739,526,0 97 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/9150.jpg 194,25,698,358,0 98 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/9179.jpg 281,88,817,539,0 99 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/9300.jpg 252,171,470,437,0 100 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/933.jpg 197,21,759,540,0 101 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/9419.jpg 388,217,557,421,0 102 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/9457.jpg 402,138,644,378,0 103 | D:\git\work\keras-yolo3/VOCdevkit/VOC2007/JPEGImages/9636.jpg 347,119,570,341,0 104 | -------------------------------------------------------------------------------- /voc_annotation.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from os import getcwd 3 | 4 | sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')] 5 | 6 | classes = ["meter" , "vameter"] 7 | 8 | 9 | def convert_annotation(year, image_id, list_file): 10 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id),encoding='UTF-8') 11 | tree=ET.parse(in_file) 12 | root = tree.getroot() 13 | 14 | for obj in root.iter('object'): 15 | difficult = obj.find('difficult').text 16 | cls = obj.find('name').text 17 | if cls not in classes or int(difficult)==1: 18 | continue 19 | cls_id = classes.index(cls) 20 | xmlbox = obj.find('bndbox') 21 | b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) 22 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 23 | 24 | wd = getcwd() 25 | 26 | for year, image_set in sets: 27 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set),encoding='UTF-8').read().strip().split() 28 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 29 | for image_id in image_ids: 30 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id)) 31 | convert_annotation(year, image_id, list_file) 32 | list_file.write('\n') 33 | list_file.close() 34 | 35 | -------------------------------------------------------------------------------- /xyh.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import numpy as np 3 | 4 | m=[0,0,1,0,0,1,1,1] 5 | A=np.array(m).reshape(4,2).astype(np.float32) 6 | m=cv.minAreaRect(A) 7 | print(m) 8 | print(cv.__version__) -------------------------------------------------------------------------------- /yolo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Class definition of YOLO_v3 style detection model on image and video 4 | """ 5 | 6 | import colorsys 7 | import os 8 | from timeit import default_timer as timer 9 | 10 | import numpy as np 11 | from keras import backend as K 12 | from keras.models import load_model 13 | from keras.layers import Input 14 | from PIL import Image, ImageFont, ImageDraw 15 | 16 | from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body 17 | from yolo3.utils import letterbox_image 18 | import os 19 | from keras.utils import multi_gpu_model 20 | 21 | class YOLO(object): 22 | _defaults = { 23 | "model_path": 'logs/000/trained_weights_final.h5', 24 | "anchors_path": 'model_data/yolo_anchors.txt', 25 | "classes_path": 'model_data/coco_classes.txt', 26 | "score" : 0.05, 27 | "iou" : 0.45, 28 | "model_image_size" : (416, 416), 29 | "gpu_num" : 1, 30 | } 31 | 32 | @classmethod 33 | def get_defaults(cls, n): 34 | if n in cls._defaults: 35 | return cls._defaults[n] 36 | else: 37 | return "Unrecognized attribute name '" + n + "'" 38 | 39 | def __init__(self, **kwargs): 40 | self.__dict__.update(self._defaults) # set up default values 41 | self.__dict__.update(kwargs) # and update with user overrides 42 | self.class_names = self._get_class() 43 | self.anchors = self._get_anchors() 44 | self.sess = K.get_session() 45 | self.boxes, self.scores, self.classes = self.generate() 46 | 47 | def _get_class(self): 48 | classes_path = os.path.expanduser(self.classes_path) 49 | with open(classes_path) as f: 50 | class_names = f.readlines() 51 | class_names = [c.strip() for c in class_names] 52 | return class_names 53 | 54 | def _get_anchors(self): 55 | anchors_path = os.path.expanduser(self.anchors_path) 56 | with open(anchors_path) as f: 57 | anchors = f.readline() 58 | anchors = [float(x) for x in anchors.split(',')] 59 | return np.array(anchors).reshape(-1, 2) 60 | 61 | def generate(self): 62 | model_path = os.path.expanduser(self.model_path) 63 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' 64 | 65 | # Load model, or construct model and load weights. 66 | num_anchors = len(self.anchors) 67 | num_classes = len(self.class_names) 68 | is_tiny_version = num_anchors==6 # default setting 69 | try: 70 | self.yolo_model = load_model(model_path, compile=False) 71 | except: 72 | self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ 73 | if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) 74 | self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match 75 | else: 76 | assert self.yolo_model.layers[-1].output_shape[-1] == \ 77 | num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 78 | 'Mismatch between model and given anchor and class sizes' 79 | 80 | print('{} model, anchors, and classes loaded.'.format(model_path)) 81 | 82 | # Generate colors for drawing bounding boxes. 83 | hsv_tuples = [(x / len(self.class_names), 1., 1.) 84 | for x in range(len(self.class_names))] 85 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 86 | self.colors = list( 87 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 88 | self.colors)) 89 | np.random.seed(10101) # Fixed seed for consistent colors across runs. 90 | np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. 91 | np.random.seed(None) # Reset seed to default. 92 | 93 | # Generate output tensor targets for filtered bounding boxes. 94 | self.input_image_shape = K.placeholder(shape=(2, )) 95 | if self.gpu_num>=2: 96 | self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) 97 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, 98 | len(self.class_names), self.input_image_shape, 99 | score_threshold=self.score, iou_threshold=self.iou) 100 | return boxes, scores, classes 101 | 102 | def detect_image(self, image): 103 | start = timer() 104 | 105 | if self.model_image_size != (None, None): 106 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' 107 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' 108 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) 109 | else: 110 | new_image_size = (image.width - (image.width % 32), 111 | image.height - (image.height % 32)) 112 | boxed_image = letterbox_image(image, new_image_size) 113 | image_data = np.array(boxed_image, dtype='float32') 114 | 115 | print(image_data.shape) 116 | image_data /= 255. 117 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 118 | 119 | out_boxes, out_scores, out_classes = self.sess.run( 120 | [self.boxes, self.scores, self.classes], 121 | feed_dict={ 122 | self.yolo_model.input: image_data, 123 | self.input_image_shape: [image.size[1], image.size[0]], 124 | K.learning_phase(): 0 125 | }) 126 | 127 | print('Found {} boxes for {}'.format(len(out_boxes), 'img')) 128 | 129 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf', 130 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 131 | thickness = (image.size[0] + image.size[1]) // 300 132 | 133 | for i, c in reversed(list(enumerate(out_classes))): 134 | predicted_class = self.class_names[c] 135 | box = out_boxes[i] 136 | score = out_scores[i] 137 | 138 | label = '{} {:.2f}'.format(predicted_class, score) 139 | draw = ImageDraw.Draw(image) 140 | label_size = draw.textsize(label, font) 141 | 142 | top, left, bottom, right = box 143 | top = max(0, np.floor(top + 0.5).astype('int32')) 144 | left = max(0, np.floor(left + 0.5).astype('int32')) 145 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 146 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 147 | print(label, (left, top), (right, bottom)) 148 | 149 | if top - label_size[1] >= 0: 150 | text_origin = np.array([left, top - label_size[1]]) 151 | else: 152 | text_origin = np.array([left, top + 1]) 153 | 154 | # My kingdom for a good redistributable image drawing library. 155 | for i in range(thickness): 156 | draw.rectangle( 157 | [left + i, top + i, right - i, bottom - i], 158 | outline=self.colors[c]) 159 | draw.rectangle( 160 | [tuple(text_origin), tuple(text_origin + label_size)], 161 | fill=self.colors[c]) 162 | draw.text(text_origin, label, fill=(0, 0, 0), font=font) 163 | del draw 164 | 165 | end = timer() 166 | print(end - start) 167 | return image 168 | 169 | def detect_image_mul(self, image): 170 | start = timer() 171 | 172 | if self.model_image_size != (None, None): 173 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' 174 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' 175 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) 176 | else: 177 | new_image_size = (image.width - (image.width % 32), 178 | image.height - (image.height % 32)) 179 | boxed_image = letterbox_image(image, new_image_size) 180 | image_data = np.array(boxed_image, dtype='float32') 181 | 182 | print(image_data.shape) 183 | image_data /= 255. 184 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 185 | 186 | out_boxes, out_scores, out_classes = self.sess.run( 187 | [self.boxes, self.scores, self.classes], 188 | feed_dict={ 189 | self.yolo_model.input: image_data, 190 | self.input_image_shape: [image.size[1], image.size[0]], 191 | K.learning_phase(): 0 192 | }) 193 | 194 | print('Found {} boxes for {}'.format(len(out_boxes), 'img')) 195 | 196 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf', 197 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 198 | thickness = (image.size[0] + image.size[1]) // 300 199 | 200 | for i, c in reversed(list(enumerate(out_classes))): 201 | predicted_class = self.class_names[c] 202 | box = out_boxes[i] 203 | score = out_scores[i] 204 | 205 | label = '{} {:.2f}'.format(predicted_class, score) 206 | draw = ImageDraw.Draw(image) 207 | label_size = draw.textsize(label, font) 208 | 209 | top, left, bottom, right = box 210 | top = max(0, np.floor(top + 0.5).astype('int32')) 211 | left = max(0, np.floor(left + 0.5).astype('int32')) 212 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 213 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 214 | print(label, (left, top), (right, bottom)) 215 | if top - label_size[1] >= 0: 216 | text_origin = np.array([left, top - label_size[1]]) 217 | else: 218 | text_origin = np.array([left, top + 1]) 219 | 220 | # My kingdom for a good redistributable image drawing library. 221 | # for i in range(thickness): 222 | # draw.rectangle( 223 | # [left + i, top + i, right - i, bottom - i], 224 | # outline=self.colors[c]) 225 | draw.rectangle( 226 | [tuple(text_origin), tuple(text_origin + label_size)], 227 | fill=self.colors[c]) 228 | draw.text(text_origin, label, fill=(0, 0, 0), font=font) 229 | del draw 230 | 231 | end = timer() 232 | print(end - start) 233 | return image, out_boxes, out_scores, out_classes 234 | 235 | def close_session(self): 236 | self.sess.close() 237 | 238 | def detect_video(yolo, video_path="D:/0.mp4", output_path="D:/3.avi"): 239 | import cv2 240 | vid = cv2.VideoCapture('D:/0.mp4') 241 | if not vid.isOpened(): 242 | raise IOError("Couldn't open webcam or video") 243 | # video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) 244 | video_fps = vid.get(cv2.CAP_PROP_FPS) 245 | video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), 246 | int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) 247 | isOutput = True if output_path != "" else False 248 | if isOutput: 249 | # print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size)) 250 | out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), video_fps, video_size) 251 | accum_time = 0 252 | curr_fps = 0 253 | fps = "FPS: ??" 254 | prev_time = timer() 255 | while True: 256 | return_value, frame = vid.read() 257 | if (return_value == False): 258 | # 判断读取是否有值,即判断是否读取至视频结尾 259 | print("it's end of video!") 260 | break # 跳出while循环 261 | image = Image.fromarray(frame) 262 | image = yolo.detect_image(image) 263 | result = np.asarray(image) 264 | curr_time = timer() 265 | exec_time = curr_time - prev_time 266 | prev_time = curr_time 267 | accum_time = accum_time + exec_time 268 | curr_fps = curr_fps + 1 269 | if accum_time > 1: 270 | accum_time = accum_time - 1 271 | fps = "FPS: " + str(curr_fps) 272 | curr_fps = 0 273 | # result = np.array(result) 274 | cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 275 | fontScale=0.50, color=(120, 120, 0), thickness=2) 276 | cv2.namedWindow("result", cv2.WINDOW_NORMAL) 277 | cv2.imshow("result", result) 278 | if isOutput: 279 | out.write(result) 280 | if cv2.waitKey(1) & 0xFF == ord('q'): 281 | break 282 | yolo.close_session() -------------------------------------------------------------------------------- /yolo3/model.py: -------------------------------------------------------------------------------- 1 | """YOLO_v3 Model Defined in Keras.""" 2 | 3 | from functools import wraps 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D 9 | from keras.layers.advanced_activations import LeakyReLU 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.models import Model 12 | from keras.regularizers import l2 13 | 14 | from yolo3.utils import compose 15 | 16 | 17 | @wraps(Conv2D) 18 | def DarknetConv2D(*args, **kwargs): 19 | """Wrapper to set Darknet parameters for Convolution2D.""" 20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 21 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 22 | darknet_conv_kwargs.update(kwargs) 23 | return Conv2D(*args, **darknet_conv_kwargs) 24 | 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 26 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 27 | no_bias_kwargs = {'use_bias': False} 28 | no_bias_kwargs.update(kwargs) 29 | return compose( 30 | DarknetConv2D(*args, **no_bias_kwargs), 31 | BatchNormalization(), 32 | LeakyReLU(alpha=0.1)) 33 | 34 | def resblock_body(x, num_filters, num_blocks): 35 | '''A series of resblocks starting with a downsampling Convolution2D''' 36 | # Darknet uses left and top padding instead of 'same' mode 37 | x = ZeroPadding2D(((1,0),(1,0)))(x) 38 | x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x) 39 | for i in range(num_blocks): 40 | y = compose( 41 | DarknetConv2D_BN_Leaky(num_filters//2, (1,1)), 42 | DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x) 43 | x = Add()([x,y]) 44 | return x 45 | 46 | def darknet_body(x): 47 | '''Darknent body having 52 Convolution2D layers''' 48 | x = DarknetConv2D_BN_Leaky(32, (3,3))(x) 49 | x = resblock_body(x, 64, 1) 50 | x = resblock_body(x, 128, 2) 51 | x = resblock_body(x, 256, 8) 52 | x = resblock_body(x, 512, 8) 53 | x = resblock_body(x, 1024, 4) 54 | return x 55 | 56 | def make_last_layers(x, num_filters, out_filters): 57 | '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' 58 | x = compose( 59 | DarknetConv2D_BN_Leaky(num_filters, (1,1)), 60 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 61 | DarknetConv2D_BN_Leaky(num_filters, (1,1)), 62 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 63 | DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x) 64 | y = compose( 65 | DarknetConv2D_BN_Leaky(num_filters*2, (3,3)), 66 | DarknetConv2D(out_filters, (1,1)))(x) 67 | return x, y 68 | 69 | 70 | def yolo_body(inputs, num_anchors, num_classes): 71 | """Create YOLO_V3 model CNN body in Keras.""" 72 | darknet = Model(inputs, darknet_body(inputs)) 73 | x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) 74 | 75 | x = compose( 76 | DarknetConv2D_BN_Leaky(256, (1,1)), 77 | UpSampling2D(2))(x) 78 | x = Concatenate()([x,darknet.layers[152].output]) 79 | x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) 80 | 81 | x = compose( 82 | DarknetConv2D_BN_Leaky(128, (1,1)), 83 | UpSampling2D(2))(x) 84 | x = Concatenate()([x,darknet.layers[92].output]) 85 | x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) 86 | 87 | return Model(inputs, [y1,y2,y3]) 88 | 89 | def tiny_yolo_body(inputs, num_anchors, num_classes): 90 | '''Create Tiny YOLO_v3 model CNN body in keras.''' 91 | x1 = compose( 92 | DarknetConv2D_BN_Leaky(16, (3,3)), 93 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 94 | DarknetConv2D_BN_Leaky(32, (3,3)), 95 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 96 | DarknetConv2D_BN_Leaky(64, (3,3)), 97 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 98 | DarknetConv2D_BN_Leaky(128, (3,3)), 99 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 100 | DarknetConv2D_BN_Leaky(256, (3,3)))(inputs) 101 | x2 = compose( 102 | MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'), 103 | DarknetConv2D_BN_Leaky(512, (3,3)), 104 | MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'), 105 | DarknetConv2D_BN_Leaky(1024, (3,3)), 106 | DarknetConv2D_BN_Leaky(256, (1,1)))(x1) 107 | y1 = compose( 108 | DarknetConv2D_BN_Leaky(512, (3,3)), 109 | DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) 110 | 111 | x2 = compose( 112 | DarknetConv2D_BN_Leaky(128, (1,1)), 113 | UpSampling2D(2))(x2) 114 | y2 = compose( 115 | Concatenate(), 116 | DarknetConv2D_BN_Leaky(256, (3,3)), 117 | DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) 118 | 119 | return Model(inputs, [y1,y2]) 120 | 121 | 122 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 123 | """Convert final layer features to bounding box parameters.""" 124 | num_anchors = len(anchors) 125 | # Reshape to batch, height, width, num_anchors, box_params. 126 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 127 | 128 | grid_shape = K.shape(feats)[1:3] # height, width 129 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 130 | [1, grid_shape[1], 1, 1]) 131 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 132 | [grid_shape[0], 1, 1, 1]) 133 | grid = K.concatenate([grid_x, grid_y]) 134 | grid = K.cast(grid, K.dtype(feats)) 135 | 136 | feats = K.reshape( 137 | feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 138 | 139 | # Adjust preditions to each spatial grid point and anchor size. 140 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 141 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) 142 | box_confidence = K.sigmoid(feats[..., 4:5]) 143 | box_class_probs = K.sigmoid(feats[..., 5:]) 144 | 145 | if calc_loss == True: 146 | return grid, feats, box_xy, box_wh 147 | return box_xy, box_wh, box_confidence, box_class_probs 148 | 149 | 150 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): 151 | '''Get corrected boxes''' 152 | box_yx = box_xy[..., ::-1] 153 | box_hw = box_wh[..., ::-1] 154 | input_shape = K.cast(input_shape, K.dtype(box_yx)) 155 | image_shape = K.cast(image_shape, K.dtype(box_yx)) 156 | new_shape = K.round(image_shape * K.min(input_shape/image_shape)) 157 | offset = (input_shape-new_shape)/2./input_shape 158 | scale = input_shape/new_shape 159 | box_yx = (box_yx - offset) * scale 160 | box_hw *= scale 161 | 162 | box_mins = box_yx - (box_hw / 2.) 163 | box_maxes = box_yx + (box_hw / 2.) 164 | boxes = K.concatenate([ 165 | box_mins[..., 0:1], # y_min 166 | box_mins[..., 1:2], # x_min 167 | box_maxes[..., 0:1], # y_max 168 | box_maxes[..., 1:2] # x_max 169 | ]) 170 | 171 | # Scale boxes back to original image shape. 172 | boxes *= K.concatenate([image_shape, image_shape]) 173 | return boxes 174 | 175 | 176 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): 177 | '''Process Conv layer output''' 178 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, 179 | anchors, num_classes, input_shape) 180 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 181 | boxes = K.reshape(boxes, [-1, 4]) 182 | box_scores = box_confidence * box_class_probs 183 | box_scores = K.reshape(box_scores, [-1, num_classes]) 184 | return boxes, box_scores 185 | 186 | 187 | def yolo_eval(yolo_outputs, 188 | anchors, 189 | num_classes, 190 | image_shape, 191 | max_boxes=20, 192 | score_threshold=.6, 193 | iou_threshold=.5): 194 | """Evaluate YOLO model on given input and return filtered boxes.""" 195 | num_layers = len(yolo_outputs) 196 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting 197 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32 198 | boxes = [] 199 | box_scores = [] 200 | for l in range(num_layers): 201 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], 202 | anchors[anchor_mask[l]], num_classes, input_shape, image_shape) 203 | boxes.append(_boxes) 204 | box_scores.append(_box_scores) 205 | boxes = K.concatenate(boxes, axis=0) 206 | box_scores = K.concatenate(box_scores, axis=0) 207 | 208 | mask = box_scores >= score_threshold 209 | max_boxes_tensor = K.constant(max_boxes, dtype='int32') 210 | boxes_ = [] 211 | scores_ = [] 212 | classes_ = [] 213 | for c in range(num_classes): 214 | # TODO: use keras backend instead of tf. 215 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 216 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 217 | nms_index = tf.image.non_max_suppression( 218 | class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) 219 | class_boxes = K.gather(class_boxes, nms_index) 220 | class_box_scores = K.gather(class_box_scores, nms_index) 221 | classes = K.ones_like(class_box_scores, 'int32') * c 222 | boxes_.append(class_boxes) 223 | scores_.append(class_box_scores) 224 | classes_.append(classes) 225 | boxes_ = K.concatenate(boxes_, axis=0) 226 | scores_ = K.concatenate(scores_, axis=0) 227 | classes_ = K.concatenate(classes_, axis=0) 228 | 229 | return boxes_, scores_, classes_ 230 | 231 | 232 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 233 | '''Preprocess true boxes to training input format 234 | 235 | Parameters 236 | ---------- 237 | true_boxes: array, shape=(m, T, 5) 238 | Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. 239 | input_shape: array-like, hw, multiples of 32 240 | anchors: array, shape=(N, 2), wh 241 | num_classes: integer 242 | 243 | Returns 244 | ------- 245 | y_true: list of array, shape like yolo_outputs, xywh are reletive value 246 | 247 | ''' 248 | assert (true_boxes[..., 4]0 269 | 270 | for b in range(m): 271 | # Discard zero rows. 272 | wh = boxes_wh[b, valid_mask[b]] 273 | if len(wh)==0: continue 274 | # Expand dim to apply broadcasting. 275 | wh = np.expand_dims(wh, -2) 276 | box_maxes = wh / 2. 277 | box_mins = -box_maxes 278 | 279 | intersect_mins = np.maximum(box_mins, anchor_mins) 280 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 281 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 282 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 283 | box_area = wh[..., 0] * wh[..., 1] 284 | anchor_area = anchors[..., 0] * anchors[..., 1] 285 | iou = intersect_area / (box_area + anchor_area - intersect_area) 286 | 287 | # Find best anchor for each true box 288 | best_anchor = np.argmax(iou, axis=-1) 289 | 290 | for t, n in enumerate(best_anchor): 291 | for l in range(num_layers): 292 | if n in anchor_mask[l]: 293 | i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') 294 | j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') 295 | k = anchor_mask[l].index(n) 296 | c = true_boxes[b,t, 4].astype('int32') 297 | y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] 298 | y_true[l][b, j, i, k, 4] = 1 299 | y_true[l][b, j, i, k, 5+c] = 1 300 | 301 | return y_true 302 | 303 | 304 | def box_iou(b1, b2): 305 | '''Return iou tensor 306 | 307 | Parameters 308 | ---------- 309 | b1: tensor, shape=(i1,...,iN, 4), xywh 310 | b2: tensor, shape=(j, 4), xywh 311 | 312 | Returns 313 | ------- 314 | iou: tensor, shape=(i1,...,iN, j) 315 | 316 | ''' 317 | 318 | # Expand dim to apply broadcasting. 319 | b1 = K.expand_dims(b1, -2) 320 | b1_xy = b1[..., :2] 321 | b1_wh = b1[..., 2:4] 322 | b1_wh_half = b1_wh/2. 323 | b1_mins = b1_xy - b1_wh_half 324 | b1_maxes = b1_xy + b1_wh_half 325 | 326 | # Expand dim to apply broadcasting. 327 | b2 = K.expand_dims(b2, 0) 328 | b2_xy = b2[..., :2] 329 | b2_wh = b2[..., 2:4] 330 | b2_wh_half = b2_wh/2. 331 | b2_mins = b2_xy - b2_wh_half 332 | b2_maxes = b2_xy + b2_wh_half 333 | 334 | intersect_mins = K.maximum(b1_mins, b2_mins) 335 | intersect_maxes = K.minimum(b1_maxes, b2_maxes) 336 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 337 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 338 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 339 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 340 | iou = intersect_area / (b1_area + b2_area - intersect_area) 341 | 342 | return iou 343 | 344 | 345 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): 346 | '''Return yolo_loss tensor 347 | 348 | Parameters 349 | ---------- 350 | yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body 351 | y_true: list of array, the output of preprocess_true_boxes 352 | anchors: array, shape=(N, 2), wh 353 | num_classes: integer 354 | ignore_thresh: float, the iou threshold whether to ignore object confidence loss 355 | 356 | Returns 357 | ------- 358 | loss: tensor, shape=(1,) 359 | 360 | ''' 361 | num_layers = len(anchors)//3 # default setting 362 | yolo_outputs = args[:num_layers] 363 | y_true = args[num_layers:] 364 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] 365 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) 366 | grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] 367 | loss = 0 368 | m = K.shape(yolo_outputs[0])[0] # batch size, tensor 369 | mf = K.cast(m, K.dtype(yolo_outputs[0])) 370 | 371 | for l in range(num_layers): 372 | object_mask = y_true[l][..., 4:5] 373 | true_class_probs = y_true[l][..., 5:] 374 | 375 | grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], 376 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) 377 | pred_box = K.concatenate([pred_xy, pred_wh]) 378 | 379 | # Darknet raw box to calculate loss. 380 | raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid 381 | raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) 382 | raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf 383 | box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] 384 | 385 | # Find ignore mask, iterate over each of batch. 386 | ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) 387 | object_mask_bool = K.cast(object_mask, 'bool') 388 | def loop_body(b, ignore_mask): 389 | true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) 390 | iou = box_iou(pred_box[b], true_box) 391 | best_iou = K.max(iou, axis=-1) 392 | ignore_mask = ignore_mask.write(b, K.cast(best_iou=2: 114 | self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) 115 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, 116 | len(self.class_names), self.input_image_shape, 117 | score_threshold=self.score, iou_threshold=self.iou) 118 | return boxes, scores, classes 119 | 120 | def detect_image(self, image): 121 | start = timer() 122 | 123 | if self.model_image_size != (None, None): 124 | assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' 125 | assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' 126 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) 127 | else: 128 | new_image_size = (image.width - (image.width % 32), 129 | image.height - (image.height % 32)) 130 | boxed_image = letterbox_image(image, new_image_size) 131 | image_data = np.array(boxed_image, dtype='float32') 132 | 133 | print(image_data.shape) 134 | image_data /= 255. 135 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 136 | 137 | out_boxes, out_scores, out_classes = self.sess.run( 138 | [self.boxes, self.scores, self.classes], 139 | feed_dict={ 140 | self.yolo_model.input: image_data, 141 | self.input_image_shape: [image.size[1], image.size[0]], 142 | K.learning_phase(): 0 143 | }) 144 | 145 | print('Found {} boxes for {}'.format(len(out_boxes), 'img')) 146 | 147 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf', 148 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 149 | thickness = (image.size[0] + image.size[1]) // 300 150 | 151 | # # 保存框检测出的框的个数 (添加) 152 | # file.write('find ' + str(len(out_boxes)) + ' target(s) \n') 153 | 154 | for i, c in reversed(list(enumerate(out_classes))): 155 | predicted_class = self.class_names[c] 156 | box = out_boxes[i] 157 | score = out_scores[i] 158 | 159 | label = '{} {:.2f}'.format(predicted_class, score) 160 | draw = ImageDraw.Draw(image) 161 | label_size = draw.textsize(label, font) 162 | 163 | top, left, bottom, right = box 164 | top = max(0, np.floor(top + 0.5).astype('int32')) 165 | left = max(0, np.floor(left + 0.5).astype('int32')) 166 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 167 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 168 | 169 | # # 写入检测位置(添加) 170 | # file.write( 171 | # predicted_class + ' score: ' + str(score) + ' \nlocation: top: ' + str(top) + '、 bottom: ' + str( 172 | # bottom) + '、 left: ' + str(left) + '、 right: ' + str(right) + '\n') 173 | 174 | file.write(predicted_class + ' ' + str(score) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + ';') 175 | 176 | print(label, (left, top), (right, bottom)) 177 | 178 | if top - label_size[1] >= 0: 179 | text_origin = np.array([left, top - label_size[1]]) 180 | else: 181 | text_origin = np.array([left, top + 1]) 182 | 183 | # My kingdom for a good redistributable image drawing library. 184 | for i in range(thickness): 185 | draw.rectangle( 186 | [left + i, top + i, right - i, bottom - i], 187 | outline=self.colors[c]) 188 | draw.rectangle( 189 | [tuple(text_origin), tuple(text_origin + label_size)], 190 | fill=self.colors[c]) 191 | draw.text(text_origin, label, fill=(0, 0, 0), font=font) 192 | del draw 193 | end = timer() 194 | print(end - start) 195 | return image 196 | 197 | def close_session(self): 198 | self.sess.close() 199 | 200 | 201 | 202 | def detect_video(yolo, video_path, output_path=""): 203 | import cv2 204 | vid = cv2.VideoCapture(video_path) 205 | if not vid.isOpened(): 206 | raise IOError("Couldn't open webcam or video") 207 | video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) # 获得视频编码MPEG4/H264 208 | video_fps = vid.get(cv2.CAP_PROP_FPS) 209 | video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), 210 | int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) 211 | isOutput = True if output_path != "" else False 212 | if isOutput: 213 | print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size)) 214 | out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) 215 | accum_time = 0 216 | curr_fps = 0 217 | fps = "FPS: ??" 218 | prev_time = timer() 219 | while True: 220 | return_value, frame = vid.read() 221 | image = Image.fromarray(frame) # 从array转换成image 222 | image = yolo.detect_image(image) 223 | result = np.asarray(image) 224 | curr_time = timer() 225 | exec_time = curr_time - prev_time 226 | prev_time = curr_time 227 | accum_time = accum_time + exec_time 228 | curr_fps = curr_fps + 1 229 | if accum_time > 1: 230 | accum_time = accum_time - 1 231 | fps = "FPS: " + str(curr_fps) 232 | curr_fps = 0 233 | cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 234 | fontScale=0.50, color=(255, 0, 0), thickness=2) 235 | cv2.namedWindow("result", cv2.WINDOW_NORMAL) 236 | cv2.imshow("result", result) 237 | if isOutput: 238 | out.write(result) 239 | if cv2.waitKey(1) & 0xFF == ord('q'): 240 | break 241 | yolo.close_session() 242 | 243 | 244 | # 批量处理文件 245 | if __name__ == '__main__': 246 | # 读取test文件 247 | with open("A05_helmet/ImageSets/Main/test.txt", 'r') as f: # 打开文件 248 | test_list = f.readlines() # 读取文件 249 | test_list = [x.strip() for x in test_list if x.strip() != ''] # 去除/n 250 | # print(test_list) 251 | 252 | t1 = time.time() 253 | yolo = YOLO() 254 | 255 | for filename in test_list: 256 | image_path = 'D:/test/testimages/'+filename+'.jpg' 257 | portion = os.path.split(image_path) 258 | # file.write(portion[1]+' detect_result:\n') 259 | file.write(image_path + ' ') 260 | image = Image.open(image_path) 261 | image_mAP_save_path = 'D:/mAP/mAP-master/input/images-optional/' 262 | image.save(image_mAP_save_path + filename + '.jpg') 263 | r_image = yolo.detect_image(image) 264 | file.write('\n') 265 | #r_image.show() 显示检测结果 266 | image_save_path = './result/result_'+portion[1] 267 | print('detect result save to....:'+image_save_path) 268 | r_image.save(image_save_path) 269 | 270 | time_sum = time.time() - t1 271 | # file.write('time sum: '+str(time_sum)+'s') 272 | print('time sum:',time_sum) 273 | file.close() 274 | yolo.close_session() 275 | 276 | -------------------------------------------------------------------------------- /yolo3/utils.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous utility functions.""" 2 | 3 | from functools import reduce 4 | 5 | from PIL import Image 6 | import numpy as np 7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb 8 | 9 | def compose(*funcs): 10 | """Compose arbitrarily many functions, evaluated left to right. 11 | 12 | Reference: https://mathieularose.com/function-composition-in-python/ 13 | """ 14 | # return lambda x: reduce(lambda v, f: f(v), funcs, x) 15 | if funcs: 16 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 17 | else: 18 | raise ValueError('Composition of empty sequence not supported.') 19 | 20 | def letterbox_image(image, size): 21 | '''resize image with unchanged aspect ratio using padding''' 22 | iw, ih = image.size 23 | w, h = size 24 | scale = min(w/iw, h/ih) 25 | nw = int(iw*scale) 26 | nh = int(ih*scale) 27 | 28 | image = image.resize((nw,nh), Image.BICUBIC) 29 | new_image = Image.new('RGB', size, (128,128,128)) 30 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 31 | return new_image 32 | 33 | def rand(a=0, b=1): 34 | return np.random.rand()*(b-a) + a 35 | 36 | def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True): 37 | '''random preprocessing for real-time data augmentation''' 38 | line = annotation_line.split() 39 | image = Image.open(line[0]) 40 | iw, ih = image.size 41 | h, w = input_shape 42 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 43 | 44 | if not random: 45 | # resize image 46 | scale = min(w/iw, h/ih) 47 | nw = int(iw*scale) 48 | nh = int(ih*scale) 49 | dx = (w-nw)//2 50 | dy = (h-nh)//2 51 | image_data=0 52 | if proc_img: 53 | image = image.resize((nw,nh), Image.BICUBIC) 54 | new_image = Image.new('RGB', (w,h), (128,128,128)) 55 | new_image.paste(image, (dx, dy)) 56 | image_data = np.array(new_image)/255. 57 | 58 | # correct boxes 59 | box_data = np.zeros((max_boxes,5)) 60 | if len(box)>0: 61 | np.random.shuffle(box) 62 | if len(box)>max_boxes: box = box[:max_boxes] 63 | box[:, [0,2]] = box[:, [0,2]]*scale + dx 64 | box[:, [1,3]] = box[:, [1,3]]*scale + dy 65 | box_data[:len(box)] = box 66 | 67 | return image_data, box_data 68 | 69 | # resize image 70 | new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter) 71 | scale = rand(.25, 2) 72 | if new_ar < 1: 73 | nh = int(scale*h) 74 | nw = int(nh*new_ar) 75 | else: 76 | nw = int(scale*w) 77 | nh = int(nw/new_ar) 78 | image = image.resize((nw,nh), Image.BICUBIC) 79 | 80 | # place image 81 | dx = int(rand(0, w-nw)) 82 | dy = int(rand(0, h-nh)) 83 | new_image = Image.new('RGB', (w,h), (128,128,128)) 84 | new_image.paste(image, (dx, dy)) 85 | image = new_image 86 | 87 | # flip image or not 88 | flip = rand()<.5 89 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 90 | 91 | # distort image 92 | hue = rand(-hue, hue) 93 | sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat) 94 | val = rand(1, val) if rand()<.5 else 1/rand(1, val) 95 | x = rgb_to_hsv(np.array(image)/255.) 96 | x[..., 0] += hue 97 | x[..., 0][x[..., 0]>1] -= 1 98 | x[..., 0][x[..., 0]<0] += 1 99 | x[..., 1] *= sat 100 | x[..., 2] *= val 101 | x[x>1] = 1 102 | x[x<0] = 0 103 | image_data = hsv_to_rgb(x) # numpy array, 0 to 1 104 | 105 | # correct boxes 106 | box_data = np.zeros((max_boxes,5)) 107 | if len(box)>0: 108 | np.random.shuffle(box) 109 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 110 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 111 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 112 | box[:, 0:2][box[:, 0:2]<0] = 0 113 | box[:, 2][box[:, 2]>w] = w 114 | box[:, 3][box[:, 3]>h] = h 115 | box_w = box[:, 2] - box[:, 0] 116 | box_h = box[:, 3] - box[:, 1] 117 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 118 | if len(box)>max_boxes: box = box[:max_boxes] 119 | box_data[:len(box)] = box 120 | 121 | return image_data, box_data 122 | -------------------------------------------------------------------------------- /yolo_video.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | from yolo import YOLO, detect_video 4 | from PIL import Image 5 | 6 | def detect_img(yolo): 7 | while True: 8 | img = input('Input image filename:') 9 | try: 10 | image = Image.open(img) 11 | except: 12 | print('Open Error! Try again!') 13 | continue 14 | else: 15 | r_image = yolo.detect_image(image) 16 | r_image.show() 17 | yolo.close_session() 18 | 19 | FLAGS = None 20 | 21 | if __name__ == '__main__': 22 | # class YOLO defines the default value, so suppress any default here 23 | parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS) 24 | ''' 25 | Command line options 26 | ''' 27 | parser.add_argument( 28 | '--model', type=str, 29 | help='path to model weight file, default ' + YOLO.get_defaults("model_path") 30 | ) 31 | 32 | parser.add_argument( 33 | '--anchors', type=str, 34 | help='path to anchor definitions, default ' + YOLO.get_defaults("anchors_path") 35 | ) 36 | 37 | parser.add_argument( 38 | '--classes', type=str, 39 | help='path to class definitions, default ' + YOLO.get_defaults("classes_path") 40 | ) 41 | 42 | parser.add_argument( 43 | '--gpu_num', type=int, 44 | help='Number of GPU to use, default ' + str(YOLO.get_defaults("gpu_num")) 45 | ) 46 | 47 | parser.add_argument( 48 | '--image', default=False, action="store_true", 49 | help='Image detection mode, will ignore all positional arguments' 50 | ) 51 | ''' 52 | Command line positional arguments -- for video detection mode 53 | ''' 54 | parser.add_argument( 55 | "--input", nargs='?', type=str,required=False,default='./path2your_video', 56 | help = "Video input path" 57 | ) 58 | 59 | parser.add_argument( 60 | "--output", nargs='?', type=str, default="", 61 | help = "[Optional] Video output path" 62 | ) 63 | 64 | FLAGS = parser.parse_args() 65 | 66 | if FLAGS.image: 67 | """ 68 | Image detection mode, disregard any remaining command line arguments 69 | """ 70 | print("Image detection mode") 71 | if "input" in FLAGS: 72 | print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output) 73 | detect_img(YOLO(**vars(FLAGS))) 74 | elif "input" in FLAGS: 75 | detect_video(YOLO(**vars(FLAGS)), FLAGS.input, FLAGS.output) 76 | else: 77 | print("Must specify at least video_input_path. See usage with --help.") 78 | -------------------------------------------------------------------------------- /yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=21 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=2 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .5 614 | truth_thresh = 1 615 | random=0 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=21 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=2 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .5 700 | truth_thresh = 1 701 | random=0 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=21 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=2 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .5 787 | truth_thresh = 1 788 | random=0 789 | 790 | --------------------------------------------------------------------------------