├── Auto_driving_car_yolo.py ├── README.md ├── coco_classes.txt ├── images的网盘地址.txt ├── keras_yolo.py ├── object_classes.txt ├── yolo_utils.py ├── yolo_自动驾驶_车辆识别介绍.docx └── yolo_自动驾驶_车辆识别介绍_20190124175811.pdf /Auto_driving_car_yolo.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8 -*- 2 | """ 3 | @project:untitled3 4 | @author:Kun_J 5 | @file:.py 6 | @ide:untitled3 7 | @time:2019-01-22 18:12:32 8 | @month:一月 9 | """ 10 | import argparse 11 | import os 12 | import matplotlib.pyplot as plt 13 | import scipy.io 14 | import scipy.misc 15 | import numpy as np 16 | import pandas as pd 17 | import PIL 18 | import tensorflow as tf 19 | from keras import backend as K 20 | from keras.layers import Input,Lambda,Conv2D 21 | from keras.models import load_model,Model 22 | from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes 23 | from keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body 24 | 25 | 26 | def yolo_filter_boxes(box_confidence, boxes, box_class_probs, thresthod = .6): 27 | """ 28 | Filters YOLO boxes by thresholding on object and class confidence. 29 | :param box_confidence: --tensor of shape (19, 19, 5,1) 30 | :param boxes: -- tensor of shape (19, 19, 5, 4) (后面用到的是边角corner coordinate) 31 | :param box_class_probs: -- tensor of shape (19, 19, 5, 80) 32 | :param thresthod: -- real value, if [highest class probability score < threshold],then get rid of the corresponding box] 33 | :return: 34 | scores -- tensor of shape(None, ),containing the class probability score for selected boxes 35 | boxes -- tensor of shape(None, 4),containing(b_x, b_y, b_h, b_w) coordinates of selected boxes 36 | classes -- tensor of shape(None, ),containing the index of the class detected by the selected boxes 37 | """ 38 | ## First step:计算锚框的得分 39 | box_scores = box_confidence * box_class_probs 40 | ## Second step:找到最大值的锚框索引以及对应的最大值的锚框 41 | box_classes = K.argmax(box_scores,axis=-1) 42 | box_class_scores = K.max(box_scores,axis=-1) 43 | ## Third step:根据阈值创建掩码 44 | filtering_mask = (box_class_scores>=thresthod) 45 | ## 对scores, boxes 以及classes使用掩码 46 | scores = tf.boolean_mask(box_class_scores,filtering_mask) 47 | boxes = tf.boolean_mask(boxes, filtering_mask) 48 | classes = tf.boolean_mask(box_classes, filtering_mask) 49 | 50 | return scores, boxes, classes 51 | # def yolo_filter_boxes_test(): 52 | # with tf.Session() as test_a: 53 | # box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1) 54 | # boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed=1) 55 | # box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1) 56 | # scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, thresthod=0.5) 57 | # print("scores[2] = " + str(scores[2].eval())) 58 | # print("boxes[2] = " + str(boxes[2].eval())) 59 | # print("classes[2] = " + str(classes[2].eval())) 60 | # print("scores.shape = " + str(scores.shape)) 61 | # print("boxes.shape = " + str(boxes.shape)) 62 | # print("classes.shape = " + str(classes.shape)) 63 | # test_a.close() 64 | ##yolo_filter_boxes_test() 65 | 66 | def iou(box1, box2): 67 | """ 68 | 实现两个锚框的交并比的计算 69 | :param box1: 第一个锚框,shape(x1,y1,x2,y2) 70 | :param box2: 第二个锚框,shape(x1,y1,x2,y2) 71 | :return: 72 | iou:实数,交并比 73 | """ 74 | # 计算相交的区域的面积 75 | xi1 = np.maximum(box1[0], box2[0]) 76 | yi1 = np.maximum(box1[1], box2[1]) 77 | xi2 = np.minimum(box1[2], box2[2]) 78 | yi2 = np.minimum(box1[3], box2[3]) 79 | inter_area = (xi1 - xi2) * (yi1 - yi2) 80 | 81 | # 计算并集 Union(A,B) = A + B - Inter(A, B) 82 | box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) 83 | box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]) 84 | union_area = box1_area + box2_area - inter_area 85 | 86 | # 计算交并比 87 | iou = inter_area / union_area 88 | 89 | return iou 90 | def iou_test(): 91 | box1 = (2,1,4,3) 92 | box2 = (1,2,3,4) 93 | print("iou = " + str(iou(box1, box2))) 94 | ##iou_test() 95 | 96 | """实现非最大值抑制函数: 97 | 1:选择分值高度额锚框 98 | 2:计算与其他框的重叠部分,并删除与该锚框交叠较大的网格 99 | 3:返回第一步,直到不再有比当前选中的框得分更低的框 100 | Note:这将删除与选定框有较大重叠的其他所有锚框,只有得分最高的锚框仍然存在""" 101 | 102 | def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): 103 | """ 104 | Applies Non-max suppression (NMS) to set of boxes 105 | Implement yolo_non_max_suppression using Tensorflow 106 | :param scores: tensor类型,(None, ),yolo_filter_boxes()的输出 107 | :param boxes: tensor类型,(None,4),yolo_filter_boxes()的输出 108 | :param classes: tensor类型,(None, ),yolo_filter_boxes()的输出 109 | :param max_boxes: Integer,预测锚框数量的最大值 110 | :param iou_threshold: real value,交并比阈值 111 | :return: 112 | scores: tensor,( ,None),predicted score for each box 113 | boxes: tensor,(4,None),predicted box coordinates 114 | classes: tensor,( ,None),predicted class for each box 115 | Note:The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this 116 | function will transpose the shapes of scores, boxes, classes. This is made for convenience. 117 | """ 118 | # 用于te.image.non_max_suppression() 119 | max_boxes_tensor = K.variable(max_boxes, dtype="int32") 120 | # 初始化变量max_boxes_tensor 121 | K.get_session().run(tf.variables_initializer([max_boxes_tensor])) 122 | # 使用tf.image.non_max_suppression()来获取我们保留框对应的索引列表 123 | nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes,iou_threshold) 124 | 125 | # 使用K.gather()来选择保留的锚框 126 | scores = K.gather(scores, nms_indices) 127 | boxes = K.gather(boxes, nms_indices) 128 | classes = K.gather(classes, nms_indices) 129 | 130 | return scores, boxes, classes 131 | def yolo_non_max_suppression_test(): 132 | with tf.Session() as test_b: 133 | scores = tf.random_normal([54, ], mean=1, stddev=4, seed=1) 134 | boxes = tf.random_normal([54,4],mean=1, stddev=4, seed=1) 135 | classes = tf.random_normal([54, ], mean=1, stddev=4, seed=1) 136 | scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes) 137 | print("scores[2] = " + str(scores[2].eval())) 138 | print("boxes[2] = " + str(boxes[2].eval())) 139 | print("classes[2] = " + str(classes[2].eval())) 140 | print("scores.shape = " + str(scores.eval().shape)) 141 | print("boxes.shape = " + str(boxes.eval().shape)) 142 | print("classes.shape = " + str(classes.eval().shape)) 143 | #yolo_non_max_suppression_test() 144 | def yolo_eval(yolo_outputs, image_shape=(720.,1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5): 145 | """ 146 | 将YOLO编码的输出(很多框)转换为预测框以及他们的分数、框坐标和类 147 | :param yolo_outputs: 编码模型的输出(对于维度为608*608*3的图片),包含4个tensor类型的变量: 148 | box_confidence:tensor类型,shape of (None,19,19,5,1) 149 | box_xy:tensor类型,shape of (None,19,19,5,2) 150 | box_wh:tensor类型,shape of (None,19,19,5,2) 151 | box_class_probs:tensor类型, shape of (None,19,19,5,80) 152 | :param image_shape:tensor类型,shape of (2, ),包含了输入的图像的维度,这里是(608, 608) 153 | :param max_boxes:integer,预测的锚框数量的最大值 154 | :param score_threshold:real value,可能的阈值 155 | :param iou_threshold:real value,交并比阈值 156 | :return: 157 | scores:tensor类型,shape of (None, ),每个锚框的预测的可能值 158 | boxes:tensor类型,shape of (None,4),预测锚框的坐标 159 | classes:tensor类型,shape of (None, ),每个锚框的预测的分类 160 | """ 161 | # 获取YOLO模型的输出 162 | box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs 163 | 164 | # 中心点转换为边角 165 | boxes = yolo_boxes_to_corners(box_xy, box_wh) 166 | 167 | # score过滤,第一个过滤器 168 | scores, boxes, classes = yolo_filter_boxes(box_confidence,boxes,box_class_probs, score_threshold) 169 | 170 | # 缩放锚框,以适应原始图像 171 | boxes = scale_boxes(boxes, image_shape) 172 | 173 | # 使用非最大值抑制,第二个过滤器 174 | scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold) 175 | return scores, boxes, classes 176 | def yolo_eval_test(): 177 | with tf.Session() as sess: 178 | yolo_outputs = (tf.random_normal([19,19,5,1],mean=1,stddev=4,seed=1), 179 | tf.random_normal([19,19,5,2],mean=1,stddev=4,seed=1), 180 | tf.random_normal([19,19,5,2],mean=1,stddev=4,seed=1), 181 | tf.random_normal([19,19,5,80],mean=1,stddev=4,seed=1)) 182 | scores, boxes, classes = yolo_eval(yolo_outputs) 183 | print("scores[2] = " + str(scores[2].eval())) 184 | print("boxes[2] = " + str(boxes[2].eval())) 185 | print("classes[2] = " + str(classes[2].eval())) 186 | print("scores.shape = " + str(scores.eval().shape)) 187 | print("boxes.shape = " + str(boxes.eval().shape)) 188 | print("classes.shape = " + str(classes.eval().shape)) 189 | #yolo_eval_test() 190 | """ 191 | 对YOLO的总结: 192 | 1、输入图像为(608, 608) 193 | 2、输入的图像先要经过一个CNN模型,返回一个(19, 19, 5, 85)的输出 194 | 3、再对最后的两维降维,输出变成(19, 19, 5, 425): 195 | ·每个19*19的单元格拥有425个数字 196 | ·425=5*85,即每个单元格拥有5个锚框,每个锚框由5个基本信息+80个分类预测构成 197 | ·85=5+80,其中5个基本信息是(Pc,Px,Py,Ph,Pw),剩下的80个就是80个分类预测 198 | 4、然后我们会根据一下规则选择锚框: 199 | ·预测分数阈值:丢弃分数低于阈值的分类的锚框 200 | ·非最大值抑制:计算交并比,并避免选择重叠的框 201 | 5、最后给出YOLO的输出 202 | """ 203 | sess = K.get_session() 204 | class_names = read_classes('F:\\吴恩达DL作业\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\model_data\\coco_classes.txt') 205 | anchors = read_anchors('F:\\吴恩达DL作业\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\model_data\\yolo_anchors.txt') 206 | image_shape = (720., 1280.) 207 | yolo_model = load_model('F:\\吴恩达DL作业\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\model_data\\yolo.h5') 208 | yolo_model.summary() 209 | 210 | yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) 211 | scores, boxes, classes = yolo_eval(yolo_outputs, image_shape) 212 | 213 | def predict(sess, image_file, is_show_info=True, is_plot=True): 214 | """ 215 | 运行存储在sess的计算图以预测image_file的边界框,打印出预测图与信息 216 | :param sess: 包含了YOLO计算图的TensorFlow/keras的会话 217 | :param imagefile: 存储images文件下的图片名称 218 | :param is_show_info: 219 | :param is_plot: 220 | :return: 221 | out_scores:tensor, (None, ),锚框的预测的可能值 222 | out_boxes:tensor, (None,4),包含了锚框位置信息 223 | out_classes:tensor, (None, ),锚框的预测的分类索引 224 | """ 225 | image, image_data = preprocess_image(image_file, model_image_size =(608, 608))###预处理图像 226 | out_scores, out_boxes, out_classes = sess.run([scores,boxes,classes],feed_dict={yolo_model.input:image_data, K.learning_phase():0}) 227 | if is_show_info: 228 | print("在" + str(image_file)+"中找到"+str(len(out_boxes))+"个锚框。") 229 | colors = generate_colors(class_names) 230 | draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) 231 | image.save(os.path.join('C:\\Users\\korey\\Desktop\\car',image_file), quality=90) 232 | if is_plot: 233 | out_image = plt.imread(os.path.join('C:\\Users\\korey\\Desktop\\car',image_file)) 234 | plt.imshow(out_image) 235 | plt.show() 236 | return out_scores, out_boxes, out_classes 237 | 238 | #out_scores, out_boxes, out_classes = predict(sess,'test.jpg') 239 | # image_test = plt.imread('test.jpg') 240 | # plt.imshow(image_test) 241 | # plt.show() 242 | rootdir = 'F:\\吴恩达DL作业\\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\images' 243 | for parent,dirnames,filenames in os.walk(rootdir):#1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字 244 | for filename in filenames: 245 | print('当前图片:'+str( os.path.join(parent, filename))) 246 | out_scores, out_boxes, out_classes = predict(sess, os.path.join(parent, filename)) 247 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Auto_driving_recognize 2 | 自动驾驶,汽车识别 3 | 主要看Auto_driving_car_yolo.py 4 | yolo_utils.py 5 | keras.py三个代码文件 6 | -------------------------------------------------------------------------------- /coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /images的网盘地址.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hierarchyJK/Auto_driving_recognize/fa7825099a27d9dceb6cf6e88d892824618f6350/images的网盘地址.txt -------------------------------------------------------------------------------- /keras_yolo.py: -------------------------------------------------------------------------------- 1 | """YOLO_v2 Model Defined in Keras.""" 2 | import sys 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | from keras import backend as K 7 | from keras.layers import Lambda 8 | from keras.layers.merge import concatenate 9 | from keras.models import Model 10 | 11 | from utils import compose 12 | from keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky, darknet_body) 13 | 14 | sys.path.append('..') 15 | 16 | voc_anchors = np.array( 17 | [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]) 18 | 19 | voc_classes = [ 20 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 21 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 22 | "pottedplant", "sheep", "sofa", "train", "tvmonitor" 23 | ] 24 | 25 | 26 | def space_to_depth_x2(x): 27 | """Thin wrapper for Tensorflow space_to_depth with block_size=2.""" 28 | # Import currently required to make Lambda work. 29 | # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273 30 | import tensorflow as tf 31 | return tf.space_to_depth(x, block_size=2) 32 | 33 | 34 | def space_to_depth_x2_output_shape(input_shape): 35 | """Determine space_to_depth output shape for block_size=2. 36 | 37 | Note: For Lambda with TensorFlow backend, output shape may not be needed. 38 | """ 39 | return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 * 40 | input_shape[3]) if input_shape[1] else (input_shape[0], None, None, 41 | 4 * input_shape[3]) 42 | 43 | 44 | def yolo_body(inputs, num_anchors, num_classes): 45 | """Create YOLO_V2 model CNN body in Keras.""" 46 | darknet = Model(inputs, darknet_body()(inputs)) 47 | conv20 = compose( 48 | DarknetConv2D_BN_Leaky(1024, (3, 3)), 49 | DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output) 50 | 51 | conv13 = darknet.layers[43].output 52 | conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13) 53 | # TODO: Allow Keras Lambda to use func arguments for output_shape? 54 | conv21_reshaped = Lambda( 55 | space_to_depth_x2, 56 | output_shape=space_to_depth_x2_output_shape, 57 | name='space_to_depth')(conv21) 58 | 59 | x = concatenate([conv21_reshaped, conv20]) 60 | x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x) 61 | x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x) 62 | return Model(inputs, x) 63 | 64 | 65 | def yolo_head(feats: object, anchors: object, num_classes: object) -> object: 66 | """Convert final layer features to bounding box parameters. 67 | 68 | Parameters 69 | ---------- 70 | feats : tensor 71 | Final convolutional layer features. 72 | anchors : array-like 73 | Anchor box widths and heights. 74 | num_classes : int 75 | Number of target classes. 76 | 77 | Returns 78 | ------- 79 | box_xy : tensor 80 | x, y box predictions adjusted by spatial location in conv layer. 81 | box_wh : tensor 82 | w, h box predictions adjusted by anchors and conv spatial resolution. 83 | box_conf : tensor 84 | Probability estimate for whether each box contains any object. 85 | box_class_pred : tensor 86 | Probability distribution estimate for each box over class labels. 87 | """ 88 | num_anchors = len(anchors) 89 | # Reshape to batch, height, width, num_anchors, box_params. 90 | anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) 91 | # Static implementation for fixed models. 92 | # TODO: Remove or add option for static implementation. 93 | # _, conv_height, conv_width, _ = K.int_shape(feats) 94 | # conv_dims = K.variable([conv_width, conv_height]) 95 | 96 | # Dynamic implementation of conv dims for fully convolutional model. 97 | conv_dims = K.shape(feats)[1:3] # assuming channels last 98 | # In YOLO the height index is the inner most iteration. 99 | conv_height_index = K.arange(0, stop=conv_dims[0]) 100 | conv_width_index = K.arange(0, stop=conv_dims[1]) 101 | conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) 102 | 103 | # TODO: Repeat_elements and tf.split doesn't support dynamic splits. 104 | # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) 105 | conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) 106 | conv_width_index = K.flatten(K.transpose(conv_width_index)) 107 | conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) 108 | conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) 109 | conv_index = K.cast(conv_index, K.dtype(feats)) 110 | 111 | feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) 112 | conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) 113 | 114 | # Static generation of conv_index: 115 | # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) 116 | # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. 117 | # conv_index = K.variable( 118 | # conv_index.reshape(1, conv_height, conv_width, 1, 2)) 119 | # feats = Reshape( 120 | # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) 121 | 122 | box_confidence = K.sigmoid(feats[..., 4:5]) 123 | box_xy = K.sigmoid(feats[..., :2]) 124 | box_wh = K.exp(feats[..., 2:4]) 125 | box_class_probs = K.softmax(feats[..., 5:]) 126 | 127 | # Adjust preditions to each spatial grid point and anchor size. 128 | # Note: YOLO iterates over height index before width index. 129 | box_xy = (box_xy + conv_index) / conv_dims 130 | box_wh = box_wh * anchors_tensor / conv_dims 131 | 132 | return box_confidence, box_xy, box_wh, box_class_probs 133 | 134 | 135 | def yolo_boxes_to_corners(box_xy, box_wh): 136 | """Convert YOLO box predictions to bounding box corners.""" 137 | box_mins = box_xy - (box_wh / 2.) 138 | box_maxes = box_xy + (box_wh / 2.) 139 | 140 | return K.concatenate([ 141 | box_mins[..., 1:2], # y_min 142 | box_mins[..., 0:1], # x_min 143 | box_maxes[..., 1:2], # y_max 144 | box_maxes[..., 0:1] # x_max 145 | ]) 146 | 147 | 148 | def yolo_loss(args, 149 | anchors, 150 | num_classes, 151 | rescore_confidence=False, 152 | print_loss=False): 153 | """YOLO localization loss function. 154 | 155 | Parameters 156 | ---------- 157 | yolo_output : tensor 158 | Final convolutional layer features. 159 | 160 | true_boxes : tensor 161 | Ground truth boxes tensor with shape [batch, num_true_boxes, 5] 162 | containing box x_center, y_center, width, height, and class. 163 | 164 | detectors_mask : array 165 | 0/1 mask for detector positions where there is a matching ground truth. 166 | 167 | matching_true_boxes : array 168 | Corresponding ground truth boxes for positive detector positions. 169 | Already adjusted for conv height and width. 170 | 171 | anchors : tensor 172 | Anchor boxes for model. 173 | 174 | num_classes : int 175 | Number of object classes. 176 | 177 | rescore_confidence : bool, default=False 178 | If true then set confidence target to IOU of best predicted box with 179 | the closest matching ground truth box. 180 | 181 | print_loss : bool, default=False 182 | If True then use a tf.Print() to print the loss components. 183 | 184 | Returns 185 | ------- 186 | mean_loss : float 187 | mean localization loss across minibatch 188 | """ 189 | (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args 190 | num_anchors = len(anchors) 191 | object_scale = 5 192 | no_object_scale = 1 193 | class_scale = 1 194 | coordinates_scale = 1 195 | pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( 196 | yolo_output, anchors, num_classes) 197 | 198 | # Unadjusted box predictions for loss. 199 | # TODO: Remove extra computation shared with yolo_head. 200 | yolo_output_shape = K.shape(yolo_output) 201 | feats = K.reshape(yolo_output, [ 202 | -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, 203 | num_classes + 5 204 | ]) 205 | pred_boxes = K.concatenate( 206 | (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) 207 | 208 | # TODO: Adjust predictions by image width/height for non-square images? 209 | # IOUs may be off due to different aspect ratio. 210 | 211 | # Expand pred x,y,w,h to allow comparison with ground truth. 212 | # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params 213 | pred_xy = K.expand_dims(pred_xy, 4) 214 | pred_wh = K.expand_dims(pred_wh, 4) 215 | 216 | pred_wh_half = pred_wh / 2. 217 | pred_mins = pred_xy - pred_wh_half 218 | pred_maxes = pred_xy + pred_wh_half 219 | 220 | true_boxes_shape = K.shape(true_boxes) 221 | 222 | # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params 223 | true_boxes = K.reshape(true_boxes, [ 224 | true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] 225 | ]) 226 | true_xy = true_boxes[..., 0:2] 227 | true_wh = true_boxes[..., 2:4] 228 | 229 | # Find IOU of each predicted box with each ground truth box. 230 | true_wh_half = true_wh / 2. 231 | true_mins = true_xy - true_wh_half 232 | true_maxes = true_xy + true_wh_half 233 | 234 | intersect_mins = K.maximum(pred_mins, true_mins) 235 | intersect_maxes = K.minimum(pred_maxes, true_maxes) 236 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 237 | intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] 238 | 239 | pred_areas = pred_wh[..., 0] * pred_wh[..., 1] 240 | true_areas = true_wh[..., 0] * true_wh[..., 1] 241 | 242 | union_areas = pred_areas + true_areas - intersect_areas 243 | iou_scores = intersect_areas / union_areas 244 | 245 | # Best IOUs for each location. 246 | best_ious = K.max(iou_scores, axis=4) # Best IOU scores. 247 | best_ious = K.expand_dims(best_ious) 248 | 249 | # A detector has found an object if IOU > thresh for some true box. 250 | object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) 251 | 252 | # TODO: Darknet region training includes extra coordinate loss for early 253 | # training steps to encourage predictions to match anchor priors. 254 | 255 | # Determine confidence weights from object and no_object weights. 256 | # NOTE: YOLO does not use binary cross-entropy here. 257 | no_object_weights = (no_object_scale * (1 - object_detections) * 258 | (1 - detectors_mask)) 259 | no_objects_loss = no_object_weights * K.square(-pred_confidence) 260 | 261 | if rescore_confidence: 262 | objects_loss = (object_scale * detectors_mask * 263 | K.square(best_ious - pred_confidence)) 264 | else: 265 | objects_loss = (object_scale * detectors_mask * 266 | K.square(1 - pred_confidence)) 267 | confidence_loss = objects_loss + no_objects_loss 268 | 269 | # Classification loss for matching detections. 270 | # NOTE: YOLO does not use categorical cross-entropy loss here. 271 | matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') 272 | matching_classes = K.one_hot(matching_classes, num_classes) 273 | classification_loss = (class_scale * detectors_mask * 274 | K.square(matching_classes - pred_class_prob)) 275 | 276 | # Coordinate loss for matching detection boxes. 277 | matching_boxes = matching_true_boxes[..., 0:4] 278 | coordinates_loss = (coordinates_scale * detectors_mask * 279 | K.square(matching_boxes - pred_boxes)) 280 | 281 | confidence_loss_sum = K.sum(confidence_loss) 282 | classification_loss_sum = K.sum(classification_loss) 283 | coordinates_loss_sum = K.sum(coordinates_loss) 284 | total_loss = 0.5 * ( 285 | confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) 286 | if print_loss: 287 | total_loss = tf.Print( 288 | total_loss, [ 289 | total_loss, confidence_loss_sum, classification_loss_sum, 290 | coordinates_loss_sum 291 | ], 292 | message='yolo_loss, conf_loss, class_loss, box_coord_loss:') 293 | 294 | return total_loss 295 | 296 | 297 | def yolo(inputs, anchors, num_classes): 298 | """Generate a complete YOLO_v2 localization model.""" 299 | num_anchors = len(anchors) 300 | body = yolo_body(inputs, num_anchors, num_classes) 301 | outputs = yolo_head(body.output, anchors, num_classes) 302 | return outputs 303 | 304 | 305 | def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=.6): 306 | """Filter YOLO boxes based on object and class confidence.""" 307 | 308 | box_scores = box_confidence * box_class_probs 309 | box_classes = K.argmax(box_scores, axis=-1) 310 | box_class_scores = K.max(box_scores, axis=-1) 311 | prediction_mask = box_class_scores >= threshold 312 | 313 | # TODO: Expose tf.boolean_mask to Keras backend? 314 | boxes = tf.boolean_mask(boxes, prediction_mask) 315 | scores = tf.boolean_mask(box_class_scores, prediction_mask) 316 | classes = tf.boolean_mask(box_classes, prediction_mask) 317 | 318 | return boxes, scores, classes 319 | 320 | 321 | def yolo_eval(yolo_outputs, 322 | image_shape, 323 | max_boxes=10, 324 | score_threshold=.6, 325 | iou_threshold=.5): 326 | """Evaluate YOLO model on given input batch and return filtered boxes.""" 327 | box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs 328 | boxes = yolo_boxes_to_corners(box_xy, box_wh) 329 | boxes, scores, classes = yolo_filter_boxes( 330 | box_confidence, boxes, box_class_probs, threshold=score_threshold) 331 | 332 | # Scale boxes back to original image shape. 333 | height = image_shape[0] 334 | width = image_shape[1] 335 | image_dims = K.stack([height, width, height, width]) 336 | image_dims = K.reshape(image_dims, [1, 4]) 337 | boxes = boxes * image_dims 338 | 339 | # TODO: Something must be done about this ugly hack! 340 | max_boxes_tensor = K.variable(max_boxes, dtype='int32') 341 | K.get_session().run(tf.variables_initializer([max_boxes_tensor])) 342 | nms_index = tf.image.non_max_suppression( 343 | boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) 344 | boxes = K.gather(boxes, nms_index) 345 | scores = K.gather(scores, nms_index) 346 | classes = K.gather(classes, nms_index) 347 | 348 | return boxes, scores, classes 349 | 350 | 351 | def preprocess_true_boxes(true_boxes, anchors, image_size): 352 | """Find detector in YOLO where ground truth box should appear. 353 | 354 | Parameters 355 | ---------- 356 | true_boxes : array 357 | List of ground truth boxes in form of relative x, y, w, h, class. 358 | Relative coordinates are in the range [0, 1] indicating a percentage 359 | of the original image dimensions. 360 | anchors : array 361 | List of anchors in form of w, h. 362 | Anchors are assumed to be in the range [0, conv_size] where conv_size 363 | is the spatial dimension of the final convolutional features. 364 | image_size : array-like 365 | List of image dimensions in form of h, w in pixels. 366 | 367 | Returns 368 | ------- 369 | detectors_mask : array 370 | 0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1] 371 | that should be compared with a matching ground truth box. 372 | matching_true_boxes: array 373 | Same shape as detectors_mask with the corresponding ground truth box 374 | adjusted for comparison with predicted parameters at training time. 375 | """ 376 | height, width = image_size 377 | num_anchors = len(anchors) 378 | # Downsampling factor of 5x 2-stride max_pools == 32. 379 | # TODO: Remove hardcoding of downscaling calculations. 380 | assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' 381 | assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' 382 | conv_height = height // 32 383 | conv_width = width // 32 384 | num_box_params = true_boxes.shape[1] 385 | detectors_mask = np.zeros( 386 | (conv_height, conv_width, num_anchors, 1), dtype=np.float32) 387 | matching_true_boxes = np.zeros( 388 | (conv_height, conv_width, num_anchors, num_box_params), 389 | dtype=np.float32) 390 | 391 | for box in true_boxes: 392 | # scale box to convolutional feature spatial dimensions 393 | box_class = box[4:5] 394 | box = box[0:4] * np.array( 395 | [conv_width, conv_height, conv_width, conv_height]) 396 | i = np.floor(box[1]).astype('int') 397 | j = min(np.floor(box[0]).astype('int'),1) 398 | best_iou = 0 399 | best_anchor = 0 400 | 401 | for k, anchor in enumerate(anchors): 402 | # Find IOU between box shifted to origin and anchor box. 403 | box_maxes = box[2:4] / 2. 404 | box_mins = -box_maxes 405 | anchor_maxes = (anchor / 2.) 406 | anchor_mins = -anchor_maxes 407 | 408 | intersect_mins = np.maximum(box_mins, anchor_mins) 409 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 410 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 411 | intersect_area = intersect_wh[0] * intersect_wh[1] 412 | box_area = box[2] * box[3] 413 | anchor_area = anchor[0] * anchor[1] 414 | iou = intersect_area / (box_area + anchor_area - intersect_area) 415 | if iou > best_iou: 416 | best_iou = iou 417 | best_anchor = k 418 | 419 | if best_iou > 0: 420 | detectors_mask[i, j, best_anchor] = 1 421 | adjusted_box = np.array( 422 | [ 423 | box[0] - j, box[1] - i, 424 | np.log(box[2] / anchors[best_anchor][0]), 425 | np.log(box[3] / anchors[best_anchor][1]), box_class 426 | ], 427 | dtype=np.float32) 428 | matching_true_boxes[i, j, best_anchor] = adjusted_box 429 | return detectors_mask, matching_true_boxes 430 | -------------------------------------------------------------------------------- /object_classes.txt: -------------------------------------------------------------------------------- 1 | car -------------------------------------------------------------------------------- /yolo_utils.py: -------------------------------------------------------------------------------- 1 | import colorsys 2 | import imghdr 3 | import os 4 | import random 5 | from keras import backend as K 6 | 7 | import numpy as np 8 | from PIL import Image, ImageDraw, ImageFont 9 | 10 | def read_classes(classes_path): 11 | with open(classes_path) as f: 12 | class_names = f.readlines() 13 | class_names = [c.strip() for c in class_names] 14 | return class_names 15 | 16 | def read_anchors(anchors_path): 17 | with open(anchors_path) as f: 18 | anchors = f.readline() 19 | anchors = [float(x) for x in anchors.split(',')] 20 | anchors = np.array(anchors).reshape(-1, 2) 21 | return anchors 22 | 23 | def generate_colors(class_names): 24 | hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))] 25 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 26 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 27 | random.seed(10101) # Fixed seed for consistent colors across runs. 28 | random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. 29 | random.seed(None) # Reset seed to default. 30 | return colors 31 | 32 | def scale_boxes(boxes, image_shape): 33 | """ Scales the predicted boxes in order to be drawable on the image""" 34 | height = image_shape[0] 35 | width = image_shape[1] 36 | image_dims = K.stack([height, width, height, width]) 37 | image_dims = K.reshape(image_dims, [1, 4]) 38 | boxes = boxes * image_dims 39 | return boxes 40 | 41 | def preprocess_image(img_path, model_image_size): 42 | image_type = imghdr.what(img_path) 43 | image = Image.open(img_path) 44 | resized_image = image.resize(tuple(reversed(model_image_size)), Image.BICUBIC) 45 | image_data = np.array(resized_image, dtype='float32') 46 | image_data /= 255. 47 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 48 | return image, image_data 49 | 50 | def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors): 51 | 52 | font = ImageFont.truetype(font='font/FiraMono-Medium.otf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 53 | thickness = (image.size[0] + image.size[1])//300 54 | 55 | for i, c in reversed(list(enumerate(out_classes))): 56 | predicted_class = class_names[c] 57 | box = out_boxes[i] 58 | score = out_scores[i] 59 | 60 | label = '{} {:.2f}'.format(predicted_class, score) 61 | 62 | draw = ImageDraw.Draw(image) 63 | label_size = draw.textsize(label, font) 64 | 65 | top, left, bottom, right = box 66 | top = max(0, np.floor(top + 0.5).astype('int32')) 67 | left = max(0, np.floor(left + 0.5).astype('int32')) 68 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 69 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 70 | print(label, (left, top), (right, bottom)) 71 | 72 | if top - label_size[1] >= 0: 73 | text_origin = np.array([left, top - label_size[1]]) 74 | else: 75 | text_origin = np.array([left, top + 1]) 76 | 77 | # My kingdom for a good redistributable image drawing library. 78 | for i in range(thickness): 79 | draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c]) 80 | draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c]) 81 | draw.text(text_origin, label, fill=(0, 0, 0), font=font) 82 | del draw 83 | -------------------------------------------------------------------------------- /yolo_自动驾驶_车辆识别介绍.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hierarchyJK/Auto_driving_recognize/fa7825099a27d9dceb6cf6e88d892824618f6350/yolo_自动驾驶_车辆识别介绍.docx -------------------------------------------------------------------------------- /yolo_自动驾驶_车辆识别介绍_20190124175811.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hierarchyJK/Auto_driving_recognize/fa7825099a27d9dceb6cf6e88d892824618f6350/yolo_自动驾驶_车辆识别介绍_20190124175811.pdf --------------------------------------------------------------------------------