├── Auto_driving_car_yolo.py
├── README.md
├── coco_classes.txt
├── images的网盘地址.txt
├── keras_yolo.py
├── object_classes.txt
├── yolo_utils.py
├── yolo_自动驾驶_车辆识别介绍.docx
└── yolo_自动驾驶_车辆识别介绍_20190124175811.pdf


/Auto_driving_car_yolo.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8 -*-
  2 | """
  3 | @project:untitled3
  4 | @author:Kun_J
  5 | @file:.py
  6 | @ide:untitled3
  7 | @time:2019-01-22 18:12:32
  8 | @month:一月
  9 | """
 10 | import argparse
 11 | import os
 12 | import matplotlib.pyplot as plt
 13 | import scipy.io
 14 | import scipy.misc
 15 | import numpy as np
 16 | import pandas as pd
 17 | import PIL
 18 | import tensorflow as tf
 19 | from keras import backend as K
 20 | from keras.layers import Input,Lambda,Conv2D
 21 | from keras.models import load_model,Model
 22 | from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
 23 | from keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body
 24 | 
 25 | 
 26 | def yolo_filter_boxes(box_confidence, boxes, box_class_probs, thresthod = .6):
 27 |     """
 28 |     Filters YOLO boxes by thresholding on object and class confidence.
 29 |     :param box_confidence: --tensor of shape (19, 19, 5,1)
 30 |     :param boxes: -- tensor of shape (19, 19, 5, 4) (后面用到的是边角corner coordinate)
 31 |     :param box_class_probs: -- tensor of shape (19, 19, 5, 80)
 32 |     :param thresthod: -- real value, if [highest class probability score < threshold],then get rid of the corresponding box]
 33 |     :return:
 34 |      scores -- tensor of shape(None, ),containing the class probability score for selected boxes
 35 |      boxes -- tensor of shape(None, 4),containing(b_x, b_y, b_h, b_w) coordinates of selected boxes
 36 |      classes -- tensor of shape(None, ),containing the index of the class detected by the selected boxes
 37 |     """
 38 |     ## First step：计算锚框的得分
 39 |     box_scores = box_confidence * box_class_probs
 40 |     ## Second step：找到最大值的锚框索引以及对应的最大值的锚框
 41 |     box_classes = K.argmax(box_scores,axis=-1)
 42 |     box_class_scores = K.max(box_scores,axis=-1)
 43 |     ## Third step：根据阈值创建掩码
 44 |     filtering_mask = (box_class_scores>=thresthod)
 45 |     ## 对scores， boxes 以及classes使用掩码
 46 |     scores = tf.boolean_mask(box_class_scores,filtering_mask)
 47 |     boxes = tf.boolean_mask(boxes, filtering_mask)
 48 |     classes = tf.boolean_mask(box_classes, filtering_mask)
 49 | 
 50 |     return scores, boxes, classes
 51 | # def yolo_filter_boxes_test():
 52 | #     with tf.Session() as test_a:
 53 | #         box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1)
 54 | #         boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed=1)
 55 | #         box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)
 56 | #         scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, thresthod=0.5)
 57 | #         print("scores[2] = " + str(scores[2].eval()))
 58 | #         print("boxes[2] = " + str(boxes[2].eval()))
 59 | #         print("classes[2] = " + str(classes[2].eval()))
 60 | #         print("scores.shape = " + str(scores.shape))
 61 | #         print("boxes.shape = " + str(boxes.shape))
 62 | #         print("classes.shape = " + str(classes.shape))
 63 | #         test_a.close()
 64 | ##yolo_filter_boxes_test()
 65 | 
 66 | def iou(box1, box2):
 67 |     """
 68 |     实现两个锚框的交并比的计算
 69 |     :param box1: 第一个锚框，shape(x1,y1,x2,y2)
 70 |     :param box2: 第二个锚框，shape(x1,y1,x2,y2)
 71 |     :return:
 72 |     iou:实数，交并比
 73 |     """
 74 |     # 计算相交的区域的面积
 75 |     xi1 = np.maximum(box1[0], box2[0])
 76 |     yi1 = np.maximum(box1[1], box2[1])
 77 |     xi2 = np.minimum(box1[2], box2[2])
 78 |     yi2 = np.minimum(box1[3], box2[3])
 79 |     inter_area = (xi1 - xi2) * (yi1 - yi2)
 80 | 
 81 |     # 计算并集 Union(A,B) = A + B - Inter(A, B)
 82 |     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
 83 |     box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
 84 |     union_area = box1_area + box2_area - inter_area
 85 | 
 86 |     # 计算交并比
 87 |     iou = inter_area / union_area
 88 | 
 89 |     return iou
 90 | def iou_test():
 91 |     box1 = (2,1,4,3)
 92 |     box2 = (1,2,3,4)
 93 |     print("iou = " + str(iou(box1, box2)))
 94 | ##iou_test()
 95 | 
 96 | """实现非最大值抑制函数：
 97 | 1：选择分值高度额锚框
 98 | 2：计算与其他框的重叠部分，并删除与该锚框交叠较大的网格
 99 | 3：返回第一步，直到不再有比当前选中的框得分更低的框
100 | Note：这将删除与选定框有较大重叠的其他所有锚框，只有得分最高的锚框仍然存在"""
101 | 
102 | def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
103 |     """
104 |     Applies Non-max suppression (NMS) to set of boxes
105 |     Implement yolo_non_max_suppression using Tensorflow
106 |     :param scores: tensor类型，(None, ),yolo_filter_boxes()的输出
107 |     :param boxes: tensor类型，(None,4),yolo_filter_boxes()的输出
108 |     :param classes: tensor类型，(None, ),yolo_filter_boxes()的输出
109 |     :param max_boxes: Integer,预测锚框数量的最大值
110 |     :param iou_threshold: real value，交并比阈值
111 |     :return:
112 |     scores: tensor,( ,None),predicted score for each box
113 |     boxes: tensor,(4,None),predicted box coordinates
114 |     classes: tensor,( ,None),predicted class for each box
115 |     Note:The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
116 |     function will transpose the shapes of scores, boxes, classes. This is made for convenience.
117 |     """
118 |     # 用于te.image.non_max_suppression()
119 |     max_boxes_tensor = K.variable(max_boxes, dtype="int32")
120 |     # 初始化变量max_boxes_tensor
121 |     K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
122 |     # 使用tf.image.non_max_suppression()来获取我们保留框对应的索引列表
123 |     nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes,iou_threshold)
124 | 
125 |     # 使用K.gather()来选择保留的锚框
126 |     scores = K.gather(scores, nms_indices)
127 |     boxes = K.gather(boxes, nms_indices)
128 |     classes = K.gather(classes, nms_indices)
129 | 
130 |     return scores, boxes, classes
131 | def yolo_non_max_suppression_test():
132 |     with tf.Session() as test_b:
133 |         scores = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
134 |         boxes = tf.random_normal([54,4],mean=1, stddev=4, seed=1)
135 |         classes = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
136 |         scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
137 |         print("scores[2] = " + str(scores[2].eval()))
138 |         print("boxes[2] = " + str(boxes[2].eval()))
139 |         print("classes[2] = " + str(classes[2].eval()))
140 |         print("scores.shape = " + str(scores.eval().shape))
141 |         print("boxes.shape = " + str(boxes.eval().shape))
142 |         print("classes.shape = " + str(classes.eval().shape))
143 | #yolo_non_max_suppression_test()
144 | def yolo_eval(yolo_outputs, image_shape=(720.,1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
145 |     """
146 |     将YOLO编码的输出（很多框）转换为预测框以及他们的分数、框坐标和类
147 |     :param yolo_outputs: 编码模型的输出（对于维度为608*608*3的图片），包含4个tensor类型的变量：
148 |                           box_confidence:tensor类型，shape of (None,19,19,5,1)
149 |                           box_xy:tensor类型，shape of (None,19,19,5,2)
150 |                           box_wh:tensor类型，shape of (None,19,19,5,2)
151 |                           box_class_probs:tensor类型， shape of (None,19,19,5,80)
152 |     :param image_shape:tensor类型，shape of (2, )，包含了输入的图像的维度，这里是(608, 608)
153 |     :param max_boxes:integer,预测的锚框数量的最大值
154 |     :param score_threshold:real value，可能的阈值
155 |     :param iou_threshold:real value,交并比阈值
156 |     :return:
157 |             scores:tensor类型，shape of (None, ),每个锚框的预测的可能值
158 |             boxes:tensor类型，shape of (None,4),预测锚框的坐标
159 |             classes:tensor类型，shape of (None, ),每个锚框的预测的分类
160 |     """
161 |     # 获取YOLO模型的输出
162 |     box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
163 | 
164 |     # 中心点转换为边角
165 |     boxes = yolo_boxes_to_corners(box_xy, box_wh)
166 | 
167 |     # score过滤，第一个过滤器
168 |     scores, boxes, classes = yolo_filter_boxes(box_confidence,boxes,box_class_probs, score_threshold)
169 | 
170 |     # 缩放锚框，以适应原始图像
171 |     boxes = scale_boxes(boxes, image_shape)
172 | 
173 |     # 使用非最大值抑制，第二个过滤器
174 |     scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
175 |     return scores, boxes, classes
176 | def yolo_eval_test():
177 |     with tf.Session() as sess:
178 |         yolo_outputs = (tf.random_normal([19,19,5,1],mean=1,stddev=4,seed=1),
179 |                         tf.random_normal([19,19,5,2],mean=1,stddev=4,seed=1),
180 |                         tf.random_normal([19,19,5,2],mean=1,stddev=4,seed=1),
181 |                         tf.random_normal([19,19,5,80],mean=1,stddev=4,seed=1))
182 |         scores, boxes, classes = yolo_eval(yolo_outputs)
183 |         print("scores[2] = " + str(scores[2].eval()))
184 |         print("boxes[2] = " + str(boxes[2].eval()))
185 |         print("classes[2] = " + str(classes[2].eval()))
186 |         print("scores.shape = " + str(scores.eval().shape))
187 |         print("boxes.shape = " + str(boxes.eval().shape))
188 |         print("classes.shape = " + str(classes.eval().shape))
189 | #yolo_eval_test()
190 | """
191 | 对YOLO的总结：
192 | 1、输入图像为(608, 608)
193 | 2、输入的图像先要经过一个CNN模型，返回一个(19, 19, 5, 85)的输出
194 | 3、再对最后的两维降维，输出变成(19, 19, 5, 425):
195 |     ·每个19*19的单元格拥有425个数字
196 |     ·425=5*85，即每个单元格拥有5个锚框，每个锚框由5个基本信息+80个分类预测构成
197 |     ·85=5+80，其中5个基本信息是(Pc,Px,Py,Ph,Pw)，剩下的80个就是80个分类预测
198 | 4、然后我们会根据一下规则选择锚框：
199 |     ·预测分数阈值：丢弃分数低于阈值的分类的锚框
200 |     ·非最大值抑制：计算交并比，并避免选择重叠的框
201 | 5、最后给出YOLO的输出
202 | """
203 | sess = K.get_session()
204 | class_names = read_classes('F:\\吴恩达DL作业\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\model_data\\coco_classes.txt')
205 | anchors = read_anchors('F:\\吴恩达DL作业\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\model_data\\yolo_anchors.txt')
206 | image_shape = (720., 1280.)
207 | yolo_model = load_model('F:\\吴恩达DL作业\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\model_data\\yolo.h5')
208 | yolo_model.summary()
209 | 
210 | yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
211 | scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)
212 | 
213 | def predict(sess, image_file, is_show_info=True, is_plot=True):
214 |     """
215 |     运行存储在sess的计算图以预测image_file的边界框，打印出预测图与信息
216 |     :param sess: 包含了YOLO计算图的TensorFlow/keras的会话
217 |     :param imagefile: 存储images文件下的图片名称
218 |     :param is_show_info:
219 |     :param is_plot:
220 |     :return:
221 |             out_scores:tensor, (None, ),锚框的预测的可能值
222 |             out_boxes:tensor, (None,4),包含了锚框位置信息
223 |             out_classes:tensor, (None, ),锚框的预测的分类索引
224 |     """
225 |     image, image_data = preprocess_image(image_file, model_image_size =(608, 608))###预处理图像
226 |     out_scores, out_boxes, out_classes = sess.run([scores,boxes,classes],feed_dict={yolo_model.input:image_data, K.learning_phase():0})
227 |     if is_show_info:
228 |         print("在" + str(image_file)+"中找到"+str(len(out_boxes))+"个锚框。")
229 |     colors = generate_colors(class_names)
230 |     draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
231 |     image.save(os.path.join('C:\\Users\\korey\\Desktop\\car',image_file), quality=90)
232 |     if is_plot:
233 |         out_image = plt.imread(os.path.join('C:\\Users\\korey\\Desktop\\car',image_file))
234 |         plt.imshow(out_image)
235 |         plt.show()
236 |     return out_scores, out_boxes, out_classes
237 | 
238 | #out_scores, out_boxes, out_classes = predict(sess,'test.jpg')
239 | # image_test = plt.imread('test.jpg')
240 | # plt.imshow(image_test)
241 | # plt.show()
242 | rootdir = 'F:\\吴恩达DL作业\\课后作业\\代码作业\\第四课第三周编程作业\\Car detection for Autonomous Driving\\images'
243 | for parent,dirnames,filenames in os.walk(rootdir):#1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
244 |     for filename in filenames:
245 |         print('当前图片：'+str( os.path.join(parent, filename)))
246 |         out_scores, out_boxes, out_classes = predict(sess, os.path.join(parent, filename))
247 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Auto_driving_recognize
2 | 自动驾驶，汽车识别
3 | 主要看Auto_driving_car_yolo.py
4 |       yolo_utils.py
5 |       keras.py三个代码文件
6 | 


--------------------------------------------------------------------------------
/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/images的网盘地址.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hierarchyJK/Auto_driving_recognize/fa7825099a27d9dceb6cf6e88d892824618f6350/images的网盘地址.txt


--------------------------------------------------------------------------------
/keras_yolo.py:
--------------------------------------------------------------------------------
  1 | """YOLO_v2 Model Defined in Keras."""
  2 | import sys
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from keras import backend as K
  7 | from keras.layers import Lambda
  8 | from keras.layers.merge import concatenate
  9 | from keras.models import Model
 10 | 
 11 | from utils import compose
 12 | from keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky, darknet_body)
 13 | 
 14 | sys.path.append('..')
 15 | 
 16 | voc_anchors = np.array(
 17 |     [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]])
 18 | 
 19 | voc_classes = [
 20 |     "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
 21 |     "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
 22 |     "pottedplant", "sheep", "sofa", "train", "tvmonitor"
 23 | ]
 24 | 
 25 | 
 26 | def space_to_depth_x2(x):
 27 |     """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
 28 |     # Import currently required to make Lambda work.
 29 |     # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
 30 |     import tensorflow as tf
 31 |     return tf.space_to_depth(x, block_size=2)
 32 | 
 33 | 
 34 | def space_to_depth_x2_output_shape(input_shape):
 35 |     """Determine space_to_depth output shape for block_size=2.
 36 | 
 37 |     Note: For Lambda with TensorFlow backend, output shape may not be needed.
 38 |     """
 39 |     return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 *
 40 |             input_shape[3]) if input_shape[1] else (input_shape[0], None, None,
 41 |                                                     4 * input_shape[3])
 42 | 
 43 | 
 44 | def yolo_body(inputs, num_anchors, num_classes):
 45 |     """Create YOLO_V2 model CNN body in Keras."""
 46 |     darknet = Model(inputs, darknet_body()(inputs))
 47 |     conv20 = compose(
 48 |         DarknetConv2D_BN_Leaky(1024, (3, 3)),
 49 |         DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output)
 50 | 
 51 |     conv13 = darknet.layers[43].output
 52 |     conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13)
 53 |     # TODO: Allow Keras Lambda to use func arguments for output_shape?
 54 |     conv21_reshaped = Lambda(
 55 |         space_to_depth_x2,
 56 |         output_shape=space_to_depth_x2_output_shape,
 57 |         name='space_to_depth')(conv21)
 58 | 
 59 |     x = concatenate([conv21_reshaped, conv20])
 60 |     x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x)
 61 |     x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x)
 62 |     return Model(inputs, x)
 63 | 
 64 | 
 65 | def yolo_head(feats: object, anchors: object, num_classes: object) -> object:
 66 |     """Convert final layer features to bounding box parameters.
 67 | 
 68 |     Parameters
 69 |     ----------
 70 |     feats : tensor
 71 |         Final convolutional layer features.
 72 |     anchors : array-like
 73 |         Anchor box widths and heights.
 74 |     num_classes : int
 75 |         Number of target classes.
 76 | 
 77 |     Returns
 78 |     -------
 79 |     box_xy : tensor
 80 |         x, y box predictions adjusted by spatial location in conv layer.
 81 |     box_wh : tensor
 82 |         w, h box predictions adjusted by anchors and conv spatial resolution.
 83 |     box_conf : tensor
 84 |         Probability estimate for whether each box contains any object.
 85 |     box_class_pred : tensor
 86 |         Probability distribution estimate for each box over class labels.
 87 |     """
 88 |     num_anchors = len(anchors)
 89 |     # Reshape to batch, height, width, num_anchors, box_params.
 90 |     anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
 91 |     # Static implementation for fixed models.
 92 |     # TODO: Remove or add option for static implementation.
 93 |     # _, conv_height, conv_width, _ = K.int_shape(feats)
 94 |     # conv_dims = K.variable([conv_width, conv_height])
 95 | 
 96 |     # Dynamic implementation of conv dims for fully convolutional model.
 97 |     conv_dims = K.shape(feats)[1:3]  # assuming channels last
 98 |     # In YOLO the height index is the inner most iteration.
 99 |     conv_height_index = K.arange(0, stop=conv_dims[0])
100 |     conv_width_index = K.arange(0, stop=conv_dims[1])
101 |     conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
102 | 
103 |     # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
104 |     # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
105 |     conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
106 |     conv_width_index = K.flatten(K.transpose(conv_width_index))
107 |     conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
108 |     conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
109 |     conv_index = K.cast(conv_index, K.dtype(feats))
110 |     
111 |     feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
112 |     conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))
113 | 
114 |     # Static generation of conv_index:
115 |     # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
116 |     # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
117 |     # conv_index = K.variable(
118 |     #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
119 |     # feats = Reshape(
120 |     #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)
121 | 
122 |     box_confidence = K.sigmoid(feats[..., 4:5])
123 |     box_xy = K.sigmoid(feats[..., :2])
124 |     box_wh = K.exp(feats[..., 2:4])
125 |     box_class_probs = K.softmax(feats[..., 5:])
126 | 
127 |     # Adjust preditions to each spatial grid point and anchor size.
128 |     # Note: YOLO iterates over height index before width index.
129 |     box_xy = (box_xy + conv_index) / conv_dims
130 |     box_wh = box_wh * anchors_tensor / conv_dims
131 | 
132 |     return box_confidence, box_xy, box_wh, box_class_probs
133 | 
134 | 
135 | def yolo_boxes_to_corners(box_xy, box_wh):
136 |     """Convert YOLO box predictions to bounding box corners."""
137 |     box_mins = box_xy - (box_wh / 2.)
138 |     box_maxes = box_xy + (box_wh / 2.)
139 | 
140 |     return K.concatenate([
141 |         box_mins[..., 1:2],  # y_min
142 |         box_mins[..., 0:1],  # x_min
143 |         box_maxes[..., 1:2],  # y_max
144 |         box_maxes[..., 0:1]  # x_max
145 |     ])
146 | 
147 | 
148 | def yolo_loss(args,
149 |               anchors,
150 |               num_classes,
151 |               rescore_confidence=False,
152 |               print_loss=False):
153 |     """YOLO localization loss function.
154 | 
155 |     Parameters
156 |     ----------
157 |     yolo_output : tensor
158 |         Final convolutional layer features.
159 | 
160 |     true_boxes : tensor
161 |         Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
162 |         containing box x_center, y_center, width, height, and class.
163 | 
164 |     detectors_mask : array
165 |         0/1 mask for detector positions where there is a matching ground truth.
166 | 
167 |     matching_true_boxes : array
168 |         Corresponding ground truth boxes for positive detector positions.
169 |         Already adjusted for conv height and width.
170 | 
171 |     anchors : tensor
172 |         Anchor boxes for model.
173 | 
174 |     num_classes : int
175 |         Number of object classes.
176 | 
177 |     rescore_confidence : bool, default=False
178 |         If true then set confidence target to IOU of best predicted box with
179 |         the closest matching ground truth box.
180 | 
181 |     print_loss : bool, default=False
182 |         If True then use a tf.Print() to print the loss components.
183 | 
184 |     Returns
185 |     -------
186 |     mean_loss : float
187 |         mean localization loss across minibatch
188 |     """
189 |     (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
190 |     num_anchors = len(anchors)
191 |     object_scale = 5
192 |     no_object_scale = 1
193 |     class_scale = 1
194 |     coordinates_scale = 1
195 |     pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
196 |         yolo_output, anchors, num_classes)
197 | 
198 |     # Unadjusted box predictions for loss.
199 |     # TODO: Remove extra computation shared with yolo_head.
200 |     yolo_output_shape = K.shape(yolo_output)
201 |     feats = K.reshape(yolo_output, [
202 |         -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
203 |         num_classes + 5
204 |     ])
205 |     pred_boxes = K.concatenate(
206 |         (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)
207 | 
208 |     # TODO: Adjust predictions by image width/height for non-square images?
209 |     # IOUs may be off due to different aspect ratio.
210 | 
211 |     # Expand pred x,y,w,h to allow comparison with ground truth.
212 |     # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
213 |     pred_xy = K.expand_dims(pred_xy, 4)
214 |     pred_wh = K.expand_dims(pred_wh, 4)
215 | 
216 |     pred_wh_half = pred_wh / 2.
217 |     pred_mins = pred_xy - pred_wh_half
218 |     pred_maxes = pred_xy + pred_wh_half
219 | 
220 |     true_boxes_shape = K.shape(true_boxes)
221 | 
222 |     # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
223 |     true_boxes = K.reshape(true_boxes, [
224 |         true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
225 |     ])
226 |     true_xy = true_boxes[..., 0:2]
227 |     true_wh = true_boxes[..., 2:4]
228 | 
229 |     # Find IOU of each predicted box with each ground truth box.
230 |     true_wh_half = true_wh / 2.
231 |     true_mins = true_xy - true_wh_half
232 |     true_maxes = true_xy + true_wh_half
233 | 
234 |     intersect_mins = K.maximum(pred_mins, true_mins)
235 |     intersect_maxes = K.minimum(pred_maxes, true_maxes)
236 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
237 |     intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
238 | 
239 |     pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
240 |     true_areas = true_wh[..., 0] * true_wh[..., 1]
241 | 
242 |     union_areas = pred_areas + true_areas - intersect_areas
243 |     iou_scores = intersect_areas / union_areas
244 | 
245 |     # Best IOUs for each location.
246 |     best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
247 |     best_ious = K.expand_dims(best_ious)
248 | 
249 |     # A detector has found an object if IOU > thresh for some true box.
250 |     object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
251 | 
252 |     # TODO: Darknet region training includes extra coordinate loss for early
253 |     # training steps to encourage predictions to match anchor priors.
254 | 
255 |     # Determine confidence weights from object and no_object weights.
256 |     # NOTE: YOLO does not use binary cross-entropy here.
257 |     no_object_weights = (no_object_scale * (1 - object_detections) *
258 |                          (1 - detectors_mask))
259 |     no_objects_loss = no_object_weights * K.square(-pred_confidence)
260 | 
261 |     if rescore_confidence:
262 |         objects_loss = (object_scale * detectors_mask *
263 |                         K.square(best_ious - pred_confidence))
264 |     else:
265 |         objects_loss = (object_scale * detectors_mask *
266 |                         K.square(1 - pred_confidence))
267 |     confidence_loss = objects_loss + no_objects_loss
268 | 
269 |     # Classification loss for matching detections.
270 |     # NOTE: YOLO does not use categorical cross-entropy loss here.
271 |     matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
272 |     matching_classes = K.one_hot(matching_classes, num_classes)
273 |     classification_loss = (class_scale * detectors_mask *
274 |                            K.square(matching_classes - pred_class_prob))
275 | 
276 |     # Coordinate loss for matching detection boxes.
277 |     matching_boxes = matching_true_boxes[..., 0:4]
278 |     coordinates_loss = (coordinates_scale * detectors_mask *
279 |                         K.square(matching_boxes - pred_boxes))
280 | 
281 |     confidence_loss_sum = K.sum(confidence_loss)
282 |     classification_loss_sum = K.sum(classification_loss)
283 |     coordinates_loss_sum = K.sum(coordinates_loss)
284 |     total_loss = 0.5 * (
285 |         confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
286 |     if print_loss:
287 |         total_loss = tf.Print(
288 |             total_loss, [
289 |                 total_loss, confidence_loss_sum, classification_loss_sum,
290 |                 coordinates_loss_sum
291 |             ],
292 |             message='yolo_loss, conf_loss, class_loss, box_coord_loss:')
293 | 
294 |     return total_loss
295 | 
296 | 
297 | def yolo(inputs, anchors, num_classes):
298 |     """Generate a complete YOLO_v2 localization model."""
299 |     num_anchors = len(anchors)
300 |     body = yolo_body(inputs, num_anchors, num_classes)
301 |     outputs = yolo_head(body.output, anchors, num_classes)
302 |     return outputs
303 | 
304 | 
305 | def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=.6):
306 |     """Filter YOLO boxes based on object and class confidence."""
307 | 
308 |     box_scores = box_confidence * box_class_probs
309 |     box_classes = K.argmax(box_scores, axis=-1)
310 |     box_class_scores = K.max(box_scores, axis=-1)
311 |     prediction_mask = box_class_scores >= threshold
312 | 
313 |     # TODO: Expose tf.boolean_mask to Keras backend?
314 |     boxes = tf.boolean_mask(boxes, prediction_mask)
315 |     scores = tf.boolean_mask(box_class_scores, prediction_mask)
316 |     classes = tf.boolean_mask(box_classes, prediction_mask)
317 | 
318 |     return boxes, scores, classes
319 | 
320 | 
321 | def yolo_eval(yolo_outputs,
322 |               image_shape,
323 |               max_boxes=10,
324 |               score_threshold=.6,
325 |               iou_threshold=.5):
326 |     """Evaluate YOLO model on given input batch and return filtered boxes."""
327 |     box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
328 |     boxes = yolo_boxes_to_corners(box_xy, box_wh)
329 |     boxes, scores, classes = yolo_filter_boxes(
330 |         box_confidence, boxes, box_class_probs, threshold=score_threshold)
331 |     
332 |     # Scale boxes back to original image shape.
333 |     height = image_shape[0]
334 |     width = image_shape[1]
335 |     image_dims = K.stack([height, width, height, width])
336 |     image_dims = K.reshape(image_dims, [1, 4])
337 |     boxes = boxes * image_dims
338 | 
339 |     # TODO: Something must be done about this ugly hack!
340 |     max_boxes_tensor = K.variable(max_boxes, dtype='int32')
341 |     K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
342 |     nms_index = tf.image.non_max_suppression(
343 |         boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)
344 |     boxes = K.gather(boxes, nms_index)
345 |     scores = K.gather(scores, nms_index)
346 |     classes = K.gather(classes, nms_index)
347 |     
348 |     return boxes, scores, classes
349 | 
350 | 
351 | def preprocess_true_boxes(true_boxes, anchors, image_size):
352 |     """Find detector in YOLO where ground truth box should appear.
353 | 
354 |     Parameters
355 |     ----------
356 |     true_boxes : array
357 |         List of ground truth boxes in form of relative x, y, w, h, class.
358 |         Relative coordinates are in the range [0, 1] indicating a percentage
359 |         of the original image dimensions.
360 |     anchors : array
361 |         List of anchors in form of w, h.
362 |         Anchors are assumed to be in the range [0, conv_size] where conv_size
363 |         is the spatial dimension of the final convolutional features.
364 |     image_size : array-like
365 |         List of image dimensions in form of h, w in pixels.
366 | 
367 |     Returns
368 |     -------
369 |     detectors_mask : array
370 |         0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1]
371 |         that should be compared with a matching ground truth box.
372 |     matching_true_boxes: array
373 |         Same shape as detectors_mask with the corresponding ground truth box
374 |         adjusted for comparison with predicted parameters at training time.
375 |     """
376 |     height, width = image_size
377 |     num_anchors = len(anchors)
378 |     # Downsampling factor of 5x 2-stride max_pools == 32.
379 |     # TODO: Remove hardcoding of downscaling calculations.
380 |     assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
381 |     assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
382 |     conv_height = height // 32
383 |     conv_width = width // 32
384 |     num_box_params = true_boxes.shape[1]
385 |     detectors_mask = np.zeros(
386 |         (conv_height, conv_width, num_anchors, 1), dtype=np.float32)
387 |     matching_true_boxes = np.zeros(
388 |         (conv_height, conv_width, num_anchors, num_box_params),
389 |         dtype=np.float32)
390 | 
391 |     for box in true_boxes:
392 |         # scale box to convolutional feature spatial dimensions
393 |         box_class = box[4:5]
394 |         box = box[0:4] * np.array(
395 |             [conv_width, conv_height, conv_width, conv_height])
396 |         i = np.floor(box[1]).astype('int')
397 |         j = min(np.floor(box[0]).astype('int'),1)
398 |         best_iou = 0
399 |         best_anchor = 0
400 |                 
401 |         for k, anchor in enumerate(anchors):
402 |             # Find IOU between box shifted to origin and anchor box.
403 |             box_maxes = box[2:4] / 2.
404 |             box_mins = -box_maxes
405 |             anchor_maxes = (anchor / 2.)
406 |             anchor_mins = -anchor_maxes
407 | 
408 |             intersect_mins = np.maximum(box_mins, anchor_mins)
409 |             intersect_maxes = np.minimum(box_maxes, anchor_maxes)
410 |             intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
411 |             intersect_area = intersect_wh[0] * intersect_wh[1]
412 |             box_area = box[2] * box[3]
413 |             anchor_area = anchor[0] * anchor[1]
414 |             iou = intersect_area / (box_area + anchor_area - intersect_area)
415 |             if iou > best_iou:
416 |                 best_iou = iou
417 |                 best_anchor = k
418 |                 
419 |         if best_iou > 0:
420 |             detectors_mask[i, j, best_anchor] = 1
421 |             adjusted_box = np.array(
422 |                 [
423 |                     box[0] - j, box[1] - i,
424 |                     np.log(box[2] / anchors[best_anchor][0]),
425 |                     np.log(box[3] / anchors[best_anchor][1]), box_class
426 |                 ],
427 |                 dtype=np.float32)
428 |             matching_true_boxes[i, j, best_anchor] = adjusted_box
429 |     return detectors_mask, matching_true_boxes
430 | 


--------------------------------------------------------------------------------
/object_classes.txt:
--------------------------------------------------------------------------------
1 | car


--------------------------------------------------------------------------------
/yolo_utils.py:
--------------------------------------------------------------------------------
 1 | import colorsys
 2 | import imghdr
 3 | import os
 4 | import random
 5 | from keras import backend as K
 6 | 
 7 | import numpy as np
 8 | from PIL import Image, ImageDraw, ImageFont
 9 | 
10 | def read_classes(classes_path):
11 |     with open(classes_path) as f:
12 |         class_names = f.readlines()
13 |     class_names = [c.strip() for c in class_names]
14 |     return class_names
15 | 
16 | def read_anchors(anchors_path):
17 |     with open(anchors_path) as f:
18 |         anchors = f.readline()
19 |         anchors = [float(x) for x in anchors.split(',')]
20 |         anchors = np.array(anchors).reshape(-1, 2)
21 |     return anchors
22 | 
23 | def generate_colors(class_names):
24 |     hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))]
25 |     colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
26 |     colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
27 |     random.seed(10101)  # Fixed seed for consistent colors across runs.
28 |     random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
29 |     random.seed(None)  # Reset seed to default.
30 |     return colors
31 | 
32 | def scale_boxes(boxes, image_shape):
33 |     """ Scales the predicted boxes in order to be drawable on the image"""
34 |     height = image_shape[0]
35 |     width = image_shape[1]
36 |     image_dims = K.stack([height, width, height, width])
37 |     image_dims = K.reshape(image_dims, [1, 4])
38 |     boxes = boxes * image_dims
39 |     return boxes
40 | 
41 | def preprocess_image(img_path, model_image_size):
42 |     image_type = imghdr.what(img_path)
43 |     image = Image.open(img_path)
44 |     resized_image = image.resize(tuple(reversed(model_image_size)), Image.BICUBIC)
45 |     image_data = np.array(resized_image, dtype='float32')
46 |     image_data /= 255.
47 |     image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
48 |     return image, image_data
49 | 
50 | def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors):
51 |     
52 |     font = ImageFont.truetype(font='font/FiraMono-Medium.otf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
53 |     thickness = (image.size[0] + image.size[1])//300
54 | 
55 |     for i, c in reversed(list(enumerate(out_classes))):
56 |         predicted_class = class_names[c]
57 |         box = out_boxes[i]
58 |         score = out_scores[i]
59 | 
60 |         label = '{} {:.2f}'.format(predicted_class, score)
61 | 
62 |         draw = ImageDraw.Draw(image)
63 |         label_size = draw.textsize(label, font)
64 | 
65 |         top, left, bottom, right = box
66 |         top = max(0, np.floor(top + 0.5).astype('int32'))
67 |         left = max(0, np.floor(left + 0.5).astype('int32'))
68 |         bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
69 |         right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
70 |         print(label, (left, top), (right, bottom))
71 | 
72 |         if top - label_size[1] >= 0:
73 |             text_origin = np.array([left, top - label_size[1]])
74 |         else:
75 |             text_origin = np.array([left, top + 1])
76 | 
77 |         # My kingdom for a good redistributable image drawing library.
78 |         for i in range(thickness):
79 |             draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c])
80 |         draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c])
81 |         draw.text(text_origin, label, fill=(0, 0, 0), font=font)
82 |         del draw
83 | 


--------------------------------------------------------------------------------
/yolo_自动驾驶_车辆识别介绍.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hierarchyJK/Auto_driving_recognize/fa7825099a27d9dceb6cf6e88d892824618f6350/yolo_自动驾驶_车辆识别介绍.docx


--------------------------------------------------------------------------------
/yolo_自动驾驶_车辆识别介绍_20190124175811.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hierarchyJK/Auto_driving_recognize/fa7825099a27d9dceb6cf6e88d892824618f6350/yolo_自动驾驶_车辆识别介绍_20190124175811.pdf


--------------------------------------------------------------------------------