├── yolo2_data
├── car.jpg
├── detection.jpg
└── coco_classes.txt
├── config.py
├── README.md
├── decode.py
├── Main.py
├── Loss.py
├── model_darknet19.py
└── utils.py
/yolo2_data/car.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KOD-Chen/YOLOv2-Tensorflow/HEAD/yolo2_data/car.jpg
--------------------------------------------------------------------------------
/yolo2_data/detection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KOD-Chen/YOLOv2-Tensorflow/HEAD/yolo2_data/detection.jpg
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # --------------------------------------
3 | # @Time : 2018/5/16$ 17:12$
4 | # @Author : KOD Chen
5 | # @Email : 821237536@qq.com
6 | # @File : configs$.py
7 | # Description :anchor尺寸、coco数据集的80个classes类别名称
8 | # --------------------------------------
9 |
10 | anchors = [[0.57273, 0.677385],
11 | [1.87446, 2.06253],
12 | [3.33843, 5.47434],
13 | [7.88282, 3.52778],
14 | [9.77052, 9.16828]]
15 |
16 | def read_coco_labels():
17 | f = open("./yolo2_data/coco_classes.txt")
18 | class_names = []
19 | for l in f.readlines():
20 | l = l.strip() # 去掉回车'\n'
21 | class_names.append(l)
22 | return class_names
23 |
24 | class_names = read_coco_labels()
25 |
--------------------------------------------------------------------------------
/yolo2_data/coco_classes.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv2-Tensorflow
2 | ## 声明:
3 | 更详细的代码解读[Tensorflow实现YOLO2](https://zhuanlan.zhihu.com/p/36902889).
4 | 欢迎关注[我的知乎](https://www.zhihu.com/people/chensicheng/posts).
5 |
6 | ## 运行环境:
7 | Python3 + Tensorflow1.5 + OpenCV-python3.3.1 + Numpy1.13
8 | windows和ubuntu环境都可以
9 |
10 | ## 准备工作:
11 | 请在[yolo2检测模型](https://pan.baidu.com/s/1ZeT5HerjQxyUZ_L9d3X52w)下载模型,并放到yolo2_model文件夹下
12 |
13 | ## 文件说明:
14 | 1、model_darknet19.py:yolo2网络模型——darknet19
15 | 2、decode.py:解码darknet19网络得到的参数
16 | 3、utils.py:功能函数,包含:预处理输入图片、筛选边界框NMS、绘制筛选后的边界框
17 | 4、config.py:配置文件,包含anchor尺寸、coco数据集的80个classes类别名称
18 | 5、Main.py:YOLO_v2主函数,对应程序有三个步骤:
19 | (1)输入图片进入darknet19网络得到特征图,并进行解码得到:xmin xmax表示的边界框、置信度、类别概率
20 | (2)筛选解码后的回归边界框——NMS
21 | (3)绘制筛选后的边界框
22 | 6、Loss.py:Yolo_v2 Loss损失函数(train时候用,预测时候没有调用此程序)
23 | (1)IOU值最大的那个anchor与ground truth匹配,对应的预测框用来预测这个ground truth:计算xywh、置信度c(目标值为1)、类别概率p误差。
24 | (2)IOU小于某阈值的anchor对应的预测框:只计算置信度c(目标值为0)误差。
25 | (3)剩下IOU大于某阈值但不是max的anchor对应的预测框:丢弃,不计算任何误差。
26 | 7、yolo2_data文件夹:包含待检测输入图片car.jpg、检测后的输出图片detection.jpg、coco数据集80个类别名称coco_classes.txt
27 |
28 | ## 运行Main.py即可得到效果图:
29 | 1、car.jpg:输入的待检测图片
30 | 
31 | 2、detected.jpg:检测结果可视化
32 | 
33 |
--------------------------------------------------------------------------------
/decode.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # --------------------------------------
3 | # @Time : 2018/5/15$ 17:01$
4 | # @Author : KOD Chen
5 | # @Email : 821237536@qq.com
6 | # @File : decode$.py
7 | # Description :解码darknet19网络得到的参数.
8 | # --------------------------------------
9 |
10 | import tensorflow as tf
11 | import numpy as np
12 |
13 | def decode(model_output,output_sizes=(13,13),num_class=80,anchors=None):
14 | '''
15 | model_output:darknet19网络输出的特征图
16 | output_sizes:darknet19网络输出的特征图大小,默认是13*13(默认输入416*416,下采样32)
17 | '''
18 | H, W = output_sizes
19 | num_anchors = len(anchors) # 这里的anchor是在configs文件中设置的
20 | anchors = tf.constant(anchors, dtype=tf.float32) # 将传入的anchors转变成tf格式的常量列表
21 |
22 | # 13*13*num_anchors*(num_class+5),第一个维度自适应batchsize
23 | detection_result = tf.reshape(model_output,[-1,H*W,num_anchors,num_class+5])
24 |
25 | # darknet19网络输出转化——偏移量、置信度、类别概率
26 | xy_offset = tf.nn.sigmoid(detection_result[:,:,:,0:2]) # 中心坐标相对于该cell左上角的偏移量,sigmoid函数归一化到0-1
27 | wh_offset = tf.exp(detection_result[:,:,:,2:4]) #相对于anchor的wh比例,通过e指数解码
28 | obj_probs = tf.nn.sigmoid(detection_result[:,:,:,4]) # 置信度,sigmoid函数归一化到0-1
29 | class_probs = tf.nn.softmax(detection_result[:,:,:,5:]) # 网络回归的是'得分',用softmax转变成类别概率
30 |
31 | # 构建特征图每个cell的左上角的xy坐标
32 | height_index = tf.range(H,dtype=tf.float32) # range(0,13)
33 | width_index = tf.range(W,dtype=tf.float32) # range(0,13)
34 | # 变成x_cell=[[0,1,...,12],...,[0,1,...,12]]和y_cell=[[0,0,...,0],[1,...,1]...,[12,...,12]]
35 | x_cell,y_cell = tf.meshgrid(height_index,width_index)
36 | x_cell = tf.reshape(x_cell,[1,-1,1]) # 和上面[H*W,num_anchors,num_class+5]对应
37 | y_cell = tf.reshape(y_cell,[1,-1,1])
38 |
39 | # decode
40 | bbox_x = (x_cell + xy_offset[:,:,:,0]) / W
41 | bbox_y = (y_cell + xy_offset[:,:,:,1]) / H
42 | bbox_w = (anchors[:,0] * wh_offset[:,:,:,0]) / W
43 | bbox_h = (anchors[:,1] * wh_offset[:,:,:,1]) / H
44 | # 中心坐标+宽高box(x,y,w,h) -> xmin=x-w/2 -> 左上+右下box(xmin,ymin,xmax,ymax)
45 | bboxes = tf.stack([bbox_x-bbox_w/2, bbox_y-bbox_h/2,
46 | bbox_x+bbox_w/2, bbox_y+bbox_h/2], axis=3)
47 |
48 | return bboxes, obj_probs, class_probs
49 |
--------------------------------------------------------------------------------
/Main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # --------------------------------------
3 | # @Time : 2018/5/16$ 17:17$
4 | # @Author : KOD Chen
5 | # @Email : 821237536@qq.com
6 | # @File : Main$.py
7 | # Description :YOLO_v2主函数.
8 | # --------------------------------------
9 |
10 | import numpy as np
11 | import tensorflow as tf
12 | import cv2
13 | from PIL import Image
14 |
15 | from YOLO_v2.model_darknet19 import darknet
16 | from YOLO_v2.decode import decode
17 | from YOLO_v2.utils import preprocess_image, postprocess, draw_detection
18 | from YOLO_v2.config import anchors, class_names
19 |
20 | def main():
21 | input_size = (416,416)
22 | image_file = './yolo2_data/car.jpg'
23 | image = cv2.imread(image_file)
24 | image_shape = image.shape[:2] #只取wh,channel=3不取
25 |
26 | # copy、resize416*416、归一化、在第0维增加存放batchsize维度
27 | image_cp = preprocess_image(image,input_size)
28 |
29 | # 【1】输入图片进入darknet19网络得到特征图,并进行解码得到:xmin xmax表示的边界框、置信度、类别概率
30 | tf_image = tf.placeholder(tf.float32,[1,input_size[0],input_size[1],3])
31 | model_output = darknet(tf_image) # darknet19网络输出的特征图
32 | output_sizes = input_size[0]//32, input_size[1]//32 # 特征图尺寸是图片下采样32倍
33 | output_decoded = decode(model_output=model_output,output_sizes=output_sizes,
34 | num_class=len(class_names),anchors=anchors) # 解码
35 |
36 | model_path = "./yolo2_model/yolo2_coco.ckpt"
37 | saver = tf.train.Saver()
38 | with tf.Session() as sess:
39 | saver.restore(sess,model_path)
40 | bboxes,obj_probs,class_probs = sess.run(output_decoded,feed_dict={tf_image:image_cp})
41 |
42 | # 【2】筛选解码后的回归边界框——NMS(post process后期处理)
43 | bboxes,scores,class_max_index = postprocess(bboxes,obj_probs,class_probs,image_shape=image_shape)
44 |
45 | # 【3】绘制筛选后的边界框
46 | img_detection = draw_detection(image, bboxes, scores, class_max_index, class_names)
47 | cv2.imwrite("./yolo2_data/detection.jpg", img_detection)
48 | print('YOLO_v2 detection has done!')
49 | cv2.imshow("detection_results", img_detection)
50 | cv2.waitKey(0)
51 |
52 | if __name__ == '__main__':
53 | main()
54 |
--------------------------------------------------------------------------------
/Loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # --------------------------------------
3 | # @Time : 2018/5/12$ 17:49$
4 | # @Author : KOD Chen
5 | # @Email : 821237536@qq.com
6 | # @File : Loss$.py
7 | # Description :Yolo_v2 Loss损失函数.
8 | # --------------------------------------
9 |
10 | import numpy as np
11 | import tensorflow as tf
12 |
13 | def compute_loss(predictions,targets,anchors,scales,num_classes=20,output_size=(13,13)):
14 | W,H = output_size
15 | C = num_classes
16 | B = len(anchors)
17 | anchors = tf.constant(anchors,dtype=tf.float32)
18 | anchors = tf.reshape(anchors,[1,1,B,2]) # 存放输入的anchors的wh
19 |
20 | # 【1】ground truth:期望值、真实值
21 | sprob,sconf,snoob,scoor = scales # loss不同部分的前面系数
22 | _coords = targets["coords"] # ground truth [-1, H*W, B, 4],真实坐标xywh
23 | _probs = targets["probs"] # class probability [-1, H*W, B, C] ,类别概率——one hot形式,C维
24 | _confs = targets["confs"] # 1 for object, 0 for background, [-1, H*W, B],置信度,每个边界框一个
25 | # ground truth计算IOU-->_up_left, _down_right
26 | _wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
27 | _areas = _wh[:, :, :, 0] * _wh[:, :, :, 1]
28 | _centers = _coords[:, :, :, 0:2]
29 | _up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5)
30 | # ground truth汇总
31 | truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3)
32 |
33 | # 【2】decode the net prediction:预测值、网络输出值
34 | predictions = tf.reshape(predictions,[-1,H,W,B,(5+C)])
35 | # t_x, t_y, t_w, t_h
36 | coords = tf.reshape(predictions[:,:,:,:,0:4],[-1,H*W,B,4])
37 | coords_xy = tf.nn.sigmoid(coords[:,:,:,0:2]) # 0-1,xy是相对于cell左上角的偏移量
38 | coords_wh = tf.sqrt(tf.exp(coords[:,:,:,2:4])*anchors/np.reshape([W,H],[1,1,1,2])) # 0-1,除以特征图的尺寸13,解码成相对于整张图片的wh
39 | coords = tf.concat([coords_xy,coords_wh],axis=3) # [batch_size, H*W, B, 4]
40 | # 置信度
41 | confs = tf.nn.sigmoid(predictions[:,:,:,:,4])
42 | confs = tf.reshape(confs,[-1,H*W,B,1]) # 每个边界框一个置信度,每个cell有B个边界框
43 | # 类别概率
44 | probs = tf.nn.softmax(predictions[:,:,:,:,5:]) # 网络最后输出是"得分",通过softmax变成概率
45 | probs = tf.reshape(probs,[-1,H*W,B,C])
46 | # prediction汇总
47 | preds = tf.concat([coords,confs,probs],axis=3) # [-1, H*W, B, (4+1+C)]
48 | # prediction计算IOU-->up_left, down_right
49 | wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
50 | areas = wh[:, :, :, 0] * wh[:, :, :, 1]
51 | centers = coords[:, :, :, 0:2]
52 | up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5)
53 |
54 |
55 | # ※※※【3】计算ground truth和anchor的IOU:※※※
56 | # 计算IOU只考虑形状,先将anchor与ground truth的中心点都偏移到同一位置(cell左上角),然后计算出对应的IOU值。
57 | # ①IOU值最大的那个anchor与ground truth匹配,对应的预测框用来预测这个ground truth:计算xywh、置信度c(目标值为1)、类别概率p误差。
58 | # ②IOU小于某阈值的anchor对应的预测框:只计算置信度c(目标值为0)误差。
59 | # ③剩下IOU大于某阈值但不是max的anchor对应的预测框:丢弃,不计算任何误差。
60 | inter_upleft = tf.maximum(up_left, _up_left)
61 | inter_downright = tf.minimum(down_right, _down_right)
62 | inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0)
63 | intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1]
64 | ious = tf.truediv(intersects, areas + _areas - intersects)
65 |
66 | best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True))
67 | best_iou_mask = tf.cast(best_iou_mask, tf.float32)
68 | mask = best_iou_mask * _confs # [-1, H*W, B]
69 | mask = tf.expand_dims(mask, -1) # [-1, H*W, B, 1]
70 |
71 | # 【4】计算各项损失所占的比例权重weight
72 | confs_w = snoob * (1 - mask) + sconf * mask
73 | coords_w = scoor * mask
74 | probs_w = sprob * mask
75 | weights = tf.concat([coords_w, confs_w, probs_w], axis=3)
76 |
77 | # 【5】计算loss:ground truth汇总和prediction汇总均方差损失函数,再乘以相应的比例权重
78 | loss = tf.pow(preds - truths, 2) * weights
79 | loss = tf.reduce_sum(loss, axis=[1, 2, 3])
80 | loss = 0.5 * tf.reduce_mean(loss)
81 |
82 | return loss
83 |
--------------------------------------------------------------------------------
/model_darknet19.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # --------------------------------------
3 | # @Time : 2018/5/15$ 12:12$
4 | # @Author : KOD Chen
5 | # @Email : 821237536@qq.com
6 | # @File : model_darknet19$.py
7 | # Description :yolo2网络模型——darknet19.
8 | # --------------------------------------
9 |
10 | import os
11 | import tensorflow as tf
12 | import numpy as np
13 |
14 | ################# 基础层:conv/pool/reorg(带passthrough的重组层) #############################################
15 | # 激活函数
16 | def leaky_relu(x):
17 | return tf.nn.leaky_relu(x,alpha=0.1,name='leaky_relu') # 或者tf.maximum(0.1*x,x)
18 |
19 | # Conv+BN:yolo2中每个卷积层后面都有一个BN层
20 | def conv2d(x,filters_num,filters_size,pad_size=0,stride=1,batch_normalize=True,
21 | activation=leaky_relu,use_bias=False,name='conv2d'):
22 | # padding,注意: 不用padding="SAME",否则可能会导致坐标计算错误
23 | if pad_size > 0:
24 | x = tf.pad(x,[[0,0],[pad_size,pad_size],[pad_size,pad_size],[0,0]])
25 | # 有BN层,所以后面有BN层的conv就不用偏置bias,并先不经过激活函数activation
26 | out = tf.layers.conv2d(x,filters=filters_num,kernel_size=filters_size,strides=stride,
27 | padding='VALID',activation=None,use_bias=use_bias,name=name)
28 | # BN,如果有,应该在卷积层conv和激活函数activation之间
29 | if batch_normalize:
30 | out = tf.layers.batch_normalization(out,axis=-1,momentum=0.9,training=False,name=name+'_bn')
31 | if activation:
32 | out = activation(out)
33 | return out
34 |
35 | # max_pool
36 | def maxpool(x,size=2,stride=2,name='maxpool'):
37 | return tf.layers.max_pooling2d(x,pool_size=size,strides=stride)
38 |
39 | # reorg layer(带passthrough的重组层)
40 | def reorg(x,stride):
41 | return tf.space_to_depth(x,block_size=stride)
42 | # 或者return tf.extract_image_patches(x,ksizes=[1,stride,stride,1],strides=[1,stride,stride,1],
43 | # rates=[1,1,1,1],padding='VALID')
44 | #########################################################################################################
45 |
46 | ################################### Darknet19 ###########################################################
47 | # 默认是coco数据集,最后一层维度是anchor_num*(class_num+5)=5*(80+5)=425
48 | def darknet(images,n_last_channels=425):
49 | net = conv2d(images, filters_num=32, filters_size=3, pad_size=1, name='conv1')
50 | net = maxpool(net, size=2, stride=2, name='pool1')
51 |
52 | net = conv2d(net, 64, 3, 1, name='conv2')
53 | net = maxpool(net, 2, 2, name='pool2')
54 |
55 | net = conv2d(net, 128, 3, 1, name='conv3_1')
56 | net = conv2d(net, 64, 1, 0, name='conv3_2')
57 | net = conv2d(net, 128, 3, 1, name='conv3_3')
58 | net = maxpool(net, 2, 2, name='pool3')
59 |
60 | net = conv2d(net, 256, 3, 1, name='conv4_1')
61 | net = conv2d(net, 128, 1, 0, name='conv4_2')
62 | net = conv2d(net, 256, 3, 1, name='conv4_3')
63 | net = maxpool(net, 2, 2, name='pool4')
64 |
65 | net = conv2d(net, 512, 3, 1, name='conv5_1')
66 | net = conv2d(net, 256, 1, 0,name='conv5_2')
67 | net = conv2d(net,512, 3, 1, name='conv5_3')
68 | net = conv2d(net, 256, 1, 0, name='conv5_4')
69 | net = conv2d(net, 512, 3, 1, name='conv5_5')
70 | shortcut = net # 存储这一层特征图,以便后面passthrough层
71 | net = maxpool(net, 2, 2, name='pool5')
72 |
73 | net = conv2d(net, 1024, 3, 1, name='conv6_1')
74 | net = conv2d(net, 512, 1, 0, name='conv6_2')
75 | net = conv2d(net, 1024, 3, 1, name='conv6_3')
76 | net = conv2d(net, 512, 1, 0, name='conv6_4')
77 | net = conv2d(net, 1024, 3, 1, name='conv6_5')
78 |
79 | net = conv2d(net, 1024, 3, 1, name='conv7_1')
80 | net = conv2d(net, 1024, 3, 1, name='conv7_2')
81 | # shortcut增加了一个中间卷积层,先采用64个1*1卷积核进行卷积,然后再进行passthrough处理
82 | # 这样26*26*512 -> 26*26*64 -> 13*13*256的特征图
83 | shortcut = conv2d(shortcut, 64, 1, 0, name='conv_shortcut')
84 | shortcut = reorg(shortcut, 2)
85 | net = tf.concat([shortcut, net], axis=-1) # channel整合到一起
86 | net = conv2d(net, 1024, 3, 1, name='conv8')
87 |
88 | # detection layer:最后用一个1*1卷积去调整channel,该层没有BN层和激活函数
89 | output = conv2d(net, filters_num=n_last_channels, filters_size=1, batch_normalize=False,
90 | activation=None, use_bias=True, name='conv_dec')
91 |
92 | return output
93 | #########################################################################################################
94 |
95 | if __name__ == '__main__':
96 | x = tf.random_normal([1, 416, 416, 3])
97 | model_output = darknet(x)
98 |
99 | saver = tf.train.Saver()
100 | with tf.Session() as sess:
101 | # 必须先restore模型才能打印shape;导入模型时,上面每层网络的name不能修改,否则找不到
102 | saver.restore(sess, "./yolo2_model/yolo2_coco.ckpt")
103 | print(sess.run(model_output).shape) # (1,13,13,425)
104 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # --------------------------------------
3 | # @Time : 2018/5/16$ 14:48$
4 | # @Author : KOD Chen
5 | # @Email : 821237536@qq.com
6 | # @File : utils$.py
7 | # Description :功能函数,包含:预处理输入图片、筛选边界框NMS、绘制筛选后的边界框。
8 | # --------------------------------------
9 |
10 | import random
11 | import colorsys
12 | import cv2
13 | import numpy as np
14 |
15 | # 【1】图像预处理(pre process前期处理)
16 | def preprocess_image(image,image_size=(416,416)):
17 | # 复制原图像
18 | image_cp = np.copy(image).astype(np.float32)
19 |
20 | # resize image
21 | image_rgb = cv2.cvtColor(image_cp,cv2.COLOR_BGR2RGB)
22 | image_resized = cv2.resize(image_rgb,image_size)
23 |
24 | # normalize归一化
25 | image_normalized = image_resized.astype(np.float32) / 225.0
26 |
27 | # 增加一个维度在第0维——batch_size
28 | image_expanded = np.expand_dims(image_normalized,axis=0)
29 |
30 | return image_expanded
31 |
32 | # 【2】筛选解码后的回归边界框——NMS(post process后期处理)
33 | def postprocess(bboxes,obj_probs,class_probs,image_shape=(416,416),threshold=0.5):
34 | # bboxes表示为:图片中有多少box就多少行;4列分别是box(xmin,ymin,xmax,ymax)
35 | bboxes = np.reshape(bboxes,[-1,4])
36 | # 将所有box还原成图片中真实的位置
37 | bboxes[:,0:1] *= float(image_shape[1]) # xmin*width
38 | bboxes[:,1:2] *= float(image_shape[0]) # ymin*height
39 | bboxes[:,2:3] *= float(image_shape[1]) # xmax*width
40 | bboxes[:,3:4] *= float(image_shape[0]) # ymax*height
41 | bboxes = bboxes.astype(np.int32)
42 |
43 | # (1)cut the box:将边界框超出整张图片(0,0)—(415,415)的部分cut掉
44 | bbox_min_max = [0,0,image_shape[1]-1,image_shape[0]-1]
45 | bboxes = bboxes_cut(bbox_min_max,bboxes)
46 |
47 | # ※※※置信度*max类别概率=类别置信度scores※※※
48 | obj_probs = np.reshape(obj_probs,[-1])
49 | class_probs = np.reshape(class_probs,[len(obj_probs),-1])
50 | class_max_index = np.argmax(class_probs,axis=1) # 得到max类别概率对应的维度
51 | class_probs = class_probs[np.arange(len(obj_probs)),class_max_index]
52 | scores = obj_probs * class_probs
53 |
54 | # ※※※类别置信度scores>threshold的边界框bboxes留下※※※
55 | keep_index = scores > threshold
56 | class_max_index = class_max_index[keep_index]
57 | scores = scores[keep_index]
58 | bboxes = bboxes[keep_index]
59 |
60 | # (2)排序top_k(默认为400)
61 | class_max_index,scores,bboxes = bboxes_sort(class_max_index,scores,bboxes)
62 | # ※※※(3)NMS※※※
63 | class_max_index,scores,bboxes = bboxes_nms(class_max_index,scores,bboxes)
64 |
65 | return bboxes,scores,class_max_index
66 |
67 | # 【3】绘制筛选后的边界框
68 | def draw_detection(im, bboxes, scores, cls_inds, labels, thr=0.3):
69 | # Generate colors for drawing bounding boxes.
70 | hsv_tuples = [(x/float(len(labels)), 1., 1.) for x in range(len(labels))]
71 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
72 | colors = list(
73 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),colors))
74 | random.seed(10101) # Fixed seed for consistent colors across runs.
75 | random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes.
76 | random.seed(None) # Reset seed to default.
77 | # draw image
78 | imgcv = np.copy(im)
79 | h, w, _ = imgcv.shape
80 | for i, box in enumerate(bboxes):
81 | if scores[i] < thr:
82 | continue
83 | cls_indx = cls_inds[i]
84 |
85 | thick = int((h + w) / 300)
86 | cv2.rectangle(imgcv,(box[0], box[1]), (box[2], box[3]),colors[cls_indx], thick)
87 | mess = '%s: %.3f' % (labels[cls_indx], scores[i])
88 | if box[1] < 20:
89 | text_loc = (box[0] + 2, box[1] + 15)
90 | else:
91 | text_loc = (box[0], box[1] - 10)
92 | # cv2.rectangle(imgcv, (box[0], box[1]-20), ((box[0]+box[2])//3+120, box[1]-8), (125, 125, 125), -1) # puttext函数的背景
93 | cv2.putText(imgcv, mess, text_loc, cv2.FONT_HERSHEY_SIMPLEX, 1e-3*h, (255,255,255), thick//3)
94 | return imgcv
95 |
96 | ######################## 对应【2】:筛选解码后的回归边界框#########################################
97 | # (1)cut the box:将边界框超出整张图片(0,0)—(415,415)的部分cut掉
98 | def bboxes_cut(bbox_min_max,bboxes):
99 | bboxes = np.copy(bboxes)
100 | bboxes = np.transpose(bboxes)
101 | bbox_min_max = np.transpose(bbox_min_max)
102 | # cut the box
103 | bboxes[0] = np.maximum(bboxes[0],bbox_min_max[0]) # xmin
104 | bboxes[1] = np.maximum(bboxes[1],bbox_min_max[1]) # ymin
105 | bboxes[2] = np.minimum(bboxes[2],bbox_min_max[2]) # xmax
106 | bboxes[3] = np.minimum(bboxes[3],bbox_min_max[3]) # ymax
107 | bboxes = np.transpose(bboxes)
108 | return bboxes
109 |
110 | # (2)按类别置信度scores降序,对边界框进行排序并仅保留top_k
111 | def bboxes_sort(classes,scores,bboxes,top_k=400):
112 | index = np.argsort(-scores)
113 | classes = classes[index][:top_k]
114 | scores = scores[index][:top_k]
115 | bboxes = bboxes[index][:top_k]
116 | return classes,scores,bboxes
117 |
118 | # (3)计算IOU+NMS
119 | # 计算两个box的IOU
120 | def bboxes_iou(bboxes1,bboxes2):
121 | bboxes1 = np.transpose(bboxes1)
122 | bboxes2 = np.transpose(bboxes2)
123 |
124 | # 计算两个box的交集:交集左上角的点取两个box的max,交集右下角的点取两个box的min
125 | int_ymin = np.maximum(bboxes1[0], bboxes2[0])
126 | int_xmin = np.maximum(bboxes1[1], bboxes2[1])
127 | int_ymax = np.minimum(bboxes1[2], bboxes2[2])
128 | int_xmax = np.minimum(bboxes1[3], bboxes2[3])
129 |
130 | # 计算两个box交集的wh:如果两个box没有交集,那么wh为0(按照计算方式wh为负数,跟0比较取最大值)
131 | int_h = np.maximum(int_ymax-int_ymin,0.)
132 | int_w = np.maximum(int_xmax-int_xmin,0.)
133 |
134 | # 计算IOU
135 | int_vol = int_h * int_w # 交集面积
136 | vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) # bboxes1面积
137 | vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) # bboxes2面积
138 | IOU = int_vol / (vol1 + vol2 - int_vol) # IOU=交集/并集
139 | return IOU
140 | # NMS,或者用tf.image.non_max_suppression(boxes, scores,self.max_output_size, self.iou_threshold)
141 | def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5):
142 | keep_bboxes = np.ones(scores.shape, dtype=np.bool)
143 | for i in range(scores.size-1):
144 | if keep_bboxes[i]:
145 | # Computer overlap with bboxes which are following.
146 | overlap = bboxes_iou(bboxes[i], bboxes[(i+1):])
147 | # Overlap threshold for keeping + checking part of the same class
148 | keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
149 | keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
150 |
151 | idxes = np.where(keep_bboxes)
152 | return classes[idxes], scores[idxes], bboxes[idxes]
153 | ###################################################################################################
154 |
--------------------------------------------------------------------------------