├── IMG_5707.JPG
├── README.md
├── VOC2007
    ├── Annotations
    │   └── 1
    ├── ImageSets
    │   ├── Layout
    │   │   └── 1
    │   ├── Main
    │   │   └── 1
    │   └── Segmentation
    │   │   └── 1
    ├── Images
    │   └── 1
    ├── JPEGImages
    │   └── 1
    ├── SegmentationClass
    │   └── 1
    ├── SegmentationObject
    │   └── 1
    └── test.py
├── __pycache__
    └── yolo.cpython-35.pyc
├── convert.py
├── font
    ├── FiraMono-Medium.otf
    └── SIL Open Font License.txt
├── logs
    └── 1
├── model_data
    ├── 1
    ├── coco_classes.txt
    ├── tiny_yolo_anchors.txt
    ├── voc_classes.txt
    └── yolo_anchors.txt
├── result
    └── 1
├── test.txt
├── tiny_train.py
├── train.txt
├── train_bottleneck.py
├── val.txt
├── voc_annotation.py
├── yolo.docx
├── yolo.py
├── yolo3
    ├── __init__.py
    ├── __pycache__
    │   ├── model.cpython-35.pyc
    │   ├── tinymodel.cpython-35.pyc
    │   └── utils.cpython-35.pyc
    ├── tinymodel.py
    └── utils.py
└── yolo_test_batch.py


/IMG_5707.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/IMG_5707.JPG


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tiny-yolov3
 2 | 使用tiny——yolov3（keras）检测自己的数据集，三类目标
 3 | 
 4 | 程序是根据github上yolov3修改的，所以大面积重复，使用tiny-yolo用法如下：
 5 | 
 6 | 1、下载tiny-yolov3工程，打开yolo.docx文档，按照文档中的教程对自己的
 7 | 图像集做标注，并生成一些必须的图像路径txt文件。
 8 | 
 9 | 2、训练图像使用 tiny_train.py
10 | 训练后的权重文件会保存在logs下
11 | 
12 | 3、对待测图像进行批量测试：
13 | yolo_test_batch.py
14 | 然后会在VOC/SegmentationClass生成检测后的结果
15 | 
16 | 我的程序是在ubantu下跑的，当然改一下路径之类的就可以在windows下测试啦
17 | 
18 | 有问题欢迎讨论，加微信
19 | 
20 | 


--------------------------------------------------------------------------------
/VOC2007/Annotations/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/ImageSets/Layout/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/ImageSets/Main/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/ImageSets/Segmentation/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/Images/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/JPEGImages/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/SegmentationClass/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/SegmentationObject/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/VOC2007/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Oct 27 12:59:20 2018
 4 | 
 5 | @author: Administrator
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | 
11 | trainval_percent = 0.1
12 | train_percent = 0.9
13 | xmlfilepath = 'Annotations'
14 | txtsavepath = 'ImageSets\Main'
15 | total_xml = os.listdir(xmlfilepath)
16 | 
17 | num = len(total_xml)
18 | list = range(num)
19 | tv = int(num * trainval_percent)
20 | tr = int(tv * train_percent)
21 | trainval = random.sample(list, tv)
22 | train = random.sample(trainval, tr)
23 | 
24 | ftrainval = open('ImageSets/Main/trainval.txt', 'w')
25 | ftest = open('ImageSets/Main/test.txt', 'w')
26 | ftrain = open('ImageSets/Main/train.txt', 'w')
27 | fval = open('ImageSets/Main/val.txt', 'w')
28 | 
29 | for i in list:
30 |     name = total_xml[i][:-4] + '\n'
31 |     if i in trainval:
32 |         ftrainval.write(name)
33 |         if i in train:
34 |             ftest.write(name)
35 |         else:
36 |             fval.write(name)
37 |     else:
38 |         ftrain.write(name)
39 | 
40 | ftrainval.close()
41 | ftrain.close()
42 | fval.close()
43 | ftest.close()
44 | 


--------------------------------------------------------------------------------
/__pycache__/yolo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/__pycache__/yolo.cpython-35.pyc


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Reads Darknet config and weights and creates Keras model with TF backend.
  4 | 
  5 | """
  6 | 
  7 | import argparse
  8 | import configparser
  9 | import io
 10 | import os
 11 | from collections import defaultdict
 12 | 
 13 | import numpy as np
 14 | from keras import backend as K
 15 | from keras.layers import (Conv2D, Input, ZeroPadding2D, Add,
 16 |                           UpSampling2D, MaxPooling2D, Concatenate)
 17 | from keras.layers.advanced_activations import LeakyReLU
 18 | from keras.layers.normalization import BatchNormalization
 19 | from keras.models import Model
 20 | from keras.regularizers import l2
 21 | from keras.utils.vis_utils import plot_model as plot
 22 | 
 23 | 
 24 | parser = argparse.ArgumentParser(description='Darknet To Keras Converter.')
 25 | parser.add_argument('config_path', help='Path to Darknet cfg file.')
 26 | parser.add_argument('weights_path', help='Path to Darknet weights file.')
 27 | parser.add_argument('output_path', help='Path to output Keras model file.')
 28 | parser.add_argument(
 29 |     '-p',
 30 |     '--plot_model',
 31 |     help='Plot generated Keras model and save as image.',
 32 |     action='store_true')
 33 | parser.add_argument(
 34 |     '-w',
 35 |     '--weights_only',
 36 |     help='Save as Keras weights file instead of model file.',
 37 |     action='store_true')
 38 | 
 39 | def unique_config_sections(config_file):
 40 |     """Convert all config sections to have unique names.
 41 | 
 42 |     Adds unique suffixes to config sections for compability with configparser.
 43 |     """
 44 |     section_counters = defaultdict(int)
 45 |     output_stream = io.StringIO()
 46 |     with open(config_file) as fin:
 47 |         for line in fin:
 48 |             if line.startswith('['):
 49 |                 section = line.strip().strip('[]')
 50 |                 _section = section + '_' + str(section_counters[section])
 51 |                 section_counters[section] += 1
 52 |                 line = line.replace(section, _section)
 53 |             output_stream.write(line)
 54 |     output_stream.seek(0)
 55 |     return output_stream
 56 | 
 57 | # %%
 58 | def _main(args):
 59 |     config_path = os.path.expanduser(args.config_path)
 60 |     weights_path = os.path.expanduser(args.weights_path)
 61 |     assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
 62 |         config_path)
 63 |     assert weights_path.endswith(
 64 |         '.weights'), '{} is not a .weights file'.format(weights_path)
 65 | 
 66 |     output_path = os.path.expanduser(args.output_path)
 67 |     assert output_path.endswith(
 68 |         '.h5'), 'output path {} is not a .h5 file'.format(output_path)
 69 |     output_root = os.path.splitext(output_path)[0]
 70 | 
 71 |     # Load weights and config.
 72 |     print('Loading weights.')
 73 |     weights_file = open(weights_path, 'rb')
 74 |     major, minor, revision = np.ndarray(
 75 |         shape=(3, ), dtype='int32', buffer=weights_file.read(12))
 76 |     if (major*10+minor)>=2 and major<1000 and minor<1000:
 77 |         seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
 78 |     else:
 79 |         seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
 80 |     print('Weights Header: ', major, minor, revision, seen)
 81 | 
 82 |     print('Parsing Darknet config.')
 83 |     unique_config_file = unique_config_sections(config_path)
 84 |     cfg_parser = configparser.ConfigParser()
 85 |     cfg_parser.read_file(unique_config_file)
 86 | 
 87 |     print('Creating Keras model.')
 88 |     input_layer = Input(shape=(None, None, 3))
 89 |     prev_layer = input_layer
 90 |     all_layers = []
 91 | 
 92 |     weight_decay = float(cfg_parser['net_0']['decay']
 93 |                          ) if 'net_0' in cfg_parser.sections() else 5e-4
 94 |     count = 0
 95 |     out_index = []
 96 |     for section in cfg_parser.sections():
 97 |         print('Parsing section {}'.format(section))
 98 |         if section.startswith('convolutional'):
 99 |             filters = int(cfg_parser[section]['filters'])
100 |             size = int(cfg_parser[section]['size'])
101 |             stride = int(cfg_parser[section]['stride'])
102 |             pad = int(cfg_parser[section]['pad'])
103 |             activation = cfg_parser[section]['activation']
104 |             batch_normalize = 'batch_normalize' in cfg_parser[section]
105 | 
106 |             padding = 'same' if pad == 1 and stride == 1 else 'valid'
107 | 
108 |             # Setting weights.
109 |             # Darknet serializes convolutional weights as:
110 |             # [bias/beta, [gamma, mean, variance], conv_weights]
111 |             prev_layer_shape = K.int_shape(prev_layer)
112 | 
113 |             weights_shape = (size, size, prev_layer_shape[-1], filters)
114 |             darknet_w_shape = (filters, weights_shape[2], size, size)
115 |             weights_size = np.product(weights_shape)
116 | 
117 |             print('conv2d', 'bn'
118 |                   if batch_normalize else '  ', activation, weights_shape)
119 | 
120 |             conv_bias = np.ndarray(
121 |                 shape=(filters, ),
122 |                 dtype='float32',
123 |                 buffer=weights_file.read(filters * 4))
124 |             count += filters
125 | 
126 |             if batch_normalize:
127 |                 bn_weights = np.ndarray(
128 |                     shape=(3, filters),
129 |                     dtype='float32',
130 |                     buffer=weights_file.read(filters * 12))
131 |                 count += 3 * filters
132 | 
133 |                 bn_weight_list = [
134 |                     bn_weights[0],  # scale gamma
135 |                     conv_bias,  # shift beta
136 |                     bn_weights[1],  # running mean
137 |                     bn_weights[2]  # running var
138 |                 ]
139 | 
140 |             conv_weights = np.ndarray(
141 |                 shape=darknet_w_shape,
142 |                 dtype='float32',
143 |                 buffer=weights_file.read(weights_size * 4))
144 |             count += weights_size
145 | 
146 |             # DarkNet conv_weights are serialized Caffe-style:
147 |             # (out_dim, in_dim, height, width)
148 |             # We would like to set these to Tensorflow order:
149 |             # (height, width, in_dim, out_dim)
150 |             conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
151 |             conv_weights = [conv_weights] if batch_normalize else [
152 |                 conv_weights, conv_bias
153 |             ]
154 | 
155 |             # Handle activation.
156 |             act_fn = None
157 |             if activation == 'leaky':
158 |                 pass  # Add advanced activation later.
159 |             elif activation != 'linear':
160 |                 raise ValueError(
161 |                     'Unknown activation function `{}` in section {}'.format(
162 |                         activation, section))
163 | 
164 |             # Create Conv2D layer
165 |             if stride>1:
166 |                 # Darknet uses left and top padding instead of 'same' mode
167 |                 prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
168 |             conv_layer = (Conv2D(
169 |                 filters, (size, size),
170 |                 strides=(stride, stride),
171 |                 kernel_regularizer=l2(weight_decay),
172 |                 use_bias=not batch_normalize,
173 |                 weights=conv_weights,
174 |                 activation=act_fn,
175 |                 padding=padding))(prev_layer)
176 | 
177 |             if batch_normalize:
178 |                 conv_layer = (BatchNormalization(
179 |                     weights=bn_weight_list))(conv_layer)
180 |             prev_layer = conv_layer
181 | 
182 |             if activation == 'linear':
183 |                 all_layers.append(prev_layer)
184 |             elif activation == 'leaky':
185 |                 act_layer = LeakyReLU(alpha=0.1)(prev_layer)
186 |                 prev_layer = act_layer
187 |                 all_layers.append(act_layer)
188 | 
189 |         elif section.startswith('route'):
190 |             ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
191 |             layers = [all_layers[i] for i in ids]
192 |             if len(layers) > 1:
193 |                 print('Concatenating route layers:', layers)
194 |                 concatenate_layer = Concatenate()(layers)
195 |                 all_layers.append(concatenate_layer)
196 |                 prev_layer = concatenate_layer
197 |             else:
198 |                 skip_layer = layers[0]  # only one layer to route
199 |                 all_layers.append(skip_layer)
200 |                 prev_layer = skip_layer
201 | 
202 |         elif section.startswith('maxpool'):
203 |             size = int(cfg_parser[section]['size'])
204 |             stride = int(cfg_parser[section]['stride'])
205 |             all_layers.append(
206 |                 MaxPooling2D(
207 |                     pool_size=(size, size),
208 |                     strides=(stride, stride),
209 |                     padding='same')(prev_layer))
210 |             prev_layer = all_layers[-1]
211 | 
212 |         elif section.startswith('shortcut'):
213 |             index = int(cfg_parser[section]['from'])
214 |             activation = cfg_parser[section]['activation']
215 |             assert activation == 'linear', 'Only linear activation supported.'
216 |             all_layers.append(Add()([all_layers[index], prev_layer]))
217 |             prev_layer = all_layers[-1]
218 | 
219 |         elif section.startswith('upsample'):
220 |             stride = int(cfg_parser[section]['stride'])
221 |             assert stride == 2, 'Only stride=2 supported.'
222 |             all_layers.append(UpSampling2D(stride)(prev_layer))
223 |             prev_layer = all_layers[-1]
224 | 
225 |         elif section.startswith('yolo'):
226 |             out_index.append(len(all_layers)-1)
227 |             all_layers.append(None)
228 |             prev_layer = all_layers[-1]
229 | 
230 |         elif section.startswith('net'):
231 |             pass
232 | 
233 |         else:
234 |             raise ValueError(
235 |                 'Unsupported section header type: {}'.format(section))
236 | 
237 |     # Create and save model.
238 |     if len(out_index)==0: out_index.append(len(all_layers)-1)
239 |     model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
240 |     print(model.summary())
241 |     if args.weights_only:
242 |         model.save_weights('{}'.format(output_path))
243 |         print('Saved Keras weights to {}'.format(output_path))
244 |     else:
245 |         model.save('{}'.format(output_path))
246 |         print('Saved Keras model to {}'.format(output_path))
247 | 
248 |     # Check to see if all weights have been read.
249 |     remaining_weights = len(weights_file.read()) / 4
250 |     weights_file.close()
251 |     print('Read {} of {} from Darknet weights.'.format(count, count +
252 |                                                        remaining_weights))
253 |     if remaining_weights > 0:
254 |         print('Warning: {} unused weights'.format(remaining_weights))
255 | 
256 |     if args.plot_model:
257 |         plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
258 |         print('Saved model plot to {}.png'.format(output_root))
259 | 
260 | 
261 | if __name__ == '__main__':
262 |     _main(parser.parse_args())
263 | 


--------------------------------------------------------------------------------
/font/FiraMono-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/font/FiraMono-Medium.otf


--------------------------------------------------------------------------------
/font/SIL Open Font License.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Mozilla Foundation https://mozilla.org/ with Reserved Font Name Fira Mono.
 2 | 
 3 | Copyright (c) 2014, Telefonica S.A.
 4 | 
 5 | This Font Software is licensed under the SIL Open Font License, Version 1.1.
 6 | This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL
 7 | 
 8 | -----------------------------------------------------------
 9 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
10 | -----------------------------------------------------------
11 | 
12 | PREAMBLE
13 | The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others.
14 | 
15 | The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives.
16 | 
17 | DEFINITIONS
18 | "Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation.
19 | 
20 | "Reserved Font Name" refers to any names specified as such after the copyright statement(s).
21 | 
22 | "Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s).
23 | 
24 | "Modified Version" refers to any derivative made by adding to, deleting, or substituting -- in part or in whole -- any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment.
25 | 
26 | "Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software.
27 | 
28 | PERMISSION & CONDITIONS
29 | Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions:
30 | 
31 | 1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself.
32 | 
33 | 2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user.
34 | 
35 | 3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users.
36 | 
37 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission.
38 | 
39 | 5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software.
40 | 
41 | TERMINATION
42 | This license becomes null and void if any of the above conditions are not met.
43 | 
44 | DISCLAIMER
45 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.


--------------------------------------------------------------------------------
/logs/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/model_data/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
1 | class1
2 | class2
3 | class3
4 | 
5 | 


--------------------------------------------------------------------------------
/model_data/tiny_yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
2 | 


--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
1 | class1
2 | class2
3 | class3


--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
2 | 


--------------------------------------------------------------------------------
/result/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/test.txt:
--------------------------------------------------------------------------------
1 | /home/zhangyang/yolo/VOC2007/JPEGImages/00026.jpg 8,349,62,403,1
2 | /home/zhangyang/yolo/VOC2007/JPEGImages/00045.jpg 108,13,213,106,2
3 | /home/zhangyang/yolo/VOC2007/JPEGImages/00013.jpg 218,406,474,503,0
4 | /home/zhangyang/yolo/VOC2007/JPEGImages/00012.jpg 203,139,508,337,0
5 | /home/zhangyang/yolo/VOC2007/JPEGImages/00053.jpg 277,393,387,508,2
6 | 


--------------------------------------------------------------------------------
/tiny_train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | 不加载预权重的训练
  4 | 
  5 | @author: Administrator
  6 | """
  7 | import numpy as np
  8 | import keras.backend as K
  9 | from keras.layers import Input, Lambda
 10 | from keras.models import Model
 11 | from keras.callbacks import TensorBoard, ModelCheckpoint
 12 | from yolo3.tinymodel import preprocess_true_boxes, tiny_yolo_body, yolo_loss
 13 | from yolo3.utils import get_random_data
 14 | #import cv2
 15 | 
 16 | #需要执行的内容
 17 | def _main():
 18 |     annotation_path = 'train.txt'
 19 |     log_dir = 'logs/'
 20 |     classes_path = 'model_data/voc_classes.txt'
 21 |     anchors_path = 'model_data/tiny_yolo_anchors.txt'    
 22 |     class_names = get_classes(classes_path)
 23 |     anchors = get_anchors(anchors_path)
 24 |     input_shape = (416,416)  # multiple of 32, hw
 25 |     #input_shape = (224,224)
 26 |     model = create_model(input_shape, anchors, len(class_names) )
 27 |     train(model, annotation_path, input_shape, anchors, len(class_names), log_dir=log_dir)
 28 | 
 29 | #函数定义
 30 | def train(model, annotation_path, input_shape, anchors, num_classes, log_dir='logs/'):
 31 |     model.compile(optimizer='adam', loss={
 32 |         'yolo_loss': lambda y_true, y_pred: y_pred})
 33 |     #记录所有训练过程，每隔一定步数记录最大值
 34 |     tensorboard = TensorBoard(log_dir=log_dir)
 35 |     checkpoint = ModelCheckpoint(log_dir + "best_weights.h5",
 36 |                                  monitor="val_loss",
 37 |                                  mode='min',
 38 |                                  save_weights_only=True,
 39 |                                  save_best_only=True, 
 40 |                                  verbose=1,
 41 |                                  period=1)
 42 | 
 43 |     callback_lists=[tensorboard,checkpoint]
 44 |     batch_size = 16
 45 |     val_split = 0.05
 46 |     with open(annotation_path) as f:
 47 |         lines = f.readlines()
 48 |     np.random.shuffle(lines)
 49 |     
 50 |     num_val = int(len(lines)*val_split)
 51 |     num_train = len(lines) - num_val
 52 |     print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 53 | 
 54 |     model.fit_generator(data_generator_wrap(lines[:num_train], batch_size, input_shape, anchors, num_classes),
 55 |             steps_per_epoch=max(1, num_train//batch_size),
 56 |             validation_data=data_generator_wrap(lines[num_train:], batch_size, input_shape, anchors, num_classes),
 57 |             validation_steps=max(1, num_val//batch_size),
 58 |             epochs=3000, #迭代的步数
 59 |             initial_epoch=0, callbacks=callback_lists, verbose=1)
 60 |     model.save_weights(log_dir + 'tiny-trained_weights.h5')
 61 | 
 62 | def get_classes(classes_path):
 63 |     with open(classes_path) as f:
 64 |         class_names = f.readlines()
 65 |     class_names = [c.strip() for c in class_names]
 66 |     return class_names
 67 | 
 68 | def get_anchors(anchors_path):
 69 |     with open(anchors_path) as f:
 70 |         anchors = f.readline()
 71 |     anchors = [float(x) for x in anchors.split(',')]
 72 |     return np.array(anchors).reshape(-1, 2)
 73 | 
 74 | def create_model(input_shape, anchors, num_classes, load_pretrained=False, freeze_body=False,
 75 |             weights_path='model_data/yolo_weights.h5'):
 76 |     K.clear_session() # get a new session
 77 |     image_input = Input(shape=(None, None, 3))
 78 |     #image_input = Input(shape=(None, None, 3))
 79 |     h, w = input_shape
 80 |     num_anchors = len(anchors)
 81 |     y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \
 82 |         num_anchors//3, num_classes+5)) for l in range(2)]
 83 |     
 84 |   
 85 |     model_body = tiny_yolo_body(image_input, num_anchors//3, num_classes)
 86 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
 87 | 
 88 |     if load_pretrained:
 89 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
 90 |         print('Load weights {}.'.format(weights_path))
 91 |         if freeze_body:
 92 |             # Do not freeze 3 output layers.
 93 |             num = len(model_body.layers)-7
 94 |             for i in range(num): model_body.layers[i].trainable = False
 95 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
 96 | 
 97 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
 98 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
 99 |         [*model_body.output, *y_true])
100 |     model = Model([model_body.input, *y_true], model_loss)
101 |     return model
102 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
103 |     n = len(annotation_lines)
104 |     np.random.shuffle(annotation_lines)
105 |     i = 0
106 |     while True:
107 |         image_data = []
108 |         box_data = []
109 |         for b in range(batch_size):
110 |             i %= n
111 |             image, box = get_random_data(annotation_lines[i], input_shape, random=True)
112 |             #image = cv2.resize(image, (224, 224))
113 |             image_data.append(image)
114 |             box_data.append(box)
115 |             i += 1
116 |         image_data = np.array(image_data)
117 |         box_data = np.array(box_data)
118 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
119 |         yield [image_data, *y_true], np.zeros(batch_size)
120 | 
121 | def data_generator_wrap(annotation_lines, batch_size, input_shape, anchors, num_classes):
122 |     n = len(annotation_lines)
123 |     if n==0 or batch_size<=0: return None
124 |     return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
125 | 
126 | if __name__ == '__main__':
127 |     _main()
128 | 


--------------------------------------------------------------------------------
/train.txt:
--------------------------------------------------------------------------------
 1 | /home/zhangyang/yolo/VOC2007/JPEGImages/00040.jpg 64,52,162,106,1
 2 | /home/zhangyang/yolo/VOC2007/JPEGImages/00010.jpg 67,321,500,411,0
 3 | /home/zhangyang/yolo/VOC2007/JPEGImages/00007.jpg 36,342,451,483,0
 4 | /home/zhangyang/yolo/VOC2007/JPEGImages/00006.jpg 223,142,500,424,0
 5 | /home/zhangyang/yolo/VOC2007/JPEGImages/00052.jpg 103,419,287,472,2
 6 | /home/zhangyang/yolo/VOC2007/JPEGImages/00022.jpg 162,185,233,239,1
 7 | /home/zhangyang/yolo/VOC2007/JPEGImages/00036.jpg 228,265,305,365,1
 8 | /home/zhangyang/yolo/VOC2007/JPEGImages/00044.jpg 270,439,395,512,2
 9 | /home/zhangyang/yolo/VOC2007/JPEGImages/00004.jpg 82,108,457,193,0
10 | /home/zhangyang/yolo/VOC2007/JPEGImages/00020.jpg 349,190,485,498,0
11 | /home/zhangyang/yolo/VOC2007/JPEGImages/00008.jpg 292,6,451,326,0
12 | /home/zhangyang/yolo/VOC2007/JPEGImages/00009.jpg 195,6,487,242,0
13 | /home/zhangyang/yolo/VOC2007/JPEGImages/00037.jpg 62,26,162,108,1
14 | /home/zhangyang/yolo/VOC2007/JPEGImages/00030.jpg 74,119,136,190,1
15 | /home/zhangyang/yolo/VOC2007/JPEGImages/00058.jpg 39,419,216,442,2
16 | /home/zhangyang/yolo/VOC2007/JPEGImages/00015.jpg 57,313,510,375,0
17 | /home/zhangyang/yolo/VOC2007/JPEGImages/00046.jpg 151,13,298,108,2
18 | /home/zhangyang/yolo/VOC2007/JPEGImages/00002.jpg 154,3,390,249,0
19 | /home/zhangyang/yolo/VOC2007/JPEGImages/00014.jpg 18,6,218,188,0
20 | /home/zhangyang/yolo/VOC2007/JPEGImages/00033.jpg 241,152,300,239,1
21 | /home/zhangyang/yolo/VOC2007/JPEGImages/00001.jpg 8,352,374,501,0
22 | /home/zhangyang/yolo/VOC2007/JPEGImages/00047.jpg 80,293,169,396,2
23 | /home/zhangyang/yolo/VOC2007/JPEGImages/00005.jpg 82,49,495,193,0
24 | /home/zhangyang/yolo/VOC2007/JPEGImages/00003.jpg 1,6,216,129,0
25 | /home/zhangyang/yolo/VOC2007/JPEGImages/00057.jpg 192,21,346,129,2
26 | /home/zhangyang/yolo/VOC2007/JPEGImages/00028.jpg 244,70,339,129,1
27 | /home/zhangyang/yolo/VOC2007/JPEGImages/00035.jpg 159,131,241,216,1
28 | /home/zhangyang/yolo/VOC2007/JPEGImages/00055.jpg 80,54,116,257,2
29 | /home/zhangyang/yolo/VOC2007/JPEGImages/00031.jpg 216,278,267,337,1
30 | /home/zhangyang/yolo/VOC2007/JPEGImages/00038.jpg 72,239,167,344,1
31 | /home/zhangyang/yolo/VOC2007/JPEGImages/00050.jpg 441,80,512,150,2
32 | /home/zhangyang/yolo/VOC2007/JPEGImages/00029.jpg 21,234,95,313,1
33 | /home/zhangyang/yolo/VOC2007/JPEGImages/00042.jpg 349,344,426,434,2
34 | /home/zhangyang/yolo/VOC2007/JPEGImages/00039.jpg 116,144,167,198,1
35 | /home/zhangyang/yolo/VOC2007/JPEGImages/00049.jpg 44,219,116,372,2
36 | /home/zhangyang/yolo/VOC2007/JPEGImages/00024.jpg 303,288,349,352,1
37 | /home/zhangyang/yolo/VOC2007/JPEGImages/00027.jpg 344,34,413,106,1
38 | /home/zhangyang/yolo/VOC2007/JPEGImages/00018.jpg 18,372,441,416,0
39 | /home/zhangyang/yolo/VOC2007/JPEGImages/00011.jpg 64,13,416,221,0
40 | /home/zhangyang/yolo/VOC2007/JPEGImages/00021.jpg 5,285,72,342,1
41 | /home/zhangyang/yolo/VOC2007/JPEGImages/00016.jpg 136,8,251,416,0
42 | /home/zhangyang/yolo/VOC2007/JPEGImages/00023.jpg 21,188,80,265,1
43 | /home/zhangyang/yolo/VOC2007/JPEGImages/00051.jpg 423,226,451,362,2
44 | /home/zhangyang/yolo/VOC2007/JPEGImages/00034.jpg 51,201,121,306,1
45 | /home/zhangyang/yolo/VOC2007/JPEGImages/00025.jpg 185,119,244,183,1
46 | /home/zhangyang/yolo/VOC2007/JPEGImages/00017.jpg 100,390,498,498,0
47 | /home/zhangyang/yolo/VOC2007/JPEGImages/00048.jpg 108,31,216,142,2
48 | /home/zhangyang/yolo/VOC2007/JPEGImages/00043.jpg 1,274,105,378,2
49 | /home/zhangyang/yolo/VOC2007/JPEGImages/00060.jpg 10,365,274,401,2
50 | /home/zhangyang/yolo/VOC2007/JPEGImages/00041.jpg 310,116,512,188,2
51 | /home/zhangyang/yolo/VOC2007/JPEGImages/00056.jpg 408,178,512,323,2
52 | /home/zhangyang/yolo/VOC2007/JPEGImages/00019.jpg 100,83,508,224,0
53 | /home/zhangyang/yolo/VOC2007/JPEGImages/00054.jpg 8,360,128,472,2
54 | /home/zhangyang/yolo/VOC2007/JPEGImages/00059.jpg 108,39,172,165,2
55 | /home/zhangyang/yolo/VOC2007/JPEGImages/00026.jpg 8,349,62,403,1
56 | /home/zhangyang/yolo/VOC2007/JPEGImages/00045.jpg 108,13,213,106,2
57 | /home/zhangyang/yolo/VOC2007/JPEGImages/00013.jpg 218,406,474,503,0
58 | /home/zhangyang/yolo/VOC2007/JPEGImages/00012.jpg 203,139,508,337,0
59 | /home/zhangyang/yolo/VOC2007/JPEGImages/00053.jpg 277,393,387,508,2
60 | /home/zhangyang/yolo/VOC2007/JPEGImages/00032.jpg 18,152,74,208,1
61 | 


--------------------------------------------------------------------------------
/train_bottleneck.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Retrain the YOLO model for your own dataset.
  3 | """
  4 | import os
  5 | import numpy as np
  6 | import keras.backend as K
  7 | from keras.layers import Input, Lambda
  8 | from keras.models import Model
  9 | from keras.optimizers import Adam
 10 | from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
 11 | 
 12 | from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
 13 | from yolo3.utils import get_random_data
 14 | 
 15 | 
 16 | def _main():
 17 |     annotation_path = 'train.txt'
 18 |     log_dir = 'logs/000/'
 19 |     classes_path = 'model_data/coco_classes.txt'
 20 |     anchors_path = 'model_data/yolo_anchors.txt'
 21 |     class_names = get_classes(classes_path)
 22 |     num_classes = len(class_names)
 23 |     anchors = get_anchors(anchors_path)
 24 | 
 25 |     input_shape = (416,416) # multiple of 32, hw
 26 | 
 27 |     model, bottleneck_model, last_layer_model = create_model(input_shape, anchors, num_classes,
 28 |             freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze
 29 | 
 30 |     logging = TensorBoard(log_dir=log_dir)
 31 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
 32 |         monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
 33 |     reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
 34 |     early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
 35 | 
 36 |     val_split = 0.1
 37 |     with open(annotation_path) as f:
 38 |         lines = f.readlines()
 39 |     np.random.seed(10101)
 40 |     np.random.shuffle(lines)
 41 |     np.random.seed(None)
 42 |     num_val = int(len(lines)*val_split)
 43 |     num_train = len(lines) - num_val
 44 | 
 45 |     # Train with frozen layers first, to get a stable loss.
 46 |     # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
 47 |     if True:
 48 |         # perform bottleneck training
 49 |         if not os.path.isfile("bottlenecks.npz"):
 50 |             print("calculating bottlenecks")
 51 |             batch_size=8
 52 |             bottlenecks=bottleneck_model.predict_generator(data_generator_wrapper(lines, batch_size, input_shape, anchors, num_classes, random=False, verbose=True),
 53 |              steps=(len(lines)//batch_size)+1, max_queue_size=1)
 54 |             np.savez("bottlenecks.npz", bot0=bottlenecks[0], bot1=bottlenecks[1], bot2=bottlenecks[2])
 55 |     
 56 |         # load bottleneck features from file
 57 |         dict_bot=np.load("bottlenecks.npz")
 58 |         bottlenecks_train=[dict_bot["bot0"][:num_train], dict_bot["bot1"][:num_train], dict_bot["bot2"][:num_train]]
 59 |         bottlenecks_val=[dict_bot["bot0"][num_train:], dict_bot["bot1"][num_train:], dict_bot["bot2"][num_train:]]
 60 | 
 61 |         # train last layers with fixed bottleneck features
 62 |         batch_size=8
 63 |         print("Training last layers with bottleneck features")
 64 |         print('with {} samples, val on {} samples and batch size {}.'.format(num_train, num_val, batch_size))
 65 |         last_layer_model.compile(optimizer='adam', loss={'yolo_loss': lambda y_true, y_pred: y_pred})
 66 |         last_layer_model.fit_generator(bottleneck_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, bottlenecks_train),
 67 |                 steps_per_epoch=max(1, num_train//batch_size),
 68 |                 validation_data=bottleneck_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, bottlenecks_val),
 69 |                 validation_steps=max(1, num_val//batch_size),
 70 |                 epochs=30,
 71 |                 initial_epoch=0, max_queue_size=1)
 72 |         model.save_weights(log_dir + 'trained_weights_stage_0.h5')
 73 |         
 74 |         # train last layers with random augmented data
 75 |         model.compile(optimizer=Adam(lr=1e-3), loss={
 76 |             # use custom yolo_loss Lambda layer.
 77 |             'yolo_loss': lambda y_true, y_pred: y_pred})
 78 |         batch_size = 16
 79 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 80 |         model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
 81 |                 steps_per_epoch=max(1, num_train//batch_size),
 82 |                 validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
 83 |                 validation_steps=max(1, num_val//batch_size),
 84 |                 epochs=50,
 85 |                 initial_epoch=0,
 86 |                 callbacks=[logging, checkpoint])
 87 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
 88 | 
 89 |     # Unfreeze and continue training, to fine-tune.
 90 |     # Train longer if the result is not good.
 91 |     if True:
 92 |         for i in range(len(model.layers)):
 93 |             model.layers[i].trainable = True
 94 |         model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
 95 |         print('Unfreeze all of the layers.')
 96 | 
 97 |         batch_size = 4 # note that more GPU memory is required after unfreezing the body
 98 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 99 |         model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
100 |             steps_per_epoch=max(1, num_train//batch_size),
101 |             validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
102 |             validation_steps=max(1, num_val//batch_size),
103 |             epochs=100,
104 |             initial_epoch=50,
105 |             callbacks=[logging, checkpoint, reduce_lr, early_stopping])
106 |         model.save_weights(log_dir + 'trained_weights_final.h5')
107 | 
108 |     # Further training if needed.
109 | 
110 | 
111 | def get_classes(classes_path):
112 |     '''loads the classes'''
113 |     with open(classes_path) as f:
114 |         class_names = f.readlines()
115 |     class_names = [c.strip() for c in class_names]
116 |     return class_names
117 | 
118 | def get_anchors(anchors_path):
119 |     '''loads the anchors from a file'''
120 |     with open(anchors_path) as f:
121 |         anchors = f.readline()
122 |     anchors = [float(x) for x in anchors.split(',')]
123 |     return np.array(anchors).reshape(-1, 2)
124 | 
125 | 
126 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
127 |             weights_path='model_data/yolo_weights.h5'):
128 |     '''create the training model'''
129 |     K.clear_session() # get a new session
130 |     image_input = Input(shape=(None, None, 3))
131 |     h, w = input_shape
132 |     num_anchors = len(anchors)
133 | 
134 |     y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
135 |         num_anchors//3, num_classes+5)) for l in range(3)]
136 | 
137 |     model_body = yolo_body(image_input, num_anchors//3, num_classes)
138 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
139 | 
140 |     if load_pretrained:
141 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
142 |         print('Load weights {}.'.format(weights_path))
143 |         if freeze_body in [1, 2]:
144 |             # Freeze darknet53 body or freeze all but 3 output layers.
145 |             num = (185, len(model_body.layers)-3)[freeze_body-1]
146 |             for i in range(num): model_body.layers[i].trainable = False
147 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
148 | 
149 |     # get output of second last layers and create bottleneck model of it
150 |     out1=model_body.layers[246].output
151 |     out2=model_body.layers[247].output
152 |     out3=model_body.layers[248].output
153 |     bottleneck_model = Model([model_body.input, *y_true], [out1, out2, out3])
154 | 
155 |     # create last layer model of last layers from yolo model
156 |     in0 = Input(shape=bottleneck_model.output[0].shape[1:].as_list()) 
157 |     in1 = Input(shape=bottleneck_model.output[1].shape[1:].as_list())
158 |     in2 = Input(shape=bottleneck_model.output[2].shape[1:].as_list())
159 |     last_out0=model_body.layers[249](in0)
160 |     last_out1=model_body.layers[250](in1)
161 |     last_out2=model_body.layers[251](in2)
162 |     model_last=Model(inputs=[in0, in1, in2], outputs=[last_out0, last_out1, last_out2])
163 |     model_loss_last =Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
164 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
165 |         [*model_last.output, *y_true])
166 |     last_layer_model = Model([in0,in1,in2, *y_true], model_loss_last)
167 | 
168 |     
169 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
170 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
171 |         [*model_body.output, *y_true])
172 |     model = Model([model_body.input, *y_true], model_loss)
173 | 
174 |     return model, bottleneck_model, last_layer_model
175 | 
176 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False):
177 |     '''data generator for fit_generator'''
178 |     n = len(annotation_lines)
179 |     i = 0
180 |     while True:
181 |         image_data = []
182 |         box_data = []
183 |         for b in range(batch_size):
184 |             if i==0 and random:
185 |                 np.random.shuffle(annotation_lines)
186 |             image, box = get_random_data(annotation_lines[i], input_shape, random=random)
187 |             image_data.append(image)
188 |             box_data.append(box)
189 |             i = (i+1) % n
190 |         image_data = np.array(image_data)
191 |         if verbose:
192 |             print("Progress: ",i,"/",n)
193 |         box_data = np.array(box_data)
194 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
195 |         yield [image_data, *y_true], np.zeros(batch_size)
196 | 
197 | def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False):
198 |     n = len(annotation_lines)
199 |     if n==0 or batch_size<=0: return None
200 |     return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random, verbose)
201 | 
202 | def bottleneck_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, bottlenecks):
203 |     n = len(annotation_lines)
204 |     i = 0
205 |     while True:
206 |         box_data = []
207 |         b0=np.zeros((batch_size,bottlenecks[0].shape[1],bottlenecks[0].shape[2],bottlenecks[0].shape[3]))
208 |         b1=np.zeros((batch_size,bottlenecks[1].shape[1],bottlenecks[1].shape[2],bottlenecks[1].shape[3]))
209 |         b2=np.zeros((batch_size,bottlenecks[2].shape[1],bottlenecks[2].shape[2],bottlenecks[2].shape[3]))
210 |         for b in range(batch_size):
211 |             _, box = get_random_data(annotation_lines[i], input_shape, random=False, proc_img=False)
212 |             box_data.append(box)
213 |             b0[b]=bottlenecks[0][i]
214 |             b1[b]=bottlenecks[1][i]
215 |             b2[b]=bottlenecks[2][i]
216 |             i = (i+1) % n
217 |         box_data = np.array(box_data)
218 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
219 |         yield [b0, b1, b2, *y_true], np.zeros(batch_size)
220 | 
221 | if __name__ == '__main__':
222 |     _main()
223 | 


--------------------------------------------------------------------------------
/val.txt:
--------------------------------------------------------------------------------
1 | /home/zhangyang/yolo/VOC2007/JPEGImages/00032.jpg 18,152,74,208,1
2 | 


--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | from os import getcwd
 3 | 
 4 | sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
 5 | 
 6 | classes = ["class1","class2","class3"]
 7 | 
 8 | 
 9 | def convert_annotation(year, image_id, list_file):
10 |     in_file = open('VOC%s/Annotations/%s.xml'%(year, image_id))
11 |     tree=ET.parse(in_file)
12 |     root = tree.getroot()
13 | 
14 |     for obj in root.iter('object'):
15 |         difficult = obj.find('difficult').text
16 |         cls = obj.find('name').text
17 |         if cls not in classes or int(difficult)==1:
18 |             continue
19 |         cls_id = classes.index(cls)
20 |         xmlbox = obj.find('bndbox')
21 |         b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
22 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
23 | 
24 | wd = getcwd()
25 | 
26 | for year, image_set in sets:
27 |     image_ids = open('VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
28 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
29 |     for image_id in image_ids:
30 |         list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
31 |         convert_annotation(year, image_id, list_file)
32 |         list_file.write('\n')
33 |     list_file.close()
34 | 
35 | 


--------------------------------------------------------------------------------
/yolo.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/yolo.docx


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Class definition of YOLO_v3 style detection model on image and video
  4 | """
  5 | 
  6 | import colorsys
  7 | import os
  8 | from timeit import default_timer as timer
  9 | import numpy as np
 10 | from keras import backend as K
 11 | from keras.models import load_model
 12 | from keras.layers import Input
 13 | from PIL import Image, ImageFont, ImageDraw
 14 | from yolo3.tinymodel import yolo_eval, yolo_body, tiny_yolo_body
 15 | from yolo3.utils import letterbox_image
 16 | from keras.utils import multi_gpu_model
 17 | 
 18 | class YOLO(object):
 19 |     _defaults = {
 20 |         "model_path": 'logs/best_weights.h5',
 21 |         "anchors_path": 'model_data/tiny_yolo_anchors.txt',
 22 |         "classes_path": 'model_data/voc_classes.txt',
 23 |         "score" : 0.3,
 24 |         "iou" : 0.25,
 25 |         "model_image_size" : (416, 416),
 26 |         "gpu_num" : 1,
 27 |     }
 28 | 
 29 |     @classmethod
 30 |     def get_defaults(cls, n):
 31 |         if n in cls._defaults:
 32 |             return cls._defaults[n]
 33 |         else:
 34 |             return "Unrecognized attribute name '" + n + "'"
 35 | 
 36 |     def __init__(self, **kwargs):
 37 |         self.__dict__.update(self._defaults) # set up default values
 38 |         self.__dict__.update(kwargs) # and update with user overrides
 39 |         self.class_names = self._get_class()
 40 |         self.anchors = self._get_anchors()
 41 |         self.sess = K.get_session()
 42 |         self.boxes, self.scores, self.classes = self.generate()
 43 | 
 44 |     def _get_class(self):
 45 |         classes_path = os.path.expanduser(self.classes_path)
 46 |         with open(classes_path) as f:
 47 |             class_names = f.readlines()
 48 |         class_names = [c.strip() for c in class_names]
 49 |         return class_names
 50 | 
 51 |     def _get_anchors(self):
 52 |         anchors_path = os.path.expanduser(self.anchors_path)
 53 |         with open(anchors_path) as f:
 54 |             anchors = f.readline()
 55 |         anchors = [float(x) for x in anchors.split(',')]
 56 |         return np.array(anchors).reshape(-1, 2)
 57 | 
 58 |     def generate(self):
 59 |         model_path = os.path.expanduser(self.model_path)
 60 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 61 | 
 62 |         # Load model, or construct model and load weights.
 63 |         num_anchors = len(self.anchors)
 64 |         num_classes = len(self.class_names)
 65 |         is_tiny_version = num_anchors==6 # default setting
 66 |         try:
 67 |             self.yolo_model = load_model(model_path, compile=False)
 68 |         except:
 69 |             self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) \
 70 |                 if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
 71 |             self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
 72 |         else:
 73 |             assert self.yolo_model.layers[-1].output_shape[-1] == \
 74 |                 num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
 75 |                 'Mismatch between model and given anchor and class sizes'
 76 | 
 77 |         print('{} model, anchors, and classes loaded.'.format(model_path))
 78 | 
 79 |         # Generate colors for drawing bounding boxes.
 80 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)
 81 |                       for x in range(len(self.class_names))]
 82 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 83 |         self.colors = list(
 84 |             map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
 85 |                 self.colors))
 86 |         np.random.seed(10101)  # Fixed seed for consistent colors across runs.
 87 |         np.random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
 88 |         np.random.seed(None)  # Reset seed to default.
 89 | 
 90 |         # Generate output tensor targets for filtered bounding boxes.
 91 |         self.input_image_shape = K.placeholder(shape=(2, ))
 92 |         if self.gpu_num>=2:
 93 |             self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
 94 |         boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
 95 |                 len(self.class_names), self.input_image_shape,
 96 |                 score_threshold=self.score, iou_threshold=self.iou)
 97 |         return boxes, scores, classes
 98 | 
 99 |     def detect_image(self, image):
100 |         start = timer()
101 | 
102 |         if self.model_image_size != (None, None):
103 |             assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
104 |             assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
105 |             boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
106 |         else:
107 |             new_image_size = (image.width - (image.width % 32),
108 |                               image.height - (image.height % 32))
109 |             boxed_image = letterbox_image(image, new_image_size)
110 |         image_data = np.array(boxed_image, dtype='float32')
111 | 
112 |         print(image_data.shape)
113 |         image_data /= 255.
114 |         image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
115 | 
116 |         out_boxes, out_scores, out_classes = self.sess.run(
117 |             [self.boxes, self.scores, self.classes],
118 |             feed_dict={
119 |                 self.yolo_model.input: image_data,
120 |                 self.input_image_shape: [image.size[1], image.size[0]],
121 |                 K.learning_phase(): 0
122 |             })
123 | 
124 |         print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
125 | 
126 |         font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
127 |                     size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
128 |         thickness = (image.size[0] + image.size[1]) // 300
129 | 
130 |         for i, c in reversed(list(enumerate(out_classes))):
131 |             predicted_class = self.class_names[c]
132 |             box = out_boxes[i]
133 |             score = out_scores[i]
134 | 
135 |             label = '{} {:.2f}'.format(predicted_class, score)
136 |             image = image.convert('RGB') #对图像进行转换维度
137 |             draw = ImageDraw.Draw(image)
138 |             label_size = draw.textsize(label, font)
139 | 
140 |             top, left, bottom, right = box
141 |             top = max(0, np.floor(top + 0.5).astype('int32'))
142 |             left = max(0, np.floor(left + 0.5).astype('int32'))
143 |             bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
144 |             right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
145 |             print(label, (left, top), (right, bottom))
146 | 
147 |             if top - label_size[1] >= 0:
148 |                 text_origin = np.array([left, top - label_size[1]])
149 |             else:
150 |                 text_origin = np.array([left, top + 1])
151 | 
152 |             # My kingdom for a good redistributable image drawing library.
153 |             for i in range(thickness):
154 |                 draw.rectangle(
155 |                     [left + i, top + i, right - i, bottom - i],
156 |                     outline=self.colors[c])
157 |                     #outline=(255))
158 |             draw.rectangle(
159 |                 [tuple(text_origin), tuple(text_origin + label_size)],
160 |                 fill=self.colors[c])
161 |                 #outline=(255))
162 |             #draw.text(text_origin, label, fill=(0, 0, 0), font=font)
163 |             draw.text(text_origin, label, fill=(0), font=font)
164 |             del draw
165 | 
166 |         end = timer()
167 |         print(end - start)
168 |         return image
169 | 
170 |     def close_session(self):
171 |         self.sess.close()
172 | 


--------------------------------------------------------------------------------
/yolo3/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/yolo3/__pycache__/model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/yolo3/__pycache__/model.cpython-35.pyc


--------------------------------------------------------------------------------
/yolo3/__pycache__/tinymodel.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/yolo3/__pycache__/tinymodel.cpython-35.pyc


--------------------------------------------------------------------------------
/yolo3/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Eatzhy/tiny-yolov3/495e8df17bba289674ea2b11bdb9fe2fa48462f5/yolo3/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/yolo3/tinymodel.py:
--------------------------------------------------------------------------------
  1 | """YOLO_v3 Model Defined in Keras."""
  2 | 
  3 | from functools import wraps
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
  9 | from keras.layers.advanced_activations import LeakyReLU
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.models import Model
 12 | from keras.regularizers import l2
 13 | 
 14 | from yolo3.utils import compose
 15 | 
 16 | 
 17 | @wraps(Conv2D)
 18 | def DarknetConv2D(*args, **kwargs):
 19 |     """Wrapper to set Darknet parameters for Convolution2D."""
 20 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 21 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 22 |     darknet_conv_kwargs.update(kwargs)
 23 |     return Conv2D(*args, **darknet_conv_kwargs)
 24 | 
 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 26 |     """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
 27 |     no_bias_kwargs = {'use_bias': False}
 28 |     no_bias_kwargs.update(kwargs)
 29 |     return compose(
 30 |         DarknetConv2D(*args, **no_bias_kwargs),
 31 |         BatchNormalization(),
 32 |         LeakyReLU(alpha=0.1))
 33 | 
 34 | def resblock_body(x, num_filters, num_blocks):
 35 |     '''A series of resblocks starting with a downsampling Convolution2D'''
 36 |     # Darknet uses left and top padding instead of 'same' mode
 37 |     x = ZeroPadding2D(((1,0),(1,0)))(x)
 38 |     x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
 39 |     for i in range(num_blocks):
 40 |         y = compose(
 41 |                 DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),
 42 |                 DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)
 43 |         x = Add()([x,y])
 44 |     return x
 45 | 
 46 | def darknet_body(x):
 47 |     '''Darknent body having 52 Convolution2D layers'''
 48 |     x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
 49 |     x = resblock_body(x, 64, 1)
 50 |     x = resblock_body(x, 128, 2)
 51 |     x = resblock_body(x, 256, 8)
 52 |     x = resblock_body(x, 512, 8)
 53 |     x = resblock_body(x, 1024, 4)
 54 |     return x
 55 | 
 56 | def make_last_layers(x, num_filters, out_filters):
 57 |     '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
 58 |     x = compose(
 59 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)),
 60 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 61 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)),
 62 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 63 |             DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)
 64 |     y = compose(
 65 |             DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
 66 |             DarknetConv2D(out_filters, (1,1)))(x)
 67 |     return x, y
 68 | 
 69 | def yolo_body(inputs, num_anchors, num_classes):
 70 |     """Create YOLO_V3 model CNN body in Keras."""
 71 |     darknet = Model(inputs, darknet_body(inputs))
 72 |     x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
 73 | 
 74 |     x = compose(
 75 |             DarknetConv2D_BN_Leaky(256, (1,1)),
 76 |             UpSampling2D(2))(x)
 77 |     x = Concatenate()([x,darknet.layers[152].output])
 78 |     x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))
 79 | 
 80 |     x = compose(
 81 |             DarknetConv2D_BN_Leaky(128, (1,1)),
 82 |             UpSampling2D(2))(x)
 83 |     x = Concatenate()([x,darknet.layers[92].output])
 84 |     x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))
 85 |     return Model(inputs, [y1,y2,y3])
 86 | 
 87 | def tiny_yolo_body(inputs, num_anchors, num_classes):
 88 |     '''Create Tiny YOLO_v3 model CNN body in keras.
 89 |     输入的尺寸为224x224，池化时对尺寸进行缩小，当y1输出时，池化为：224/2/2/2/2=14
 90 |     y2=7
 91 |     '''
 92 |     x1 = compose(
 93 |             DarknetConv2D_BN_Leaky(16, (3,3)),
 94 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
 95 |             DarknetConv2D_BN_Leaky(32, (3,3)),
 96 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
 97 |             DarknetConv2D_BN_Leaky(64, (3,3)),
 98 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
 99 |             DarknetConv2D_BN_Leaky(128, (3,3)),
100 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
101 |             DarknetConv2D_BN_Leaky(256, (3,3)))(inputs)
102 |     x2 = compose(
103 |             MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
104 |             DarknetConv2D_BN_Leaky(512, (3,3)),
105 |             MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'),
106 |             DarknetConv2D_BN_Leaky(1024, (3,3)),
107 |             DarknetConv2D_BN_Leaky(256, (1,1)))(x1)
108 |     y1 = compose(
109 |             DarknetConv2D_BN_Leaky(512, (3,3)),
110 |             DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2)
111 | 
112 |     x2 = compose(
113 |             DarknetConv2D_BN_Leaky(128, (1,1)),
114 |             UpSampling2D(2))(x2)
115 |     y2 = compose(
116 |             Concatenate(),
117 |             DarknetConv2D_BN_Leaky(256, (3,3)),
118 |             DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1])
119 | 
120 |     return Model(inputs, [y1,y2])
121 | 
122 | 
123 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
124 |     """Convert final layer features to bounding box parameters."""
125 |     num_anchors = len(anchors)
126 |     # Reshape to batch, height, width, num_anchors, box_params.
127 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
128 | 
129 |     grid_shape = K.shape(feats)[1:3] # height, width
130 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
131 |         [1, grid_shape[1], 1, 1])
132 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
133 |         [grid_shape[0], 1, 1, 1])
134 |     grid = K.concatenate([grid_x, grid_y])
135 |     grid = K.cast(grid, K.dtype(feats))
136 | 
137 |     feats = K.reshape(
138 |         feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
139 | 
140 |     # Adjust preditions to each spatial grid point and anchor size.
141 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
142 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
143 |     box_confidence = K.sigmoid(feats[..., 4:5])
144 |     box_class_probs = K.sigmoid(feats[..., 5:])
145 | 
146 |     if calc_loss == True:
147 |         return grid, feats, box_xy, box_wh
148 |     return box_xy, box_wh, box_confidence, box_class_probs
149 | 
150 | 
151 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
152 |     '''Get corrected boxes'''
153 |     box_yx = box_xy[..., ::-1]
154 |     box_hw = box_wh[..., ::-1]
155 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
156 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
157 |     new_shape = K.round(image_shape * K.min(input_shape/image_shape))
158 |     offset = (input_shape-new_shape)/2./input_shape
159 |     scale = input_shape/new_shape
160 |     box_yx = (box_yx - offset) * scale
161 |     box_hw *= scale
162 | 
163 |     box_mins = box_yx - (box_hw / 2.)
164 |     box_maxes = box_yx + (box_hw / 2.)
165 |     boxes =  K.concatenate([
166 |         box_mins[..., 0:1],  # y_min
167 |         box_mins[..., 1:2],  # x_min
168 |         box_maxes[..., 0:1],  # y_max
169 |         box_maxes[..., 1:2]  # x_max
170 |     ])
171 | 
172 |     # Scale boxes back to original image shape.
173 |     boxes *= K.concatenate([image_shape, image_shape])
174 |     return boxes
175 | 
176 | 
177 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
178 |     '''Process Conv layer output'''
179 |     box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
180 |         anchors, num_classes, input_shape)
181 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
182 |     boxes = K.reshape(boxes, [-1, 4])
183 |     box_scores = box_confidence * box_class_probs
184 |     box_scores = K.reshape(box_scores, [-1, num_classes])
185 |     return boxes, box_scores
186 | 
187 | 
188 | def yolo_eval(yolo_outputs,
189 |               anchors,
190 |               num_classes,
191 |               image_shape,
192 |               max_boxes=20,
193 |               score_threshold=.3,
194 |               iou_threshold=.25):
195 |     """Evaluate YOLO model on given input and return filtered boxes."""
196 |     num_layers = len(yolo_outputs)
197 |     #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
198 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4], [1,2]]
199 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
200 |     boxes = []
201 |     box_scores = []
202 |     for l in range(num_layers):
203 |         _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
204 |             anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
205 |         boxes.append(_boxes)
206 |         box_scores.append(_box_scores)
207 |     boxes = K.concatenate(boxes, axis=0)
208 |     box_scores = K.concatenate(box_scores, axis=0)
209 | 
210 |     mask = box_scores >= score_threshold
211 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
212 |     boxes_ = []
213 |     scores_ = []
214 |     classes_ = []
215 |     for c in range(num_classes):
216 |         # TODO: use keras backend instead of tf.
217 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
218 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
219 |         nms_index = tf.image.non_max_suppression(
220 |             class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
221 |         class_boxes = K.gather(class_boxes, nms_index)
222 |         class_box_scores = K.gather(class_box_scores, nms_index)
223 |         classes = K.ones_like(class_box_scores, 'int32') * c
224 |         boxes_.append(class_boxes)
225 |         scores_.append(class_box_scores)
226 |         classes_.append(classes)
227 |     boxes_ = K.concatenate(boxes_, axis=0)
228 |     scores_ = K.concatenate(scores_, axis=0)
229 |     classes_ = K.concatenate(classes_, axis=0)
230 | 
231 |     return boxes_, scores_, classes_
232 | 
233 | 
234 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
235 |     '''Preprocess true boxes to training input format
236 | 
237 |     Parameters
238 |     ----------
239 |     true_boxes: array, shape=(m, T, 5)
240 |         Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
241 |     input_shape: array-like, hw, multiples of 32
242 |     anchors: array, shape=(N, 2), wh
243 |     num_classes: integer
244 | 
245 |     Returns
246 |     -------
247 |     y_true: list of array, shape like yolo_outputs, xywh are reletive value
248 | 
249 |     '''
250 |     assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
251 |     num_layers = len(anchors)//3 # default setting
252 |     #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
253 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4], [1,2]]
254 | 
255 |     true_boxes = np.array(true_boxes, dtype='float32')
256 |     input_shape = np.array(input_shape, dtype='int32')
257 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
258 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
259 |     true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
260 |     true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
261 | 
262 |     m = true_boxes.shape[0]
263 |     #grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
264 |     grid_shapes = [input_shape//{0:32, 1:16}[l] for l in range(num_layers)]
265 |     y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
266 |         dtype='float32') for l in range(num_layers)]
267 | 
268 |     # Expand dim to apply broadcasting.
269 |     anchors = np.expand_dims(anchors, 0)
270 |     anchor_maxes = anchors / 2.
271 |     anchor_mins = -anchor_maxes
272 |     valid_mask = boxes_wh[..., 0]>0
273 | 
274 |     for b in range(m):
275 |         # Discard zero rows.
276 |         wh = boxes_wh[b, valid_mask[b]]
277 |         if len(wh)==0: continue
278 |         # Expand dim to apply broadcasting.
279 |         wh = np.expand_dims(wh, -2)
280 |         box_maxes = wh / 2.
281 |         box_mins = -box_maxes
282 | 
283 |         intersect_mins = np.maximum(box_mins, anchor_mins)
284 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
285 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
286 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
287 |         box_area = wh[..., 0] * wh[..., 1]
288 |         anchor_area = anchors[..., 0] * anchors[..., 1]
289 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
290 | 
291 |         # Find best anchor for each true box
292 |         best_anchor = np.argmax(iou, axis=-1)
293 | 
294 |         for t, n in enumerate(best_anchor):
295 |             for l in range(num_layers):
296 |                 if n in anchor_mask[l]:
297 |                     i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
298 |                     j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
299 |                     k = anchor_mask[l].index(n)
300 |                     c = true_boxes[b,t, 4].astype('int32')
301 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
302 |                     y_true[l][b, j, i, k, 4] = 1
303 |                     y_true[l][b, j, i, k, 5+c] = 1
304 | 
305 |     return y_true
306 | 
307 | 
308 | def box_iou(b1, b2):
309 |     '''Return iou tensor
310 | 
311 |     Parameters
312 |     ----------
313 |     b1: tensor, shape=(i1,...,iN, 4), xywh
314 |     b2: tensor, shape=(j, 4), xywh
315 | 
316 |     Returns
317 |     -------
318 |     iou: tensor, shape=(i1,...,iN, j)
319 | 
320 |     '''
321 | 
322 |     # Expand dim to apply broadcasting.
323 |     b1 = K.expand_dims(b1, -2)
324 |     b1_xy = b1[..., :2]
325 |     b1_wh = b1[..., 2:4]
326 |     b1_wh_half = b1_wh/2.
327 |     b1_mins = b1_xy - b1_wh_half
328 |     b1_maxes = b1_xy + b1_wh_half
329 | 
330 |     # Expand dim to apply broadcasting.
331 |     b2 = K.expand_dims(b2, 0)
332 |     b2_xy = b2[..., :2]
333 |     b2_wh = b2[..., 2:4]
334 |     b2_wh_half = b2_wh/2.
335 |     b2_mins = b2_xy - b2_wh_half
336 |     b2_maxes = b2_xy + b2_wh_half
337 | 
338 |     intersect_mins = K.maximum(b1_mins, b2_mins)
339 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
340 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
341 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
342 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
343 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
344 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
345 | 
346 |     return iou
347 | 
348 | 
349 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
350 |     '''Return yolo_loss tensor
351 |     
352 |     num_layers：层的数量，是anchors数量的3分之1；
353 |     args:前3个是yolo_outputs预测值，后3个是y_true真值；
354 |     anchor_mask：anchor box的索引数组，3个1组倒序排序，678对应13x13，345对应26x26，123对应52x52；
355 |     即[[6, 7, 8], [3, 4, 5], [0, 1, 2]]；
356 |     input_shape：K.shape(yolo_outputs[0])[1:3]，第1个预测矩阵yolo_outputs[0]的结构（shape）的第1~2位，
357 |     即(?, 13, 13, 18)中的(13, 13)。再x32，就是YOLO网络的输入尺寸，
358 |     即(416, 416)，因为在网络中，含有5个步长为(2, 2)的卷积操作，降维32=5^2倍；
359 |     grid_shapes：与input_shape类似，K.shape(yolo_outputs[l])[1:3]，以列表的形式，选择3个尺寸的预测图维度，
360 |     即[(13, 13), (26, 26), (52, 52)]；
361 |     m：第1个预测图的结构的第1位，即K.shape(yolo_outputs[0])[0]，输入模型的图片总量，即批次数；
362 |     mf：m的float类型，即K.cast(m, K.dtype(yolo_outputs[0]))
363 |     loss：损失值为0；
364 |     
365 |     '''
366 |     num_layers = len(anchors)//3 # default setting
367 |     yolo_outputs = args[:num_layers]
368 |     y_true = args[num_layers:]
369 |     #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
370 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4], [1,2]]
371 |     input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) #修改之处1
372 |     grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
373 |     loss = 0
374 |     m = K.shape(yolo_outputs[0])[0] # batch size, tensor
375 |     mf = K.cast(m, K.dtype(yolo_outputs[0]))
376 | 
377 |     for l in range(num_layers):
378 |         object_mask = y_true[l][..., 4:5]
379 |         true_class_probs = y_true[l][..., 5:]
380 | 
381 |         grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
382 |              anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
383 |         pred_box = K.concatenate([pred_xy, pred_wh])
384 | 
385 |         # Darknet raw box to calculate loss.
386 |         raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
387 |         raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
388 |         raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
389 |         box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
390 | 
391 |         # Find ignore mask, iterate over each of batch.
392 |         ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
393 |         object_mask_bool = K.cast(object_mask, 'bool')
394 |         def loop_body(b, ignore_mask):
395 |             true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
396 |             iou = box_iou(pred_box[b], true_box)
397 |             best_iou = K.max(iou, axis=-1)
398 |             ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
399 |             return b+1, ignore_mask
400 |         _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
401 |         ignore_mask = ignore_mask.stack()
402 |         ignore_mask = K.expand_dims(ignore_mask, -1)
403 | 
404 |         # K.binary_crossentropy is helpful to avoid exp overflow.
405 |         xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
406 |         wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
407 |         confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
408 |             (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
409 |         class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)
410 | 
411 |         xy_loss = K.sum(xy_loss) / mf
412 |         wh_loss = K.sum(wh_loss) / mf
413 |         confidence_loss = K.sum(confidence_loss) / mf
414 |         class_loss = K.sum(class_loss) / mf
415 |         loss += xy_loss + wh_loss + confidence_loss + class_loss
416 |         if print_loss:
417 |             loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
418 |     return loss
419 | 


--------------------------------------------------------------------------------
/yolo3/utils.py:
--------------------------------------------------------------------------------
  1 | """Miscellaneous utility functions."""
  2 | 
  3 | from functools import reduce
  4 | 
  5 | from PIL import Image
  6 | import numpy as np
  7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
  8 | 
  9 | def compose(*funcs):
 10 |     """Compose arbitrarily many functions, evaluated left to right.
 11 | 
 12 |     Reference: https://mathieularose.com/function-composition-in-python/
 13 |     """
 14 |     # return lambda x: reduce(lambda v, f: f(v), funcs, x)
 15 |     if funcs:
 16 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
 17 |     else:
 18 |         raise ValueError('Composition of empty sequence not supported.')
 19 | 
 20 | def letterbox_image(image, size):
 21 |     '''resize image with unchanged aspect ratio using padding'''
 22 |     iw, ih = image.size
 23 |     w, h = size
 24 |     scale = min(w/iw, h/ih)
 25 |     nw = int(iw*scale)
 26 |     nh = int(ih*scale)
 27 | 
 28 |     image = image.resize((nw,nh), Image.BICUBIC)
 29 |     new_image = Image.new('RGB', size, (128,128,128))
 30 |     new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 31 |     return new_image
 32 | 
 33 | def rand(a=0, b=1):
 34 |     return np.random.rand()*(b-a) + a
 35 | 
 36 | def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5, proc_img=True):
 37 |     '''random preprocessing for real-time data augmentation'''
 38 |     line = annotation_line.split()
 39 |     image = Image.open(line[0])
 40 |     iw, ih = image.size
 41 |     h, w = input_shape
 42 |     box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 43 | 
 44 |     if not random:
 45 |         # resize image
 46 |         scale = min(w/iw, h/ih)
 47 |         nw = int(iw*scale)
 48 |         nh = int(ih*scale)
 49 |         dx = (w-nw)//2
 50 |         dy = (h-nh)//2
 51 |         image_data=0
 52 |         if proc_img:
 53 |             image = image.resize((nw,nh), Image.BICUBIC)
 54 |             new_image = Image.new('RGB', (w,h), (128,128,128))
 55 |             new_image.paste(image, (dx, dy))
 56 |             image_data = np.array(new_image)/255.
 57 | 
 58 |         # correct boxes
 59 |         box_data = np.zeros((max_boxes,5))
 60 |         if len(box)>0:
 61 |             np.random.shuffle(box)
 62 |             if len(box)>max_boxes: box = box[:max_boxes]
 63 |             box[:, [0,2]] = box[:, [0,2]]*scale + dx
 64 |             box[:, [1,3]] = box[:, [1,3]]*scale + dy
 65 |             box_data[:len(box)] = box
 66 | 
 67 |         return image_data, box_data
 68 | 
 69 |     # resize image
 70 |     new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
 71 |     scale = rand(.25, 2)
 72 |     if new_ar < 1:
 73 |         nh = int(scale*h)
 74 |         nw = int(nh*new_ar)
 75 |     else:
 76 |         nw = int(scale*w)
 77 |         nh = int(nw/new_ar)
 78 |     image = image.resize((nw,nh), Image.BICUBIC)
 79 | 
 80 |     # place image
 81 |     dx = int(rand(0, w-nw))
 82 |     dy = int(rand(0, h-nh))
 83 |     new_image = Image.new('RGB', (w,h), (128,128,128))
 84 |     new_image.paste(image, (dx, dy))
 85 |     image = new_image
 86 | 
 87 |     # flip image or not
 88 |     flip = rand()<.5
 89 |     if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
 90 | 
 91 |     # distort image
 92 |     hue = rand(-hue, hue)
 93 |     sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
 94 |     val = rand(1, val) if rand()<.5 else 1/rand(1, val)
 95 |     x = rgb_to_hsv(np.array(image)/255.)
 96 |     x[..., 0] += hue
 97 |     x[..., 0][x[..., 0]>1] -= 1
 98 |     x[..., 0][x[..., 0]<0] += 1
 99 |     x[..., 1] *= sat
100 |     x[..., 2] *= val
101 |     x[x>1] = 1
102 |     x[x<0] = 0
103 |     image_data = hsv_to_rgb(x) # numpy array, 0 to 1
104 | 
105 |     # correct boxes
106 |     box_data = np.zeros((max_boxes,5))
107 |     if len(box)>0:
108 |         np.random.shuffle(box)
109 |         box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
110 |         box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
111 |         if flip: box[:, [0,2]] = w - box[:, [2,0]]
112 |         box[:, 0:2][box[:, 0:2]<0] = 0
113 |         box[:, 2][box[:, 2]>w] = w
114 |         box[:, 3][box[:, 3]>h] = h
115 |         box_w = box[:, 2] - box[:, 0]
116 |         box_h = box[:, 3] - box[:, 1]
117 |         box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
118 |         if len(box)>max_boxes: box = box[:max_boxes]
119 |         box_data[:len(box)] = box
120 | 
121 |     return image_data, box_data
122 | 


--------------------------------------------------------------------------------
/yolo_test_batch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 批量测试图像
 4 | 测试的结果存放在路径outdir = "VOC2007/SegmentationClass
 5 | """
 6 | import argparse
 7 | import os
 8 | from yolo import YOLO
 9 | from PIL import Image
10 | 
11 | 
12 | import glob
13 | def detect_img(yolo):
14 |     path = "VOC2007/Images/*.jpg"
15 |     outdir = "VOC2007/SegmentationClass"
16 |     for jpgfile in glob.glob(path):
17 |         img = Image.open(jpgfile)
18 |         img = yolo.detect_image(img)
19 |         img.save(os.path.join(outdir, os.path.basename(jpgfile)))
20 |     yolo.close_session()
21 | FLAGS = None
22 | 
23 | if __name__ == '__main__':
24 |     # class YOLO defines the default value, so suppress any default here
25 |     parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS)
26 |     '''
27 |     Command line options
28 |     '''
29 |     parser.add_argument(
30 |         '--model', type=str,
31 |         help='path to model weight file, default ' + YOLO.get_defaults("model_path")
32 |     )
33 | 
34 |     parser.add_argument(
35 |         '--anchors', type=str,
36 |         help='path to anchor definitions, default ' + YOLO.get_defaults("anchors_path")
37 |     )
38 | 
39 |     parser.add_argument(
40 |         '--classes', type=str,
41 |         help='path to class definitions, default ' + YOLO.get_defaults("classes_path")
42 |     )
43 | 
44 |     parser.add_argument(
45 |         '--gpu_num', type=int,
46 |         help='Number of GPU to use, default ' + str(YOLO.get_defaults("gpu_num"))
47 |     )
48 | 
49 |     parser.add_argument(
50 |         '--image', default=False, action="store_true",
51 |         help='Image detection mode, will ignore all positional arguments'
52 |     )
53 |     '''
54 |     Command line positional arguments -- for video detection mode
55 |     '''
56 |     parser.add_argument(
57 |         "--input", nargs='?', type=str,required=False,default='./path2your_video',
58 |         help = "Video input path"
59 |     )
60 | 
61 |     parser.add_argument(
62 |         "--output", nargs='?', type=str, default="",
63 |         help = "[Optional] Video output path"
64 |     )
65 | 
66 |     FLAGS = parser.parse_args()
67 | 
68 |     if FLAGS.image:
69 |         """
70 |         Image detection mode, disregard any remaining command line arguments
71 |         """
72 |         print("Image detection mode")
73 |         if "input" in FLAGS:
74 |              print("error")
75 | #            print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output)
76 | #        detect_img(YOLO(**vars(FLAGS)))
77 |     elif "input" in FLAGS:
78 | #        print("error")
79 |          print("Image detection mode")
80 |          print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output)
81 |          detect_img(YOLO(**vars(FLAGS)))
82 | #        detect_video(YOLO(**vars(FLAGS)), FLAGS.input, FLAGS.output)
83 | #    else:
84 | #        print("Must specify at least video_input_path.  See usage with --help.")
85 | 


--------------------------------------------------------------------------------