├── Readme.md
├── create_tf_record.py
├── data
    ├── coco.names
    └── dog.jpg
├── demo.py
├── result.jpg
├── run_convert.sh
├── save_model
    └── tiny
    │   ├── checkpoint
    │   ├── yolov3-tiny.ckpt.data-00000-of-00001
    │   ├── yolov3-tiny.ckpt.index
    │   └── yolov3-tiny.ckpt.meta
├── test_pb.py
├── tf_int8.py
├── utils.py
├── yolo_v3.py
└── yolo_v3_tiny.py


/Readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Start from pretrained tf model
 4 | To use 8int quantize in tflite. We need to convert `checkpoint` into `tf.GraphDef`
 5 | 
 6 | note:tf.lite only work after 1.13. Your original tf code may not work!!
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/create_tf_record.py:
--------------------------------------------------------------------------------
  1 | # -*-coding: utf-8 -*-
  2 | """
  3 |     @Project: create_tfrecord
  4 |     @File   : create_tfrecord.py
  5 |     @Author : panjq
  6 |     @E-mail : pan_jinquan@163.com
  7 |     @Date   : 2018-07-27 17:19:54
  8 |     @desc   : 将图片数据保存为单个tfrecord文件
  9 | """
 10 | 
 11 | ##########################################################################
 12 | 
 13 | import tensorflow as tf
 14 | import numpy as np
 15 | import os
 16 | import cv2
 17 | import matplotlib.pyplot as plt
 18 | import random
 19 | from PIL import Image
 20 | 
 21 | 
 22 | ##########################################################################
 23 | def _int64_feature(value):
 24 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 25 | # 生成字符串型的属性
 26 | def _bytes_feature(value):
 27 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 28 | # 生成实数型的属性
 29 | def float_list_feature(value):
 30 |   return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 31 | 
 32 | def get_example_nums(tf_records_filenames):
 33 |     '''
 34 |     统计tf_records图像的个数(example)个数
 35 |     :param tf_records_filenames: tf_records文件路径
 36 |     :return:
 37 |     '''
 38 |     nums= 0
 39 |     for record in tf.python_io.tf_record_iterator(tf_records_filenames):
 40 |         nums += 1
 41 |     return nums
 42 | 
 43 | def show_image(title,image):
 44 |     '''
 45 |     显示图片
 46 |     :param title: 图像标题
 47 |     :param image: 图像的数据
 48 |     :return:
 49 |     '''
 50 |     # plt.figure("show_image")
 51 |     # print(image.dtype)
 52 |     plt.imshow(image)
 53 |     plt.axis('on')    # 关掉坐标轴为 off
 54 |     plt.title(title)  # 图像题目
 55 |     plt.show()
 56 | 
 57 | def load_labels_file(filename,labels_num=1,shuffle=False):
 58 |     '''
 59 |     载图txt文件，文件中每行为一个图片信息，且以空格隔开：图像路径 标签1 标签2，如：test_image/1.jpg 0 2
 60 |     :param filename:
 61 |     :param labels_num :labels个数
 62 |     :param shuffle :是否打乱顺序
 63 |     :return:images type->list
 64 |     :return:labels type->list
 65 |     '''
 66 |     images=[]
 67 |     labels=[]
 68 |     with open(filename) as f:
 69 |         lines_list=f.readlines()
 70 |         if shuffle:
 71 |             random.shuffle(lines_list)
 72 | 
 73 |         for lines in lines_list:
 74 |             line=lines.rstrip().split(' ')
 75 |             label=[]
 76 |             for i in range(labels_num):
 77 |                 label.append(int(line[i+1]))
 78 |             images.append(line[0])
 79 |             labels.append(label)
 80 |     return images,labels
 81 | 
 82 | def read_image(filename, resize_height, resize_width,normalization=False):
 83 |     '''
 84 |     读取图片数据,默认返回的是uint8,[0,255]
 85 |     :param filename:
 86 |     :param resize_height:
 87 |     :param resize_width:
 88 |     :param normalization:是否归一化到[0.,1.0]
 89 |     :return: 返回的图片数据
 90 |     '''
 91 | 
 92 |     bgr_image = cv2.imread(filename)
 93 |     if len(bgr_image.shape)==2:#若是灰度图则转为三通道
 94 |         print("Warning:gray image",filename)
 95 |         bgr_image = cv2.cvtColor(bgr_image, cv2.COLOR_GRAY2BGR)
 96 | 
 97 |     rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)#将BGR转为RGB
 98 |     # show_image(filename,rgb_image)
 99 |     # rgb_image=Image.open(filename)
100 |     if resize_height>0 and resize_width>0:
101 |         rgb_image=cv2.resize(rgb_image,(resize_width,resize_height))
102 |     rgb_image=np.asanyarray(rgb_image)
103 |     if normalization:
104 |         # 不能写成:rgb_image=rgb_image/255
105 |         rgb_image=rgb_image/255.0
106 |     # show_image("src resize image",image)
107 |     return rgb_image
108 | 
109 | 
110 | def get_batch_images(images,labels,batch_size,labels_nums,one_hot=False,shuffle=False,num_threads=1):
111 |     '''
112 |     :param images:图像
113 |     :param labels:标签
114 |     :param batch_size:
115 |     :param labels_nums:标签个数
116 |     :param one_hot:是否将labels转为one_hot的形式
117 |     :param shuffle:是否打乱顺序,一般train时shuffle=True,验证时shuffle=False
118 |     :return:返回batch的images和labels
119 |     '''
120 |     min_after_dequeue = 200
121 |     capacity = min_after_dequeue + 3 * batch_size  # 保证capacity必须大于min_after_dequeue参数值
122 |     if shuffle:
123 |         images_batch, labels_batch = tf.train.shuffle_batch([images,labels],
124 |                                                                     batch_size=batch_size,
125 |                                                                     capacity=capacity,
126 |                                                                     min_after_dequeue=min_after_dequeue,
127 |                                                                     num_threads=num_threads)
128 |     else:
129 |         images_batch, labels_batch = tf.train.batch([images,labels],
130 |                                                         batch_size=batch_size,
131 |                                                         capacity=capacity,
132 |                                                         num_threads=num_threads)
133 |     if one_hot:
134 |         labels_batch = tf.one_hot(labels_batch, labels_nums, 1, 0)
135 |     return images_batch,labels_batch
136 | 
137 | def read_records(filename,resize_height, resize_width,type=None):
138 |     '''
139 |     解析record文件:源文件的图像数据是RGB,uint8,[0,255],一般作为训练数据时,需要归一化到[0,1]
140 |     :param filename:
141 |     :param resize_height:
142 |     :param resize_width:
143 |     :param type:选择图像数据的返回类型
144 |          None:默认将uint8-[0,255]转为float32-[0,255]
145 |          normalization:归一化float32-[0,1]
146 |          centralization:归一化float32-[0,1],再减均值中心化
147 |     :return:
148 |     '''
149 |     # 创建文件队列,不限读取的数量
150 |     filename_queue = tf.train.string_input_producer([filename])
151 |     # create a reader from file queue
152 |     reader = tf.TFRecordReader()
153 |     # reader从文件队列中读入一个序列化的样本
154 |     _, serialized_example = reader.read(filename_queue)
155 |     # get feature from serialized example
156 |     # 解析符号化的样本
157 |     features = tf.parse_single_example(
158 |         serialized_example,
159 |         features={
160 |             'image_raw': tf.FixedLenFeature([], tf.string),
161 |             'height': tf.FixedLenFeature([], tf.int64),
162 |             'width': tf.FixedLenFeature([], tf.int64),
163 |             'depth': tf.FixedLenFeature([], tf.int64),
164 |             'label': tf.FixedLenFeature([], tf.int64)
165 |         }
166 |     )
167 |     tf_image = tf.decode_raw(features['image_raw'], tf.uint8)#获得图像原始的数据
168 | 
169 |     tf_height = features['height']
170 |     tf_width = features['width']
171 |     tf_depth = features['depth']
172 |     tf_label = tf.cast(features['label'], tf.int32)
173 |     # PS:恢复原始图像数据,reshape的大小必须与保存之前的图像shape一致,否则出错
174 |     # tf_image=tf.reshape(tf_image, [-1])    # 转换为行向量
175 |     tf_image=tf.reshape(tf_image, [resize_height, resize_width, 3]) # 设置图像的维度
176 | 
177 |     # 恢复数据后,才可以对图像进行resize_images:输入uint->输出float32
178 |     # tf_image=tf.image.resize_images(tf_image,[224, 224])
179 | 
180 |     # 存储的图像类型为uint8,tensorflow训练时数据必须是tf.float32
181 |     if type is None:
182 |         tf_image = tf.cast(tf_image, tf.float32)
183 |     elif type=='normalization':# [1]若需要归一化请使用:
184 |         # 仅当输入数据是uint8,才会归一化[0,255]
185 |         # tf_image = tf.image.convert_image_dtype(tf_image, tf.float32)
186 |         tf_image = tf.cast(tf_image, tf.float32) * (1. / 255.0)  # 归一化
187 |     elif type=='centralization':
188 |         # 若需要归一化,且中心化,假设均值为0.5,请使用:
189 |         tf_image = tf.cast(tf_image, tf.float32) * (1. / 255) - 0.5 #中心化
190 | 
191 |     # 这里仅仅返回图像和标签
192 |     # return tf_image, tf_height,tf_width,tf_depth,tf_label
193 |     return tf_image,tf_label
194 | 
195 | 
196 | def create_records(image_dir,file, output_record_dir, resize_height, resize_width,shuffle,log=5):
197 |     '''
198 |     实现将图像原始数据,label,长,宽等信息保存为record文件
199 |     注意:读取的图像数据默认是uint8,再转为tf的字符串型BytesList保存,解析请需要根据需要转换类型
200 |     :param image_dir:原始图像的目录
201 |     :param file:输入保存图片信息的txt文件(image_dir+file构成图片的路径)
202 |     :param output_record_dir:保存record文件的路径
203 |     :param resize_height:
204 |     :param resize_width:
205 |     PS:当resize_height或者resize_width=0是,不执行resize
206 |     :param shuffle:是否打乱顺序
207 |     :param log:log信息打印间隔
208 |     '''
209 |     # 加载文件,仅获取一个label
210 |     images_list, labels_list=load_labels_file(file,1,shuffle)
211 | 
212 |     writer = tf.python_io.TFRecordWriter(output_record_dir)
213 |     for i, [image_name, labels] in enumerate(zip(images_list, labels_list)):
214 |         image_path=os.path.join(image_dir,images_list[i])
215 |         if not os.path.exists(image_path):
216 |             print('Err:no image',image_path)
217 |             continue
218 |         image = read_image(image_path, resize_height, resize_width)
219 |         image_raw = image.tostring()
220 |         if i%log==0 or i==len(images_list)-1:
221 |             print('------------processing:%d-th------------' % (i))
222 |             print('current image_path=%s' % (image_path),'shape:{}'.format(image.shape),'labels:{}'.format(labels))
223 |         # 这里仅保存一个label,多label适当增加"'label': _int64_feature(label)"项
224 |         label=labels[0]
225 |         example = tf.train.Example(features=tf.train.Features(feature={
226 |             'image_raw': _bytes_feature(image_raw),
227 |             'height': _int64_feature(image.shape[0]),
228 |             'width': _int64_feature(image.shape[1]),
229 |             'depth': _int64_feature(image.shape[2]),
230 |             'label': _int64_feature(label)
231 |         }))
232 |         writer.write(example.SerializeToString())
233 |     writer.close()
234 | 
235 | def disp_records(record_file,resize_height, resize_width,show_nums=4):
236 |     '''
237 |     解析record文件，并显示show_nums张图片，主要用于验证生成record文件是否成功
238 |     :param tfrecord_file: record文件路径
239 |     :return:
240 |     '''
241 |     # 读取record函数
242 |     tf_image, tf_label = read_records(record_file,resize_height,resize_width,type='normalization')
243 |     # 显示前4个图片
244 |     init_op = tf.initialize_all_variables()
245 |     with tf.Session() as sess:
246 |         sess.run(init_op)
247 |         coord = tf.train.Coordinator()
248 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
249 |         for i in range(show_nums):
250 |             image,label = sess.run([tf_image,tf_label])  # 在会话中取出image和label
251 |             # image = tf_image.eval()
252 |             # 直接从record解析的image是一个向量,需要reshape显示
253 |             # image = image.reshape([height,width,depth])
254 |             print('shape:{},tpye:{},labels:{}'.format(image.shape,image.dtype,label))
255 |             # pilimg = Image.fromarray(np.asarray(image_eval_reshape))
256 |             # pilimg.show()
257 |             show_image("image:%d"%(label),image)
258 |         coord.request_stop()
259 |         coord.join(threads)
260 | 
261 | 
262 | def batch_test(record_file,resize_height, resize_width):
263 |     '''
264 |     :param record_file: record文件路径
265 |     :param resize_height:
266 |     :param resize_width:
267 |     :return:
268 |     :PS:image_batch, label_batch一般作为网络的输入
269 |     '''
270 |     # 读取record函数
271 |     tf_image,tf_label = read_records(record_file,resize_height,resize_width,type='normalization')
272 |     image_batch, label_batch= get_batch_images(tf_image,tf_label,batch_size=4,labels_nums=5,one_hot=False,shuffle=False)
273 | 
274 |     init = tf.global_variables_initializer()
275 |     with tf.Session() as sess:  # 开始一个会话
276 |         sess.run(init)
277 |         coord = tf.train.Coordinator()
278 |         threads = tf.train.start_queue_runners(coord=coord)
279 |         for i in range(4):
280 |             # 在会话中取出images和labels
281 |             images, labels = sess.run([image_batch, label_batch])
282 |             # 这里仅显示每个batch里第一张图片
283 |             show_image("image", images[0, :, :, :])
284 |             print('shape:{},tpye:{},labels:{}'.format(images.shape,images.dtype,labels))
285 | 
286 |         # 停止所有线程
287 |         coord.request_stop()
288 |         coord.join(threads)
289 | 
290 | 
291 | if __name__ == '__main__':
292 |     # 参数设置
293 | 
294 |     resize_height = 224  # 指定存储图片高度
295 |     resize_width = 224  # 指定存储图片宽度
296 |     shuffle=True
297 |     log=5
298 |     # 产生train.record文件
299 |     image_dir='dataset/train'
300 |     train_labels = 'dataset/train.txt'  # 图片路径
301 |     train_record_output = 'dataset/record/train.tfrecords'
302 |     create_records(image_dir,train_labels, train_record_output, resize_height, resize_width,shuffle,log)
303 |     train_nums=get_example_nums(train_record_output)
304 |     print("save train example nums={}".format(train_nums))
305 | 
306 |     # 产生val.record文件
307 |     image_dir='dataset/val'
308 |     val_labels = 'dataset/val.txt'  # 图片路径
309 |     val_record_output = 'dataset/record/val.tfrecords'
310 |     create_records(image_dir,val_labels, val_record_output, resize_height, resize_width,shuffle,log)
311 |     val_nums=get_example_nums(val_record_output)
312 |     print("save val example nums={}".format(val_nums))
313 | 
314 |     # 测试显示函数
315 |     # disp_records(train_record_output,resize_height, resize_width)
316 |     batch_test(train_record_output,resize_height, resize_width)
317 | 


--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/data/dog.jpg


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from PIL import Image, ImageDraw
  7 | import time
  8 | from tensorflow.python.framework import graph_util
  9 | 
 10 | # import argparse
 11 | 
 12 | import yolo_v3
 13 | import yolo_v3_tiny
 14 | 
 15 | from utils import load_coco_names, draw_boxes, convert_to_original_size, \
 16 |     load_weights, detections_boxes, non_max_suppression
 17 | 
 18 | FLAGS = tf.app.flags.FLAGS
 19 | 
 20 | 
 21 | # parser = argparse.ArgumentParser()
 22 | # parser.add_argument('--tiny', dest='flag', action='store_true')
 23 | # args = parser.parse_args()
 24 | 
 25 | 
 26 | tf.app.flags.DEFINE_string('input_img', '', 'Input image')
 27 | tf.app.flags.DEFINE_string('output_img', '', 'Output image')
 28 | 
 29 | tf.app.flags.DEFINE_string(
 30 |     'class_names', 'data/coco.names', 'File with class names')
 31 | tf.app.flags.DEFINE_string(
 32 |     'weights_file', 'yolov3.weights', 'Binary file with detector weights')
 33 | tf.app.flags.DEFINE_string(
 34 |     'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
 35 | tf.app.flags.DEFINE_string(
 36 |     'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file')
 37 | tf.app.flags.DEFINE_bool(
 38 |     'tiny', False, 'Use tiny version of YOLOv3')
 39 | 
 40 | tf.app.flags.DEFINE_integer('thread', 1, 'thread number')
 41 | 
 42 | tf.app.flags.DEFINE_integer('size', 416, 'Image size')
 43 | 
 44 | tf.app.flags.DEFINE_float('conf_threshold', 0.5, 'Confidence threshold')
 45 | tf.app.flags.DEFINE_float('iou_threshold', 0.4, 'IoU threshold')
 46 | 
 47 | 
 48 | def yolo_full():
 49 |     '''
 50 |     if FLAGS.tiny:
 51 |             model = yolo_v3_tiny.yolo_v3_tiny
 52 |             ckpt_file = './saved_model/yolov3-tiny.ckpt'
 53 |     else:
 54 |         model = yolo_v3.yolo_v3
 55 |         ckpt_file = './saved_model/yolov3.ckpt'
 56 |     '''
 57 |     model = yolo_v3_tiny.yolo_v3_tiny
 58 |     ckpt_file = './save_model/tiny/yolov3-tiny.ckpt'
 59 | 
 60 |     img = Image.open(FLAGS.input_img)
 61 |     img_resized = img.resize(size=(FLAGS.size, FLAGS.size))
 62 | 
 63 |     classes = load_coco_names(FLAGS.class_names)
 64 | 
 65 |     # placeholder for detector inputs
 66 |     inputs = tf.placeholder(tf.float32, [1, FLAGS.size, FLAGS.size, 3])
 67 | 
 68 |     with tf.variable_scope('detector'):
 69 |         detections = model(inputs, len(classes),
 70 |                            data_format=FLAGS.data_format)
 71 | 
 72 |     saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
 73 | 
 74 |     boxes = detections_boxes(detections)
 75 |     session_conf = tf.ConfigProto(intra_op_parallelism_threads=FLAGS.thread, inter_op_parallelism_threads=FLAGS.thread, \
 76 |                         allow_soft_placement=True, device_count = {'GPU': 1})
 77 |     with tf.Session(config=session_conf) as sess:
 78 |     
 79 |         saver.restore(sess, ckpt_file)
 80 |         tf.summary.FileWriter("TensorBoard/", graph = sess.graph)
 81 |         print( ">>>>>>>>>>>>>>>>> %d"  % len(tf.get_default_graph().as_graph_def().node))
 82 |         print('Model restored.')
 83 |         start = time.time()
 84 |         detected_boxes = sess.run(
 85 |             boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]})
 86 |         end = time.time()
 87 |         print("%2.2f secs"%(end - start))
 88 |         '''
 89 |         opts = tf.profiler.ProfileOptionBuilder.float_operation()    
 90 |         flops = tf.profiler.profile(tf.get_default_graph() , run_meta=tf.RunMetadata(), cmd='op', options=opts)
 91 |         if flops is not None:
 92 |             #print('Flops should be ~',2*25*16*9)
 93 |             #print('25 x 25 x 9 would be',2*25*25*9) # ignores internal dim, repeats first
 94 |             print('TF stats gives',flops.total_float_ops)
 95 |         '''
 96 |         #output_node_names = "detector/yolo-v3-tiny/detections"
 97 |         output_node_names = "concat_1"
 98 |         output_graph_def = graph_util.convert_variables_to_constants(
 99 |             sess=sess,
100 |             input_graph_def=sess.graph_def,
101 |             output_node_names=output_node_names.split( "," ))
102 |     
103 |         print( ">>>>>>>>>>> %d ops in the final graph."  % len( output_graph_def.node))
104 |         with tf.gfile.GFile( "save_model/tiny/pb/frozen_model_yolov3-tiny.pb", "wb" ) as f:
105 |             f.write( output_graph_def.SerializeToString( ))
106 |         #builder = tf.saved_model.builder.SavedModelBuilder('./savemodel')
107 |         #builder.add_meta_graph_and_variables(sess, ['cpu_server_1'])
108 | 
109 | 
110 | 
111 | 
112 |     #builder.save()
113 |     print (detected_boxes.shape) 
114 |     #print (detected_boxes[0,1,1]) 
115 |     #print (np.array(img_resized, dtype=np.float32)[111,111]) 
116 |     #print (inputs.shape) 
117 |     filtered_boxes = non_max_suppression(detected_boxes,
118 |                                          confidence_threshold=FLAGS.conf_threshold,
119 |                                          iou_threshold=FLAGS.iou_threshold)
120 | 
121 |     draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))
122 | 
123 |     img.save(FLAGS.output_img)
124 |     
125 | 
126 | 
127 | 
128 | 
129 | def main(argv=None):
130 |     
131 |     time_yolo = time.process_time() 
132 |     yolo_full()
133 |     time_yolo = time.process_time() - time_yolo
134 |     print(time_yolo)
135 |     
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     tf.app.run()
140 | 


--------------------------------------------------------------------------------
/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/result.jpg


--------------------------------------------------------------------------------
/run_convert.sh:
--------------------------------------------------------------------------------
 1 | #python3 ./convert_weights.py
 2 | 
 3 | python3 ./demo.py --input_img data/dog.jpg --output_img result.jpg --tiny
 4 | 
 5 | 
 6 | #valgrind --tool=massif --time-unit=B --stacks=no --massif-out-file=massif.out python3 ./demo.py --input_img dog.jpg --output_img result.jpg
 7 | 
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/save_model/tiny/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "yolov3-tiny.ckpt"
2 | all_model_checkpoint_paths: "yolov3-tiny.ckpt"
3 | 


--------------------------------------------------------------------------------
/save_model/tiny/yolov3-tiny.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/save_model/tiny/yolov3-tiny.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/save_model/tiny/yolov3-tiny.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/save_model/tiny/yolov3-tiny.ckpt.index


--------------------------------------------------------------------------------
/save_model/tiny/yolov3-tiny.ckpt.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/save_model/tiny/yolov3-tiny.ckpt.meta


--------------------------------------------------------------------------------
/test_pb.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: utf-8 -*-
  2 | #https://blog.csdn.net/guyuealian/article/details/82218092
  3 | import tensorflow as tf
  4 | from tensorflow.python.framework import graph_util
  5 | from utils import load_coco_names, draw_boxes, convert_to_original_size, \
  6 |     load_weights, detections_boxes, non_max_suppression
  7 | from create_tf_record import *
  8 | 
  9 | import yolo_v3
 10 | import yolo_v3_tiny
 11 | 
 12 | FLAGS = tf.app.flags.FLAGS
 13 | tf.app.flags.DEFINE_integer('size', 416, 'Image size')
 14 | #tf.app.flags.DEFINE_string('output_img', './pb_img_result', 'Output image')
 15 | tf.app.flags.DEFINE_string('output_img', '', 'Output image')
 16 | tf.app.flags.DEFINE_float('conf_threshold', 0.5, 'Confidence threshold')
 17 | tf.app.flags.DEFINE_float('iou_threshold', 0.4, 'IoU threshold')
 18 | tf.app.flags.DEFINE_string('class_names', 'data/coco.names', 'File with class names')
 19 | 
 20 | 
 21 | def freeze_graph( input_checkpoint,output_graph):
 22 |     '''
 23 |     :param input_checkpoint:
 24 |     :return:
 25 |     '''
 26 |     # checkpoint = tf.train.get_checkpoint_state( model_folder) 
 27 |     # input_checkpoint = checkpoint.model_checkpoint_path
 28 |  
 29 |     saver = tf.train.import_meta_graph( input_checkpoint + '.meta', clear_devices=True )
 30 |     #saver2 = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
 31 |  
 32 |     with tf.Session( ) as sess:
 33 |         saver.restore( sess, input_checkpoint)
 34 |         print( "[ckpt] op count >>>>>>>>>>>>>>>>> %d"  % len(tf.get_default_graph().as_graph_def().node))
 35 |         '''
 36 |         from tensorflow.tools.graph_transforms import TransformGraph
 37 |         transforms = ['add_default_attributes',
 38 |                       'remove_nodes(op=Identity, op=CheckNumerics)',
 39 |                       'fold_batch_norms', 'fold_old_batch_norms',
 40 |                       'strip_unused_nodes', 'sort_by_execution_order']
 41 |         transformed_graph_def = TransformGraph(tf.get_default_graph().as_graph_def(),'Placeholder', output_node_names.split(","), transforms)
 42 |         '''
 43 | 
 44 |         #[  print( n.name) for n in tf.get_default_graph( ).as_graph_def( ).node]
 45 |         tf.summary.FileWriter("TensorBoard/", graph = sess.graph)
 46 |         #graph=tf.Graph().as_default()
 47 |         output_node_names = "detector/yolo-v3-tiny/detections" 
 48 |         output_graph_def = graph_util.convert_variables_to_constants(
 49 |             sess=sess,
 50 |             #input_graph_def= graph.as_graph_def() , 
 51 |             input_graph_def=sess.graph_def, 
 52 |             #input_graph_def=transformed_graph_def, 
 53 |             output_node_names=output_node_names.split( "," ))
 54 |  
 55 |         print( "[pb] op count >>>>>>>>>>>>>>>>> %d ops in the final graph."  % len( output_graph_def.node))
 56 |         with tf.gfile.GFile( output_graph, "wb" ) as f: 
 57 |             f.write( output_graph_def.SerializeToString( ))
 58 |         
 59 | 
 60 |     #tf.train.ExponentialMovingAverage(0.999)
 61 |     #variables_to_restore = variable_averages.variables_to_restore()
 62 |     #saver = tf.train.Saver(variables_to_restore) ,
 63 | 
 64 | 
 65 | 
 66 | def freeze_graph_test(pb_path, image_path):
 67 |     '''
 68 |     :param pb_path:pb文件的路径
 69 |     :param image_path:测试图片的路径
 70 |     :return:
 71 |     '''
 72 |     with tf.Graph().as_default():
 73 |         output_graph_def = tf.GraphDef()
 74 |         with open(pb_path, "rb") as f:
 75 |             output_graph_def.ParseFromString(f.read())
 76 |             tf.import_graph_def(output_graph_def, name="")
 77 | 
 78 |         #tf.global_variables_initializer()
 79 |         with tf.Session() as sess:
 80 |             #saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
 81 |             #input_checkpoint='./saved_model/yolov3-tiny.ckpt'
 82 |             #saver.restore( sess, input_checkpoint)
 83 |             
 84 |             sess.run(tf.global_variables_initializer())
 85 | 
 86 |             #sess.run(tf.initialize_all_variables())
 87 | 
 88 | 
 89 |  
 90 |             # 定义输入的张量名称,对应网络结构的输入张量
 91 |             # input:0作为输入图像,keep_prob:0作为dropout的参数,测试时值为1,is_training:0训练参数
 92 |             input_image_tensor = sess.graph.get_tensor_by_name("Placeholder:0")
 93 |  
 94 |             # 定义输出的张量名称
 95 |             output_tensor_name = sess.graph.get_tensor_by_name("concat_1:0")
 96 |  
 97 |             # 读取测试图片
 98 | 
 99 |             img = Image.open(image_path )
100 |             img_resized = img.resize( size=(416,416) )
101 |             
102 |             # 测试读出来的模型是否正确，注意这里传入的是输出和输入节点的tensor的名字，不是操作节点的名字
103 |             detected_boxes = sess.run(output_tensor_name , 
104 |                     feed_dict={ input_image_tensor: [np.array(img_resized, dtype=np.float32)] })
105 |            
106 |             opts = tf.profiler.ProfileOptionBuilder.float_operation()
107 |             flops = tf.profiler.profile(tf.get_default_graph() , run_meta=tf.RunMetadata(), cmd='op'    , options=opts)
108 |             if flops is not None:
109 |                 #print('Flops should be ~',2*25*16*9)
110 |                 #print('25 x 25 x 9 would be',2*25*25*9) # ignores internal dim, repeats first
111 |                 print('TF stats gives',flops.total_float_ops) 
112 |         
113 |     print ('[pb] output tensor shape: ', detected_boxes.shape) 
114 |     filtered_boxes = non_max_suppression(detected_boxes,
115 |                                          confidence_threshold=FLAGS.conf_threshold,
116 |                                          iou_threshold=FLAGS.iou_threshold)
117 |     classes = load_coco_names(FLAGS.class_names)
118 |     draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size))
119 |     
120 |     writer = tf.summary.FileWriter("TensorBoard/", graph = sess.graph)
121 |     #img.save(FLAGS.output_img)
122 |     img.save("result_pb.jpg")
123 | 
124 | 
125 | image_path = 'data/dog.jpg'
126 | 
127 | input_checkpoint='./saved_model/yolov3-tiny.ckpt'
128 | pb_path="./save_model/tiny/pb/frozen_model_yolov3-tiny.pb" 
129 | #out_pb_path="./saved_model/tiny/frozen_yolov3-tiny.pb" 
130 | #out_pb_path="frozen_flowers_model_yolov3-tiny.pb" 
131 | 
132 | #freeze_graph( input_checkpoint, out_pb_path)
133 | freeze_graph_test(pb_path, image_path)
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/tf_int8.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | if sys.version_info.major >= 3:
 5 |     import pathlib
 6 | else:
 7 |     import pathlib2 as pathlib
 8 | 
 9 | 
10 | 
11 | import tensorflow as tf
12 | lite=tf.contrib.lite
13 | 
14 | saved_models_root="../saved_model/tiny/"
15 | tf.enable_eager_execution()
16 | #help(lite.TFLiteConverter.from_saved_model)
17 | #saved_model_dir = str(sorted(pathlib.Path(saved_models_root).glob("*"))[-1])
18 | converter = tf.lite.TocoConverter.from_saved_model(saved_models_root )
19 | #https://www.tensorflow.org/guide/saved_model
20 | #converter = lite
21 | 
22 | 
23 | #converter = lite.TFLiteConverter.from_saved_model(saved_models_root)
24 | #tflite_model = converter.convert()
25 | 
26 | 
27 | '''
28 | tflite_model = converter.convert()
29 | tflite_models_dir = pathlib.Path(saved_model_dir )
30 | tflite_models_dir.mkdir(exist_ok=True, parents=True)
31 | tflite_model_file = tflite_models_dir/"yolov3-tiny.tflite"
32 | tflite_model_file.write_bytes(tflite_model)
33 | 
34 | tf.logging.set_verbosity(tf.logging.INFO)
35 | converter.post_training_quantize = True
36 | tflite_quant_model = converter.convert()
37 | tflite_model_quant_file = tflite_models_dir/"yolov3-tiny_quant.tflite"
38 | tflite_model_quant_file.write_bytes(tflite_quant_model)
39 | '''
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from PIL import ImageDraw
  6 | 
  7 | 
  8 | def load_weights(var_list, weights_file):
  9 |     """
 10 |     Loads and converts pre-trained weights.
 11 |     :param var_list: list of network variables.
 12 |     :param weights_file: name of the binary file.
 13 |     :return: list of assign ops
 14 |     """
 15 |     with open(weights_file, "rb") as fp:
 16 |         _ = np.fromfile(fp, dtype=np.int32, count=5)
 17 | 
 18 |         weights = np.fromfile(fp, dtype=np.float32)
 19 | 
 20 |     ptr = 0
 21 |     i = 0
 22 |     assign_ops = []
 23 |     while i < len(var_list) - 1:
 24 |         var1 = var_list[i]
 25 |         var2 = var_list[i + 1]
 26 |         # do something only if we process conv layer
 27 |         if 'Conv' in var1.name.split('/')[-2]:
 28 |             # check type of next layer
 29 |             if 'BatchNorm' in var2.name.split('/')[-2]:
 30 |                 # load batch norm params
 31 |                 gamma, beta, mean, var = var_list[i + 1:i + 5]
 32 |                 batch_norm_vars = [beta, gamma, mean, var]
 33 |                 for var in batch_norm_vars:
 34 |                     shape = var.shape.as_list()
 35 |                     num_params = np.prod(shape)
 36 |                     var_weights = weights[ptr:ptr + num_params].reshape(shape)
 37 |                     ptr += num_params
 38 |                     assign_ops.append(
 39 |                         tf.assign(var, var_weights, validate_shape=True))
 40 | 
 41 |                 # we move the pointer by 4, because we loaded 4 variables
 42 |                 i += 4
 43 |             elif 'Conv' in var2.name.split('/')[-2]:
 44 |                 # load biases
 45 |                 bias = var2
 46 |                 bias_shape = bias.shape.as_list()
 47 |                 bias_params = np.prod(bias_shape)
 48 |                 bias_weights = weights[ptr:ptr +
 49 |                                        bias_params].reshape(bias_shape)
 50 |                 ptr += bias_params
 51 |                 assign_ops.append(
 52 |                     tf.assign(bias, bias_weights, validate_shape=True))
 53 | 
 54 |                 # we loaded 1 variable
 55 |                 i += 1
 56 |             # we can load weights of conv layer
 57 |             shape = var1.shape.as_list()
 58 |             num_params = np.prod(shape)
 59 | 
 60 |             var_weights = weights[ptr:ptr + num_params].reshape(
 61 |                 (shape[3], shape[2], shape[0], shape[1]))
 62 |             # remember to transpose to column-major
 63 |             var_weights = np.transpose(var_weights, (2, 3, 1, 0))
 64 |             ptr += num_params
 65 |             assign_ops.append(
 66 |                 tf.assign(var1, var_weights, validate_shape=True))
 67 |             i += 1
 68 | 
 69 |     return assign_ops
 70 | 
 71 | 
 72 | def detections_boxes(detections):
 73 |     """
 74 |     Converts center x, center y, width and height values to coordinates of top left and bottom right points.
 75 | 
 76 |     :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5))
 77 |     :return: converted detections of same shape as input
 78 |     """
 79 |     center_x, center_y, width, height, attrs = tf.split(
 80 |         detections, [1, 1, 1, 1, -1], axis=-1)
 81 |     w2 = width / 2
 82 |     h2 = height / 2
 83 |     x0 = center_x - w2
 84 |     y0 = center_y - h2
 85 |     x1 = center_x + w2
 86 |     y1 = center_y + h2
 87 | 
 88 |     boxes = tf.concat([x0, y0, x1, y1], axis=-1)
 89 |     detections = tf.concat([boxes, attrs], axis=-1)
 90 |     return detections
 91 | 
 92 | 
 93 | def _iou(box1, box2):
 94 |     """
 95 |     Computes Intersection over Union value for 2 bounding boxes
 96 | 
 97 |     :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2]
 98 |     :param box2: same as box1
 99 |     :return: IoU
100 |     """
101 |     b1_x0, b1_y0, b1_x1, b1_y1 = box1
102 |     b2_x0, b2_y0, b2_x1, b2_y1 = box2
103 | 
104 |     int_x0 = max(b1_x0, b2_x0)
105 |     int_y0 = max(b1_y0, b2_y0)
106 |     int_x1 = min(b1_x1, b2_x1)
107 |     int_y1 = min(b1_y1, b2_y1)
108 | 
109 |     int_area = (int_x1 - int_x0) * (int_y1 - int_y0)
110 | 
111 |     b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
112 |     b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)
113 | 
114 |     # we add small epsilon of 1e-05 to avoid division by 0
115 |     iou = int_area / (b1_area + b2_area - int_area + 1e-05)
116 |     return iou
117 | 
118 | 
119 | def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4):
120 |     """
121 |     Applies Non-max suppression to prediction boxes.
122 | 
123 |     :param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence
124 |     :param confidence_threshold: the threshold for deciding if prediction is valid
125 |     :param iou_threshold: the threshold for deciding if two boxes overlap
126 |     :return: dict: class -> [(box, score)]
127 |     """
128 |     conf_mask = np.expand_dims(
129 |         (predictions_with_boxes[:, :, 4] > confidence_threshold), -1)
130 |     predictions = predictions_with_boxes * conf_mask
131 | 
132 |     result = {}
133 |     #print (predictions.shape)
134 |     for i, image_pred in enumerate(predictions):
135 |         shape = image_pred.shape
136 |         non_zero_idxs = np.nonzero(image_pred)
137 |         image_pred = image_pred[non_zero_idxs]
138 |         #print (image_pred.shape)
139 |         #print (shape[-1])
140 |         image_pred = image_pred.reshape(-1, shape[-1])
141 | 
142 |         bbox_attrs = image_pred[:, :5]
143 |         classes = image_pred[:, 5:]
144 |         classes = np.argmax(classes, axis=-1)
145 | 
146 |         unique_classes = list(set(classes.reshape(-1)))
147 | 
148 |         for cls in unique_classes:
149 |             cls_mask = classes == cls
150 |             cls_boxes = bbox_attrs[np.nonzero(cls_mask)]
151 |             cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]]
152 |             cls_scores = cls_boxes[:, -1]
153 |             cls_boxes = cls_boxes[:, :-1]
154 | 
155 |             while len(cls_boxes) > 0:
156 |                 box = cls_boxes[0]
157 |                 score = cls_scores[0]
158 |                 if cls not in result:
159 |                     result[cls] = []
160 |                 result[cls].append((box, score))
161 |                 cls_boxes = cls_boxes[1:]
162 |                 cls_scores = cls_scores[1:]
163 |                 ious = np.array([_iou(box, x) for x in cls_boxes])
164 |                 iou_mask = ious < iou_threshold
165 |                 cls_boxes = cls_boxes[np.nonzero(iou_mask)]
166 |                 cls_scores = cls_scores[np.nonzero(iou_mask)]
167 | 
168 |     return result
169 | 
170 | 
171 | def load_coco_names(file_name):
172 |     names = {}
173 |     with open(file_name) as f:
174 |         for id, name in enumerate(f):
175 |             names[id] = name
176 |     return names
177 | 
178 | 
179 | def draw_boxes(boxes, img, cls_names, detection_size):
180 |     draw = ImageDraw.Draw(img)
181 | 
182 |     for cls, bboxs in boxes.items():
183 |         color = tuple(np.random.randint(0, 256, 3))
184 |         for box, score in bboxs:
185 |             box = convert_to_original_size(box, np.array(detection_size),
186 |                                            np.array(img.size))
187 |             draw.rectangle(box, outline=color)
188 |             draw.text(box[:2], '{} {:.2f}%'.format(
189 |                 cls_names[cls], score * 100), fill=color)
190 | 
191 | 
192 | def convert_to_original_size(box, size, original_size):
193 |     ratio = original_size / size
194 |     box = box.reshape(2, 2) * ratio
195 |     return list(box.reshape(-1))
196 | 


--------------------------------------------------------------------------------
/yolo_v3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | slim = tf.contrib.slim
  7 | 
  8 | _BATCH_NORM_DECAY = 0.9
  9 | _BATCH_NORM_EPSILON = 1e-05
 10 | _LEAKY_RELU = 0.1
 11 | 
 12 | _ANCHORS = [(10, 13), (16, 30), (33, 23),
 13 |             (30, 61), (62, 45), (59, 119),
 14 |             (116, 90), (156, 198), (373, 326)]
 15 | 
 16 | 
 17 | def darknet53(inputs):
 18 |     """
 19 |     Builds Darknet-53 model.
 20 |     """
 21 |     inputs = _conv2d_fixed_padding(inputs, 32, 3)
 22 |     inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2)
 23 |     inputs = _darknet53_block(inputs, 32)
 24 |     inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2)
 25 | 
 26 |     for i in range(2):
 27 |         inputs = _darknet53_block(inputs, 64)
 28 | 
 29 |     inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2)
 30 | 
 31 |     for i in range(8):
 32 |         inputs = _darknet53_block(inputs, 128)
 33 | 
 34 |     route_1 = inputs
 35 |     inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2)
 36 | 
 37 |     for i in range(8):
 38 |         inputs = _darknet53_block(inputs, 256)
 39 | 
 40 |     route_2 = inputs
 41 |     inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2)
 42 | 
 43 |     for i in range(4):
 44 |         inputs = _darknet53_block(inputs, 512)
 45 | 
 46 |     return route_1, route_2, inputs
 47 | 
 48 | 
 49 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1):
 50 |     if strides > 1:
 51 |         inputs = _fixed_padding(inputs, kernel_size)
 52 |     inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
 53 |                          padding=('SAME' if strides == 1 else 'VALID'))
 54 |     return inputs
 55 | 
 56 | 
 57 | def _darknet53_block(inputs, filters):
 58 |     shortcut = inputs
 59 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
 60 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
 61 | 
 62 |     inputs = inputs + shortcut
 63 |     return inputs
 64 | 
 65 | 
 66 | @tf.contrib.framework.add_arg_scope
 67 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs):
 68 |     """
 69 |     Pads the input along the spatial dimensions independently of input size.
 70 | 
 71 |     Args:
 72 |       inputs: A tensor of size [batch, channels, height_in, width_in] or
 73 |         [batch, height_in, width_in, channels] depending on data_format.
 74 |       kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
 75 |                    Should be a positive integer.
 76 |       data_format: The input format ('NHWC' or 'NCHW').
 77 |       mode: The mode for tf.pad.
 78 | 
 79 |     Returns:
 80 |       A tensor with the same format as the input with the data either intact
 81 |       (if kernel_size == 1) or padded (if kernel_size > 1).
 82 |     """
 83 |     pad_total = kernel_size - 1
 84 |     pad_beg = pad_total // 2
 85 |     pad_end = pad_total - pad_beg
 86 | 
 87 |     if kwargs['data_format'] == 'NCHW':
 88 |         padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
 89 |                                         [pad_beg, pad_end],
 90 |                                         [pad_beg, pad_end]],
 91 |                                mode=mode)
 92 |     else:
 93 |         padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
 94 |                                         [pad_beg, pad_end], [0, 0]], mode=mode)
 95 |     return padded_inputs
 96 | 
 97 | 
 98 | def _yolo_block(inputs, filters):
 99 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
100 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
101 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
102 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
103 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
104 |     route = inputs
105 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
106 |     return route, inputs
107 | 
108 | 
109 | def _get_size(shape, data_format):
110 |     if len(shape) == 4:
111 |         shape = shape[1:]
112 |     return shape[1:3] if data_format == 'NCHW' else shape[0:2]
113 | 
114 | 
115 | def _detection_layer(inputs, num_classes, anchors, img_size, data_format):
116 |     num_anchors = len(anchors)
117 |     predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1,
118 |                               stride=1, normalizer_fn=None,
119 |                               activation_fn=None,
120 |                               biases_initializer=tf.zeros_initializer())
121 | 
122 |     shape = predictions.get_shape().as_list()
123 |     grid_size = _get_size(shape, data_format)
124 |     dim = grid_size[0] * grid_size[1]
125 |     bbox_attrs = 5 + num_classes
126 | 
127 |     if data_format == 'NCHW':
128 |         predictions = tf.reshape(
129 |             predictions, [-1, num_anchors * bbox_attrs, dim])
130 |         predictions = tf.transpose(predictions, [0, 2, 1])
131 | 
132 |     predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs])
133 | 
134 |     stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
135 | 
136 |     anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
137 | 
138 |     box_centers, box_sizes, confidence, classes = tf.split(
139 |         predictions, [2, 2, 1, num_classes], axis=-1)
140 | 
141 |     box_centers = tf.nn.sigmoid(box_centers)
142 |     confidence = tf.nn.sigmoid(confidence)
143 | 
144 |     grid_x = tf.range(grid_size[0], dtype=tf.float32)
145 |     grid_y = tf.range(grid_size[1], dtype=tf.float32)
146 |     a, b = tf.meshgrid(grid_x, grid_y)
147 | 
148 |     x_offset = tf.reshape(a, (-1, 1))
149 |     y_offset = tf.reshape(b, (-1, 1))
150 | 
151 |     x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
152 |     x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
153 | 
154 |     box_centers = box_centers + x_y_offset
155 |     box_centers = box_centers * stride
156 | 
157 |     anchors = tf.tile(anchors, [dim, 1])
158 |     box_sizes = tf.exp(box_sizes) * anchors
159 |     box_sizes = box_sizes * stride
160 | 
161 |     detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
162 | 
163 |     classes = tf.nn.sigmoid(classes)
164 |     predictions = tf.concat([detections, classes], axis=-1)
165 |     return predictions
166 | 
167 | 
168 | def _upsample(inputs, out_shape, data_format='NCHW'):
169 |     # tf.image.resize_nearest_neighbor accepts input in format NHWC
170 |     if data_format == 'NCHW':
171 |         inputs = tf.transpose(inputs, [0, 2, 3, 1])
172 | 
173 |     if data_format == 'NCHW':
174 |         new_height = out_shape[3]
175 |         new_width = out_shape[2]
176 |     else:
177 |         new_height = out_shape[2]
178 |         new_width = out_shape[1]
179 | 
180 |     inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
181 | 
182 |     # back to NCHW if needed
183 |     if data_format == 'NCHW':
184 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
185 | 
186 |     inputs = tf.identity(inputs, name='upsampled')
187 |     return inputs
188 | 
189 | 
190 | def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
191 |     """
192 |     Creates YOLO v3 model.
193 | 
194 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
195 |         Dimension batch_size may be undefined. The channel order is RGB.
196 |     :param num_classes: number of predicted classes.
197 |     :param is_training: whether is training or not.
198 |     :param data_format: data format NCHW or NHWC.
199 |     :param reuse: whether or not the network and its variables should be reused.
200 |     :return:
201 |     """
202 |     # it will be needed later on
203 |     img_size = inputs.get_shape().as_list()[1:3]
204 | 
205 |     # transpose the inputs to NCHW
206 |     if data_format == 'NCHW':
207 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
208 | 
209 |     # normalize values to range [0..1]
210 |     inputs = inputs / 255
211 | 
212 |     # set batch norm params
213 |     batch_norm_params = {
214 |         'decay': _BATCH_NORM_DECAY,
215 |         'epsilon': _BATCH_NORM_EPSILON,
216 |         'scale': True,
217 |         'is_training': is_training,
218 |         'fused': None,  # Use fused batch norm if possible.
219 |     }
220 | 
221 |     # Set activation_fn and parameters for conv2d, batch_norm.
222 |     with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse):
223 |         with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm,
224 |                             normalizer_params=batch_norm_params,
225 |                             biases_initializer=None,
226 |                             activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
227 |             with tf.variable_scope('darknet-53'):
228 |                 route_1, route_2, inputs = darknet53(inputs)
229 | 
230 |             with tf.variable_scope('yolo-v3'):
231 |                 route, inputs = _yolo_block(inputs, 512)
232 |                 detect_1 = _detection_layer(
233 |                     inputs, num_classes, _ANCHORS[6:9], img_size, data_format)
234 |                 detect_1 = tf.identity(detect_1, name='detect_1')
235 | 
236 |                 inputs = _conv2d_fixed_padding(route, 256, 1)
237 |                 upsample_size = route_2.get_shape().as_list()
238 |                 inputs = _upsample(inputs, upsample_size, data_format)
239 |                 inputs = tf.concat([inputs, route_2],
240 |                                    axis=1 if data_format == 'NCHW' else 3)
241 | 
242 |                 route, inputs = _yolo_block(inputs, 256)
243 | 
244 |                 detect_2 = _detection_layer(
245 |                     inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
246 |                 detect_2 = tf.identity(detect_2, name='detect_2')
247 | 
248 |                 inputs = _conv2d_fixed_padding(route, 128, 1)
249 |                 upsample_size = route_1.get_shape().as_list()
250 |                 inputs = _upsample(inputs, upsample_size, data_format)
251 |                 inputs = tf.concat([inputs, route_1],
252 |                                    axis=1 if data_format == 'NCHW' else 3)
253 | 
254 |                 _, inputs = _yolo_block(inputs, 128)
255 | 
256 |                 detect_3 = _detection_layer(
257 |                     inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
258 |                 detect_3 = tf.identity(detect_3, name='detect_3')
259 | 
260 |                 detections = tf.concat([detect_1, detect_2, detect_3], axis=1)
261 |                 detections = tf.identity(detections, name='detections')
262 |                 return detections
263 | 


--------------------------------------------------------------------------------
/yolo_v3_tiny.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \
  6 |     _detection_layer, _upsample
  7 | 
  8 | slim = tf.contrib.slim
  9 | 
 10 | _BATCH_NORM_DECAY = 0.9
 11 | _BATCH_NORM_EPSILON = 1e-05
 12 | _LEAKY_RELU = 0.1
 13 | 
 14 | _ANCHORS = [(10, 14),  (23, 27),  (37, 58),
 15 |             (81, 82),  (135, 169),  (344, 319)]
 16 | 
 17 | 
 18 | def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
 19 |     """
 20 |     Creates YOLO v3 tiny model.
 21 | 
 22 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
 23 |         Dimension batch_size may be undefined. The channel order is RGB.
 24 |     :param num_classes: number of predicted classes.
 25 |     :param is_training: whether is training or not.
 26 |     :param data_format: data format NCHW or NHWC.
 27 |     :param reuse: whether or not the network and its variables should be reused.
 28 |     :return:
 29 |     """
 30 |     # it will be needed later on
 31 |     img_size = inputs.get_shape().as_list()[1:3]
 32 | 
 33 |     # transpose the inputs to NCHW
 34 |     if data_format == 'NCHW':
 35 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
 36 | 
 37 |     # normalize values to range [0..1]
 38 |     inputs = inputs / 255
 39 | 
 40 |     # set batch norm params
 41 |     batch_norm_params = {
 42 |         'decay': _BATCH_NORM_DECAY,
 43 |         'epsilon': _BATCH_NORM_EPSILON,
 44 |         'scale': True,
 45 |         'is_training': is_training,
 46 |         'fused': None,  # Use fused batch norm if possible.
 47 |     }
 48 | 
 49 |     # Set activation_fn and parameters for conv2d, batch_norm.
 50 |     with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format):
 51 |         with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse):
 52 |             with slim.arg_scope([slim.conv2d],
 53 |                                 normalizer_fn=slim.batch_norm,
 54 |                                 normalizer_params=batch_norm_params,
 55 |                                 biases_initializer=None,
 56 |                                 activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
 57 | 
 58 |                 with tf.variable_scope('yolo-v3-tiny'):
 59 |                     for i in range(6):
 60 |                         inputs = _conv2d_fixed_padding(
 61 |                             inputs, 16 * pow(2, i), 3)
 62 | 
 63 |                         if i == 4:
 64 |                             route_1 = inputs
 65 | 
 66 |                         if i == 5:
 67 |                             inputs = slim.max_pool2d(
 68 |                                 inputs, [2, 2], stride=1, padding="SAME", scope='pool2')
 69 |                         else:
 70 |                             inputs = slim.max_pool2d(
 71 |                                 inputs, [2, 2], scope='pool2')
 72 | 
 73 |                     inputs = _conv2d_fixed_padding(inputs, 1024, 3)
 74 |                     inputs = _conv2d_fixed_padding(inputs, 256, 1)
 75 |                     route_2 = inputs
 76 | 
 77 |                     inputs = _conv2d_fixed_padding(inputs, 512, 3)
 78 |                     # inputs = _conv2d_fixed_padding(inputs, 255, 1)
 79 | 
 80 |                     detect_1 = _detection_layer(
 81 |                         inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
 82 |                     detect_1 = tf.identity(detect_1, name='detect_1')
 83 | 
 84 |                     inputs = _conv2d_fixed_padding(route_2, 128, 1)
 85 |                     upsample_size = route_1.get_shape().as_list()
 86 |                     inputs = _upsample(inputs, upsample_size, data_format)
 87 | 
 88 |                     inputs = tf.concat([inputs, route_1],
 89 |                                        axis=1 if data_format == 'NCHW' else 3)
 90 | 
 91 |                     inputs = _conv2d_fixed_padding(inputs, 256, 3)
 92 |                     # inputs = _conv2d_fixed_padding(inputs, 255, 1)
 93 | 
 94 |                     detect_2 = _detection_layer(
 95 |                         inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
 96 |                     detect_2 = tf.identity(detect_2, name='detect_2')
 97 | 
 98 |                     detections = tf.concat([detect_1, detect_2], axis=1)
 99 |                     detections = tf.identity(detections, name='detections')
100 |                     return detections
101 | 


--------------------------------------------------------------------------------