├── Readme.md ├── create_tf_record.py ├── data ├── coco.names └── dog.jpg ├── demo.py ├── result.jpg ├── run_convert.sh ├── save_model └── tiny │ ├── checkpoint │ ├── yolov3-tiny.ckpt.data-00000-of-00001 │ ├── yolov3-tiny.ckpt.index │ └── yolov3-tiny.ckpt.meta ├── test_pb.py ├── tf_int8.py ├── utils.py ├── yolo_v3.py └── yolo_v3_tiny.py /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Start from pretrained tf model 4 | To use 8int quantize in tflite. We need to convert `checkpoint` into `tf.GraphDef` 5 | 6 | note:tf.lite only work after 1.13. Your original tf code may not work!! 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /create_tf_record.py: -------------------------------------------------------------------------------- 1 | # -*-coding: utf-8 -*- 2 | """ 3 | @Project: create_tfrecord 4 | @File : create_tfrecord.py 5 | @Author : panjq 6 | @E-mail : pan_jinquan@163.com 7 | @Date : 2018-07-27 17:19:54 8 | @desc : 将图片数据保存为单个tfrecord文件 9 | """ 10 | 11 | ########################################################################## 12 | 13 | import tensorflow as tf 14 | import numpy as np 15 | import os 16 | import cv2 17 | import matplotlib.pyplot as plt 18 | import random 19 | from PIL import Image 20 | 21 | 22 | ########################################################################## 23 | def _int64_feature(value): 24 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 25 | # 生成字符串型的属性 26 | def _bytes_feature(value): 27 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 28 | # 生成实数型的属性 29 | def float_list_feature(value): 30 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 31 | 32 | def get_example_nums(tf_records_filenames): 33 | ''' 34 | 统计tf_records图像的个数(example)个数 35 | :param tf_records_filenames: tf_records文件路径 36 | :return: 37 | ''' 38 | nums= 0 39 | for record in tf.python_io.tf_record_iterator(tf_records_filenames): 40 | nums += 1 41 | return nums 42 | 43 | def show_image(title,image): 44 | ''' 45 | 显示图片 46 | :param title: 图像标题 47 | :param image: 图像的数据 48 | :return: 49 | ''' 50 | # plt.figure("show_image") 51 | # print(image.dtype) 52 | plt.imshow(image) 53 | plt.axis('on') # 关掉坐标轴为 off 54 | plt.title(title) # 图像题目 55 | plt.show() 56 | 57 | def load_labels_file(filename,labels_num=1,shuffle=False): 58 | ''' 59 | 载图txt文件,文件中每行为一个图片信息,且以空格隔开:图像路径 标签1 标签2,如:test_image/1.jpg 0 2 60 | :param filename: 61 | :param labels_num :labels个数 62 | :param shuffle :是否打乱顺序 63 | :return:images type->list 64 | :return:labels type->list 65 | ''' 66 | images=[] 67 | labels=[] 68 | with open(filename) as f: 69 | lines_list=f.readlines() 70 | if shuffle: 71 | random.shuffle(lines_list) 72 | 73 | for lines in lines_list: 74 | line=lines.rstrip().split(' ') 75 | label=[] 76 | for i in range(labels_num): 77 | label.append(int(line[i+1])) 78 | images.append(line[0]) 79 | labels.append(label) 80 | return images,labels 81 | 82 | def read_image(filename, resize_height, resize_width,normalization=False): 83 | ''' 84 | 读取图片数据,默认返回的是uint8,[0,255] 85 | :param filename: 86 | :param resize_height: 87 | :param resize_width: 88 | :param normalization:是否归一化到[0.,1.0] 89 | :return: 返回的图片数据 90 | ''' 91 | 92 | bgr_image = cv2.imread(filename) 93 | if len(bgr_image.shape)==2:#若是灰度图则转为三通道 94 | print("Warning:gray image",filename) 95 | bgr_image = cv2.cvtColor(bgr_image, cv2.COLOR_GRAY2BGR) 96 | 97 | rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)#将BGR转为RGB 98 | # show_image(filename,rgb_image) 99 | # rgb_image=Image.open(filename) 100 | if resize_height>0 and resize_width>0: 101 | rgb_image=cv2.resize(rgb_image,(resize_width,resize_height)) 102 | rgb_image=np.asanyarray(rgb_image) 103 | if normalization: 104 | # 不能写成:rgb_image=rgb_image/255 105 | rgb_image=rgb_image/255.0 106 | # show_image("src resize image",image) 107 | return rgb_image 108 | 109 | 110 | def get_batch_images(images,labels,batch_size,labels_nums,one_hot=False,shuffle=False,num_threads=1): 111 | ''' 112 | :param images:图像 113 | :param labels:标签 114 | :param batch_size: 115 | :param labels_nums:标签个数 116 | :param one_hot:是否将labels转为one_hot的形式 117 | :param shuffle:是否打乱顺序,一般train时shuffle=True,验证时shuffle=False 118 | :return:返回batch的images和labels 119 | ''' 120 | min_after_dequeue = 200 121 | capacity = min_after_dequeue + 3 * batch_size # 保证capacity必须大于min_after_dequeue参数值 122 | if shuffle: 123 | images_batch, labels_batch = tf.train.shuffle_batch([images,labels], 124 | batch_size=batch_size, 125 | capacity=capacity, 126 | min_after_dequeue=min_after_dequeue, 127 | num_threads=num_threads) 128 | else: 129 | images_batch, labels_batch = tf.train.batch([images,labels], 130 | batch_size=batch_size, 131 | capacity=capacity, 132 | num_threads=num_threads) 133 | if one_hot: 134 | labels_batch = tf.one_hot(labels_batch, labels_nums, 1, 0) 135 | return images_batch,labels_batch 136 | 137 | def read_records(filename,resize_height, resize_width,type=None): 138 | ''' 139 | 解析record文件:源文件的图像数据是RGB,uint8,[0,255],一般作为训练数据时,需要归一化到[0,1] 140 | :param filename: 141 | :param resize_height: 142 | :param resize_width: 143 | :param type:选择图像数据的返回类型 144 | None:默认将uint8-[0,255]转为float32-[0,255] 145 | normalization:归一化float32-[0,1] 146 | centralization:归一化float32-[0,1],再减均值中心化 147 | :return: 148 | ''' 149 | # 创建文件队列,不限读取的数量 150 | filename_queue = tf.train.string_input_producer([filename]) 151 | # create a reader from file queue 152 | reader = tf.TFRecordReader() 153 | # reader从文件队列中读入一个序列化的样本 154 | _, serialized_example = reader.read(filename_queue) 155 | # get feature from serialized example 156 | # 解析符号化的样本 157 | features = tf.parse_single_example( 158 | serialized_example, 159 | features={ 160 | 'image_raw': tf.FixedLenFeature([], tf.string), 161 | 'height': tf.FixedLenFeature([], tf.int64), 162 | 'width': tf.FixedLenFeature([], tf.int64), 163 | 'depth': tf.FixedLenFeature([], tf.int64), 164 | 'label': tf.FixedLenFeature([], tf.int64) 165 | } 166 | ) 167 | tf_image = tf.decode_raw(features['image_raw'], tf.uint8)#获得图像原始的数据 168 | 169 | tf_height = features['height'] 170 | tf_width = features['width'] 171 | tf_depth = features['depth'] 172 | tf_label = tf.cast(features['label'], tf.int32) 173 | # PS:恢复原始图像数据,reshape的大小必须与保存之前的图像shape一致,否则出错 174 | # tf_image=tf.reshape(tf_image, [-1]) # 转换为行向量 175 | tf_image=tf.reshape(tf_image, [resize_height, resize_width, 3]) # 设置图像的维度 176 | 177 | # 恢复数据后,才可以对图像进行resize_images:输入uint->输出float32 178 | # tf_image=tf.image.resize_images(tf_image,[224, 224]) 179 | 180 | # 存储的图像类型为uint8,tensorflow训练时数据必须是tf.float32 181 | if type is None: 182 | tf_image = tf.cast(tf_image, tf.float32) 183 | elif type=='normalization':# [1]若需要归一化请使用: 184 | # 仅当输入数据是uint8,才会归一化[0,255] 185 | # tf_image = tf.image.convert_image_dtype(tf_image, tf.float32) 186 | tf_image = tf.cast(tf_image, tf.float32) * (1. / 255.0) # 归一化 187 | elif type=='centralization': 188 | # 若需要归一化,且中心化,假设均值为0.5,请使用: 189 | tf_image = tf.cast(tf_image, tf.float32) * (1. / 255) - 0.5 #中心化 190 | 191 | # 这里仅仅返回图像和标签 192 | # return tf_image, tf_height,tf_width,tf_depth,tf_label 193 | return tf_image,tf_label 194 | 195 | 196 | def create_records(image_dir,file, output_record_dir, resize_height, resize_width,shuffle,log=5): 197 | ''' 198 | 实现将图像原始数据,label,长,宽等信息保存为record文件 199 | 注意:读取的图像数据默认是uint8,再转为tf的字符串型BytesList保存,解析请需要根据需要转换类型 200 | :param image_dir:原始图像的目录 201 | :param file:输入保存图片信息的txt文件(image_dir+file构成图片的路径) 202 | :param output_record_dir:保存record文件的路径 203 | :param resize_height: 204 | :param resize_width: 205 | PS:当resize_height或者resize_width=0是,不执行resize 206 | :param shuffle:是否打乱顺序 207 | :param log:log信息打印间隔 208 | ''' 209 | # 加载文件,仅获取一个label 210 | images_list, labels_list=load_labels_file(file,1,shuffle) 211 | 212 | writer = tf.python_io.TFRecordWriter(output_record_dir) 213 | for i, [image_name, labels] in enumerate(zip(images_list, labels_list)): 214 | image_path=os.path.join(image_dir,images_list[i]) 215 | if not os.path.exists(image_path): 216 | print('Err:no image',image_path) 217 | continue 218 | image = read_image(image_path, resize_height, resize_width) 219 | image_raw = image.tostring() 220 | if i%log==0 or i==len(images_list)-1: 221 | print('------------processing:%d-th------------' % (i)) 222 | print('current image_path=%s' % (image_path),'shape:{}'.format(image.shape),'labels:{}'.format(labels)) 223 | # 这里仅保存一个label,多label适当增加"'label': _int64_feature(label)"项 224 | label=labels[0] 225 | example = tf.train.Example(features=tf.train.Features(feature={ 226 | 'image_raw': _bytes_feature(image_raw), 227 | 'height': _int64_feature(image.shape[0]), 228 | 'width': _int64_feature(image.shape[1]), 229 | 'depth': _int64_feature(image.shape[2]), 230 | 'label': _int64_feature(label) 231 | })) 232 | writer.write(example.SerializeToString()) 233 | writer.close() 234 | 235 | def disp_records(record_file,resize_height, resize_width,show_nums=4): 236 | ''' 237 | 解析record文件,并显示show_nums张图片,主要用于验证生成record文件是否成功 238 | :param tfrecord_file: record文件路径 239 | :return: 240 | ''' 241 | # 读取record函数 242 | tf_image, tf_label = read_records(record_file,resize_height,resize_width,type='normalization') 243 | # 显示前4个图片 244 | init_op = tf.initialize_all_variables() 245 | with tf.Session() as sess: 246 | sess.run(init_op) 247 | coord = tf.train.Coordinator() 248 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 249 | for i in range(show_nums): 250 | image,label = sess.run([tf_image,tf_label]) # 在会话中取出image和label 251 | # image = tf_image.eval() 252 | # 直接从record解析的image是一个向量,需要reshape显示 253 | # image = image.reshape([height,width,depth]) 254 | print('shape:{},tpye:{},labels:{}'.format(image.shape,image.dtype,label)) 255 | # pilimg = Image.fromarray(np.asarray(image_eval_reshape)) 256 | # pilimg.show() 257 | show_image("image:%d"%(label),image) 258 | coord.request_stop() 259 | coord.join(threads) 260 | 261 | 262 | def batch_test(record_file,resize_height, resize_width): 263 | ''' 264 | :param record_file: record文件路径 265 | :param resize_height: 266 | :param resize_width: 267 | :return: 268 | :PS:image_batch, label_batch一般作为网络的输入 269 | ''' 270 | # 读取record函数 271 | tf_image,tf_label = read_records(record_file,resize_height,resize_width,type='normalization') 272 | image_batch, label_batch= get_batch_images(tf_image,tf_label,batch_size=4,labels_nums=5,one_hot=False,shuffle=False) 273 | 274 | init = tf.global_variables_initializer() 275 | with tf.Session() as sess: # 开始一个会话 276 | sess.run(init) 277 | coord = tf.train.Coordinator() 278 | threads = tf.train.start_queue_runners(coord=coord) 279 | for i in range(4): 280 | # 在会话中取出images和labels 281 | images, labels = sess.run([image_batch, label_batch]) 282 | # 这里仅显示每个batch里第一张图片 283 | show_image("image", images[0, :, :, :]) 284 | print('shape:{},tpye:{},labels:{}'.format(images.shape,images.dtype,labels)) 285 | 286 | # 停止所有线程 287 | coord.request_stop() 288 | coord.join(threads) 289 | 290 | 291 | if __name__ == '__main__': 292 | # 参数设置 293 | 294 | resize_height = 224 # 指定存储图片高度 295 | resize_width = 224 # 指定存储图片宽度 296 | shuffle=True 297 | log=5 298 | # 产生train.record文件 299 | image_dir='dataset/train' 300 | train_labels = 'dataset/train.txt' # 图片路径 301 | train_record_output = 'dataset/record/train.tfrecords' 302 | create_records(image_dir,train_labels, train_record_output, resize_height, resize_width,shuffle,log) 303 | train_nums=get_example_nums(train_record_output) 304 | print("save train example nums={}".format(train_nums)) 305 | 306 | # 产生val.record文件 307 | image_dir='dataset/val' 308 | val_labels = 'dataset/val.txt' # 图片路径 309 | val_record_output = 'dataset/record/val.tfrecords' 310 | create_records(image_dir,val_labels, val_record_output, resize_height, resize_width,shuffle,log) 311 | val_nums=get_example_nums(val_record_output) 312 | print("save val example nums={}".format(val_nums)) 313 | 314 | # 测试显示函数 315 | # disp_records(train_record_output,resize_height, resize_width) 316 | batch_test(train_record_output,resize_height, resize_width) 317 | -------------------------------------------------------------------------------- /data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/data/dog.jpg -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | from PIL import Image, ImageDraw 7 | import time 8 | from tensorflow.python.framework import graph_util 9 | 10 | # import argparse 11 | 12 | import yolo_v3 13 | import yolo_v3_tiny 14 | 15 | from utils import load_coco_names, draw_boxes, convert_to_original_size, \ 16 | load_weights, detections_boxes, non_max_suppression 17 | 18 | FLAGS = tf.app.flags.FLAGS 19 | 20 | 21 | # parser = argparse.ArgumentParser() 22 | # parser.add_argument('--tiny', dest='flag', action='store_true') 23 | # args = parser.parse_args() 24 | 25 | 26 | tf.app.flags.DEFINE_string('input_img', '', 'Input image') 27 | tf.app.flags.DEFINE_string('output_img', '', 'Output image') 28 | 29 | tf.app.flags.DEFINE_string( 30 | 'class_names', 'data/coco.names', 'File with class names') 31 | tf.app.flags.DEFINE_string( 32 | 'weights_file', 'yolov3.weights', 'Binary file with detector weights') 33 | tf.app.flags.DEFINE_string( 34 | 'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC') 35 | tf.app.flags.DEFINE_string( 36 | 'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file') 37 | tf.app.flags.DEFINE_bool( 38 | 'tiny', False, 'Use tiny version of YOLOv3') 39 | 40 | tf.app.flags.DEFINE_integer('thread', 1, 'thread number') 41 | 42 | tf.app.flags.DEFINE_integer('size', 416, 'Image size') 43 | 44 | tf.app.flags.DEFINE_float('conf_threshold', 0.5, 'Confidence threshold') 45 | tf.app.flags.DEFINE_float('iou_threshold', 0.4, 'IoU threshold') 46 | 47 | 48 | def yolo_full(): 49 | ''' 50 | if FLAGS.tiny: 51 | model = yolo_v3_tiny.yolo_v3_tiny 52 | ckpt_file = './saved_model/yolov3-tiny.ckpt' 53 | else: 54 | model = yolo_v3.yolo_v3 55 | ckpt_file = './saved_model/yolov3.ckpt' 56 | ''' 57 | model = yolo_v3_tiny.yolo_v3_tiny 58 | ckpt_file = './save_model/tiny/yolov3-tiny.ckpt' 59 | 60 | img = Image.open(FLAGS.input_img) 61 | img_resized = img.resize(size=(FLAGS.size, FLAGS.size)) 62 | 63 | classes = load_coco_names(FLAGS.class_names) 64 | 65 | # placeholder for detector inputs 66 | inputs = tf.placeholder(tf.float32, [1, FLAGS.size, FLAGS.size, 3]) 67 | 68 | with tf.variable_scope('detector'): 69 | detections = model(inputs, len(classes), 70 | data_format=FLAGS.data_format) 71 | 72 | saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) 73 | 74 | boxes = detections_boxes(detections) 75 | session_conf = tf.ConfigProto(intra_op_parallelism_threads=FLAGS.thread, inter_op_parallelism_threads=FLAGS.thread, \ 76 | allow_soft_placement=True, device_count = {'GPU': 1}) 77 | with tf.Session(config=session_conf) as sess: 78 | 79 | saver.restore(sess, ckpt_file) 80 | tf.summary.FileWriter("TensorBoard/", graph = sess.graph) 81 | print( ">>>>>>>>>>>>>>>>> %d" % len(tf.get_default_graph().as_graph_def().node)) 82 | print('Model restored.') 83 | start = time.time() 84 | detected_boxes = sess.run( 85 | boxes, feed_dict={inputs: [np.array(img_resized, dtype=np.float32)]}) 86 | end = time.time() 87 | print("%2.2f secs"%(end - start)) 88 | ''' 89 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 90 | flops = tf.profiler.profile(tf.get_default_graph() , run_meta=tf.RunMetadata(), cmd='op', options=opts) 91 | if flops is not None: 92 | #print('Flops should be ~',2*25*16*9) 93 | #print('25 x 25 x 9 would be',2*25*25*9) # ignores internal dim, repeats first 94 | print('TF stats gives',flops.total_float_ops) 95 | ''' 96 | #output_node_names = "detector/yolo-v3-tiny/detections" 97 | output_node_names = "concat_1" 98 | output_graph_def = graph_util.convert_variables_to_constants( 99 | sess=sess, 100 | input_graph_def=sess.graph_def, 101 | output_node_names=output_node_names.split( "," )) 102 | 103 | print( ">>>>>>>>>>> %d ops in the final graph." % len( output_graph_def.node)) 104 | with tf.gfile.GFile( "save_model/tiny/pb/frozen_model_yolov3-tiny.pb", "wb" ) as f: 105 | f.write( output_graph_def.SerializeToString( )) 106 | #builder = tf.saved_model.builder.SavedModelBuilder('./savemodel') 107 | #builder.add_meta_graph_and_variables(sess, ['cpu_server_1']) 108 | 109 | 110 | 111 | 112 | #builder.save() 113 | print (detected_boxes.shape) 114 | #print (detected_boxes[0,1,1]) 115 | #print (np.array(img_resized, dtype=np.float32)[111,111]) 116 | #print (inputs.shape) 117 | filtered_boxes = non_max_suppression(detected_boxes, 118 | confidence_threshold=FLAGS.conf_threshold, 119 | iou_threshold=FLAGS.iou_threshold) 120 | 121 | draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) 122 | 123 | img.save(FLAGS.output_img) 124 | 125 | 126 | 127 | 128 | 129 | def main(argv=None): 130 | 131 | time_yolo = time.process_time() 132 | yolo_full() 133 | time_yolo = time.process_time() - time_yolo 134 | print(time_yolo) 135 | 136 | 137 | 138 | if __name__ == '__main__': 139 | tf.app.run() 140 | -------------------------------------------------------------------------------- /result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/result.jpg -------------------------------------------------------------------------------- /run_convert.sh: -------------------------------------------------------------------------------- 1 | #python3 ./convert_weights.py 2 | 3 | python3 ./demo.py --input_img data/dog.jpg --output_img result.jpg --tiny 4 | 5 | 6 | #valgrind --tool=massif --time-unit=B --stacks=no --massif-out-file=massif.out python3 ./demo.py --input_img dog.jpg --output_img result.jpg 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /save_model/tiny/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "yolov3-tiny.ckpt" 2 | all_model_checkpoint_paths: "yolov3-tiny.ckpt" 3 | -------------------------------------------------------------------------------- /save_model/tiny/yolov3-tiny.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/save_model/tiny/yolov3-tiny.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /save_model/tiny/yolov3-tiny.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/save_model/tiny/yolov3-tiny.ckpt.index -------------------------------------------------------------------------------- /save_model/tiny/yolov3-tiny.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caslabai/yolov3tiny_tensorflow_int8_quantized/a0b012f3bbaf8bb86bea87a7a657b81838cec371/save_model/tiny/yolov3-tiny.ckpt.meta -------------------------------------------------------------------------------- /test_pb.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | #https://blog.csdn.net/guyuealian/article/details/82218092 3 | import tensorflow as tf 4 | from tensorflow.python.framework import graph_util 5 | from utils import load_coco_names, draw_boxes, convert_to_original_size, \ 6 | load_weights, detections_boxes, non_max_suppression 7 | from create_tf_record import * 8 | 9 | import yolo_v3 10 | import yolo_v3_tiny 11 | 12 | FLAGS = tf.app.flags.FLAGS 13 | tf.app.flags.DEFINE_integer('size', 416, 'Image size') 14 | #tf.app.flags.DEFINE_string('output_img', './pb_img_result', 'Output image') 15 | tf.app.flags.DEFINE_string('output_img', '', 'Output image') 16 | tf.app.flags.DEFINE_float('conf_threshold', 0.5, 'Confidence threshold') 17 | tf.app.flags.DEFINE_float('iou_threshold', 0.4, 'IoU threshold') 18 | tf.app.flags.DEFINE_string('class_names', 'data/coco.names', 'File with class names') 19 | 20 | 21 | def freeze_graph( input_checkpoint,output_graph): 22 | ''' 23 | :param input_checkpoint: 24 | :return: 25 | ''' 26 | # checkpoint = tf.train.get_checkpoint_state( model_folder) 27 | # input_checkpoint = checkpoint.model_checkpoint_path 28 | 29 | saver = tf.train.import_meta_graph( input_checkpoint + '.meta', clear_devices=True ) 30 | #saver2 = tf.train.Saver(var_list=tf.global_variables(scope='detector')) 31 | 32 | with tf.Session( ) as sess: 33 | saver.restore( sess, input_checkpoint) 34 | print( "[ckpt] op count >>>>>>>>>>>>>>>>> %d" % len(tf.get_default_graph().as_graph_def().node)) 35 | ''' 36 | from tensorflow.tools.graph_transforms import TransformGraph 37 | transforms = ['add_default_attributes', 38 | 'remove_nodes(op=Identity, op=CheckNumerics)', 39 | 'fold_batch_norms', 'fold_old_batch_norms', 40 | 'strip_unused_nodes', 'sort_by_execution_order'] 41 | transformed_graph_def = TransformGraph(tf.get_default_graph().as_graph_def(),'Placeholder', output_node_names.split(","), transforms) 42 | ''' 43 | 44 | #[ print( n.name) for n in tf.get_default_graph( ).as_graph_def( ).node] 45 | tf.summary.FileWriter("TensorBoard/", graph = sess.graph) 46 | #graph=tf.Graph().as_default() 47 | output_node_names = "detector/yolo-v3-tiny/detections" 48 | output_graph_def = graph_util.convert_variables_to_constants( 49 | sess=sess, 50 | #input_graph_def= graph.as_graph_def() , 51 | input_graph_def=sess.graph_def, 52 | #input_graph_def=transformed_graph_def, 53 | output_node_names=output_node_names.split( "," )) 54 | 55 | print( "[pb] op count >>>>>>>>>>>>>>>>> %d ops in the final graph." % len( output_graph_def.node)) 56 | with tf.gfile.GFile( output_graph, "wb" ) as f: 57 | f.write( output_graph_def.SerializeToString( )) 58 | 59 | 60 | #tf.train.ExponentialMovingAverage(0.999) 61 | #variables_to_restore = variable_averages.variables_to_restore() 62 | #saver = tf.train.Saver(variables_to_restore) , 63 | 64 | 65 | 66 | def freeze_graph_test(pb_path, image_path): 67 | ''' 68 | :param pb_path:pb文件的路径 69 | :param image_path:测试图片的路径 70 | :return: 71 | ''' 72 | with tf.Graph().as_default(): 73 | output_graph_def = tf.GraphDef() 74 | with open(pb_path, "rb") as f: 75 | output_graph_def.ParseFromString(f.read()) 76 | tf.import_graph_def(output_graph_def, name="") 77 | 78 | #tf.global_variables_initializer() 79 | with tf.Session() as sess: 80 | #saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) 81 | #input_checkpoint='./saved_model/yolov3-tiny.ckpt' 82 | #saver.restore( sess, input_checkpoint) 83 | 84 | sess.run(tf.global_variables_initializer()) 85 | 86 | #sess.run(tf.initialize_all_variables()) 87 | 88 | 89 | 90 | # 定义输入的张量名称,对应网络结构的输入张量 91 | # input:0作为输入图像,keep_prob:0作为dropout的参数,测试时值为1,is_training:0训练参数 92 | input_image_tensor = sess.graph.get_tensor_by_name("Placeholder:0") 93 | 94 | # 定义输出的张量名称 95 | output_tensor_name = sess.graph.get_tensor_by_name("concat_1:0") 96 | 97 | # 读取测试图片 98 | 99 | img = Image.open(image_path ) 100 | img_resized = img.resize( size=(416,416) ) 101 | 102 | # 测试读出来的模型是否正确,注意这里传入的是输出和输入节点的tensor的名字,不是操作节点的名字 103 | detected_boxes = sess.run(output_tensor_name , 104 | feed_dict={ input_image_tensor: [np.array(img_resized, dtype=np.float32)] }) 105 | 106 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 107 | flops = tf.profiler.profile(tf.get_default_graph() , run_meta=tf.RunMetadata(), cmd='op' , options=opts) 108 | if flops is not None: 109 | #print('Flops should be ~',2*25*16*9) 110 | #print('25 x 25 x 9 would be',2*25*25*9) # ignores internal dim, repeats first 111 | print('TF stats gives',flops.total_float_ops) 112 | 113 | print ('[pb] output tensor shape: ', detected_boxes.shape) 114 | filtered_boxes = non_max_suppression(detected_boxes, 115 | confidence_threshold=FLAGS.conf_threshold, 116 | iou_threshold=FLAGS.iou_threshold) 117 | classes = load_coco_names(FLAGS.class_names) 118 | draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size)) 119 | 120 | writer = tf.summary.FileWriter("TensorBoard/", graph = sess.graph) 121 | #img.save(FLAGS.output_img) 122 | img.save("result_pb.jpg") 123 | 124 | 125 | image_path = 'data/dog.jpg' 126 | 127 | input_checkpoint='./saved_model/yolov3-tiny.ckpt' 128 | pb_path="./save_model/tiny/pb/frozen_model_yolov3-tiny.pb" 129 | #out_pb_path="./saved_model/tiny/frozen_yolov3-tiny.pb" 130 | #out_pb_path="frozen_flowers_model_yolov3-tiny.pb" 131 | 132 | #freeze_graph( input_checkpoint, out_pb_path) 133 | freeze_graph_test(pb_path, image_path) 134 | 135 | 136 | -------------------------------------------------------------------------------- /tf_int8.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | if sys.version_info.major >= 3: 5 | import pathlib 6 | else: 7 | import pathlib2 as pathlib 8 | 9 | 10 | 11 | import tensorflow as tf 12 | lite=tf.contrib.lite 13 | 14 | saved_models_root="../saved_model/tiny/" 15 | tf.enable_eager_execution() 16 | #help(lite.TFLiteConverter.from_saved_model) 17 | #saved_model_dir = str(sorted(pathlib.Path(saved_models_root).glob("*"))[-1]) 18 | converter = tf.lite.TocoConverter.from_saved_model(saved_models_root ) 19 | #https://www.tensorflow.org/guide/saved_model 20 | #converter = lite 21 | 22 | 23 | #converter = lite.TFLiteConverter.from_saved_model(saved_models_root) 24 | #tflite_model = converter.convert() 25 | 26 | 27 | ''' 28 | tflite_model = converter.convert() 29 | tflite_models_dir = pathlib.Path(saved_model_dir ) 30 | tflite_models_dir.mkdir(exist_ok=True, parents=True) 31 | tflite_model_file = tflite_models_dir/"yolov3-tiny.tflite" 32 | tflite_model_file.write_bytes(tflite_model) 33 | 34 | tf.logging.set_verbosity(tf.logging.INFO) 35 | converter.post_training_quantize = True 36 | tflite_quant_model = converter.convert() 37 | tflite_model_quant_file = tflite_models_dir/"yolov3-tiny_quant.tflite" 38 | tflite_model_quant_file.write_bytes(tflite_quant_model) 39 | ''' 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from PIL import ImageDraw 6 | 7 | 8 | def load_weights(var_list, weights_file): 9 | """ 10 | Loads and converts pre-trained weights. 11 | :param var_list: list of network variables. 12 | :param weights_file: name of the binary file. 13 | :return: list of assign ops 14 | """ 15 | with open(weights_file, "rb") as fp: 16 | _ = np.fromfile(fp, dtype=np.int32, count=5) 17 | 18 | weights = np.fromfile(fp, dtype=np.float32) 19 | 20 | ptr = 0 21 | i = 0 22 | assign_ops = [] 23 | while i < len(var_list) - 1: 24 | var1 = var_list[i] 25 | var2 = var_list[i + 1] 26 | # do something only if we process conv layer 27 | if 'Conv' in var1.name.split('/')[-2]: 28 | # check type of next layer 29 | if 'BatchNorm' in var2.name.split('/')[-2]: 30 | # load batch norm params 31 | gamma, beta, mean, var = var_list[i + 1:i + 5] 32 | batch_norm_vars = [beta, gamma, mean, var] 33 | for var in batch_norm_vars: 34 | shape = var.shape.as_list() 35 | num_params = np.prod(shape) 36 | var_weights = weights[ptr:ptr + num_params].reshape(shape) 37 | ptr += num_params 38 | assign_ops.append( 39 | tf.assign(var, var_weights, validate_shape=True)) 40 | 41 | # we move the pointer by 4, because we loaded 4 variables 42 | i += 4 43 | elif 'Conv' in var2.name.split('/')[-2]: 44 | # load biases 45 | bias = var2 46 | bias_shape = bias.shape.as_list() 47 | bias_params = np.prod(bias_shape) 48 | bias_weights = weights[ptr:ptr + 49 | bias_params].reshape(bias_shape) 50 | ptr += bias_params 51 | assign_ops.append( 52 | tf.assign(bias, bias_weights, validate_shape=True)) 53 | 54 | # we loaded 1 variable 55 | i += 1 56 | # we can load weights of conv layer 57 | shape = var1.shape.as_list() 58 | num_params = np.prod(shape) 59 | 60 | var_weights = weights[ptr:ptr + num_params].reshape( 61 | (shape[3], shape[2], shape[0], shape[1])) 62 | # remember to transpose to column-major 63 | var_weights = np.transpose(var_weights, (2, 3, 1, 0)) 64 | ptr += num_params 65 | assign_ops.append( 66 | tf.assign(var1, var_weights, validate_shape=True)) 67 | i += 1 68 | 69 | return assign_ops 70 | 71 | 72 | def detections_boxes(detections): 73 | """ 74 | Converts center x, center y, width and height values to coordinates of top left and bottom right points. 75 | 76 | :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5)) 77 | :return: converted detections of same shape as input 78 | """ 79 | center_x, center_y, width, height, attrs = tf.split( 80 | detections, [1, 1, 1, 1, -1], axis=-1) 81 | w2 = width / 2 82 | h2 = height / 2 83 | x0 = center_x - w2 84 | y0 = center_y - h2 85 | x1 = center_x + w2 86 | y1 = center_y + h2 87 | 88 | boxes = tf.concat([x0, y0, x1, y1], axis=-1) 89 | detections = tf.concat([boxes, attrs], axis=-1) 90 | return detections 91 | 92 | 93 | def _iou(box1, box2): 94 | """ 95 | Computes Intersection over Union value for 2 bounding boxes 96 | 97 | :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2] 98 | :param box2: same as box1 99 | :return: IoU 100 | """ 101 | b1_x0, b1_y0, b1_x1, b1_y1 = box1 102 | b2_x0, b2_y0, b2_x1, b2_y1 = box2 103 | 104 | int_x0 = max(b1_x0, b2_x0) 105 | int_y0 = max(b1_y0, b2_y0) 106 | int_x1 = min(b1_x1, b2_x1) 107 | int_y1 = min(b1_y1, b2_y1) 108 | 109 | int_area = (int_x1 - int_x0) * (int_y1 - int_y0) 110 | 111 | b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0) 112 | b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0) 113 | 114 | # we add small epsilon of 1e-05 to avoid division by 0 115 | iou = int_area / (b1_area + b2_area - int_area + 1e-05) 116 | return iou 117 | 118 | 119 | def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4): 120 | """ 121 | Applies Non-max suppression to prediction boxes. 122 | 123 | :param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence 124 | :param confidence_threshold: the threshold for deciding if prediction is valid 125 | :param iou_threshold: the threshold for deciding if two boxes overlap 126 | :return: dict: class -> [(box, score)] 127 | """ 128 | conf_mask = np.expand_dims( 129 | (predictions_with_boxes[:, :, 4] > confidence_threshold), -1) 130 | predictions = predictions_with_boxes * conf_mask 131 | 132 | result = {} 133 | #print (predictions.shape) 134 | for i, image_pred in enumerate(predictions): 135 | shape = image_pred.shape 136 | non_zero_idxs = np.nonzero(image_pred) 137 | image_pred = image_pred[non_zero_idxs] 138 | #print (image_pred.shape) 139 | #print (shape[-1]) 140 | image_pred = image_pred.reshape(-1, shape[-1]) 141 | 142 | bbox_attrs = image_pred[:, :5] 143 | classes = image_pred[:, 5:] 144 | classes = np.argmax(classes, axis=-1) 145 | 146 | unique_classes = list(set(classes.reshape(-1))) 147 | 148 | for cls in unique_classes: 149 | cls_mask = classes == cls 150 | cls_boxes = bbox_attrs[np.nonzero(cls_mask)] 151 | cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]] 152 | cls_scores = cls_boxes[:, -1] 153 | cls_boxes = cls_boxes[:, :-1] 154 | 155 | while len(cls_boxes) > 0: 156 | box = cls_boxes[0] 157 | score = cls_scores[0] 158 | if cls not in result: 159 | result[cls] = [] 160 | result[cls].append((box, score)) 161 | cls_boxes = cls_boxes[1:] 162 | cls_scores = cls_scores[1:] 163 | ious = np.array([_iou(box, x) for x in cls_boxes]) 164 | iou_mask = ious < iou_threshold 165 | cls_boxes = cls_boxes[np.nonzero(iou_mask)] 166 | cls_scores = cls_scores[np.nonzero(iou_mask)] 167 | 168 | return result 169 | 170 | 171 | def load_coco_names(file_name): 172 | names = {} 173 | with open(file_name) as f: 174 | for id, name in enumerate(f): 175 | names[id] = name 176 | return names 177 | 178 | 179 | def draw_boxes(boxes, img, cls_names, detection_size): 180 | draw = ImageDraw.Draw(img) 181 | 182 | for cls, bboxs in boxes.items(): 183 | color = tuple(np.random.randint(0, 256, 3)) 184 | for box, score in bboxs: 185 | box = convert_to_original_size(box, np.array(detection_size), 186 | np.array(img.size)) 187 | draw.rectangle(box, outline=color) 188 | draw.text(box[:2], '{} {:.2f}%'.format( 189 | cls_names[cls], score * 100), fill=color) 190 | 191 | 192 | def convert_to_original_size(box, size, original_size): 193 | ratio = original_size / size 194 | box = box.reshape(2, 2) * ratio 195 | return list(box.reshape(-1)) 196 | -------------------------------------------------------------------------------- /yolo_v3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | slim = tf.contrib.slim 7 | 8 | _BATCH_NORM_DECAY = 0.9 9 | _BATCH_NORM_EPSILON = 1e-05 10 | _LEAKY_RELU = 0.1 11 | 12 | _ANCHORS = [(10, 13), (16, 30), (33, 23), 13 | (30, 61), (62, 45), (59, 119), 14 | (116, 90), (156, 198), (373, 326)] 15 | 16 | 17 | def darknet53(inputs): 18 | """ 19 | Builds Darknet-53 model. 20 | """ 21 | inputs = _conv2d_fixed_padding(inputs, 32, 3) 22 | inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2) 23 | inputs = _darknet53_block(inputs, 32) 24 | inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2) 25 | 26 | for i in range(2): 27 | inputs = _darknet53_block(inputs, 64) 28 | 29 | inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2) 30 | 31 | for i in range(8): 32 | inputs = _darknet53_block(inputs, 128) 33 | 34 | route_1 = inputs 35 | inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2) 36 | 37 | for i in range(8): 38 | inputs = _darknet53_block(inputs, 256) 39 | 40 | route_2 = inputs 41 | inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2) 42 | 43 | for i in range(4): 44 | inputs = _darknet53_block(inputs, 512) 45 | 46 | return route_1, route_2, inputs 47 | 48 | 49 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1): 50 | if strides > 1: 51 | inputs = _fixed_padding(inputs, kernel_size) 52 | inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides, 53 | padding=('SAME' if strides == 1 else 'VALID')) 54 | return inputs 55 | 56 | 57 | def _darknet53_block(inputs, filters): 58 | shortcut = inputs 59 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 60 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 61 | 62 | inputs = inputs + shortcut 63 | return inputs 64 | 65 | 66 | @tf.contrib.framework.add_arg_scope 67 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs): 68 | """ 69 | Pads the input along the spatial dimensions independently of input size. 70 | 71 | Args: 72 | inputs: A tensor of size [batch, channels, height_in, width_in] or 73 | [batch, height_in, width_in, channels] depending on data_format. 74 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation. 75 | Should be a positive integer. 76 | data_format: The input format ('NHWC' or 'NCHW'). 77 | mode: The mode for tf.pad. 78 | 79 | Returns: 80 | A tensor with the same format as the input with the data either intact 81 | (if kernel_size == 1) or padded (if kernel_size > 1). 82 | """ 83 | pad_total = kernel_size - 1 84 | pad_beg = pad_total // 2 85 | pad_end = pad_total - pad_beg 86 | 87 | if kwargs['data_format'] == 'NCHW': 88 | padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], 89 | [pad_beg, pad_end], 90 | [pad_beg, pad_end]], 91 | mode=mode) 92 | else: 93 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 94 | [pad_beg, pad_end], [0, 0]], mode=mode) 95 | return padded_inputs 96 | 97 | 98 | def _yolo_block(inputs, filters): 99 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 100 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 101 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 102 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 103 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 104 | route = inputs 105 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 106 | return route, inputs 107 | 108 | 109 | def _get_size(shape, data_format): 110 | if len(shape) == 4: 111 | shape = shape[1:] 112 | return shape[1:3] if data_format == 'NCHW' else shape[0:2] 113 | 114 | 115 | def _detection_layer(inputs, num_classes, anchors, img_size, data_format): 116 | num_anchors = len(anchors) 117 | predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1, 118 | stride=1, normalizer_fn=None, 119 | activation_fn=None, 120 | biases_initializer=tf.zeros_initializer()) 121 | 122 | shape = predictions.get_shape().as_list() 123 | grid_size = _get_size(shape, data_format) 124 | dim = grid_size[0] * grid_size[1] 125 | bbox_attrs = 5 + num_classes 126 | 127 | if data_format == 'NCHW': 128 | predictions = tf.reshape( 129 | predictions, [-1, num_anchors * bbox_attrs, dim]) 130 | predictions = tf.transpose(predictions, [0, 2, 1]) 131 | 132 | predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs]) 133 | 134 | stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1]) 135 | 136 | anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors] 137 | 138 | box_centers, box_sizes, confidence, classes = tf.split( 139 | predictions, [2, 2, 1, num_classes], axis=-1) 140 | 141 | box_centers = tf.nn.sigmoid(box_centers) 142 | confidence = tf.nn.sigmoid(confidence) 143 | 144 | grid_x = tf.range(grid_size[0], dtype=tf.float32) 145 | grid_y = tf.range(grid_size[1], dtype=tf.float32) 146 | a, b = tf.meshgrid(grid_x, grid_y) 147 | 148 | x_offset = tf.reshape(a, (-1, 1)) 149 | y_offset = tf.reshape(b, (-1, 1)) 150 | 151 | x_y_offset = tf.concat([x_offset, y_offset], axis=-1) 152 | x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2]) 153 | 154 | box_centers = box_centers + x_y_offset 155 | box_centers = box_centers * stride 156 | 157 | anchors = tf.tile(anchors, [dim, 1]) 158 | box_sizes = tf.exp(box_sizes) * anchors 159 | box_sizes = box_sizes * stride 160 | 161 | detections = tf.concat([box_centers, box_sizes, confidence], axis=-1) 162 | 163 | classes = tf.nn.sigmoid(classes) 164 | predictions = tf.concat([detections, classes], axis=-1) 165 | return predictions 166 | 167 | 168 | def _upsample(inputs, out_shape, data_format='NCHW'): 169 | # tf.image.resize_nearest_neighbor accepts input in format NHWC 170 | if data_format == 'NCHW': 171 | inputs = tf.transpose(inputs, [0, 2, 3, 1]) 172 | 173 | if data_format == 'NCHW': 174 | new_height = out_shape[3] 175 | new_width = out_shape[2] 176 | else: 177 | new_height = out_shape[2] 178 | new_width = out_shape[1] 179 | 180 | inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width)) 181 | 182 | # back to NCHW if needed 183 | if data_format == 'NCHW': 184 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 185 | 186 | inputs = tf.identity(inputs, name='upsampled') 187 | return inputs 188 | 189 | 190 | def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): 191 | """ 192 | Creates YOLO v3 model. 193 | 194 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 195 | Dimension batch_size may be undefined. The channel order is RGB. 196 | :param num_classes: number of predicted classes. 197 | :param is_training: whether is training or not. 198 | :param data_format: data format NCHW or NHWC. 199 | :param reuse: whether or not the network and its variables should be reused. 200 | :return: 201 | """ 202 | # it will be needed later on 203 | img_size = inputs.get_shape().as_list()[1:3] 204 | 205 | # transpose the inputs to NCHW 206 | if data_format == 'NCHW': 207 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 208 | 209 | # normalize values to range [0..1] 210 | inputs = inputs / 255 211 | 212 | # set batch norm params 213 | batch_norm_params = { 214 | 'decay': _BATCH_NORM_DECAY, 215 | 'epsilon': _BATCH_NORM_EPSILON, 216 | 'scale': True, 217 | 'is_training': is_training, 218 | 'fused': None, # Use fused batch norm if possible. 219 | } 220 | 221 | # Set activation_fn and parameters for conv2d, batch_norm. 222 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse): 223 | with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, 224 | normalizer_params=batch_norm_params, 225 | biases_initializer=None, 226 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): 227 | with tf.variable_scope('darknet-53'): 228 | route_1, route_2, inputs = darknet53(inputs) 229 | 230 | with tf.variable_scope('yolo-v3'): 231 | route, inputs = _yolo_block(inputs, 512) 232 | detect_1 = _detection_layer( 233 | inputs, num_classes, _ANCHORS[6:9], img_size, data_format) 234 | detect_1 = tf.identity(detect_1, name='detect_1') 235 | 236 | inputs = _conv2d_fixed_padding(route, 256, 1) 237 | upsample_size = route_2.get_shape().as_list() 238 | inputs = _upsample(inputs, upsample_size, data_format) 239 | inputs = tf.concat([inputs, route_2], 240 | axis=1 if data_format == 'NCHW' else 3) 241 | 242 | route, inputs = _yolo_block(inputs, 256) 243 | 244 | detect_2 = _detection_layer( 245 | inputs, num_classes, _ANCHORS[3:6], img_size, data_format) 246 | detect_2 = tf.identity(detect_2, name='detect_2') 247 | 248 | inputs = _conv2d_fixed_padding(route, 128, 1) 249 | upsample_size = route_1.get_shape().as_list() 250 | inputs = _upsample(inputs, upsample_size, data_format) 251 | inputs = tf.concat([inputs, route_1], 252 | axis=1 if data_format == 'NCHW' else 3) 253 | 254 | _, inputs = _yolo_block(inputs, 128) 255 | 256 | detect_3 = _detection_layer( 257 | inputs, num_classes, _ANCHORS[0:3], img_size, data_format) 258 | detect_3 = tf.identity(detect_3, name='detect_3') 259 | 260 | detections = tf.concat([detect_1, detect_2, detect_3], axis=1) 261 | detections = tf.identity(detections, name='detections') 262 | return detections 263 | -------------------------------------------------------------------------------- /yolo_v3_tiny.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \ 6 | _detection_layer, _upsample 7 | 8 | slim = tf.contrib.slim 9 | 10 | _BATCH_NORM_DECAY = 0.9 11 | _BATCH_NORM_EPSILON = 1e-05 12 | _LEAKY_RELU = 0.1 13 | 14 | _ANCHORS = [(10, 14), (23, 27), (37, 58), 15 | (81, 82), (135, 169), (344, 319)] 16 | 17 | 18 | def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): 19 | """ 20 | Creates YOLO v3 tiny model. 21 | 22 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 23 | Dimension batch_size may be undefined. The channel order is RGB. 24 | :param num_classes: number of predicted classes. 25 | :param is_training: whether is training or not. 26 | :param data_format: data format NCHW or NHWC. 27 | :param reuse: whether or not the network and its variables should be reused. 28 | :return: 29 | """ 30 | # it will be needed later on 31 | img_size = inputs.get_shape().as_list()[1:3] 32 | 33 | # transpose the inputs to NCHW 34 | if data_format == 'NCHW': 35 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 36 | 37 | # normalize values to range [0..1] 38 | inputs = inputs / 255 39 | 40 | # set batch norm params 41 | batch_norm_params = { 42 | 'decay': _BATCH_NORM_DECAY, 43 | 'epsilon': _BATCH_NORM_EPSILON, 44 | 'scale': True, 45 | 'is_training': is_training, 46 | 'fused': None, # Use fused batch norm if possible. 47 | } 48 | 49 | # Set activation_fn and parameters for conv2d, batch_norm. 50 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format): 51 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse): 52 | with slim.arg_scope([slim.conv2d], 53 | normalizer_fn=slim.batch_norm, 54 | normalizer_params=batch_norm_params, 55 | biases_initializer=None, 56 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): 57 | 58 | with tf.variable_scope('yolo-v3-tiny'): 59 | for i in range(6): 60 | inputs = _conv2d_fixed_padding( 61 | inputs, 16 * pow(2, i), 3) 62 | 63 | if i == 4: 64 | route_1 = inputs 65 | 66 | if i == 5: 67 | inputs = slim.max_pool2d( 68 | inputs, [2, 2], stride=1, padding="SAME", scope='pool2') 69 | else: 70 | inputs = slim.max_pool2d( 71 | inputs, [2, 2], scope='pool2') 72 | 73 | inputs = _conv2d_fixed_padding(inputs, 1024, 3) 74 | inputs = _conv2d_fixed_padding(inputs, 256, 1) 75 | route_2 = inputs 76 | 77 | inputs = _conv2d_fixed_padding(inputs, 512, 3) 78 | # inputs = _conv2d_fixed_padding(inputs, 255, 1) 79 | 80 | detect_1 = _detection_layer( 81 | inputs, num_classes, _ANCHORS[3:6], img_size, data_format) 82 | detect_1 = tf.identity(detect_1, name='detect_1') 83 | 84 | inputs = _conv2d_fixed_padding(route_2, 128, 1) 85 | upsample_size = route_1.get_shape().as_list() 86 | inputs = _upsample(inputs, upsample_size, data_format) 87 | 88 | inputs = tf.concat([inputs, route_1], 89 | axis=1 if data_format == 'NCHW' else 3) 90 | 91 | inputs = _conv2d_fixed_padding(inputs, 256, 3) 92 | # inputs = _conv2d_fixed_padding(inputs, 255, 1) 93 | 94 | detect_2 = _detection_layer( 95 | inputs, num_classes, _ANCHORS[0:3], img_size, data_format) 96 | detect_2 = tf.identity(detect_2, name='detect_2') 97 | 98 | detections = tf.concat([detect_1, detect_2], axis=1) 99 | detections = tf.identity(detections, name='detections') 100 | return detections 101 | --------------------------------------------------------------------------------