├── README.md ├── config ├── config.yml └── eval_config.yml ├── eval.py ├── reader.py ├── train.py └── utils ├── IOU.py ├── __init__.py ├── eval_uitls.py ├── extract_labels.py ├── get_loss.py ├── net.py ├── read_config.py └── select_things.py /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3_tensorflow 2 | YOLOv3 in tensorflow v1.4
3 | RTR 4 | -------------------------------------------------------------------------------- /config/config.yml: -------------------------------------------------------------------------------- 1 | ## Basic config 2 | batch_size: 2 3 | learning_rate: 0.001 4 | epoch: 1000 5 | 6 | ## reset image size 7 | height: 128 8 | width: 128 9 | 10 | ## The scale size that the net work output. 11 | scale: 1 12 | 13 | ## Path 14 | datas_path: './data/VOCtrainval_06-Nov-2007/JPEGImages' 15 | labels_path: './data/VOCtrainval_06-Nov-2007/Annotations' -------------------------------------------------------------------------------- /config/eval_config.yml: -------------------------------------------------------------------------------- 1 | # Image dir 2 | image_dir: './data/VOCtest_06-Nov-2007/JPEGImages/000001.jpg' 3 | 4 | # Select the scale 5 | scale: 1 6 | 7 | # The path of the target image to save 8 | save_dir: './generated_images' 9 | 10 | # Out put image attribute 11 | image_width: 128 12 | image_height: 128 -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import time 2 | import tensorflow as tf 3 | import os 4 | import argparse 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | import reader 10 | from utils import net, read_config, select_things, eval_uitls 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument( '-c', '--conf', default = './config/eval_config.yml', help = 'the path to the eval_config file' ) 15 | return parser.parse_args() 16 | 17 | def main( FLAGS ): 18 | if not os.path.exists( FLAGS.save_dir ): 19 | os.makedirs( FLAGS.save_dir ) 20 | 21 | input_image = reader.get_image( FLAGS.image_dir, FLAGS.image_width, FLAGS.image_height ) 22 | output_image = np.copy( input_image ) 23 | 24 | '''--------Create placeholder--------''' 25 | image = net.create_eval_placeholder( FLAGS.image_width, FLAGS.image_height ) 26 | 27 | '''--------net--------''' 28 | pre_scale1, pre_scale2, pre_scale3 = net.feature_extractor( image, False ) 29 | scale1, scale2, scale3 = net.scales( pre_scale1, pre_scale2, pre_scale3, False ) 30 | 31 | with tf.Session() as sess: 32 | saver = tf.train.Saver() 33 | save_path = select_things.select_checkpoint( FLAGS.scale ) 34 | last_checkpoint = tf.train.latest_checkpoint( save_path, 'checkpoint' ) 35 | if last_checkpoint: 36 | saver.restore(sess, last_checkpoint) 37 | print( 'Success load model from: ', format( last_checkpoint ) ) 38 | else: 39 | print( 'Model has not trained' ) 40 | 41 | start_time = time.time() 42 | scale1, scale2, scale3 = sess.run( [scale1, scale2, scale3], feed_dict = {image: [output_image]} ) 43 | 44 | if FLAGS.scale == 1: 45 | scale = scale1 46 | if FLAGS.scale == 2: 47 | scale = scale2 48 | if FLAGS.scale == 3: 49 | scale = scale3 50 | 51 | boxes_labels = eval_uitls.label_extractor( scale[0] ) 52 | 53 | bdboxes = eval_uitls.get_bdboxes( boxes_labels ) 54 | 55 | for bdbox in bdboxes: 56 | font = cv2.FONT_HERSHEY_SIMPLEX 57 | output_image = cv2.rectangle( output_image, 58 | ( int( bdbox[0] - bdbox[2] / 2 ), int( bdbox[1] - bdbox[3] / 2 ) ), 59 | ( int( bdbox[0] + bdbox[2] / 2 ), int( bdbox[1] + bdbox[3] / 2 ) ), 60 | ( 200, 0, 0 ), 61 | 1 ) 62 | output_image = cv2.putText( output_image, 63 | bdbox[4], 64 | ( int( bdbox[0] - bdbox[2] / 2 ), int( bdbox[1] - bdbox[3] / 2 ) ), 65 | cv2.FONT_HERSHEY_SIMPLEX, 66 | 0.3, 67 | (0, 255, 0), 68 | 1 ) 69 | # output_image = np.multiply( output_image, 255 ) 70 | 71 | generate_image = FLAGS.save_dir + '/res.jpg' 72 | if not os.path.exists( FLAGS.save_dir ): 73 | os.makedirs( FLAGS.save_dir ) 74 | 75 | cv2.imwrite( generate_image, cv2.cvtColor( output_image, cv2.COLOR_RGB2BGR ) ) 76 | end_time = time.time() 77 | 78 | print( 'Use time: ', end_time - start_time ) 79 | 80 | plt.imshow( output_image ) 81 | plt.show() 82 | 83 | 84 | 85 | 86 | if __name__ == '__main__': 87 | args = parse_args() 88 | FLAGS = read_config.read_config_file( args.conf ) 89 | main( FLAGS ) -------------------------------------------------------------------------------- /reader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2 4 | from os.path import isfile, join 5 | from os import listdir 6 | from utils import extract_labels as extract_labels 7 | 8 | 9 | '''--------Here fnish the minibatch operation--------''' 10 | def images( batch_size, path ): 11 | filenames = [join( path, f ) for f in listdir( path ) if isfile( join( path, f ) )] 12 | 13 | batch_filenames = [] 14 | num = len( filenames ) // batch_size 15 | for i in range( num ): 16 | batch_filename = filenames[i * batch_size : ( i + 1 ) * batch_size] 17 | 18 | batch_filenames.append( batch_filename ) 19 | 20 | '''--------Ignore some data--------''' 21 | # if len( filenames ) % batch_size: 22 | # batch_filename = filenames[num * batch_size :] 23 | 24 | # batch_filenames.append( batch_filename ) 25 | 26 | return batch_filenames 27 | 28 | def get_image( path, width, height ): 29 | image = cv2.imread( path ) 30 | image = cv2.cvtColor( image, cv2.COLOR_BGR2RGB ) 31 | # image = np.true_divide( image, 255 ) 32 | image = cv2.resize( image, ( width, height ) ) 33 | 34 | return image 35 | 36 | def labels( batch_size, path ): 37 | # batch_labels = [] 38 | labels_filenames = images( batch_size, path ) 39 | # for label_filename in labels_filenames: 40 | # batch_label = extract_labels.labels_normaliszer( label_filename ) 41 | # batch_labels.append( batch_label ) 42 | 43 | return labels_filenames 44 | 45 | 46 | 47 | 48 | '''--------Test images--------''' 49 | if __name__ == '__main__': 50 | image = images( 3, './data/VOCtrainval_06-Nov-2007/JPEGImages' ) 51 | 52 | print( len( image ) ) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | import argparse 5 | import time 6 | import utils.read_config as read_config 7 | 8 | from utils import net, read_config, get_loss, IOU, extract_labels, select_things 9 | import reader 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument( '-c', '--conf', default = './config/config.yml', help = 'the path to the config file' ) 14 | return parser.parse_args() 15 | 16 | def main( FLAGS ): 17 | 18 | scale_width, scale_height = select_things.select_scale( FLAGS.scale, FLAGS.width, FLAGS.height ) 19 | '''--------Creat palceholder--------''' 20 | datas, labels, train = net.create_placeholder( FLAGS.batch_size, FLAGS.width, FLAGS.height, scale_width, scale_height ) 21 | 22 | '''--------net--------''' 23 | pre_scale1, pre_scale2, pre_scale3 = net.feature_extractor( datas, train ) 24 | scale1, scale2, scale3 = net.scales( pre_scale1, pre_scale2, pre_scale3, train ) 25 | 26 | '''--------get labels_filenames and datas_filenames--------''' 27 | datas_filenames = reader.images( FLAGS.batch_size, FLAGS.datas_path ) 28 | labels_fienames = reader.labels( FLAGS.batch_size, FLAGS.labels_path ) 29 | normalize_labels = extract_labels.labels_normalizer( labels_fienames, 30 | FLAGS.width, 31 | FLAGS.height, 32 | scale_width, 33 | scale_height ) 34 | 35 | '''---------partition the train data and val data--------''' 36 | train_filenames = datas_filenames[: int( len( datas_filenames ) * 0.9 )] 37 | train_labels = normalize_labels[: int( len( normalize_labels ) * 0.9 )] 38 | val_filenames = datas_filenames[len( datas_filenames ) - int( len( datas_filenames ) * 0.9 ) :] 39 | val_labels = normalize_labels[len( normalize_labels ) - int( len( normalize_labels ) * 0.9 ) :] 40 | 41 | '''--------calculate loss--------''' 42 | if FLAGS.scale == 1: 43 | loss = get_loss.calculate_loss( scale1, labels ) 44 | 45 | if FLAGS.scale == 2: 46 | loss = get_loss.calculate_loss( scale2, labels ) 47 | 48 | if FLAGS.scale == 3: 49 | loss = get_loss.calculate_loss( scale3, labels ) 50 | 51 | '''--------Optimizer--------''' 52 | update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) 53 | with tf.control_dependencies( update_ops ): 54 | optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate ).minimize( loss ) 55 | 56 | tf.summary.scalar( 'loss', loss ) 57 | merged = tf.summary.merge_all() 58 | 59 | init = tf.initialize_all_variables() 60 | 61 | with tf.Session() as sess: 62 | writer = tf.summary.FileWriter( "logs/", sess.graph ) 63 | number = 0 64 | 65 | saver = tf.train.Saver( max_to_keep = 10 ) 66 | save_path = select_things.select_checkpoint( FLAGS.scale ) 67 | last_checkpoint = tf.train.latest_checkpoint( save_path, 'checkpoint' ) 68 | if last_checkpoint: 69 | saver.restore( sess, last_checkpoint ) 70 | number = int( last_checkpoint[28 :] ) + 1 71 | print( 'Reuse model form: ', format( last_checkpoint ) ) 72 | else: 73 | sess.run( init ) 74 | 75 | 76 | for epoch in range( FLAGS.epoch ): 77 | epoch_loss = tf.cast( 0, tf.float32 ) 78 | for i in range( len( train_filenames ) ): 79 | normalize_datas = [] 80 | for data_filename in train_filenames[i]: 81 | image = reader.get_image( data_filename, FLAGS.width, FLAGS.height ) 82 | image = np.array( image, np.float32 ) 83 | 84 | normalize_datas.append( image ) 85 | 86 | normalize_datas = np.array( normalize_datas ) 87 | 88 | _, batch_loss, rs = sess.run( [optimizer, loss, merged], feed_dict = {datas: normalize_datas, labels: train_labels[i], train: True} ) 89 | 90 | epoch_loss =+ batch_loss 91 | 92 | writer.add_summary( rs, epoch + number ) 93 | 94 | 95 | if epoch % 1 == 0 & epoch != 0: 96 | print( 'Cost after epoch %i: %f' % ( epoch + number, epoch_loss ) ) 97 | name = 'scale' + str( FLAGS.scale ) + '.ckpt' 98 | saver.save( sess, os.path.join( save_path, name ), global_step = epoch + number ) 99 | 100 | if epoch % 10 == 0 & epoch != 0: 101 | val_loss = tf.cast( 0, tf.float32 ) 102 | for i in range( len( val_filenames ) ): 103 | normalize_datas = [] 104 | for val_filename in val_filenames[i]: 105 | image = reader.get_image( val_filename, FLAGS.width, FLAGS.height ) 106 | image = np.array( image, np.float32 ) 107 | image = np.divide( image, 255 ) 108 | 109 | normalize_datas.append( image ) 110 | 111 | normalize_datas = np.array( normalize_datas ) 112 | 113 | batch_loss = sess.run( loss, feed_dict = {datas: normalize_datas, labels: val_labels[i], train: False} ) 114 | 115 | val_loss =+ batch_loss 116 | 117 | print( 'VAL_Cost after epoch %i: %f' %( epoch + number, val_loss ) ) 118 | 119 | 120 | 121 | if __name__ == '__main__': 122 | args = parse_args() 123 | FLAGS = read_config.read_config_file( args.conf ) 124 | main( FLAGS ) -------------------------------------------------------------------------------- /utils/IOU.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | def calculate_min( point, data ): 5 | min_point = point - data 6 | 7 | return min_point 8 | 9 | def calculate_max( point, data ): 10 | max_point = point + data 11 | 12 | return max_point 13 | 14 | def IOU_calculator( x, y, width, height, l_x, l_y, l_width, l_height ): 15 | ''' 16 | Cculate IOU 17 | 18 | :param x: net predicted x 19 | :param y: net predicted y 20 | :param width: net predicted width 21 | :param height: net predicted height 22 | :param l_x: label x 23 | :param l_y: label y 24 | :param l_width: label width 25 | :param l_height: label height 26 | :return: IOU 27 | ''' 28 | 29 | x_max = calculate_max( x , width / 2 ) 30 | y_max = calculate_max( y, height / 2 ) 31 | x_min = calculate_min( x, width / 2 ) 32 | y_min = calculate_min( y, height / 2 ) 33 | 34 | l_x_max = calculate_max( l_x, width / 2 ) 35 | l_y_max = calculate_max( l_y, height / 2 ) 36 | l_x_min = calculate_min( l_x, width / 2 ) 37 | l_y_min = calculate_min( l_y, height / 2 ) 38 | 39 | '''--------Caculate Both Area's point--------''' 40 | xend = tf.minimum( x_max, l_x_max ) 41 | xstart = tf.maximum( x_min, l_x_min ) 42 | 43 | yend = tf.minimum( y_max, l_y_max ) 44 | ystart = tf.maximum( y_min, l_y_min ) 45 | 46 | area_width = xend - xstart 47 | area_height = yend - ystart 48 | 49 | '''--------Caculate the IOU--------''' 50 | area = area_width * area_height 51 | 52 | all_area = tf.cond( ( width * height + l_width * l_height - area ) <= 0, lambda : tf.cast( 1e-8, tf.float32 ), lambda : ( width * height + l_width * l_height - area ) ) 53 | 54 | IOU = area / all_area 55 | 56 | IOU = tf.cond( area_width < 0, lambda : tf.cast( 1e-8, tf.float32 ), lambda : IOU ) 57 | IOU = tf.cond( area_height < 0, lambda : tf.cast( 1e-8, tf.float32 ), lambda : IOU ) 58 | 59 | return IOU 60 | 61 | 62 | 63 | '''--------Test the IOU function--------''' 64 | if __name__ == '__main__': 65 | IOU1 = IOU_calculator( tf.cast( 1, tf.float32 ), tf.cast( 1, tf.float32 ), tf.cast( 2, tf.float32 ), tf.cast( 2, tf.float32 ), 66 | tf.cast( 2, tf.float32 ), tf.cast( 2, tf.float32 ), tf.cast( 2, tf.float32 ), tf.cast( 2, tf.float32 ) ) 67 | IOU = IOU_calculator(tf.cast( 0, tf.float32 ), tf.cast( 0, tf.float32 ), tf.cast( 0, tf.float32 ), tf.cast( 0, tf.float32 ), 68 | tf.cast( 0, tf.float32 ), tf.cast( 0, tf.float32 ), tf.cast( 0, tf.float32 ), tf.cast( 0, tf.float32 ) ) 69 | sess = tf.Session() 70 | print( sess.run( IOU ) ) -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IronMastiff/YOLOv3_tensorflow/cb122efb9487bb4d130daddf1c7e529dd58f6562/utils/__init__.py -------------------------------------------------------------------------------- /utils/eval_uitls.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def label_extractor( scale ): 4 | boxes_labels = [] 5 | for boxes in scale: 6 | for box in boxes: 7 | box_labels = [] 8 | for i in range( 3 ): 9 | pretect_x = box[i * 25] 10 | pretect_y = box[i * 25 + 1] 11 | pretect_width = box[i * 25 + 2] 12 | pretect_height = box[i * 25 + 3] 13 | pretect_objectness = box[i * 25 + 4] 14 | pretect_class = box[i * 25 + 5: i * 25 + 5 + 20] 15 | 16 | box_label = ( pretect_x, pretect_y, pretect_width, pretect_height, pretect_objectness, pretect_class ) 17 | box_labels.append( box_label ) 18 | 19 | boxes_labels.append( box_labels ) 20 | 21 | return boxes_labels 22 | 23 | def get_bdboxes( boxes_labels ): 24 | bdboxes = [] 25 | index = 0 26 | for box_labels in boxes_labels: 27 | max = 0 28 | for i in range( 3 ): 29 | if box_labels[i][4] > max: 30 | max = box_labels[i][4] 31 | index = i 32 | 33 | # if box_labels[i][4] >= 0.1: 34 | x = box_labels[i][0] 35 | y = box_labels[i][1] 36 | width = box_labels[i][2] 37 | height = box_labels[i][3] 38 | object_class = get_object_class( box_labels[i][5] ) 39 | 40 | bdbox = ( x, y, width, height, object_class ) 41 | 42 | bdboxes.append( bdbox ) 43 | 44 | return bdboxes 45 | 46 | def get_object_class( input ): 47 | max = 0 48 | index = 0 49 | for i in range( len( input ) ): 50 | if input[i] > max: 51 | max = input[i] 52 | index = i 53 | index = index + 5 54 | 55 | class_map = { 56 | 5 : 'person', 57 | 6 : 'bird', 58 | 7 : 'cat', 59 | 8 : 'cow', 60 | 9 : 'dog', 61 | 10 : 'horse', 62 | 11 : 'sheep', 63 | 12 : 'aeroplane', 64 | 13 : 'bicycle', 65 | 14 : 'boat', 66 | 15 : 'bus', 67 | 16 : 'car', 68 | 17 : 'motorbike', 69 | 18 : 'train', 70 | 19 : 'bottle', 71 | 20 : 'chair', 72 | 21 : 'diningtable', 73 | 22 : 'pottedplant', 74 | 23 : 'sofa', 75 | 24 : 'tvmonitor' 76 | } 77 | 78 | class_name = class_map[index] 79 | return class_name 80 | -------------------------------------------------------------------------------- /utils/extract_labels.py: -------------------------------------------------------------------------------- 1 | from xml.dom.minidom import parse 2 | import xml.dom.minidom 3 | import numpy as np 4 | 5 | def xml_extractor( dir ): 6 | DOMTree = parse( dir ) 7 | collection = DOMTree.documentElement 8 | file_name_xml = collection.getElementsByTagName( 'filename' )[0] 9 | objects_xml = collection.getElementsByTagName( 'object' ) 10 | size_xml = collection.getElementsByTagName( 'size' ) 11 | 12 | file_name = file_name_xml.childNodes[0].data 13 | 14 | for size in size_xml: 15 | width = size.getElementsByTagName( 'width' )[0] 16 | height = size.getElementsByTagName( 'height' )[0] 17 | 18 | width = width.childNodes[0].data 19 | height = height.childNodes[0].data 20 | 21 | objects = [] 22 | for object_xml in objects_xml: 23 | object_name = object_xml.getElementsByTagName( 'name' )[0] 24 | bdbox = object_xml.getElementsByTagName( 'bndbox' )[0] 25 | xmin = bdbox.getElementsByTagName( 'xmin' )[0] 26 | ymin = bdbox.getElementsByTagName( 'ymin' )[0] 27 | xmax = bdbox.getElementsByTagName( 'xmax' )[0] 28 | ymax = bdbox.getElementsByTagName( 'ymax' )[0] 29 | 30 | object = ( object_name.childNodes[0].data, 31 | xmin.childNodes[0].data, 32 | ymin.childNodes[0].data, 33 | xmax.childNodes[0].data, 34 | ymax.childNodes[0].data ) 35 | 36 | objects.append( object ) 37 | 38 | return file_name, width, height, objects 39 | 40 | def labels_normalizer( batches_filenames, target_width, target_height, layerout_width, layerout_height ): 41 | 42 | class_map = { 43 | 'person' : 5, 44 | 'bird' : 6, 45 | 'cat' : 7, 46 | 'cow' : 8, 47 | 'dog' : 9, 48 | 'horse' : 10, 49 | 'sheep' : 11, 50 | 'aeroplane' : 12, 51 | 'bicycle' : 13, 52 | 'boat' : 14, 53 | 'bus' : 15, 54 | 'car' : 16, 55 | 'motorbike' : 17, 56 | 'train' : 18, 57 | 'bottle' : 19, 58 | 'chair' : 20, 59 | 'diningtable' : 21, 60 | 'pottedplant': 22, 61 | 'sofa' : 23, 62 | 'tvmonitor' : 24 63 | } 64 | 65 | height_width = [] 66 | batches_labels = [] 67 | for batch_filenames in batches_filenames: 68 | batch_labels = [] 69 | for filename in batch_filenames: 70 | _, width, height, objects = xml_extractor( filename ) 71 | width_preprotion = target_width / int( width ) 72 | height_preprotion = target_height / int( height ) 73 | label = np.add( np.zeros( [int( layerout_height ), int( layerout_width ), 255] ), 1e-8 ) 74 | for object in objects: 75 | class_label = class_map[object[0]] 76 | xmin = float( object[1] ) 77 | ymin = float( object[2] ) 78 | xmax = float( object[3] ) 79 | ymax = float( object[4] ) 80 | x = ( 1.0 * xmax + xmin ) / 2 * width_preprotion 81 | y = ( 1.0 * ymax + ymin ) / 2 * height_preprotion 82 | bdbox_width = ( 1.0 * xmax - xmin ) * width_preprotion 83 | bdbox_height = ( 1.0 * ymax - ymin ) * height_preprotion 84 | falg_width = int( target_width ) / layerout_width 85 | flag_height = int( target_height ) / layerout_height 86 | box_x = x // falg_width 87 | box_y = y // flag_height 88 | if box_x == layerout_width: 89 | box_x -= 1 90 | if box_y == layerout_height: 91 | box_y -= 1 92 | for i in range( 3 ): 93 | label[int( box_y ), int( box_x ), i * 25] = x # point x 94 | label[int( box_y ), int( box_x ), i * 25 + 1] = y # point y 95 | label[int( box_y ), int( box_x ), i * 25 + 2] = bdbox_width # bdbox width 96 | label[int( box_y ), int( box_x ), i * 25 + 3] = bdbox_height # bdbox height 97 | label[int( box_y ), int( box_x ), i * 25 + 4] = 1 # objectness 98 | label[int( box_y ), int( box_x ), i * 25 + int( class_label )] = 0.9 # class label 99 | 100 | batch_labels.append( label ) 101 | 102 | batches_labels.append( batch_labels ) 103 | 104 | # batches_labels = np.array( batches_labels ) 105 | 106 | return batches_labels 107 | 108 | 109 | 110 | 111 | 112 | 113 | '''--------Test extract_labels--------''' 114 | if __name__ == '__main__': 115 | dir = [['../data/VOCtest_06-Nov-2007/Annotations/000001.xml', '../data/VOCtest_06-Nov-2007/Annotations/000002.xml'], ['../data/VOCtest_06-Nov-2007/Annotations/000003.xml', '../data/VOCtest_06-Nov-2007/Annotations/000004.xml']] 116 | batches_labels = labels_normalizer( dir, 512, 512, 16, 16 ) 117 | print( np.array( dir ).shape ) 118 | print( np.array( batches_labels ).shape ) -------------------------------------------------------------------------------- /utils/get_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from utils import IOU as get_IOU 4 | 5 | def objectness_loss( input, switch, l_switch, alpha = 0.5 ): 6 | ''' 7 | Calculate the objectness loss 8 | 9 | :param input: input IOU 10 | :param switch: If target in this box is 1, else 1e-8 11 | :param l_switch: Target in this box is 1, else 0 12 | :return: objectness_loss 13 | ''' 14 | 15 | IOU_loss = tf.square( l_switch - input * switch ) 16 | loss_max = tf.square( l_switch * 0.5 - input * switch ) 17 | 18 | IOU_loss = tf.cond( IOU_loss < loss_max, lambda : tf.cast( 1e-8, tf.float32 ), lambda : IOU_loss ) 19 | 20 | IOU_loss = tf.cond( l_switch < 1, lambda : IOU_loss * alpha, lambda : IOU_loss ) 21 | 22 | return IOU_loss 23 | 24 | def location_loss( x, y, width, height, l_x, l_y, l_width, l_height, alpha = 5 ): 25 | point_loss = ( tf.square( l_x - x ) + tf.square( l_y - y ) ) * alpha 26 | size_loss = ( tf.square( tf.sqrt( l_width ) - tf.sqrt( width ) ) + tf.square( tf.sqrt( l_height ) - tf.sqrt( height ) ) ) * alpha 27 | 28 | location_loss = point_loss + size_loss 29 | 30 | return location_loss 31 | 32 | def class_loss( inputs, labels ): 33 | classloss = tf.square( labels - inputs ) 34 | loss_sum = tf.reduce_sum( classloss ) 35 | 36 | return loss_sum 37 | 38 | def calculate_loss( batch_inputs, batch_labels ): 39 | batch_loss = 0 40 | # for batch in range( batch_inputs.shape[0] ): 41 | for image_num in range( batch_inputs.shape[0] ): 42 | for y in range( batch_inputs.shape[1] ): 43 | for x in range( batch_inputs.shape[2] ): 44 | for i in range( 3 ): 45 | pretect_x = batch_inputs[image_num][y][x][i * 25] 46 | pretect_y = batch_inputs[image_num][y][x][i * 25 + 1] 47 | pretect_width = batch_inputs[image_num][y][x][i * 25 + 2] 48 | pretect_height = batch_inputs[image_num][y][x][i * 25 + 3] 49 | pretect_objectness = batch_inputs[image_num][y][x][i * 25 + 4] 50 | pretect_class = batch_inputs[image_num][y][x][i * 25 + 5 : i * 25 + 5 + 20] 51 | label_x = batch_labels[image_num][y][x][i * 25] 52 | label_y = batch_labels[image_num][y][x][i * 25 + 1] 53 | label_width = batch_labels[image_num][y][x][i * 25 + 2] 54 | label_height = batch_labels[image_num][y][x][i * 25 + 3] 55 | label_objectness = batch_labels[image_num][y][x][i * 25 + 4] 56 | label_class = batch_labels[image_num][y][x][i * 25 + 5 : i * 25 + 5 + 20] 57 | IOU = get_IOU.IOU_calculator( tf.cast( pretect_x, tf.float32 ), 58 | tf.cast( pretect_y, tf.float32 ), 59 | tf.cast( pretect_width, tf.float32 ), 60 | tf.cast( pretect_height, tf.float32 ), 61 | tf.cast( label_x, tf.float32 ), 62 | tf.cast( label_y, tf.float32 ), 63 | tf.cast( label_width, tf.float32 ), 64 | tf.cast( label_height, tf.float32 ) ) 65 | loss = class_loss( pretect_class, 66 | label_class ) + location_loss( pretect_x, 67 | pretect_y, 68 | pretect_width, 69 | pretect_height, 70 | label_x, 71 | label_y, 72 | label_width, 73 | label_height ) + objectness_loss( IOU, pretect_objectness, label_objectness ) 74 | 75 | batch_loss += loss 76 | return batch_loss 77 | 78 | '''--------test calculate loss--------''' 79 | if __name__ == '__main__': 80 | batch_datas = np.zeros( [1, 1, 1, 255], dtype = np.float32 ) 81 | batch_labels = [[[np.zeros( 255, dtype = np.float32 )]]] 82 | batch_loss = calculate_loss( batch_datas, batch_labels ) 83 | 84 | print( len( batch_datas ), len( batch_datas[0] ), len( batch_datas[0][0] ), len( batch_datas[0][0][0] ) ) 85 | 86 | sess = tf.Session() 87 | 88 | print( sess.run( batch_loss ) ) -------------------------------------------------------------------------------- /utils/net.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import tensorflow as tf 3 | import skimage.transform 4 | 5 | 6 | def create_placeholder( batch_size, width, height, final_width, final_height ): 7 | X = tf.placeholder( tf.float32, [batch_size, width, height, 3] ) 8 | Y = tf.placeholder( tf.float32, [batch_size, final_width, final_height, 255] ) 9 | train = tf.placeholder( tf.bool ) 10 | 11 | return X, Y, train 12 | 13 | def create_eval_placeholder( width, height ): 14 | image = tf.placeholder( tf.float32, [1, width, height, 3] ) 15 | 16 | return image 17 | 18 | def Leaky_Relu( input, alpha = 0.01 ): 19 | output = tf.maximum( input, tf.multiply( input, alpha ) ) 20 | 21 | return output 22 | 23 | 24 | def conv2d( inputs, filters, shape, stride = ( 1, 1 ), training = True ): 25 | layer = tf.layers.conv2d( inputs, 26 | filters, 27 | shape, 28 | stride, 29 | padding = 'SAME', 30 | kernel_initializer=tf.truncated_normal_initializer( stddev=0.01 ) ) 31 | 32 | layer = tf.layers.batch_normalization( layer, training = training ) 33 | 34 | layer = Leaky_Relu( layer ) 35 | 36 | return layer 37 | 38 | 39 | def Res_conv2d( inputs, shortcut, filters, shape, stride = ( 1, 1 ), training = True ): 40 | conv = conv2d( inputs, filters, shape, training = training ) 41 | Res = Leaky_Relu( conv + shortcut ) 42 | 43 | return Res 44 | 45 | 46 | def feature_extractor( inputs, training ): 47 | layer = conv2d( inputs, 32, [3, 3], training = training ) 48 | layer = conv2d( layer, 64, [3, 3], ( 2, 2 ), training = training ) 49 | shortcut = layer 50 | 51 | layer = conv2d( layer, 32, [1, 1], training = training ) 52 | layer = Res_conv2d( layer, shortcut, 64, [3, 3], training = training ) 53 | 54 | layer = conv2d( layer, 128, [3, 3], ( 2, 2 ), training = training ) 55 | shortcut = layer 56 | 57 | for _ in range( 2 ): 58 | layer = conv2d( layer, 64, [1, 1], training = training ) 59 | layer = Res_conv2d( layer, shortcut, 128, [3, 3], training = training ) 60 | 61 | layer = conv2d( layer, 256, [3, 3], ( 2, 2 ), training = training ) 62 | shortcut = layer 63 | 64 | for _ in range( 8 ): 65 | layer = conv2d( layer, 128, [1, 1], training = training ) 66 | layer = Res_conv2d( layer, shortcut, 256, [3, 3], training = training ) 67 | pre_scale3 = layer 68 | 69 | layer = conv2d( layer, 512, [3, 3], ( 2, 2 ), training = training ) 70 | shortcut = layer 71 | 72 | for _ in range( 8 ): 73 | layer = conv2d( layer, 256, [1, 1], training = training ) 74 | layer = Res_conv2d( layer, shortcut, 512, [3, 3], training = training ) 75 | pre_scale2 = layer 76 | 77 | layer = conv2d( layer, 1024, [3, 3], ( 2, 2 ), training = training ) 78 | shortcut = layer 79 | 80 | for _ in range( 4 ): 81 | layer = conv2d( layer, 512, [1, 1], training = training ) 82 | layer = Res_conv2d( layer, shortcut, 1024, [3, 3], training = training ) 83 | pre_scale1 = layer 84 | 85 | return pre_scale1, pre_scale2, pre_scale3 86 | 87 | def get_layer2x( layer_final, pre_scale ): 88 | layer2x = tf.image.resize_images(layer_final, 89 | [2 * tf.shape(layer_final)[1], 2 * tf.shape(layer_final)[2]]) 90 | layer2x_add = tf.concat( [layer2x, pre_scale], 3 ) 91 | 92 | return layer2x_add 93 | 94 | def scales( layer, pre_scale2, pre_scale3, training ): 95 | layer_copy = layer 96 | layer = conv2d( layer, 512, [1, 1], training = training ) 97 | layer = conv2d( layer, 1024, [3, 3], training = training ) 98 | layer = conv2d(layer, 512, [1, 1], training = training ) 99 | layer_final = layer 100 | layer = conv2d(layer, 1024, [3, 3], training = training ) 101 | 102 | '''--------scale_1--------''' 103 | scale_1 = conv2d( layer, 255, [1, 1], training = training ) 104 | 105 | '''--------scale_2--------''' 106 | layer = conv2d( layer_final, 256, [1, 1], training = training ) 107 | layer = get_layer2x( layer, pre_scale2 ) 108 | 109 | layer = conv2d( layer, 256, [1, 1], training = training ) 110 | layer= conv2d( layer, 512, [3, 3], training = training ) 111 | layer = conv2d( layer, 256, [1, 1], training = training ) 112 | layer = conv2d( layer, 512, [3, 3], training = training ) 113 | layer = conv2d( layer, 256, [1, 1], training = training ) 114 | layer_final = layer 115 | layer = conv2d( layer, 512, [3, 3], training = training ) 116 | scale_2 = conv2d( layer, 255, [1, 1], training = training ) 117 | 118 | '''--------scale_3--------''' 119 | layer = conv2d( layer_final, 128, [1, 1], training = training ) 120 | layer = get_layer2x( layer, pre_scale3 ) 121 | 122 | for _ in range( 3 ): 123 | layer = conv2d( layer, 128, [1, 1], training = training ) 124 | layer = conv2d( layer, 256, [3, 3], training = training ) 125 | scale_3 = conv2d( layer, 255, [1, 1], training = training ) 126 | 127 | scale_1 = tf.abs( scale_1 ) 128 | scale_2 = tf.abs( scale_2 ) 129 | scale_3 = tf.abs( scale_3 ) 130 | 131 | return scale_1, scale_2, scale_3 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | '''--------Test the scale--------''' 140 | if __name__ == "__main__": 141 | data = cv2.imread( '../data/VOCtest_06-Nov-2007/JPEGImages/000001.jpg' ) 142 | data = cv2.cvtColor( data, cv2.COLOR_BGR2RGB ) 143 | data = cv2.resize( data, ( 416, 416 ) ) 144 | 145 | data = tf.cast( tf.expand_dims( tf.constant( data ), 0 ), tf.float32 ) 146 | 147 | pre_scale1, pre_scale2, pre_scale3 = feature_extractor( data ) 148 | 149 | scale_1, scale_2, scale_3 = scales( pre_scale1, pre_scale2, pre_scale3 ) 150 | 151 | with tf.Session() as sess: 152 | 153 | sess.run( tf.initialize_all_variables() ) 154 | 155 | print( sess.run( scale_1 ).shape ) -------------------------------------------------------------------------------- /utils/read_config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | class Flag( object ): 4 | def __init__( self, ** entries ): 5 | self.__dict__.update( entries ) 6 | 7 | def read_config_file( config_file ): 8 | with open( config_file )as f: 9 | FLAG = Flag( **yaml.load( f ) ) 10 | 11 | return FLAG -------------------------------------------------------------------------------- /utils/select_things.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | 5 | def select_scale(tag, image_width, image_height): 6 | if tag == 1: 7 | scale_width = image_width / np.power( 2, 5 ) 8 | scale_height = image_height / np.power( 2, 5 ) 9 | if tag == 2: 10 | scale_width = image_width / np.power( 2, 4 ) 11 | scale_height = image_height / np.power( 2, 4 ) 12 | if tag == 3: 13 | scale_width = image_width / np.power( 2, 3 ) 14 | scale_height = image_height / np.power( 2, 3 ) 15 | 16 | return scale_width, scale_height 17 | 18 | 19 | def select_checkpoint( tag ): 20 | dir_name = 'scale' + str( tag ) 21 | 22 | checkpoint_path = os.path.join( './models', dir_name ) 23 | 24 | if not (os.path.exists( checkpoint_path ) ): 25 | os.makedirs( checkpoint_path ) 26 | 27 | return checkpoint_path --------------------------------------------------------------------------------