├── net ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── yolo3_net.cpython-36.pyc └── yolo3_net.py ├── util ├── __init__.py ├── __pycache__ │ ├── config.cpython-36.pyc │ ├── utils.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── box_util.cpython-36.pyc │ ├── box_utils.cpython-36.pyc │ ├── image_util.cpython-36.pyc │ └── image_utils.cpython-36.pyc ├── voc_annotation.py ├── coco_annotation.py ├── load_weights.py ├── kmeans.py ├── image_utils.py ├── box_utils.py └── utils.py ├── model_data ├── yolo_anchors_tiny.txt ├── yolo_anchors.txt ├── voc_classes.txt └── coco_classes.txt ├── images ├── full.jpg ├── mAP.png └── tiny.jpg ├── shell ├── train_cnn_full.sh ├── train_cnn_tiny.sh ├── train_mobilenetv1_full.sh ├── train_mobilenetv2_full.sh └── train_mobilenetv2_tiny.sh ├── config ├── pred_config.py └── train_config.py ├── convert_weights.py ├── test.py ├── yolo.py ├── readme.md └── train.py /net/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model_data/yolo_anchors_tiny.txt: -------------------------------------------------------------------------------- 1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 -------------------------------------------------------------------------------- /images/full.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/full.jpg -------------------------------------------------------------------------------- /images/mAP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/mAP.png -------------------------------------------------------------------------------- /images/tiny.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/tiny.jpg -------------------------------------------------------------------------------- /model_data/yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -------------------------------------------------------------------------------- /util/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /net/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/net/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /net/__pycache__/yolo3_net.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/net/__pycache__/yolo3_net.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/box_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/box_util.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/box_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/box_utils.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/image_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/image_util.cpython-36.pyc -------------------------------------------------------------------------------- /util/__pycache__/image_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/image_utils.cpython-36.pyc -------------------------------------------------------------------------------- /model_data/voc_classes.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor -------------------------------------------------------------------------------- /shell/train_cnn_full.sh: -------------------------------------------------------------------------------- 1 | NET_TYPE="cnn" 2 | TINY=False 3 | ANCHOR_PATH="./model_data/yolo_anchors.txt" 4 | PRETRAIN_PATH="" 5 | 6 | epoch=200 7 | batch_size=4 8 | learning_rate=1e-4 9 | 10 | debug=False 11 | 12 | if [ -z "${PRETRAIN_PATH}" ] 13 | 14 | then 15 | 16 | cmd="python train.py \ 17 | -n "${NET_TYPE}" \ 18 | -t ${TINY} \ 19 | -e ${epoch} \ 20 | -b ${batch_size} \ 21 | -lr ${learning_rate} \ 22 | -d ${debug} \ 23 | --anchor_path ${ANCHOR_PATH} 24 | " 25 | 26 | else 27 | 28 | cmd="python train.py \ 29 | -n "${NET_TYPE}" \ 30 | -t ${TINY} \ 31 | -pt "${PRETRAIN_PATH}" \ 32 | -e ${epoch} \ 33 | -b ${batch_size} \ 34 | -lr ${learning_rate} \ 35 | -d ${debug} \ 36 | --anchor_path ${ANCHOR_PATH} 37 | " 38 | 39 | fi 40 | 41 | echo $cmd 42 | $cmd 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /shell/train_cnn_tiny.sh: -------------------------------------------------------------------------------- 1 | NET_TYPE="cnn" 2 | TINY=True 3 | ANCHOR_PATH="./model_data/yolo_anchors_tiny.txt" 4 | PRETRAIN_PATH="" 5 | 6 | epoch=200 7 | batch_size=4 8 | learning_rate=1e-4 9 | 10 | debug=False 11 | 12 | if [ -z "${PRETRAIN_PATH}" ] 13 | 14 | then 15 | 16 | cmd="python train.py \ 17 | -n "${NET_TYPE}" \ 18 | -t ${TINY} \ 19 | -e ${epoch} \ 20 | -b ${batch_size} \ 21 | -lr ${learning_rate} \ 22 | -d ${debug} \ 23 | --anchor_path ${ANCHOR_PATH} 24 | " 25 | 26 | else 27 | 28 | cmd="python train.py \ 29 | -n "${NET_TYPE}" \ 30 | -t ${TINY} \ 31 | -pt "${PRETRAIN_PATH}" \ 32 | -e ${epoch} \ 33 | -b ${batch_size} \ 34 | -lr ${learning_rate} \ 35 | -d ${debug} \ 36 | --anchor_path ${ANCHOR_PATH} 37 | " 38 | 39 | fi 40 | 41 | echo $cmd 42 | $cmd 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /shell/train_mobilenetv1_full.sh: -------------------------------------------------------------------------------- 1 | NET_TYPE="mobilenetv1" 2 | TINY=False 3 | ANCHOR_PATH="./model_data/yolo_anchors.txt" 4 | PRETRAIN_PATH="" 5 | 6 | epoch=200 7 | batch_size=4 8 | learning_rate=1e-4 9 | 10 | debug=False 11 | 12 | if [ -z "${PRETRAIN_PATH}" ] 13 | 14 | then 15 | 16 | cmd="python train.py \ 17 | -n "${NET_TYPE}" \ 18 | -t ${TINY} \ 19 | -e ${epoch} \ 20 | -b ${batch_size} \ 21 | -lr ${learning_rate} \ 22 | -d ${debug} \ 23 | --anchor_path ${ANCHOR_PATH} 24 | " 25 | 26 | else 27 | 28 | cmd="python train.py \ 29 | -n "${NET_TYPE}" \ 30 | -t ${TINY} \ 31 | -pt "${PRETRAIN_PATH}" \ 32 | -e ${epoch} \ 33 | -b ${batch_size} \ 34 | -lr ${learning_rate} \ 35 | -d ${debug} \ 36 | --anchor_path ${ANCHOR_PATH} 37 | " 38 | 39 | fi 40 | 41 | echo $cmd 42 | $cmd 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /shell/train_mobilenetv2_full.sh: -------------------------------------------------------------------------------- 1 | NET_TYPE="mobilenetv2" 2 | TINY=False 3 | ANCHOR_PATH="./model_data/yolo_anchors.txt" 4 | PRETRAIN_PATH="" 5 | 6 | epoch=200 7 | batch_size=4 8 | learning_rate=1e-4 9 | 10 | debug=False 11 | 12 | if [ -z "${PRETRAIN_PATH}" ] 13 | 14 | then 15 | 16 | cmd="python train.py \ 17 | -n "${NET_TYPE}" \ 18 | -t ${TINY} \ 19 | -e ${epoch} \ 20 | -b ${batch_size} \ 21 | -lr ${learning_rate} \ 22 | -d ${debug} \ 23 | --anchor_path ${ANCHOR_PATH} 24 | " 25 | 26 | else 27 | 28 | cmd="python train.py \ 29 | -n "${NET_TYPE}" \ 30 | -t ${TINY} \ 31 | -pt "${PRETRAIN_PATH}" \ 32 | -e ${epoch} \ 33 | -b ${batch_size} \ 34 | -lr ${learning_rate} \ 35 | -d ${debug} \ 36 | --anchor_path ${ANCHOR_PATH} 37 | " 38 | 39 | fi 40 | 41 | echo $cmd 42 | $cmd 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /shell/train_mobilenetv2_tiny.sh: -------------------------------------------------------------------------------- 1 | NET_TYPE="mobilenetv2" 2 | TINY=True 3 | ANCHOR_PATH="./model_data/yolo_anchors_tiny.txt" 4 | PRETRAIN_PATH="" 5 | 6 | epoch=200 7 | batch_size=4 8 | learning_rate=1e-4 9 | 10 | debug=False 11 | 12 | if [ -z "${PRETRAIN_PATH}" ] 13 | 14 | then 15 | 16 | cmd="python train.py \ 17 | -n "${NET_TYPE}" \ 18 | -t ${TINY} \ 19 | -e ${epoch} \ 20 | -b ${batch_size} \ 21 | -lr ${learning_rate} \ 22 | -d ${debug} \ 23 | --anchor_path ${ANCHOR_PATH} 24 | " 25 | 26 | else 27 | 28 | cmd="python train.py \ 29 | -n "${NET_TYPE}" \ 30 | -t ${TINY} \ 31 | -pt "${PRETRAIN_PATH}" \ 32 | -e ${epoch} \ 33 | -b ${batch_size} \ 34 | -lr ${learning_rate} \ 35 | -d ${debug} \ 36 | --anchor_path ${ANCHOR_PATH} 37 | " 38 | 39 | fi 40 | 41 | echo $cmd 42 | $cmd 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /config/pred_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from os import getcwd 3 | from os.path import join 4 | 5 | 6 | def get_config(): 7 | root = getcwd() 8 | conf = argparse.ArgumentParser() 9 | 10 | conf.add_argument('-i', '--image', default=None, type=str, help='image path') 11 | conf.add_argument('-v', '--video', default=None, type=str, help='video path') 12 | 13 | # load weight_path 14 | conf.add_argument('-w', '--weight_path', type=str, help='weight path', 15 | default='logs/cnn_full/cnn_full_model_epoch_20') 16 | 17 | conf.add_argument('--score', default=0.3, type=float, help='score threshold') 18 | 19 | conf.add_argument('--classes_path', type=str, help='classes path', 20 | default=join(root, 'model_data', 'coco_classes.txt')) 21 | 22 | return conf.parse_args() 23 | -------------------------------------------------------------------------------- /model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /convert_weights.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import sys 3 | import time 4 | from os import makedirs 5 | from os.path import exists, join, split 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | from net.yolo3_net import model 11 | from util.load_weights import load_weight 12 | 13 | 14 | def convert(is_tiny=False): 15 | if is_tiny: 16 | anchors = np.array([[1, 1]] * 6) 17 | weight_path = join('model_data', 'yolov3-tiny.weights') 18 | save_path = join('logs', 'cnn_tiny', 'cnn_tiny_model') 19 | else: 20 | anchors = np.array([[1, 1]] * 9) 21 | weight_path = join('model_data', 'yolov3.weights') 22 | save_path = join('logs', 'cnn_full', 'cnn_full_model') 23 | 24 | if not exists(split(save_path)[0]): 25 | makedirs(split(save_path)[0]) 26 | input_data = tf.placeholder(dtype=tf.float32, shape=(1, 416, 416, 3)) 27 | 28 | model(input_data, 80, anchors, 'cnn', True, False) 29 | 30 | model_vars_ = tf.global_variables() 31 | assert weight_path.endswith('.weights'), '{} is not a .weights files'.format(weight_path) 32 | assign_ops_ = load_weight(model_vars_, weight_path) 33 | t0 = time.time() 34 | print("start loading weights") 35 | saver = tf.train.Saver() 36 | with tf.Session() as sess: 37 | sess.run(assign_ops_) 38 | saver.save(sess, save_path, write_meta_graph=False, write_state=False) 39 | t1 = time.time() 40 | print("convert weights is over, cost {0:.4f}s".format(t1 - t0)) 41 | 42 | 43 | if __name__ == '__main__': 44 | boolen = sys.argv[1] 45 | if boolen.lower() == 'tiny': 46 | convert(True) 47 | elif boolen.lower() == 'full': 48 | convert(False) 49 | else: 50 | raise Exception('unkonwm argument') 51 | -------------------------------------------------------------------------------- /config/train_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from os import getcwd 3 | from os.path import join 4 | 5 | 6 | def str2bool(v): 7 | if v.lower() in ['yes', 'true']: 8 | return True 9 | elif v.lower() in ['no', 'false']: 10 | return False 11 | else: 12 | raise argparse.ArgumentTypeError() 13 | 14 | 15 | def get_config(): 16 | root = getcwd() 17 | conf = argparse.ArgumentParser() 18 | 19 | # yolo3 type 20 | conf.add_argument('-n', "--net_type", type=str, help='net type: cnn, mobilenetv1 mobilenetv2 or mobilenetv3', 21 | default='cnn') 22 | conf.add_argument('-t', '--tiny', type=str2bool, help='whether tiny yolo or not', default=False) 23 | 24 | # training argument 25 | conf.add_argument('-b', '--batch_size', type=int, help='batch_size', default=4) 26 | conf.add_argument('-e', '--epoch', type=int, help='epoch', default=100) 27 | conf.add_argument('-lr', '--learn_rate', type=float, help='learn_rate', default=1e-4) 28 | 29 | # load pretrain 30 | conf.add_argument('-pt', '--pretrain_path', type=str, help='pretrain path', default='logs/cnn_full/cnn_full_model') 31 | 32 | conf.add_argument('--anchor_path', type=str, help='anchor path', 33 | default=join(root, 'model_data', 'yolo_anchors.txt')) 34 | conf.add_argument('--train_path', type=str, help='train file path', 35 | default=join(root, 'model_data', 'train.txt')) 36 | conf.add_argument('--valid_path', type=str, help='valid file path', 37 | default=join(root, 'model_data', 'valid.txt')) 38 | conf.add_argument('--classes_path', type=str, help='classes path', 39 | default=join(root, 'model_data', 'coco_classes.txt')) 40 | 41 | conf.add_argument('-d', '--debug', type=str2bool, help='whether print per item loss', default=False) 42 | return conf.parse_args() 43 | -------------------------------------------------------------------------------- /util/voc_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import xml.etree.ElementTree as ET 4 | 5 | wd = os.path.dirname(os.getcwd()) 6 | class_path = os.path.join(wd, 'model_data', 'voc_classes.txt') # change to the classes path you want to detect 7 | is_train = True # whether train dataset or valid dataset 8 | 9 | if is_train: 10 | image_dir = '' # your train image dir 11 | annotation_dir = '' # your train image annotation dir 12 | gen_files = 'train.txt' 13 | else: 14 | image_dir = '' # your val image dir 15 | annotation_dir = '' # your val image annotation dir 16 | gen_files = 'valid.txt' 17 | 18 | with open(class_path) as f: 19 | class_names = f.readlines() 20 | classes = [c.strip() for c in class_names] 21 | 22 | list_file_train = open(os.path.join(wd, 'model_data', gen_files), 'w') 23 | 24 | annotation_files = os.listdir(annotation_dir) 25 | random.shuffle(annotation_files) 26 | 27 | for i in range(0, len(annotation_files), 1): 28 | annotation_file = annotation_files[i] 29 | 30 | list_file_train.write('%s/%s.jpg' % (image_dir, annotation_file.split('.')[0])) 31 | 32 | xml_file = os.path.join(annotation_dir, annotation_file) 33 | try: 34 | in_file = open(xml_file, 'r') 35 | except: 36 | print("open failed {0}".format(xml_file)) 37 | else: 38 | # print("open success {0}".format(image_id)) 39 | tree = ET.parse(in_file) 40 | root = tree.getroot() 41 | 42 | for obj in root.iter('object'): 43 | difficult = obj.find('difficult').text 44 | cls = obj.find('name').text 45 | if cls not in classes or int(difficult) == 1: 46 | continue 47 | cls_id = classes.index(cls) 48 | xmlbox = obj.find('bndbox') 49 | b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), 50 | int(xmlbox.find('ymax').text)) 51 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 52 | # list_file_train.write(" " + ",".join([str(a) for a in b]) + ',' + str(0)) 53 | list_file_train.write('\n') 54 | 55 | list_file_train.close() 56 | # list_file_val.close() 57 | # clean dataset 58 | with open(os.path.join(wd, 'model_data', gen_files), 'r') as f1: 59 | old_line = f1.readlines() 60 | with open(os.path.join(wd, 'model_data', gen_files), 'w') as f2: 61 | for line in old_line: 62 | line_ = line.split(' ') 63 | if len(line_) > 1: 64 | f2.write(line) 65 | -------------------------------------------------------------------------------- /util/coco_annotation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from collections import defaultdict 4 | 5 | wd = os.path.dirname(os.getcwd()) 6 | class_path = os.path.join(wd, 'model_data', 'coco_classes.txt') # change to the classes path you want to detect 7 | is_train = 1 # whether train dataset or valid dataset 8 | 9 | if is_train: 10 | image_dir = '/media/data1/datasets/coco/train2017' # your train image dir 11 | annotation_file = '/media/data1/datasets/coco/annotations/instances_train2017.json' # your train image annotation dir 12 | gen_files = 'train.txt' 13 | else: 14 | image_dir = '/media/data1/datasets/coco/val2017' # your val image dir 15 | annotation_file = '/media/data1/datasets/coco/annotations/instances_val2017.json' # your val image annotation dir 16 | gen_files = 'valid.txt' 17 | 18 | name_box_id = defaultdict(list) 19 | id_name = dict() 20 | with open(class_path) as f: 21 | class_names = f.readlines() 22 | classes = [c.strip() for c in class_names] 23 | 24 | list_file = open(os.path.join(wd, 'model_data', gen_files), 'w') 25 | 26 | with open(annotation_file) as f: 27 | data = json.load(f) 28 | annotations = data['annotations'] 29 | 30 | for ant in annotations: 31 | image_id = ant['image_id'] 32 | image_path = os.path.join(image_dir, '%012d.jpg' % image_id) 33 | cat = ant['category_id'] 34 | 35 | if 1 <= cat <= 11: 36 | cat -= 1 37 | elif 13 <= cat <= 25: 38 | cat -= 2 39 | elif 27 <= cat <= 28: 40 | cat -= 3 41 | elif 31 <= cat <= 44: 42 | cat -= 5 43 | elif 46 <= cat <= 65: 44 | cat -= 6 45 | elif cat == 67: 46 | cat -= 7 47 | elif cat == 70: 48 | cat -= 9 49 | elif 72 <= cat <= 82: 50 | cat -= 10 51 | elif 84 <= cat <= 90: 52 | cat -= 11 53 | name_box_id[image_path].append([ant['bbox'], cat]) 54 | 55 | for key, box_infos in name_box_id.items(): 56 | list_file.write(key) 57 | for info in box_infos: 58 | x_min = int(info[0][0]) 59 | y_min = int(info[0][1]) 60 | x_max = x_min + int(info[0][2]) 61 | y_max = y_min + int(info[0][3]) 62 | 63 | box_info = " %d,%d,%d,%d,%d" % (x_min, y_min, x_max, y_max, int(info[1])) 64 | list_file.write(box_info) 65 | list_file.write('\n') 66 | list_file.close() 67 | 68 | # list_file_val.close() 69 | # clean dataset 70 | with open(os.path.join(wd, 'model_data', gen_files), 'r') as f1: 71 | old_line = f1.readlines() 72 | with open(os.path.join(wd, 'model_data', gen_files), 'w') as f2: 73 | for line in old_line: 74 | line_ = line.split(' ') 75 | if len(line_) > 1: 76 | f2.write(line) 77 | -------------------------------------------------------------------------------- /util/load_weights.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | 6 | def load_weight(var_list, file_path): 7 | with open(file_path, "rb") as fp: 8 | _ = np.fromfile(fp, dtype=np.int32, count=5) 9 | weights = np.fromfile(fp, dtype=np.float32) 10 | ptr = 0 11 | i = 0 12 | assign_ops = [] 13 | while i < len(var_list) - 1: 14 | var1 = var_list[i] 15 | var2 = var_list[i + 1] 16 | # do something only if we process conv layer 17 | if 'cnn' in var1.name: 18 | # check type of next layer 19 | if 'batch' in var2.name: 20 | # load batch norm params 21 | gamma, beta, mean, var = var_list[i + 1:i + 5] 22 | batch_norm_vars = [beta, gamma, mean, var] 23 | for var in batch_norm_vars: 24 | shape = var.shape.as_list() 25 | num_params = np.prod(shape) 26 | var_weights = weights[ptr:ptr + num_params].reshape(shape) 27 | ptr += num_params 28 | assign_ops.append(tf.assign(var, var_weights, validate_shape=True)) 29 | 30 | # we move the pointer by 4, because we loaded 4 variables 31 | i += 4 32 | elif 'cnn' in var2.name: 33 | # load biases 34 | bias = var2 35 | bias_shape = bias.shape.as_list() 36 | bias_params = np.prod(bias_shape) 37 | bias_weights = weights[ptr:ptr + bias_params].reshape(bias_shape) 38 | assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True)) 39 | if 'yolo_head' in bias.name: # if num_classes is not 80 40 | ptr += 255 41 | else: 42 | ptr += bias_params 43 | 44 | # we loaded 1 variable 45 | i += 1 46 | # we can load weights of conv layer 47 | 48 | shape = var1.shape.as_list() 49 | num_params = np.prod(shape) 50 | var_weights = weights[ptr:ptr + num_params].reshape((shape[3], shape[2], shape[0], shape[1])) 51 | # remember to transpose to column-major 52 | # DarkNet conv_weights are serialized Caffe-style: 53 | # (out_dim, in_dim, height, width) 54 | # We would like to set these to Tensorflow order: 55 | # (height, width, in_dim, out_dim) 56 | var_weights = np.transpose(var_weights, (2, 3, 1, 0)) 57 | assign_ops.append(tf.assign(var1, var_weights, validate_shape=True)) 58 | 59 | if 'yolo_head' in var1.name: # if num_classes is not 80 60 | shape_ = shape[:3] 61 | shape_.append(255) 62 | ptr += np.prod(shape_) 63 | 64 | else: 65 | ptr += num_params 66 | 67 | i += 1 68 | assert ptr == len(weights), "load failed, please verify your weight file" 69 | return assign_ops 70 | -------------------------------------------------------------------------------- /util/kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class YOLO_Kmeans: 5 | 6 | def __init__(self, cluster_number, filename): 7 | self.cluster_number = cluster_number 8 | self.filename = filename 9 | 10 | def iou(self, boxes, clusters): # 1 box -> k clusters 11 | n = boxes.shape[0] 12 | k = self.cluster_number 13 | 14 | box_area = boxes[:, 0] * boxes[:, 1] 15 | box_area = box_area.repeat(k) 16 | box_area = np.reshape(box_area, (n, k)) 17 | 18 | cluster_area = clusters[:, 0] * clusters[:, 1] 19 | cluster_area = np.tile(cluster_area, [1, n]) 20 | cluster_area = np.reshape(cluster_area, (n, k)) 21 | 22 | box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) 23 | cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) 24 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) 25 | 26 | box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) 27 | cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) 28 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) 29 | inter_area = np.multiply(min_w_matrix, min_h_matrix) 30 | 31 | result = inter_area / (box_area + cluster_area - inter_area) 32 | return result 33 | 34 | def avg_iou(self, boxes, clusters): 35 | accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) 36 | return accuracy 37 | 38 | def kmeans(self, boxes, k, dist=np.median): 39 | box_number = boxes.shape[0] 40 | distances = np.empty((box_number, k)) 41 | last_nearest = np.zeros((box_number,)) 42 | np.random.seed() 43 | clusters = boxes[np.random.choice( 44 | box_number, k, replace=False)] # init k clusters 45 | while True: 46 | 47 | distances = 1 - self.iou(boxes, clusters) 48 | 49 | current_nearest = np.argmin(distances, axis=1) 50 | if (last_nearest == current_nearest).all(): 51 | break # clusters won't change 52 | for cluster in range(k): 53 | clusters[cluster] = dist( # update clusters 54 | boxes[current_nearest == cluster], axis=0) 55 | 56 | last_nearest = current_nearest 57 | 58 | return clusters 59 | 60 | def result2txt(self, data): 61 | f = open("model_data/yolo_tiny_anchors.txt", 'w') 62 | row = np.shape(data)[0] 63 | for i in range(row): 64 | if i == 0: 65 | x_y = "%d,%d" % (data[i][0], data[i][1]) 66 | else: 67 | x_y = ", %d,%d" % (data[i][0], data[i][1]) 68 | f.write(x_y) 69 | f.close() 70 | 71 | def txt2boxes(self): 72 | f = open(self.filename, 'r') 73 | dataSet = [] 74 | for line in f: 75 | infos = line.split(" ") 76 | length = len(infos) 77 | for i in range(1, length): 78 | width = int(infos[i].split(",")[2]) - \ 79 | int(infos[i].split(",")[0]) 80 | height = int(infos[i].split(",")[3]) - \ 81 | int(infos[i].split(",")[1]) 82 | dataSet.append([width, height]) 83 | result = np.array(dataSet) 84 | f.close() 85 | return result 86 | 87 | def txt2clusters(self): 88 | all_boxes = self.txt2boxes() 89 | result = self.kmeans(all_boxes, k=self.cluster_number) 90 | result = result[np.lexsort(result.T[0, None])] 91 | self.result2txt(result) 92 | print("K anchors:\n {}".format(result)) 93 | print("Accuracy: {:.2f}%".format( 94 | self.avg_iou(all_boxes, result) * 100)) 95 | 96 | 97 | if __name__ == "__main__": 98 | cluster_number = 9 99 | filename = "model_data/train.txt" 100 | kmeans = YOLO_Kmeans(cluster_number, filename) 101 | kmeans.txt2clusters() 102 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | from os.path import join, split 4 | 5 | import cv2 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from config.pred_config import get_config 10 | from net.yolo3_net import model 11 | from util.box_utils import pick_box 12 | from util.image_utils import get_color_table, read_image_and_lable 13 | from util.utils import cal_fp_fn_tp_tn, cal_mAP 14 | 15 | 16 | class YOLO(): 17 | def __init__(self, config): 18 | self.config = config 19 | 20 | net_type, tiny = split(self.config.weight_path)[-1].split('_')[:2] 21 | 22 | if tiny == 'tiny': 23 | self.anchor_path = join('model_data', 'yolo_anchors_tiny.txt') 24 | else: 25 | self.anchor_path = join('model_data', 'yolo_anchors.txt') 26 | 27 | self.classes = self._get_classes() 28 | self.anchors = self._get_anchors() 29 | self.hw = [416, 416] 30 | self.batch_size = 64 31 | self.ious_thres = [0.5, 0.75] 32 | 33 | self.test_path = "model_data/test.txt" 34 | 35 | with open(self.test_path) as f: 36 | self.test_data = f.readlines() 37 | 38 | if tiny == 'tiny': 39 | assert 6 == len( 40 | self.anchors), 'the model type does not match with anchors, check anchors or type param' 41 | else: 42 | assert 9 == len( 43 | self.anchors), 'the model type does not match with anchors, check anchors or type param' 44 | 45 | self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3]) 46 | self.is_training = tf.placeholder(tf.bool, shape=[]) 47 | self.pred = model(self.input, len(self.classes), self.anchors, net_type, self.is_training, False) 48 | 49 | print('start load net_type: {}_{}_model'.format(net_type, tiny)) 50 | 51 | # load weights 52 | conf = tf.ConfigProto() 53 | conf.gpu_options.allow_growth = True 54 | 55 | # change fraction according to your GPU 56 | # conf.gpu_options.per_process_gpu_memory_fraction = 0.05 57 | 58 | self.sess = tf.Session(config=conf) 59 | saver = tf.train.Saver() 60 | saver.restore(self.sess, self.config.weight_path) 61 | self.color_table = get_color_table(len(self.classes)) 62 | 63 | def _get_anchors(self): 64 | """loads the anchors from a file""" 65 | with open(self.anchor_path) as f: 66 | anchors = f.readline() 67 | anchors = [float(x) for x in anchors.split(',')] 68 | return np.array(anchors).reshape(-1, 2) 69 | 70 | def _get_classes(self): 71 | """loads the classes""" 72 | with open(self.config.classes_path) as f: 73 | class_names = f.readlines() 74 | class_names = [c.strip() for c in class_names] 75 | return class_names 76 | 77 | def test(self): 78 | total_test_case = len(self.test_data) 79 | 80 | FP_TP = defaultdict(lambda: defaultdict(list)) 81 | GT_NUMS = defaultdict(int) 82 | GTS = defaultdict(lambda: defaultdict(list)) 83 | DETECTION = defaultdict(lambda: defaultdict(list)) 84 | img_data = [] 85 | 86 | print("total test case:", total_test_case) 87 | 88 | for i in range(total_test_case): 89 | 90 | img, xyxy = read_image_and_lable(self.test_data[i], self.hw, is_training=False) 91 | img_data.append(img) 92 | print("{}/{}".format(i, total_test_case)) 93 | for per_xyxy in xyxy: 94 | GTS[i % self.batch_size][self.classes[int(per_xyxy[4])]].append(per_xyxy[:4].tolist()) 95 | 96 | if (i + 1) % self.batch_size == 0: # a batch 97 | boxes = self.sess.run(self.pred, feed_dict={self.input: img_data, self.is_training: False}) 98 | 99 | for b in range(self.batch_size): 100 | picked_boxes = pick_box(boxes[b], 0.01, 0.5, self.hw, self.classes) # NMS 101 | for picked_box in picked_boxes: 102 | DETECTION[b][self.classes[int(picked_box[5])]].append(picked_box[:5].tolist()) 103 | 104 | # cal FP TP 105 | cal_fp_fn_tp_tn(DETECTION, GTS, FP_TP, GT_NUMS, self.classes, self.ious_thres) 106 | 107 | DETECTION.clear() 108 | GTS.clear() 109 | img_data.clear() 110 | 111 | APs, mAPs = cal_mAP(FP_TP, GT_NUMS, self.classes, self.ious_thres) 112 | print(APs, mAPs) 113 | 114 | 115 | if __name__ == '__main__': 116 | configs = get_config() 117 | YOLO(configs).test() 118 | -------------------------------------------------------------------------------- /util/image_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import random 4 | 5 | import cv2 6 | import numpy as np 7 | from matplotlib.colors import hsv_to_rgb, rgb_to_hsv 8 | 9 | 10 | def rand(a=0., b=1.): 11 | return random.random() * (b - a) + a 12 | 13 | 14 | def read_image_and_lable(gt_path, hw, hue=.1, sat=1.5, val=1.5, is_training=True): 15 | """read image form image_set path random distort image """ 16 | f_path, *_label = gt_path.split(' ') 17 | if not len(_label): 18 | # f_path = f_path.split('\n')[0] 19 | return 20 | image_raw_data = cv2.imread(f_path)[..., ::-1] # RGB h*w*c 21 | height, width = image_raw_data.shape[0], image_raw_data.shape[1] 22 | image_data = cv2.resize(image_raw_data, tuple(hw[::-1])) / 255.0 23 | 24 | h_scale = hw[0] / height 25 | w_scale = hw[1] / width 26 | # anchor[:, 0] *= w_scale 27 | # anchor[:, 1] *= h_scale 28 | 29 | xyxy = [] 30 | 31 | for per_label in _label: 32 | xmin, ymin, xmax, ymax, cls = list(map(float, per_label.split(','))) 33 | xyxy.append([xmin * w_scale, ymin * h_scale, xmax * w_scale, ymax * h_scale, cls]) 34 | xyxy = np.array(xyxy) 35 | 36 | if is_training: 37 | 38 | # random flip image from top to down 39 | if rand() < .5: 40 | image_data = cv2.flip(image_data, 0) 41 | tmp = xyxy[:, 1].copy() 42 | xyxy[:, 1] = hw[0] - xyxy[:, 3] 43 | xyxy[:, 3] = hw[0] - tmp 44 | 45 | # random flip image from left to right 46 | if rand() < .5: 47 | image_data = cv2.flip(image_data, 1) 48 | tmp = xyxy[:, 0].copy() 49 | xyxy[:, 0] = hw[1] - xyxy[:, 2] 50 | xyxy[:, 2] = hw[1] - tmp 51 | 52 | # distort image 53 | if rand() < 0.5: 54 | x = rgb_to_hsv(image_data) 55 | hue = rand(-hue, hue) 56 | sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat) 57 | val = rand(1, val) if rand() < .5 else 1 / rand(1, val) 58 | x[..., 0] += hue 59 | x[..., 0][x[..., 0] > 1] -= 1 60 | x[..., 0][x[..., 0] < 0] += 1 61 | x[..., 1] *= sat 62 | x[..., 2] *= val 63 | x[x > 1] = 1 64 | x[x < 0] = 0 65 | 66 | image_data = hsv_to_rgb(x) # RGB 67 | # random pad 68 | if rand() < .5: 69 | pad_top = random.randint(0, 25) 70 | pad_left = random.randint(0, 25) 71 | if rand() < .5: 72 | image_data = np.pad(image_data, ((pad_top, 0), (pad_left, 0), (0, 0)), 'edge') 73 | else: 74 | image_data = np.pad(image_data, ((pad_top, 0), (pad_left, 0), (0, 0)), 'constant') 75 | image_data = image_data[:hw[0], :hw[1], :] 76 | for i in range(xyxy.shape[0]): 77 | xyxy[i, 0] = pad_left + xyxy[i, 0] if pad_left + xyxy[i, 0] < hw[1] else hw[1] 78 | xyxy[i, 2] = pad_left + xyxy[i, 2] if pad_left + xyxy[i, 2] < hw[1] else hw[1] 79 | xyxy[i, 1] = pad_top + xyxy[i, 1] if pad_top + xyxy[i, 1] < hw[0] else hw[0] 80 | xyxy[i, 3] = pad_top + xyxy[i, 3] if pad_top + xyxy[i, 3] < hw[0] else hw[0] 81 | # random pad 82 | if rand() < .5: 83 | pad_bottom = random.randint(0, 25) 84 | pad_right = random.randint(0, 25) 85 | if rand() < .5: 86 | image_data = np.pad(image_data, ((0, pad_bottom), (0, pad_right), (0, 0)), 'edge') 87 | else: 88 | image_data = np.pad(image_data, ((0, pad_bottom), (0, pad_right), (0, 0)), 'constant') 89 | image_data = image_data[pad_bottom:hw[0] + pad_bottom, pad_right:hw[1] + pad_right, :] 90 | for i in range(xyxy.shape[0]): 91 | xyxy[i, 0] = xyxy[i, 0] - pad_right if xyxy[i, 0] - pad_right > 0 else 0 92 | xyxy[i, 2] = xyxy[i, 2] - pad_right if xyxy[i, 2] - pad_right > 0 else 0 93 | xyxy[i, 1] = xyxy[i, 1] - pad_bottom if xyxy[i, 1] - pad_bottom > 0 else 0 94 | xyxy[i, 3] = xyxy[i, 3] - pad_bottom if xyxy[i, 3] - pad_bottom > 0 else 0 95 | return image_data, xyxy 96 | 97 | 98 | def get_color_table(class_num, seed=200): 99 | random.seed(seed) 100 | color_table = {} 101 | for i in range(class_num): 102 | color_table[i] = [random.randint(0, 255) for _ in range(3)] 103 | return color_table 104 | 105 | 106 | def plot_img(img, picked_boxes, color_table, classes, is_gt=False): 107 | """ 108 | get original boxes and plot them 109 | """ 110 | for co, bbox in enumerate(picked_boxes): 111 | color = color_table[int(bbox[5])] 112 | tl = int(min(round(0.002 * max(img.shape[0:2])), min(bbox[3] - bbox[1], bbox[2] - bbox[0]))) 113 | t2 = max(tl - 1, 1) # font thickness 114 | if is_gt: 115 | label = "gts: {}".format(classes[int(bbox[5])]) 116 | else: 117 | label = "{} {:.2f}".format(classes[int(bbox[5])], bbox[4]) 118 | img = cv2.rectangle(img, tuple(np.int32([bbox[0], bbox[1]])), 119 | tuple(np.int32([bbox[2], bbox[3]])), color, 3) 120 | img = cv2.putText(img, label, tuple(np.int32([bbox[0], bbox[1]])), 121 | cv2.FONT_HERSHEY_TRIPLEX, float(tl) / 3, color, thickness=t2, lineType=cv2.LINE_AA) 122 | 123 | return img 124 | -------------------------------------------------------------------------------- /yolo.py: -------------------------------------------------------------------------------- 1 | import time 2 | from os.path import join, split 3 | 4 | import cv2 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | from config.pred_config import get_config 9 | from net.yolo3_net import model 10 | from util.box_utils import pick_box, get_true_box 11 | from util.image_utils import get_color_table, plot_img 12 | 13 | 14 | class YOLO(): 15 | def __init__(self, config): 16 | self.config = config 17 | 18 | net_type, tiny = split(self.config.weight_path)[-1].split('_')[:2] 19 | 20 | if tiny == 'tiny': 21 | self.anchor_path = join('model_data', 'yolo_anchors_tiny.txt') 22 | else: 23 | self.anchor_path = join('model_data', 'yolo_anchors.txt') 24 | 25 | self.classes = self._get_classes() 26 | self.anchors = self._get_anchors() 27 | self.hw = [416, 416] 28 | self.batch_size = 1 29 | 30 | if tiny == 'tiny': 31 | assert 6 == len( 32 | self.anchors), 'the model type does not match with anchors, check anchors or type param' 33 | else: 34 | assert 9 == len( 35 | self.anchors), 'the model type does not match with anchors, check anchors or type param' 36 | 37 | self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3]) 38 | self.is_training = tf.placeholder(tf.bool, shape=[]) 39 | self.pred = model(self.input, len(self.classes), self.anchors, net_type, self.is_training, False) 40 | 41 | print('start load net_type: {}_{}_model'.format(net_type, tiny)) 42 | # load weights 43 | conf = tf.ConfigProto() 44 | # conf.gpu_options.allow_growth = True 45 | 46 | # change fraction according to your GPU 47 | conf.gpu_options.per_process_gpu_memory_fraction = 0.05 48 | self.sess = tf.Session(config=conf) 49 | saver = tf.train.Saver() 50 | saver.restore(self.sess, self.config.weight_path) 51 | self.color_table = get_color_table(len(self.classes)) 52 | 53 | def _get_anchors(self): 54 | """loads the anchors from a file""" 55 | with open(self.anchor_path) as f: 56 | anchors = f.readline() 57 | anchors = [float(x) for x in anchors.split(',')] 58 | return np.array(anchors).reshape(-1, 2) 59 | 60 | def _get_classes(self): 61 | """loads the classes""" 62 | with open(self.config.classes_path) as f: 63 | class_names = f.readlines() 64 | class_names = [c.strip() for c in class_names] 65 | return class_names 66 | 67 | def forward(self, img): 68 | """ 69 | :param img: shape = (h,w,c), 0-255 70 | :return: 71 | """ 72 | height, width = img.shape[:2] 73 | img_ = cv2.resize(img, tuple(self.hw)[::-1]) 74 | h_r = height / self.hw[0] 75 | w_r = width / self.hw[1] 76 | 77 | im_data = np.expand_dims(img_[..., ::-1], 0) / 255.0 78 | boxes = self.sess.run(self.pred, feed_dict={self.input: im_data, self.is_training: False}) 79 | 80 | vis_img = [] 81 | for b in range(self.batch_size): 82 | picked_boxes = pick_box(boxes[b], 0.3, 0.6, self.hw, self.classes) 83 | true_boxes = get_true_box(picked_boxes, w_r, h_r) 84 | per_img = img 85 | per_img = plot_img(per_img, true_boxes, self.color_table, self.classes) 86 | print('find {} boxes'.format(len(true_boxes))) 87 | print(true_boxes) 88 | vis_img.append(per_img) 89 | return vis_img[0] 90 | 91 | def detect_image(self, img_path): 92 | img = cv2.imread(img_path) 93 | if img is None: 94 | return None 95 | img = self.forward(img) 96 | cv2.imshow('img', img) 97 | cv2.imwrite('tiny.jpg', img) 98 | cv2.waitKey(0) 99 | return 1 100 | 101 | def detect_video(self, video_path): 102 | cap = cv2.VideoCapture(video_path) 103 | if not cap.isOpened(): 104 | raise IOError("Couldn't open webcam or video") 105 | # video_FourCC = -1 106 | video_FourCC = cv2.VideoWriter_fourcc(*'XVID') 107 | video_fps = cap.get(cv2.CAP_PROP_FPS) 108 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 109 | 110 | # writer = cv2.VideoWriter('output.mp4', video_FourCC, video_fps, (width, height)) 111 | writer = cv2.VideoWriter('output.avi', video_FourCC, video_fps, (width, height)) 112 | 113 | total_time = 0 114 | curr_fps = 0 115 | fps = "FPS: ??" 116 | time1 = time.time() 117 | 118 | while True: 119 | ret, frame = cap.read() 120 | if ret: 121 | out = self.forward(frame) 122 | time2 = time.time() 123 | d_time = time2 - time1 124 | time1 = time2 125 | total_time += d_time 126 | curr_fps += 1 127 | if total_time >= 1: 128 | fps = "FPS: {}".format(curr_fps) 129 | total_time -= 1 130 | curr_fps = 0 131 | 132 | out = cv2.putText(out, fps, tuple(np.int32([20, 30])), 133 | cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 255)) 134 | out = cv2.resize(out, (width, height)) 135 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) 136 | cv2.imshow('result', out) 137 | cv2.waitKey(1) 138 | writer.write(out) 139 | else: 140 | break 141 | 142 | 143 | if __name__ == '__main__': 144 | configs = get_config() 145 | yolo = YOLO(configs) 146 | if configs.video: 147 | yolo.detect_video(configs.video) 148 | elif configs.image: 149 | yolo.detect_image(configs.image) 150 | else: 151 | while True: 152 | img_path = input('input image path:') 153 | if not yolo.detect_image(img_path): 154 | print('check your iamge path ') 155 | continue 156 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # yolo3-tensorflow 2 | TensorFlow implementation of yolo v3 objects detection. 3 | Based: full or tiny, and cnn or mobilenets(mobilenet_v1, mobilenet_v2); 4 | We can get 6 combination, but 1 of them has a little parameters and performs badly. 5 | So, you should build these 5 combination as folloing: 6 | * cnn + full 7 | * cnn + tiny 8 | * mobilenet_v1 + full 9 | * mobilenet_v2 + full 10 | * mobilenet_v2 + tiny 11 | 12 | These 5 frameworks are provided in this repository. 13 | 14 | ## Dependence 15 | python3 16 | tensorflow >= 1.12 17 | opencv 18 | 19 | ## Quick start 20 | * cnn full yolo3 21 | 1. Download official [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) and put it on `model_data` floder of project. 22 | 2. Run the command `python convert_weights.py full` to convert weights to TensorFlow checkpoint file, which will locate in `logs/cnn_full/` and named `cnn_full_model.data-00000-of-00001` 23 | 3. Run the command `python yolo.py` or `python yolo.py -w logs/cnn_full/cnn_full_model` and input the image path to detect. 24 | 4. Detect example: 25 | 26 | * cnn tiny yolo3 27 | 1. Download official [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) and put it on `model_data` floder of project. 28 | 2. Run the command `python convert_weights.py tiny` to convert weights to TensorFlow checkpoint file, which will locate in `logs/cnn_tiny/` and named `cnn_tiny_model.data-00000-of-00001` 29 | 3. Run the command `python yolo.py -w logs/cnn_tiny/cnn_tiny_model` and input the image path to detect. 30 | 4. Detect example: 31 | 32 | 33 | ## Train 34 | 35 | 1. Prepare Dataset 36 | Before training, you should generate your own annotation file and class names file. 37 | One row for one image 38 | Row format: image_file_path box1 box2 ... boxN 39 | Box format: x_min,y_min,x_max,y_max,class_id (no space) 40 | For VOC dataset, try `python util/voc_annotation.py` 41 | For your own dataset, you should change the [voc_annotation.py](voc_annotation.py) 42 | Here is an example: 43 | ``` 44 | path/to/img1.jpg 50,100,150,200,0 30,50,200,120,3 45 | path/to/img2.jpg 120,300,250,600,2 46 | ... 47 | ``` 48 | 49 | 2. Prepare yolo anchors 50 | run `python util/kmeans.py` to generate anchors. Note that, anchor number 51 | should be 9 if you wang to train full yolo, else it should be 6. 52 | 53 | 3. Start to train 54 | The train arguments can be seen in [config/train_config.py](config/train_config.py). 55 | ``` 56 | usage: train.py [-h] [-n NET_TYPE] [-t TINY] [-b BATCH_SIZE] [-e EPOCH] 57 | [-lr LEARN_RATE] [-pt PRETRAIN_PATH] 58 | [--anchor_path ANCHOR_PATH] [--train_path TRAIN_PATH] 59 | [--classes_path CLASSES_PATH] [-d DEBUG] 60 | 61 | optional arguments: 62 | -h, --help show this help message and exit 63 | -n NET_TYPE, --net_type NET_TYPE 64 | net type: cnn, mobilenetv1 mobilenetv2 or mobilenetv3 65 | -t TINY, --tiny TINY whether tiny yolo or not 66 | -b BATCH_SIZE, --batch_size BATCH_SIZE 67 | batch_size 68 | -e EPOCH, --epoch EPOCH 69 | epoch 70 | -lr LEARN_RATE, --learn_rate LEARN_RATE 71 | learn_rate 72 | -pt PRETRAIN_PATH, --pretrain_path PRETRAIN_PATH 73 | pretrain path 74 | --anchor_path ANCHOR_PATH 75 | anchor path 76 | --train_path TRAIN_PATH 77 | train file path 78 | --classes_path CLASSES_PATH 79 | classes path 80 | -d DEBUG, --debug DEBUG 81 | whether print per item loss 82 | ``` 83 | The dafault framework is cnn + full. If you want to train others, you can pass 84 | the `-n` (cnn, mobilenetv1 or mobilenetv2) and `-t` (True or False) arguments. 85 | 86 | 4. To be simple 87 | I have write scripts in `shell` folder. Just run `CUDA_VISIBLE_DEVICES='0' sh ./shell/train_cnn_full.sh` or 88 | `CUDA_VISIBLE_DEVICES='0' nohup stdbuf -oL sh ./shell/train_cnn_full.sh > logs/cnn_full.txt &` in background and the log 89 | will be write in cnn_full.txt. 90 | You can also change some other arguments such as batch_size and epoch and so on. 91 | If you want to use pretrain, you should pass the pretrain path. I will provide the pretrain weights later. 92 | 5. NOTE 93 | The mobilenet is converged more slower than cnn, you should train more epoch. 94 | 6. Tensorboard 95 | You can use Tensorboard to watch the training trend. 96 | Run `Tensorboard --logdir ./ --host 127.0.0.1` 97 | you can see mAP score 98 | 99 | 7. test your training weights with your test datasets 100 | `python test.py` 101 | you maybe need to change configs in config/pred_conf.py 102 | 103 | 104 | ## Predict 105 | The prediction arguments can be seen in [config/pred_config.py](config/pred_config.py). 106 | ``` 107 | usage: yolo.py [-h] [-i IMAGE] [-v VIDEO] [-w WEIGHT_PATH] [--score SCORE] 108 | [--classes_path CLASSES_PATH] 109 | 110 | optional arguments: 111 | -h, --help show this help message and exit 112 | -i IMAGE, --image IMAGE 113 | image path 114 | -v VIDEO, --video VIDEO 115 | video path 116 | -w WEIGHT_PATH, --weight_path WEIGHT_PATH 117 | weight path 118 | --score SCORE score threshold 119 | --classes_path CLASSES_PATH 120 | classes path 121 | ``` 122 | Note that, the weights filename should be like `cnn_full_model.xxx`, `cnn_tiny_model.xxx`, or others. 123 | the framework will be built by the word 'cnn' and 'full' or 'cnn' and 'tiny'. 124 | You can predict an image or video. 125 | For example: 126 | `python yolo.py -w weight_path` 127 | `python yolo.py -i imgage_path -w weight_path` 128 | `python yolo.py -v video_path -w weight_path` -------------------------------------------------------------------------------- /util/box_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def box_anchor_iou(b1, b2): 6 | '''Return iou tensor 7 | Parameters 8 | ---------- 9 | b1: tensor, shape=(batch,... 2), wh 10 | b2: tensor, shape=(j, 2), wh 11 | Returns 12 | ------- 13 | iou: tensor, shape=(i1,...,iN, j) 14 | ''' 15 | 16 | # Expand dim to apply broadcasting. 17 | b1 = np.expand_dims(b1, -2) 18 | b1_mins = - b1 / 2 19 | b1_maxes = b1 / 2 20 | 21 | # Expand dim to apply broadcasting. 22 | b2 = np.expand_dims(b2, 0) 23 | b2_mins = -b2 / 2 24 | b2_maxes = b2 / 2 25 | 26 | intersect_mins = np.maximum(b1_mins, b2_mins) 27 | intersect_maxes = np.minimum(b1_maxes, b2_maxes) 28 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 29 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 30 | b1_area = b1[..., 0] * b1[..., 1] 31 | b2_area = b2[..., 0] * b2[..., 1] 32 | iou = intersect_area / (b1_area + b2_area - intersect_area) 33 | 34 | return iou 35 | 36 | 37 | def pick_box(boxes, score_threshold, nms_iou_threshold, hw, classes): 38 | """ 39 | :param boxes: (boxes_num, 5+numclass),xywh 40 | :param score_threshold: score_threshold 41 | :param nms_iou_threshold: nms iou_threshold 42 | :param hw: sacled_image height and width 43 | :param classes: classes num 44 | :return: 45 | """ 46 | score = boxes[..., 4:5] * boxes[..., 5:] 47 | idx = np.where(score > score_threshold) 48 | box_select = boxes[idx[:2]] 49 | box_xywh = box_select[:, :4] 50 | box_xyxy = wh2xy_np(box_xywh) 51 | if not len(box_xyxy): 52 | return [] 53 | box_truncated = [] 54 | for box_k in box_xyxy: 55 | box_k[0] = box_k[0] if box_k[0] >= 0 else 0 56 | box_k[1] = box_k[1] if box_k[1] >= 0 else 0 57 | box_k[2] = box_k[2] if box_k[2] <= hw[1] else hw[1] 58 | box_k[3] = box_k[3] if box_k[3] <= hw[0] else hw[0] 59 | box_truncated.append(box_k) 60 | box_xyxy = np.stack(box_truncated) 61 | box_socre = score[idx] 62 | clsid = idx[2] 63 | picked_boxes = nms_np( 64 | np.concatenate([box_xyxy, box_socre.reshape([-1, 1]), clsid.reshape([-1, 1])], -1), 65 | len(classes), iou_threshold=nms_iou_threshold) 66 | return picked_boxes 67 | 68 | 69 | def nms_np(boxes, classes, iou_threshold=0.3, max_output=20): 70 | """Return nms 71 | Parameters 72 | ---------- 73 | :param boxes: shape=(boxnum 6), xyxy,score,cls 74 | :param iou_threshold: iou_threshold 75 | :param max_output: max_output 76 | :param classes: total_classes_num 77 | 78 | Returns 79 | ------- 80 | nms boxes 81 | """ 82 | 83 | picked_boxes = [] 84 | 85 | for c in range(classes): 86 | b = boxes[boxes[..., -1] == c] 87 | score = b[..., 4] 88 | order = np.argsort(score) 89 | count = 0 90 | while order.size > 0 and count < max_output: 91 | # The index of largest confidence score 92 | index = order[-1] 93 | 94 | # Pick the bounding box with largest confidence score 95 | picked_boxes.append(b[index]) 96 | 97 | b1_mins = b[index][0:2] 98 | b1_maxes = b[index][2:4] 99 | b1_wh = b1_maxes - b1_mins 100 | 101 | b2_mins = b[order[:-1]][..., 0:2] 102 | b2_maxes = b[order[:-1]][..., 2:4] 103 | b2_wh = b2_maxes - b2_mins 104 | 105 | intersect_mins = np.maximum(b1_mins, b2_mins) 106 | intersect_maxes = np.minimum(b1_maxes, b2_maxes) 107 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 108 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 109 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 110 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 111 | iou = intersect_area / (b1_area + b2_area - intersect_area) 112 | 113 | left = np.where(iou < iou_threshold) 114 | order = order[left] 115 | count += 1 116 | 117 | return picked_boxes 118 | 119 | 120 | def xy2wh_np(b): 121 | """ 122 | :param b: list xmin ymin xmax ymax 123 | :return: shape=(...,4) x0 y0 w h 124 | """ 125 | xmin, ymin, xmax, ymax = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4] 126 | x0 = (xmin + xmax) / 2.0 127 | y0 = (ymin + ymax) / 2.0 128 | w = xmax - xmin 129 | h = ymax - ymin 130 | return np.concatenate([x0, y0, w, h], -1) 131 | 132 | 133 | def wh2xy_np(b): 134 | """ 135 | :param b: shape=(...,4) x0 y0 w h 136 | :return: shape=(...,4) xmin ymin xmax ymax 137 | """ 138 | x0, y0, w, h = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4] 139 | xmin = x0 - w / 2.0 140 | xmax = x0 + w / 2.0 141 | ymin = y0 - h / 2.0 142 | ymax = y0 + h / 2.0 143 | return np.concatenate([xmin, ymin, xmax, ymax], -1) 144 | 145 | 146 | def box_iou(b1, b2): 147 | '''Return iou tensor 148 | Parameters 149 | ---------- 150 | b1: tensor, shape=(batch,... 4), xywh 151 | b2: tensor, shape=(j, 4), xywh 152 | Returns 153 | ------- 154 | iou: tensor, shape=(i1,...,iN, j) 155 | ''' 156 | 157 | # Expand dim to apply broadcasting. 158 | b1 = tf.expand_dims(b1, -2) 159 | b1_xy = b1[..., :2] 160 | b1_wh = b1[..., 2:4] 161 | b1_wh_half = b1_wh / 2. 162 | b1_mins = b1_xy - b1_wh_half 163 | b1_maxes = b1_xy + b1_wh_half 164 | 165 | # Expand dim to apply broadcasting. 166 | b2 = tf.expand_dims(b2, 0) 167 | b2_xy = b2[..., :2] 168 | b2_wh = b2[..., 2:4] 169 | b2_wh_half = b2_wh / 2. 170 | b2_mins = b2_xy - b2_wh_half 171 | b2_maxes = b2_xy + b2_wh_half 172 | 173 | intersect_mins = tf.maximum(b1_mins, b2_mins) 174 | intersect_maxes = tf.minimum(b1_maxes, b2_maxes) 175 | intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) 176 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 177 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 178 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 179 | iou = tf.math.divide(intersect_area, b1_area + b2_area - intersect_area, name='iou') 180 | 181 | return iou 182 | 183 | 184 | def box_iou_np(b1, b2): 185 | """ 186 | Return iou tensor 187 | Parameters 188 | ---------- 189 | b1: array shape=(i, 4), xyxy 190 | b2: array, shape=(j, 4), xyxy 191 | Returns 192 | ------- 193 | iou: array, shape=(i1,...,iN, j) 194 | """ 195 | 196 | # Expand dim to apply broadcasting. 197 | b1 = np.expand_dims(b1[...,:4], -2) 198 | 199 | # Expand dim to apply broadcasting. 200 | b2 = np.expand_dims(b2[...,:4], 0) 201 | 202 | intersect_mins = np.maximum(b1[...,0:2], b2[...,0:2]) 203 | intersect_maxes = np.minimum(b1[...,2:4], b2[...,2:4]) 204 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 205 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 206 | b1_area = (b1[..., 2] - b1[..., 0]) * (b1[..., 3] - b1[..., 1]) 207 | b2_area = (b2[..., 2] - b2[..., 0]) * (b2[..., 3] - b2[..., 1]) 208 | iou = intersect_area / (b1_area + b2_area - intersect_area) 209 | 210 | return iou 211 | 212 | 213 | 214 | def xy2wh(b): 215 | """ 216 | :param b: shape=(...,4) xmin ymin xmax ymax 217 | :return: shape=(...,4) x0 y0 w h 218 | """ 219 | xmin, ymin, xmax, ymax = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4] 220 | x0 = (xmin + xmax) / 2.0 221 | y0 = (ymin + ymax) / 2.0 222 | w = xmax - xmin 223 | h = ymax - ymin 224 | return tf.concat([x0, y0, w, h], -1) 225 | 226 | 227 | def wh2xy(b): 228 | """ 229 | :param b: shape=(...,4) x0 y0 w h 230 | :return: shape=(...,4) xmin ymin xmax ymax 231 | """ 232 | x0, y0, w, h = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4] 233 | xmin = x0 - w / 2.0 234 | xmax = x0 + w / 2.0 235 | ymin = y0 - h / 2.0 236 | ymax = y0 + h / 2.0 237 | return tf.concat([xmin, ymin, xmax, ymax], -1) 238 | 239 | 240 | def np_sigmoid(x): 241 | return 1 / (1 + np.exp(-x)) 242 | 243 | 244 | def get_true_box(picked_boxes, w_r, h_r): 245 | """ get original true box according to ori image scale""" 246 | true_boxes = [] 247 | for co, bbox in enumerate(picked_boxes): 248 | bbox[0] *= w_r 249 | bbox[2] *= w_r 250 | bbox[1] *= h_r 251 | bbox[3] *= h_r 252 | true_boxes.append(bbox) 253 | if not len(true_boxes): 254 | return true_boxes 255 | true_boxes = np.concatenate(true_boxes, 0).reshape(-1, 6) 256 | return true_boxes 257 | 258 | if __name__ == '__main__': 259 | bx = tf.placeholder(tf.float32, [2, 4, 4]) 260 | xy2wh(bx) 261 | -------------------------------------------------------------------------------- /util/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from util.box_utils import box_iou_np 4 | from collections import defaultdict 5 | 6 | 7 | def np_sigmoid(x): 8 | return 1 / (1 + np.exp(-x)) 9 | 10 | 11 | def sec2time(sec, n_msec=3): 12 | ''' Convert seconds to 'D days, HH:MM:SS.FFF' ''' 13 | m, s = divmod(sec, 60) 14 | h, m = divmod(m, 60) 15 | d, h = divmod(h, 24) 16 | if n_msec > 0: 17 | pattern = '%%02dh %%02dm %%0%d.%dfs' % (n_msec + 3, n_msec) 18 | else: 19 | pattern = r'%02dh %02dm %02s' 20 | if d == 0: 21 | return pattern % (h, m, s) 22 | return ('%d d, ' + pattern) % (d, h, m, s) 23 | 24 | 25 | def cal_fp_fn_tp_tn(detection, ground_truth, FP_TP, GT_NUM, classes, iou_thres_list): 26 | """ 27 | calculate FP TP FN TN accroding to detection and ground truth 28 | :param detection: a dict, the format: 29 | 30 | { 31 | image1: { class1: [ 32 | [xmin, ymim, xmax, ymax, confidence_score], # obj1 33 | [xmin, ymim, xmax, ymax, confidence_score], # obj2 34 | ... 35 | ], 36 | class2: [[xmin, ymim, xmax, ymax, confidence_score]], 37 | ... 38 | }, 39 | 40 | image2: { class1: [ 41 | [xmin, ymim, xmax, ymax, confidence_score], # obj1 42 | [xmin, ymim, xmax, ymax, confidence_score] # obj2 43 | ... 44 | ], 45 | ... 46 | }, 47 | ... 48 | } 49 | 50 | 51 | :param ground_truth: a dict: 52 | { 53 | image1: { class1: [ 54 | [xmin, ymim, xmax, ymax], # obj1 55 | [xmin, ymim, xmax, ymax] # obj2 56 | ... 57 | ], 58 | class2: [ 59 | [xmin, ymim, xmax, ymax], # obj1 60 | [xmin, ymim, xmax, ymax] # obj2 61 | ... 62 | ], 63 | ... 64 | }, 65 | 66 | image2: { class1: [ 67 | [xmin, ymim, xmax, ymax], # obj1 68 | [xmin, ymim, xmax, ymax] # obj2 69 | ... 70 | ], 71 | ... 72 | }, 73 | ... 74 | } 75 | 76 | :param FP_TP : a dict returned 77 | { 78 | iou_thres1: { 79 | class1: [ 80 | [False, confidence_score], # image1_obj1, False means FP, True means TP 81 | [False, confidence_score], # image1_obj2, False means FP, True means TP 82 | [False, confidence_score], # image2_obj1, False means FP, True means TP 83 | [False, confidence_score], # image2_obj2, False means FP, True means TP 84 | ], 85 | class2: [[False, confidence_score]], 86 | ... 87 | }, 88 | 89 | iou_thres2: { 90 | class1: [ 91 | [False, confidence_score], # image1_obj1, False means FP, True means TP 92 | [False, confidence_score], # image1_obj2, False means FP, True means TP 93 | [False, confidence_score], # image2_obj1, False means FP, True means TP 94 | [False, confidence_score], # image2_obj2, False means FP, True means TP 95 | ], 96 | class2: [[False, confidence_score]], 97 | ... 98 | }, 99 | ... 100 | } 101 | 102 | 103 | :param GT_NUM: a dict that stores the total gt box, to calculate recall rate 104 | { 105 | class1: num1, 106 | class2: num2, 107 | ... 108 | } 109 | 110 | :param classes: list, classes name 111 | :param iou_thres_list: list, iou_threshold 112 | 113 | 114 | """ 115 | for i in detection.keys(): # image file name 116 | det_objs = detection[i] # detection dict 117 | gt_objs = ground_truth[i] # gt dict 118 | for j in classes: # class name 119 | det_boxes = np.array(det_objs[j]) # detection boxes 120 | gt_boxes = np.array(gt_objs[j]) # gt boxes 121 | 122 | if not len(gt_boxes): # if gt boxes is none, all detection box is FP 123 | for iou_thres in iou_thres_list: 124 | for box_index, box in enumerate(det_boxes): # init 125 | FP_TP[iou_thres][j].append([False, box[4]]) 126 | continue 127 | 128 | GT_NUM[j] += len(gt_boxes) 129 | 130 | if not len(det_boxes): # if gt boxes is not none, but detection box is NONE, only add the gt num 131 | continue 132 | 133 | ious = box_iou_np(det_boxes, gt_boxes) # calculate iou 134 | # ious_larger = np.where(ious > iou_thres, ious, np.zeros_like(ious)) 135 | ious_index = np.argmax(ious, 0) # find max iou index, which will be TP, others will be FP 136 | 137 | for iou_thres in iou_thres_list: 138 | for box_index, box in enumerate(det_boxes): # init 139 | FP_TP[iou_thres][j].append([False, box[4]]) 140 | 141 | for gt_index in range(len(gt_boxes)): 142 | selected = ious_index[gt_index] 143 | sel_index = len(det_boxes) - selected - 1 144 | FP_TP[iou_thres][j][~sel_index][0] = ious[selected, gt_index] >= iou_thres 145 | 146 | 147 | def cal_mAP(FP_TP, GT_NUM, classes, iou_thres_list): 148 | """ 149 | calculate mAP 150 | :param FP_TP : a dict returned 151 | { 152 | iou_thres1: { 153 | class1: [ 154 | [False, confidence_score], # image1_obj1, False means FP, True means TP 155 | [False, confidence_score], # image1_obj2, False means FP, True means TP 156 | [False, confidence_score], # image2_obj1, False means FP, True means TP 157 | [False, confidence_score], # image2_obj2, False means FP, True means TP 158 | ], 159 | class2: [[False, confidence_score]], 160 | ... 161 | }, 162 | 163 | iou_thres2: { 164 | class1: [ 165 | [False, confidence_score], # image1_obj1, False means FP, True means TP 166 | [False, confidence_score], # image1_obj2, False means FP, True means TP 167 | [False, confidence_score], # image2_obj1, False means FP, True means TP 168 | [False, confidence_score], # image2_obj2, False means FP, True means TP 169 | ], 170 | class2: [[False, confidence_score]], 171 | ... 172 | }, 173 | ... 174 | } 175 | 176 | 177 | :param GT_NUM: a dict that stores the total gt box, to calculate recall rate 178 | { 179 | class1: num1, 180 | class2: num2, 181 | ... 182 | } 183 | 184 | :param classes: list, classes name 185 | :param iou_thres_list: list, iou_threshold 186 | 187 | """ 188 | iou_class_AP = {} 189 | iou_mAP = {} 190 | for iou_thres in iou_thres_list: 191 | class_AP = {} 192 | for cls in classes: 193 | fp_tp = FP_TP[iou_thres][cls] 194 | fp_tp = sorted(fp_tp, key=lambda x: x[1], reverse=True) 195 | TP, total_det = 0, 0 196 | precision = [1.0] 197 | recall = [0.0] 198 | 199 | # calculate pr for each box 200 | for per_fp_tp in fp_tp: 201 | total_det += 1 202 | if per_fp_tp[0]: 203 | TP += 1 204 | precision.append(TP / total_det) 205 | if not GT_NUM[cls]: 206 | print('your valid or test data is too small that cannot cover all classes') 207 | recall.append(0) 208 | else: 209 | recall.append(TP / GT_NUM[cls]) 210 | 211 | # calculate AP by all points interpolation 212 | AP = 0 213 | i_old = 0 214 | for i in range(1, len(recall)): 215 | if recall[i] == recall[i_old]: 216 | continue 217 | p = max(precision[i:]) 218 | AP += p * (recall[i] - recall[i_old]) 219 | i_old = i 220 | class_AP[cls] = AP 221 | iou_class_AP[iou_thres] = class_AP 222 | iou_mAP[iou_thres] = sum(class_AP.values()) / len(classes) 223 | 224 | return iou_class_AP, iou_mAP 225 | 226 | 227 | if __name__ == '__main__': 228 | detection = { 229 | "image1": { 230 | "class1": [[1, 2, 3, 4, 5], 231 | [10, 20, 30, 40, 4], 232 | [1, 2, 3, 40, 3], 233 | ], 234 | "class2": [[1, 2, 3, 4, 5], 235 | [10, 20, 30, 40, 4], 236 | [1, 2, 3, 40, 3], 237 | ] 238 | }, 239 | "image2": { 240 | "class1": [[1, 2, 3, 4, 5], 241 | [10, 20, 30, 40, 4], 242 | [1, 2, 3, 40, 3], 243 | ], 244 | "class2": [[1, 2, 3, 4, 5], 245 | [10, 20, 30, 40, 4], 246 | [1, 2, 3, 40, 3], 247 | ] 248 | } 249 | 250 | } 251 | 252 | GT = { 253 | "image1": { 254 | "class1": [[1, 2, 3.4, 4], 255 | [10, 20, 30, 40], 256 | ], 257 | "class2": [[1, 2, 3.4, 4], 258 | ], 259 | }, 260 | "image2": { 261 | "class1": [[1, 2, 3.4, 4], 262 | [10, 20, 30, 40], 263 | ], 264 | "class2": [[1, 2, 3.4, 4], 265 | ], 266 | }, 267 | } 268 | fp = defaultdict(lambda: defaultdict(list)) 269 | nums = defaultdict(int) 270 | cal_fp_fn_tp_tn(detection, GT, fp, nums, ["class1", "class2"], [0.4, 0.9, 1]) 271 | a = cal_mAP(fp, nums, ["class1", "class2"], [0.4, 0.9, 1]) 272 | print() 273 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | from copy import deepcopy 4 | from os import getcwd 5 | from os.path import join, split 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | from config.train_config import get_config 11 | from net.yolo3_net import loss, model 12 | from util.box_utils import box_anchor_iou, pick_box, xy2wh_np 13 | from util.image_utils import get_color_table, plot_img, read_image_and_lable 14 | from util.utils import sec2time, cal_fp_fn_tp_tn, cal_mAP 15 | 16 | 17 | class YOLO(): 18 | def __init__(self, config): 19 | self.config = config 20 | 21 | self.batch_size = self.config.batch_size 22 | self.epoch = self.config.epoch 23 | self.learn_rate = self.config.learn_rate 24 | 25 | self.lambda_coord = 5 26 | self.lambda_noobj = 0.5 27 | self.lambda_cls = 1 28 | self.iou_threshold = 0.5 # used to decide whether box is BG or FG 29 | 30 | self.ious_thres = [0.5, 0.75] # used to calculate mAP 31 | 32 | self.classes = self.__get_classes() 33 | self.anchors = self.__get_anchors() 34 | self.hw = [416, 416] 35 | if self.config.tiny: 36 | assert 6 == len( 37 | self.anchors), 'model type does not match with anchors, check anchors or type param' 38 | self.log_path = join(getcwd(), 'logs', self.config.net_type + '_tiny') 39 | else: 40 | assert 9 == len( 41 | self.anchors), 'model type does not match with anchors, check anchors or type param' 42 | self.log_path = join(getcwd(), 'logs', self.config.net_type + '_full') 43 | self.pretrain_path = self.config.pretrain_path 44 | 45 | self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3]) 46 | self.is_training = tf.placeholder(tf.bool, shape=[]) 47 | self.label = None 48 | 49 | with open(self.config.train_path) as f: 50 | self.train_data = f.readlines() 51 | with open(self.config.valid_path) as f: 52 | self.val_data = f.readlines() 53 | 54 | self.color_table = get_color_table(len(self.classes)) 55 | 56 | def __get_anchors(self): 57 | """loads the anchors from a file""" 58 | with open(self.config.anchor_path) as f: 59 | anchors = f.readline() 60 | anchors = [float(x) for x in anchors.split(',')] 61 | return np.array(anchors).reshape(-1, 2) 62 | 63 | def __get_classes(self): 64 | """loads the classes""" 65 | with open(self.config.classes_path) as f: 66 | class_names = f.readlines() 67 | class_names = [c.strip() for c in class_names] 68 | return class_names 69 | 70 | def generate_data(self, grid_shape, is_val=False): 71 | 72 | gds_init = [np.zeros(g_shape[1:3] + [3, 9 + len(self.classes)]) for g_shape in grid_shape] 73 | 74 | idx = 0 75 | 76 | GTS = defaultdict(lambda: defaultdict(list)) 77 | 78 | if is_val: 79 | gts = self.val_data 80 | else: 81 | gts = self.train_data 82 | while True: 83 | img_files = [] 84 | labels = [] 85 | b = 0 86 | GTS.clear() 87 | 88 | while idx < len(gts) - self.batch_size: # a batch 89 | try: 90 | res = read_image_and_lable(gts[idx + b], self.hw, is_training=not is_val) 91 | # print(idx + b) 92 | except IndexError: 93 | raise Exception('it should not happen') 94 | else: 95 | if not res: 96 | raise Exception('check your dataset, it has none label') 97 | 98 | img, _label = res 99 | 100 | img_files.append(img) 101 | 102 | for per_xyxy in _label: 103 | GTS[b][self.classes[int(per_xyxy[4])]].append(per_xyxy[:4].tolist()) 104 | 105 | _label_ = np.concatenate([xy2wh_np(_label[:, :4]), _label[:, 4:]], -1) # change to xywh 106 | 107 | gds = deepcopy(gds_init) 108 | for per_label in _label_: 109 | x0, y0, w, h = per_label[:4] 110 | if w == 0 or h == 0: 111 | continue 112 | box_iou = box_anchor_iou(self.anchors, per_label[2:4]) 113 | k = np.argmax(box_iou) 114 | div, mod = divmod(int(k), 3) 115 | div = len(grid_shape) - 1 - div 116 | h_r = self.hw[0] / gds[div].shape[0] 117 | w_r = self.hw[1] / gds[div].shape[1] 118 | i = int(np.floor(x0 / w_r)) 119 | j = int(np.floor(y0 / h_r)) 120 | 121 | gds[div][j, i, mod, 0] = x0 / w_r - i 122 | gds[div][j, i, mod, 1] = y0 / h_r - j 123 | gds[div][j, i, mod, 2] = np.log(w / self.anchors[k, 0] + 1e-5) 124 | gds[div][j, i, mod, 3] = np.log(h / self.anchors[k, 1] + 1e-5) 125 | 126 | gds[div][j, i, mod, 4] = x0 127 | gds[div][j, i, mod, 5] = y0 128 | gds[div][j, i, mod, 6] = w 129 | gds[div][j, i, mod, 7] = h 130 | 131 | gds[div][j, i, mod, 8] = 1 132 | gds[div][j, i, mod, 9 + int(per_label[4])] = 1 133 | 134 | gds = [gd.reshape([-1, 3, 9 + len(self.classes)]) for gd in gds] 135 | labels.append(np.concatenate(gds, 0)) 136 | b += 1 137 | if len(labels) == self.batch_size: 138 | idx += self.batch_size 139 | break 140 | if idx >= len(gts) - self.batch_size: 141 | np.random.shuffle(gts) 142 | idx = 0 143 | img_files, labels = np.array(img_files, np.float32), np.array(labels, np.float32) 144 | if is_val: 145 | yield img_files, labels, GTS 146 | else: 147 | yield img_files, labels, idx 148 | 149 | def train(self): 150 | # pred, losses, op = self.create_model() 151 | pred = model(self.input, len(self.classes), self.anchors, self.config.net_type, self.is_training, True) 152 | grid_shape = [g.get_shape().as_list() for g in pred[2]] 153 | 154 | s = sum([g[2] * g[1] for g in grid_shape]) 155 | self.label = tf.placeholder(tf.float32, [self.batch_size, s, 3, 9 + len(self.classes)]) 156 | # for data in self.generate_data(grid_shape): 157 | # print() 158 | 159 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 160 | var_list = tf.global_variables() 161 | 162 | losses = loss(pred, self.label, self.hw, self.lambda_coord, self.lambda_noobj, self.lambda_cls, 163 | self.iou_threshold, self.config.debug) 164 | opt = tf.train.AdamOptimizer(self.learn_rate) 165 | 166 | with tf.control_dependencies(update_ops): 167 | op = opt.minimize(losses) 168 | 169 | # summary 170 | writer = tf.summary.FileWriter(self.log_path, max_queue=-1) 171 | img_tensor = tf.placeholder(tf.float32, [2 * self.batch_size] + self.hw + [3]) 172 | 173 | with tf.name_scope('loss'): 174 | train_loss_tensor = tf.placeholder(tf.float32) 175 | val_loss_tensor = tf.placeholder(tf.float32) 176 | tf.summary.scalar('train_loss', train_loss_tensor) 177 | tf.summary.scalar('val_loss', val_loss_tensor) 178 | 179 | with tf.name_scope('mAP'): 180 | for iou in self.ious_thres: 181 | with tf.name_scope('iou{}'.format(iou)): 182 | exec('map_with_iou{} = tf.placeholder(tf.float32)'.format(int(iou * 100))) 183 | exec('tf.summary.scalar("mAP", map_with_iou{})'.format(int(iou * 100))) 184 | 185 | with tf.name_scope('per_class_AP'): 186 | for iou in self.ious_thres: 187 | with tf.name_scope('iou{}'.format(iou)): 188 | for per_cls in self.classes: 189 | per_cls = per_cls.replace(' ', '_') 190 | exec('ap_{}_with_iou{} = tf.placeholder(tf.float32)'.format(per_cls, int(iou * 100))) 191 | exec('tf.summary.scalar("{}", ap_{}_with_iou{})'.format(per_cls, per_cls, int(iou * 100))) 192 | 193 | tf.summary.image('img', img_tensor, 2 * self.batch_size) 194 | summary = tf.summary.merge_all() 195 | 196 | conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) 197 | sess = tf.Session(config=conf) 198 | # sess = tf_debug.LocalCLIDebugWrapperSession(sess) 199 | # sess = tf_debug.TensorBoardDebugWrapperSession(sess, "PC-DAIXILI:6001") 200 | 201 | saver = tf.train.Saver(var_list=var_list, max_to_keep=5) 202 | # saver = tf.train.Saver() 203 | 204 | # init 205 | init = tf.global_variables_initializer() 206 | sess.run(init) 207 | 208 | if len(self.pretrain_path): 209 | flag = 0 210 | try: 211 | print('try to restore the whole graph') 212 | saver.restore(sess, self.pretrain_path) 213 | print('successfully restore the whole graph') 214 | except: 215 | print('failed to restore the whole graph') 216 | flag = 1 217 | if flag: 218 | try: 219 | print('try to restore the graph body') 220 | restore_weights = [v for v in var_list if 'yolo_head' not in v.name] 221 | sv = tf.train.Saver(var_list=restore_weights) 222 | sv.restore(sess, self.pretrain_path) 223 | print('successfully restore the graph body') 224 | except Exception: 225 | raise Exception('restore body failed, please check the pretained weight') 226 | 227 | total_step = int(np.ceil(len(self.train_data) / self.batch_size)) * self.epoch 228 | 229 | print('train on {} samples, val on {} samples, batch size {}, total {} epoch'.format(len(self.train_data), 230 | len(self.val_data), 231 | self.batch_size, 232 | self.epoch)) 233 | step = 0 234 | epoch = 0 235 | t0 = time.time() 236 | 237 | DETECTION = defaultdict(lambda: defaultdict(list)) 238 | FP_TP = defaultdict(lambda: defaultdict(list)) 239 | GT_NUMS = defaultdict(int) 240 | 241 | for data in self.generate_data(grid_shape): 242 | step += 1 243 | 244 | img, label, idx = data 245 | pred_, losses_, _ = sess.run([pred, losses, op], { 246 | self.input: img, 247 | self.label: label, 248 | self.is_training: True 249 | }) 250 | t1 = time.time() 251 | print('step:{:= len(self.val_data): 314 | break 315 | 316 | APs, mAPs = cal_mAP(FP_TP, GT_NUMS, self.classes, self.ious_thres) 317 | print(APs) 318 | print(mAPs) 319 | # import pdb 320 | # pdb.set_trace() 321 | val_loss_ /= (val_step / self.batch_size) 322 | 323 | feed_dict = { 324 | img_tensor: np.array(vis_img), 325 | train_loss_tensor: losses_, 326 | val_loss_tensor: val_loss_ 327 | } 328 | 329 | for iou in self.ious_thres: 330 | exec('feed_dict[map_with_iou{0}] = mAPs[{1}] '.format(int(iou * 100), iou)) 331 | for per_cls in self.classes: 332 | per_clses = per_cls.replace(' ', '_') 333 | exec( 334 | 'feed_dict[ap_{0}_with_iou{1}] = APs[{2}]["{3}"] '.format(per_clses, int(iou * 100), iou, 335 | per_cls)) 336 | 337 | ss = sess.run(summary, feed_dict=feed_dict) 338 | writer.add_summary(ss, epoch) 339 | saver.save(sess, join(self.log_path, split(self.log_path)[-1] + '_model_epoch_{}'.format(epoch)), 340 | write_meta_graph=False, write_state=False) 341 | print('epoch:{} train_loss:{:< .3f} val_loss:{:< .3f}'.format( 342 | epoch, losses_, val_loss_)) 343 | epoch += 1 344 | if epoch >= self.epoch: 345 | break 346 | 347 | 348 | if __name__ == '__main__': 349 | configs = get_config() 350 | YOLO(configs).train() 351 | -------------------------------------------------------------------------------- /net/yolo3_net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from util.box_utils import box_iou 4 | 5 | """ 6 | (1280 * 640) 7 | input = (640 * 320) 8 | 640 * 320 9 | 320 * 160 10 | 160 * 80 11 | 80 * 40 12 | 40 * 20 13 | 20 * 10 14 | 10 * 5 15 | """ 16 | leaky_alpha = 0.1 17 | 18 | xavier_initializer = tf.initializers.glorot_uniform() 19 | 20 | 21 | def conv_block(x, filters, stride, out_channel, net_type, is_training, name='', relu=True): 22 | """ 23 | :param x: input :nhwc 24 | :param filters: list [f_w, f_h] 25 | :param stride: list int 26 | :param out_channel: int, out_channel 27 | :param net_type: cnn mobilenet 28 | :param is_training: used in BN 29 | :param name: str 30 | :param relu: boolean 31 | :return: depwise and pointwise out 32 | """ 33 | with tf.name_scope('' + name): 34 | in_channel = x.shape[3].value 35 | if net_type == 'cnn': 36 | with tf.name_scope('cnn'): 37 | # weight = tf.Variable(tf.truncated_normal([filters[0], filters[1], in_channel, out_channel], 0, 0.01)) 38 | weight = tf.Variable(xavier_initializer([filters[0], filters[1], in_channel, out_channel])) 39 | if stride[0] == 2: # refer to "https://github.com/qqwweee/keras-yolo3/issues/8" 40 | x = tf.pad(x, tf.constant([[0, 0], [1, 0, ], [1, 0], [0, 0]])) 41 | x = tf.nn.conv2d(x, weight, [1, stride[0], stride[1], 1], 'VALID') 42 | else: 43 | x = tf.nn.conv2d(x, weight, [1, stride[0], stride[1], 1], 'SAME') 44 | if relu: 45 | x = tf.layers.batch_normalization(x, training=is_training) 46 | x = tf.nn.leaky_relu(x, leaky_alpha) 47 | else: 48 | bias = tf.Variable(tf.zeros(shape=out_channel)) 49 | x += bias 50 | elif net_type == 'mobilenetv1': 51 | with tf.name_scope('depthwise'): 52 | # depthwise_weight = tf.Variable(tf.truncated_normal([filters[0], filters[1], in_channel, 1], 0, 0.01)) 53 | depthwise_weight = tf.Variable(xavier_initializer([filters[0], filters[1], in_channel, 1])) 54 | x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride[0], stride[1], 1], 'SAME') 55 | x = tf.layers.batch_normalization(x, training=is_training) 56 | x = tf.nn.relu6(x) 57 | 58 | with tf.name_scope('pointwise'): 59 | # pointwise_weight = tf.Variable(tf.truncated_normal([1, 1, in_channel, out_channel], 0, 0.01)) 60 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, out_channel])) 61 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME') 62 | if relu: 63 | x = tf.layers.batch_normalization(x, training=is_training) 64 | x = tf.nn.relu6(x) 65 | else: 66 | bias = tf.Variable(tf.zeros(shape=out_channel)) 67 | x += bias 68 | 69 | elif net_type == 'mobilenetv2': 70 | tmp_channel = out_channel * 3 71 | with tf.name_scope('expand_pointwise'): 72 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, tmp_channel])) 73 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME') 74 | x = tf.layers.batch_normalization(x, training=is_training) 75 | x = tf.nn.relu6(x) 76 | print("Activation function : relu6") 77 | with tf.name_scope('depthwise'): 78 | depthwise_weight = tf.Variable(xavier_initializer([filters[0], filters[1], tmp_channel, 1])) 79 | x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride[0], stride[1], 1], 'SAME') 80 | x = tf.layers.batch_normalization(x, training=is_training) 81 | x = tf.nn.relu6(x) 82 | with tf.name_scope('project_pointwise'): 83 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, tmp_channel, out_channel])) 84 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME') 85 | if relu: 86 | x = tf.layers.batch_normalization(x, training=is_training) 87 | #x = tf.nn.relu6(x) 88 | else: 89 | bias = tf.Variable(tf.zeros(shape=out_channel)) 90 | x += bias 91 | else: 92 | raise Exception('net type is error, please check') 93 | return x 94 | 95 | 96 | def residual(x, net_type, is_training, out_channel=1, expand_time=1, stride=1): 97 | if net_type in ['cnn', 'mobilenetv1']: 98 | out_channel = x.shape[3].value 99 | shortcut = x 100 | x = conv_block(x, [1, 1], [1, 1], out_channel // 2, net_type='cnn', is_training=is_training) 101 | x = conv_block(x, [3, 3], [1, 1], out_channel, net_type='cnn', is_training=is_training) 102 | x += shortcut 103 | 104 | elif net_type == 'mobilenetv2':#倒置残差块 Inverted Residuals 105 | shortcut = x 106 | in_channel = x.shape[3].value 107 | tmp_channel = in_channel * expand_time 108 | with tf.name_scope('expand_pointwise'):#点卷积 拓展,生成一个高维信息域 参考《深度可分离卷积文档》 109 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, tmp_channel])) 110 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME') 111 | x = tf.layers.batch_normalization(x, training=is_training) 112 | x = tf.nn.relu6(x) 113 | with tf.name_scope('depthwise'):#深度卷积 114 | depthwise_weight = tf.Variable(xavier_initializer([3, 3, tmp_channel, 1])) 115 | x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride, stride, 1], 'SAME') 116 | x = tf.layers.batch_normalization(x, training=is_training) 117 | x = tf.nn.relu6(x) 118 | with tf.name_scope('project_pointwise'):#点卷积 119 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, tmp_channel, out_channel])) 120 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME') 121 | x = tf.layers.batch_normalization(x, training=is_training) 122 | #不用激活函数,线性激活 避免信息丢失 123 | #x = tf.nn.relu6(x) 124 | x += shortcut#快捷链接 避免梯度消失 125 | 126 | return x 127 | 128 | 129 | def upsample(x, scale): 130 | new_height = x.shape[1] * scale 131 | new_width = x.shape[2] * scale 132 | resized = tf.image.resize_images(x, [new_height, new_width]) 133 | return resized 134 | 135 | 136 | def full_yolo_body(x, out_channel, net_type, is_training): 137 | channel = out_channel 138 | if net_type in ['mobilenetv2']: 139 | net_type = 'mobilenetv1' 140 | x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training) 141 | x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training) 142 | x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training) 143 | x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training) 144 | x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training) 145 | x_route = x 146 | x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training) 147 | return x_route, x 148 | 149 | 150 | def full_darknet_body(x, net_type, is_training):#特征检测网络 151 | """ 152 | yolo3_tiny build by net_type 153 | :param x: 154 | :param is_training: 155 | :param net_type: cnn mobilenet 156 | :return: 157 | """ 158 | if net_type in ['cnn', 'mobilenetv1']: 159 | x = conv_block(x, [3, 3], [1, 1], 32, 'cnn', is_training=is_training) 160 | 161 | # down sample 162 | x = conv_block(x, [3, 3], [2, 2], 64, 'cnn', is_training=is_training) 163 | for i in range(1): 164 | x = residual(x, net_type, is_training) 165 | 166 | # down sample 167 | x = conv_block(x, [3, 3], [2, 2], 128, 'cnn', is_training=is_training) 168 | for i in range(2): 169 | x = residual(x, net_type, is_training) 170 | 171 | # down sample 172 | x = conv_block(x, [3, 3], [2, 2], 256, 'cnn', is_training=is_training) 173 | for i in range(8): 174 | x = residual(x, net_type, is_training) 175 | route2 = x 176 | 177 | # down sample 178 | x = conv_block(x, [3, 3], [2, 2], 512, 'cnn', is_training=is_training) 179 | for i in range(8): 180 | x = residual(x, net_type, is_training) 181 | route1 = x 182 | 183 | # down sample 184 | x = conv_block(x, [3, 3], [2, 2], 1024, 'cnn', is_training=is_training) 185 | for i in range(4): 186 | x = residual(x, net_type, is_training) 187 | 188 | elif net_type == 'mobilenetv2':# 189 | 190 | print('MobileNet V2 ------------------ input image batch’s shape:',x.shape) 191 | #x 为 416×416 图像 标准的mobilnet v2 输入为 224 ×224有一定差异 192 | x = conv_block(x, [3, 3], [2, 2], 32, 'cnn', is_training=is_training) #conv2d正常卷积,输出208×208×32通道 193 | 194 | print('1 ------------------ input image batch’s shape:',x.shape) 195 | x = conv_block(x, [3, 3], [2, 2], 16, net_type, is_training=is_training) #残差块卷积,输出104×104×16 下采样 196 | x = conv_block(x, [3, 3], [1, 1], 24, net_type, is_training=is_training) #残差块卷积,输出104×104×24 197 | x = residual(x, net_type, is_training, 24, 1)#残差块卷积,输出104×104×24 198 | x = conv_block(x, [3, 3], [2, 2], 32, net_type, is_training=is_training) #残差块卷积,输出52×52×32 下采样 199 | #print('2 ------------------ input image batch’s shape:',x.shape) 200 | for i in range(2):# 残差块卷积 输出 52×52×32 201 | x = residual(x, net_type, is_training, 32, 1) 202 | #print('21 ------------------ input image batch’s shape:',x.shape) 203 | print('ROUTE2 ------------------ batch’s shape:',x.shape) 204 | route2 = x 205 | 206 | #print('4 ------------------ input image batch’s shape:',x.shape) 207 | x = conv_block(x, [3, 3], [2, 2], 64, net_type, is_training=is_training)#残差块卷积,输出26×26×64 下采样 208 | for i in range(3):# 残差块卷积 输出 26×26×64 209 | x = residual(x, net_type, is_training, 64, 6) 210 | x = conv_block(x, [3, 3], [1, 1], 96, net_type, is_training=is_training)#残差块卷积,输出26×26×96 更改输出通道 211 | for i in range(2):# 残差块卷积 输出 26×26×64 212 | x = residual(x, net_type, is_training, 96, 6) 213 | print('ROUTE1 ------------------ batch’s shape:',x.shape) 214 | route1 = x 215 | 216 | # down sample 217 | print('5 ------------------ input image batch’s shape:',x.shape) 218 | x = conv_block(x, [3, 3], [2, 2], 160, net_type, is_training=is_training)#残差块卷积,输出13×13×160 下采样 219 | for i in range(2): 220 | x = residual(x, net_type, is_training, 160, 1) 221 | x = conv_block(x, [3, 3], [1, 1], 320, net_type, is_training=is_training)#残差块卷积,输出13×13×320 更改输出通道 222 | print('Final ------------------ batch’s shape:',x.shape) 223 | else: 224 | route1, route2 = [], [] 225 | return x, route1, route2 226 | 227 | 228 | def full_yolo_head(x, route1, route2, num_class, anchors, net_type, is_training): 229 | with tf.name_scope('body_layer1'): 230 | x_route, x = full_yolo_body(x, 1024, net_type, is_training) 231 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head1", False) 232 | fe1, box1, grid1 = yolo(x, anchors[[6, 7, 8]]) 233 | 234 | with tf.name_scope('head_layer2'): 235 | x = conv_block(x_route, [1, 1], [1, 1], x_route.shape[-1].value // 2, net_type, is_training) 236 | x = upsample(x, 2) 237 | x = tf.concat([x, route1], 3) 238 | x_route, x = full_yolo_body(x, 512, net_type, is_training) 239 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head2", False) 240 | fe2, box2, grid2 = yolo(x, anchors[[3, 4, 5]]) 241 | 242 | with tf.name_scope('head_layer3'): 243 | x = conv_block(x_route, [1, 1], [1, 1], x_route.shape[-1].value // 2, net_type, is_training) 244 | x = upsample(x, 2) 245 | x = tf.concat([x, route2], 3) 246 | x_route, x = full_yolo_body(x, 256, net_type, is_training) 247 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head3", False) 248 | fe3, box3, grid3 = yolo(x, anchors[[0, 1, 2]]) 249 | 250 | fe = tf.concat([fe1, fe2, fe3], 1) 251 | boxes = tf.concat([box1, box2, box3], 1) 252 | return fe, boxes, grid1, grid2, grid3 253 | 254 | 255 | def tiny_darknet_body(x, net_type, is_training): 256 | """ 257 | yolo3_tiny build by net_type 258 | :param x: 259 | :param is_training: used in bn 260 | :param net_type: cnn or mobile-net 261 | :return: 262 | """ 263 | if net_type in ['mobilenetv1', 'mobilenetv2']: 264 | net_type = 'mobilenetv1' 265 | x = conv_block(x, [3, 3], [1, 1], 16, net_type, is_training) 266 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 267 | 268 | x = conv_block(x, [3, 3], [1, 1], 32, net_type, is_training) 269 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 270 | 271 | x = conv_block(x, [3, 3], [1, 1], 64, net_type, is_training) 272 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 273 | 274 | x = conv_block(x, [3, 3], [1, 1], 128, net_type, is_training) 275 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 276 | 277 | x = conv_block(x, [3, 3], [1, 1], 256, net_type, is_training) 278 | x_route = x 279 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') 280 | 281 | x = conv_block(x, [3, 3], [1, 1], 512, net_type, is_training) 282 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 1, 1, 1], 'SAME') 283 | 284 | x = conv_block(x, [3, 3], [1, 1], 1024, net_type, is_training) 285 | 286 | return x, x_route 287 | 288 | 289 | def tiny_yolo_head(x, x_route1, num_class, anchors, net_type, is_training): 290 | with tf.name_scope('head_layer1'): 291 | x = conv_block(x, [1, 1], [1, 1], 256, net_type, is_training) 292 | x_route2 = x 293 | x = conv_block(x, [3, 3], [1, 1], 512, net_type, is_training) 294 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head1", False) 295 | fe1 = x 296 | fe1, box1, grid1 = yolo(fe1, anchors[[3, 4, 5]]) 297 | 298 | with tf.name_scope('head_layer2'): 299 | x = conv_block(x_route2, [1, 1], [1, 1], 128, net_type, is_training) 300 | x = upsample(x, 2) 301 | x = tf.concat([x, x_route1], 3) 302 | x = conv_block(x, [3, 3], [1, 1], 256, net_type, is_training) 303 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head2", False) 304 | fe2 = x 305 | fe2, box2, grid2 = yolo(fe2, anchors[[0, 1, 2]]) 306 | 307 | fe = tf.concat([fe1, fe2], 1) 308 | box = tf.concat([box1, box2], 1) 309 | return fe, box, grid1, grid2 310 | 311 | 312 | def yolo(f, anchors): 313 | """ 314 | convert feature to box and scores 315 | :param f: 316 | :param anchors: 317 | :return: 318 | """ 319 | anchor_tensor = tf.constant(anchors, tf.float32) 320 | batchsize = f.shape[0] 321 | f = tf.reshape(f, [f.shape[0], f.shape[1], f.shape[2], 3, -1]) 322 | grid_y = tf.tile(tf.reshape(tf.range(f.shape[1]), [1, -1, 1, 1]), [batchsize, 1, f.shape[2], 1]) 323 | grid_x = tf.tile(tf.reshape(tf.range(f.shape[2]), [1, 1, -1, 1]), [batchsize, f.shape[1], 1, 1]) 324 | grid = tf.tile(tf.cast(tf.concat([grid_x, grid_y], -1), tf.float32)[:, :, :, tf.newaxis, :], (1, 1, 1, 3, 1)) 325 | 326 | box_xy = (tf.nn.sigmoid(f[..., :2]) + grid) / tf.cast(grid.shape[::-1][2:4], tf.float32, ) 327 | box_wh = tf.math.exp(f[..., 2:4]) * anchor_tensor 328 | box_confidence = tf.nn.sigmoid(f[..., 4:5]) 329 | classes_score = tf.nn.sigmoid(f[..., 5:]) 330 | boxes = tf.reshape(tf.concat([box_xy, box_wh, box_confidence, classes_score], -1), [batchsize, -1, 3, f.shape[4]]) 331 | f = tf.reshape(f, [batchsize, -1, 3, f.shape[4]]) 332 | return f, boxes, grid 333 | 334 | 335 | def model(x, num_classes, anchors, net_type, is_training, cal_loss=False): 336 | batchsize, height, width, _ = x.get_shape().as_list() 337 | if len(anchors) == 6: 338 | x, x_route = tiny_darknet_body(x, net_type, is_training) 339 | raw_pred, y, *grid = tiny_yolo_head(x, x_route, num_classes, anchors, net_type, is_training) 340 | else: 341 | x, route1, route2 = full_darknet_body(x, net_type, is_training) 342 | raw_pred, y, *grid = full_yolo_head(x, route1, route2, num_classes, anchors, net_type, is_training) 343 | 344 | box_xy, box_wh, box_confidence, classes_score = y[..., :2], y[..., 2:4], y[..., 4:5], y[..., 5:] 345 | box_xy *= tf.constant([width, height], tf.float32) 346 | # box_wh *= tf.constant([width, height], tf.float32) 347 | boxe = tf.concat([box_xy, box_wh, box_confidence, classes_score], -1, name='debug_pred') 348 | 349 | if cal_loss: 350 | return raw_pred, boxe, grid 351 | else: 352 | return boxe 353 | 354 | 355 | def loss(pred, gts, input_size, lambda_coord, lambda_noobj, lambda_cls, iou_threshold, debug_=False): 356 | """ 357 | :param pred: (batch_size, num_boxes, 3, 5+num_class)[x0 y0 w h ] raw_pres + boxes +grid 358 | :param gts: shape = (batch_size, num_boxes, 3, 4+4+1+num_class) [xywh,calsses] 359 | :param input_size: height * width 360 | :param lambda_coord: lambda 361 | :param lambda_noobj: lambda 362 | :param lambda_cls: lambda 363 | :param iou_threshold: iou_threshold 364 | :param debug_: 365 | :return: 366 | """ 367 | 368 | def binary_cross(_labels, _pred): 369 | # pred = tf.clip_by_value(pred, 1e-10, 1 - 1e-10) 370 | # return -labels * tf.math.log(pred) 371 | # pred = tf.math.log(pred / (1 - pred)) 372 | return tf.nn.sigmoid_cross_entropy_with_logits(labels=_labels, logits=_pred) 373 | 374 | raw_pred, pred_boxes, grid = pred 375 | 376 | raw_gt_xy, raw_gt_wh = gts[..., 0:2], gts[..., 2:4] 377 | true_gt_xy, true_gt_wh = gts[..., 4:6], gts[..., 6:8] 378 | masks = gts[..., 8] 379 | batchsize = masks.shape[0].value 380 | i_height, i_width = input_size 381 | 382 | # cal ignore_mask 383 | ignore_mask = [] 384 | for b in range(batchsize): 385 | true_box = tf.boolean_mask(gts[b:b + 1, ..., 4:8], masks[b:b + 1], name='debug_true_box') 386 | with tf.name_scope('debug_iou'): 387 | ious = box_iou(pred_boxes[b:b + 1, ..., :4], true_box) 388 | ious = tf.reduce_max(ious, -1) 389 | ignore_mask_ = tf.where(ious > iou_threshold, tf.zeros_like(ious), tf.ones_like(ious)) 390 | ignore_mask.append(ignore_mask_) 391 | ignore_mask = tf.concat(ignore_mask, 0, name='debug_ignore_mask') 392 | 393 | boxes_scale = 2 - true_gt_wh[..., 0] / i_width * true_gt_wh[..., 1] / i_height 394 | # boxes_scale = 1 395 | 396 | varss = tf.trainable_variables() 397 | l2_loss = tf.reduce_sum([tf.nn.l2_loss(var) for var in varss]) * 0.001 398 | 399 | masks_noobj = (1 - masks) * ignore_mask 400 | 401 | # n_xywh = tf.reduce_sum(masks, name='debug_n_xywh') 402 | # n_noob = tf.reduce_sum(masks_noobj, name='debug_n_noobj') / 100 403 | n_xywh = batchsize 404 | n_noob = batchsize 405 | 406 | loss_xy = tf.reduce_sum( 407 | lambda_coord * masks * boxes_scale * tf.reduce_sum( 408 | # tf.math.square(raw_gt_xy - tf.math.sigmoid(raw_pred[..., 0:2])) 409 | binary_cross(_labels=raw_gt_xy, _pred=raw_pred[..., 0:2]) 410 | , -1), name='debug_loss_xy') / n_xywh 411 | loss_wh = tf.reduce_sum( 412 | lambda_coord * masks * boxes_scale * tf.reduce_sum( 413 | tf.math.square(raw_gt_wh - raw_pred[..., 2:4]), 414 | -1), name='debug_loss_wh') / n_xywh 415 | loss_obj_confidence = tf.reduce_sum( 416 | masks * binary_cross(_labels=masks, _pred=raw_pred[..., 4]), name='debug_loss_obj') / n_xywh 417 | 418 | loss_noobj_confidence = tf.reduce_sum( 419 | lambda_noobj * masks_noobj * binary_cross(_labels=masks, _pred=raw_pred[..., 4]), 420 | name='debug_loss_noobj') / n_noob 421 | loss_cls = tf.reduce_sum( 422 | masks * lambda_cls * tf.reduce_sum( 423 | binary_cross(_labels=gts[..., 9:], _pred=raw_pred[..., 5:]), -1), name='debug_loss_cls' 424 | ) / n_xywh 425 | if debug_: 426 | p = tf.print("loss_xy", loss_xy, "loss_wh", loss_wh, "loss_obj_confidence", loss_obj_confidence, 427 | 'loss_noobj_confidence', loss_noobj_confidence, "loss_cls", loss_cls, "l2_loss", l2_loss) 428 | with tf.control_dependencies([p]): 429 | return loss_xy + loss_wh + loss_obj_confidence + loss_noobj_confidence + loss_cls + l2_loss 430 | return loss_xy + loss_wh + loss_obj_confidence + loss_noobj_confidence + loss_cls + l2_loss 431 | --------------------------------------------------------------------------------