├── net
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   └── yolo3_net.cpython-36.pyc
    └── yolo3_net.py
├── util
    ├── __init__.py
    ├── __pycache__
    │   ├── config.cpython-36.pyc
    │   ├── utils.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── box_util.cpython-36.pyc
    │   ├── box_utils.cpython-36.pyc
    │   ├── image_util.cpython-36.pyc
    │   └── image_utils.cpython-36.pyc
    ├── voc_annotation.py
    ├── coco_annotation.py
    ├── load_weights.py
    ├── kmeans.py
    ├── image_utils.py
    ├── box_utils.py
    └── utils.py
├── model_data
    ├── yolo_anchors_tiny.txt
    ├── yolo_anchors.txt
    ├── voc_classes.txt
    └── coco_classes.txt
├── images
    ├── full.jpg
    ├── mAP.png
    └── tiny.jpg
├── shell
    ├── train_cnn_full.sh
    ├── train_cnn_tiny.sh
    ├── train_mobilenetv1_full.sh
    ├── train_mobilenetv2_full.sh
    └── train_mobilenetv2_tiny.sh
├── config
    ├── pred_config.py
    └── train_config.py
├── convert_weights.py
├── test.py
├── yolo.py
├── readme.md
└── train.py


/net/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model_data/yolo_anchors_tiny.txt:
--------------------------------------------------------------------------------
1 | 10,14,  23,27,  37,58,  81,82,  135,169,  344,319


--------------------------------------------------------------------------------
/images/full.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/full.jpg


--------------------------------------------------------------------------------
/images/mAP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/mAP.png


--------------------------------------------------------------------------------
/images/tiny.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/tiny.jpg


--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326


--------------------------------------------------------------------------------
/util/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/net/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/net/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/net/__pycache__/yolo3_net.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/net/__pycache__/yolo3_net.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/box_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/box_util.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/box_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/box_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/image_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/image_util.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/image_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/image_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor


--------------------------------------------------------------------------------
/shell/train_cnn_full.sh:
--------------------------------------------------------------------------------
 1 | NET_TYPE="cnn"
 2 | TINY=False
 3 | ANCHOR_PATH="./model_data/yolo_anchors.txt"
 4 | PRETRAIN_PATH=""
 5 | 
 6 | epoch=200
 7 | batch_size=4
 8 | learning_rate=1e-4
 9 | 
10 | debug=False
11 | 
12 | if [ -z "${PRETRAIN_PATH}" ]
13 | 
14 | then
15 | 
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 | 
26 | else
27 | 
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 | 
39 | fi
40 | 
41 | echo $cmd
42 | $cmd
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/shell/train_cnn_tiny.sh:
--------------------------------------------------------------------------------
 1 | NET_TYPE="cnn"
 2 | TINY=True
 3 | ANCHOR_PATH="./model_data/yolo_anchors_tiny.txt"
 4 | PRETRAIN_PATH=""
 5 | 
 6 | epoch=200
 7 | batch_size=4
 8 | learning_rate=1e-4
 9 | 
10 | debug=False
11 | 
12 | if [ -z "${PRETRAIN_PATH}" ]
13 | 
14 | then
15 | 
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 | 
26 | else
27 | 
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 | 
39 | fi
40 | 
41 | echo $cmd
42 | $cmd
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/shell/train_mobilenetv1_full.sh:
--------------------------------------------------------------------------------
 1 | NET_TYPE="mobilenetv1"
 2 | TINY=False
 3 | ANCHOR_PATH="./model_data/yolo_anchors.txt"
 4 | PRETRAIN_PATH=""
 5 | 
 6 | epoch=200
 7 | batch_size=4
 8 | learning_rate=1e-4
 9 | 
10 | debug=False
11 | 
12 | if [ -z "${PRETRAIN_PATH}" ]
13 | 
14 | then
15 | 
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 | 
26 | else
27 | 
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 | 
39 | fi
40 | 
41 | echo $cmd
42 | $cmd
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/shell/train_mobilenetv2_full.sh:
--------------------------------------------------------------------------------
 1 | NET_TYPE="mobilenetv2"
 2 | TINY=False
 3 | ANCHOR_PATH="./model_data/yolo_anchors.txt"
 4 | PRETRAIN_PATH=""
 5 | 
 6 | epoch=200
 7 | batch_size=4
 8 | learning_rate=1e-4
 9 | 
10 | debug=False
11 | 
12 | if [ -z "${PRETRAIN_PATH}" ]
13 | 
14 | then
15 | 
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 | 
26 | else
27 | 
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 | 
39 | fi
40 | 
41 | echo $cmd
42 | $cmd
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/shell/train_mobilenetv2_tiny.sh:
--------------------------------------------------------------------------------
 1 | NET_TYPE="mobilenetv2"
 2 | TINY=True
 3 | ANCHOR_PATH="./model_data/yolo_anchors_tiny.txt"
 4 | PRETRAIN_PATH=""
 5 | 
 6 | epoch=200
 7 | batch_size=4
 8 | learning_rate=1e-4
 9 | 
10 | debug=False
11 | 
12 | if [ -z "${PRETRAIN_PATH}" ]
13 | 
14 | then
15 | 
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 | 
26 | else
27 | 
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 | 
39 | fi
40 | 
41 | echo $cmd
42 | $cmd
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/config/pred_config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from os import getcwd
 3 | from os.path import join
 4 | 
 5 | 
 6 | def get_config():
 7 |     root = getcwd()
 8 |     conf = argparse.ArgumentParser()
 9 | 
10 |     conf.add_argument('-i', '--image', default=None, type=str, help='image path')
11 |     conf.add_argument('-v', '--video', default=None, type=str, help='video path')
12 | 
13 |     # load weight_path
14 |     conf.add_argument('-w', '--weight_path', type=str, help='weight path',
15 |                       default='logs/cnn_full/cnn_full_model_epoch_20')
16 | 
17 |     conf.add_argument('--score', default=0.3, type=float, help='score threshold')
18 | 
19 |     conf.add_argument('--classes_path', type=str, help='classes path',
20 |                       default=join(root, 'model_data', 'coco_classes.txt'))
21 | 
22 |     return conf.parse_args()
23 | 


--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush


--------------------------------------------------------------------------------
/convert_weights.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import sys
 3 | import time
 4 | from os import makedirs
 5 | from os.path import exists, join, split
 6 | 
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | from net.yolo3_net import model
11 | from util.load_weights import load_weight
12 | 
13 | 
14 | def convert(is_tiny=False):
15 |     if is_tiny:
16 |         anchors = np.array([[1, 1]] * 6)
17 |         weight_path = join('model_data', 'yolov3-tiny.weights')
18 |         save_path = join('logs', 'cnn_tiny', 'cnn_tiny_model')
19 |     else:
20 |         anchors = np.array([[1, 1]] * 9)
21 |         weight_path = join('model_data', 'yolov3.weights')
22 |         save_path = join('logs', 'cnn_full', 'cnn_full_model')
23 | 
24 |     if not exists(split(save_path)[0]):
25 |         makedirs(split(save_path)[0])
26 |     input_data = tf.placeholder(dtype=tf.float32, shape=(1, 416, 416, 3))
27 | 
28 |     model(input_data, 80, anchors, 'cnn', True, False)
29 | 
30 |     model_vars_ = tf.global_variables()
31 |     assert weight_path.endswith('.weights'), '{} is not a .weights files'.format(weight_path)
32 |     assign_ops_ = load_weight(model_vars_, weight_path)
33 |     t0 = time.time()
34 |     print("start loading weights")
35 |     saver = tf.train.Saver()
36 |     with tf.Session() as sess:
37 |         sess.run(assign_ops_)
38 |         saver.save(sess, save_path, write_meta_graph=False, write_state=False)
39 |         t1 = time.time()
40 |         print("convert weights is over, cost {0:.4f}s".format(t1 - t0))
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     boolen = sys.argv[1]
45 |     if boolen.lower() == 'tiny':
46 |         convert(True)
47 |     elif boolen.lower() == 'full':
48 |         convert(False)
49 |     else:
50 |         raise Exception('unkonwm argument')
51 | 


--------------------------------------------------------------------------------
/config/train_config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from os import getcwd
 3 | from os.path import join
 4 | 
 5 | 
 6 | def str2bool(v):
 7 |     if v.lower() in ['yes', 'true']:
 8 |         return True
 9 |     elif v.lower() in ['no', 'false']:
10 |         return False
11 |     else:
12 |         raise argparse.ArgumentTypeError()
13 | 
14 | 
15 | def get_config():
16 |     root = getcwd()
17 |     conf = argparse.ArgumentParser()
18 | 
19 |     # yolo3 type
20 |     conf.add_argument('-n', "--net_type", type=str, help='net type: cnn, mobilenetv1 mobilenetv2 or mobilenetv3',
21 |                       default='cnn')
22 |     conf.add_argument('-t', '--tiny', type=str2bool, help='whether tiny yolo or not', default=False)
23 | 
24 |     # training argument
25 |     conf.add_argument('-b', '--batch_size', type=int, help='batch_size', default=4)
26 |     conf.add_argument('-e', '--epoch', type=int, help='epoch', default=100)
27 |     conf.add_argument('-lr', '--learn_rate', type=float, help='learn_rate', default=1e-4)
28 | 
29 |     # load pretrain
30 |     conf.add_argument('-pt', '--pretrain_path', type=str, help='pretrain path', default='logs/cnn_full/cnn_full_model')
31 | 
32 |     conf.add_argument('--anchor_path', type=str, help='anchor path',
33 |                       default=join(root, 'model_data', 'yolo_anchors.txt'))
34 |     conf.add_argument('--train_path', type=str, help='train file path',
35 |                       default=join(root, 'model_data', 'train.txt'))
36 |     conf.add_argument('--valid_path', type=str, help='valid file path',
37 |                       default=join(root, 'model_data', 'valid.txt'))
38 |     conf.add_argument('--classes_path', type=str, help='classes path',
39 |                       default=join(root, 'model_data', 'coco_classes.txt'))
40 | 
41 |     conf.add_argument('-d', '--debug', type=str2bool, help='whether print per item loss', default=False)
42 |     return conf.parse_args()
43 | 


--------------------------------------------------------------------------------
/util/voc_annotation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import xml.etree.ElementTree as ET
 4 | 
 5 | wd = os.path.dirname(os.getcwd())
 6 | class_path = os.path.join(wd, 'model_data', 'voc_classes.txt')  # change to the classes path you want to detect
 7 | is_train = True  # whether train dataset or valid dataset
 8 | 
 9 | if is_train:
10 |     image_dir = ''  # your train image dir
11 |     annotation_dir = ''  # your train image annotation  dir
12 |     gen_files = 'train.txt'
13 | else:
14 |     image_dir = ''  # your val image dir
15 |     annotation_dir = ''  # your val image annotation  dir
16 |     gen_files = 'valid.txt'
17 | 
18 | with open(class_path) as f:
19 |     class_names = f.readlines()
20 | classes = [c.strip() for c in class_names]
21 | 
22 | list_file_train = open(os.path.join(wd, 'model_data', gen_files), 'w')
23 | 
24 | annotation_files = os.listdir(annotation_dir)
25 | random.shuffle(annotation_files)
26 | 
27 | for i in range(0, len(annotation_files), 1):
28 |     annotation_file = annotation_files[i]
29 | 
30 |     list_file_train.write('%s/%s.jpg' % (image_dir, annotation_file.split('.')[0]))
31 | 
32 |     xml_file = os.path.join(annotation_dir, annotation_file)
33 |     try:
34 |         in_file = open(xml_file, 'r')
35 |     except:
36 |         print("open failed {0}".format(xml_file))
37 |     else:
38 |         # print("open success {0}".format(image_id))
39 |         tree = ET.parse(in_file)
40 |         root = tree.getroot()
41 | 
42 |         for obj in root.iter('object'):
43 |             difficult = obj.find('difficult').text
44 |             cls = obj.find('name').text
45 |             if cls not in classes or int(difficult) == 1:
46 |                 continue
47 |             cls_id = classes.index(cls)
48 |             xmlbox = obj.find('bndbox')
49 |             b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text),
50 |                  int(xmlbox.find('ymax').text))
51 |             list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
52 |             # list_file_train.write(" " + ",".join([str(a) for a in b]) + ',' + str(0))
53 |     list_file_train.write('\n')
54 | 
55 | list_file_train.close()
56 | # list_file_val.close()
57 | # clean dataset
58 | with open(os.path.join(wd, 'model_data', gen_files), 'r') as f1:
59 |     old_line = f1.readlines()
60 | with open(os.path.join(wd, 'model_data', gen_files), 'w') as f2:
61 |     for line in old_line:
62 |         line_ = line.split(' ')
63 |         if len(line_) > 1:
64 |             f2.write(line)
65 | 


--------------------------------------------------------------------------------
/util/coco_annotation.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from collections import defaultdict
 4 | 
 5 | wd = os.path.dirname(os.getcwd())
 6 | class_path = os.path.join(wd, 'model_data', 'coco_classes.txt')  # change to the classes path you want to detect
 7 | is_train = 1  # whether train dataset or valid dataset
 8 | 
 9 | if is_train:
10 |     image_dir = '/media/data1/datasets/coco/train2017'  # your train image dir
11 |     annotation_file = '/media/data1/datasets/coco/annotations/instances_train2017.json'  # your train image annotation  dir
12 |     gen_files = 'train.txt'
13 | else:
14 |     image_dir = '/media/data1/datasets/coco/val2017'  # your val image dir
15 |     annotation_file = '/media/data1/datasets/coco/annotations/instances_val2017.json'  # your val image annotation  dir
16 |     gen_files = 'valid.txt'
17 | 
18 | name_box_id = defaultdict(list)
19 | id_name = dict()
20 | with open(class_path) as f:
21 |     class_names = f.readlines()
22 | classes = [c.strip() for c in class_names]
23 | 
24 | list_file = open(os.path.join(wd, 'model_data', gen_files), 'w')
25 | 
26 | with open(annotation_file) as f:
27 |     data = json.load(f)
28 | annotations = data['annotations']
29 | 
30 | for ant in annotations:
31 |     image_id = ant['image_id']
32 |     image_path = os.path.join(image_dir,  '%012d.jpg' % image_id)
33 |     cat = ant['category_id']
34 | 
35 |     if 1 <= cat <= 11:
36 |         cat -= 1
37 |     elif 13 <= cat <= 25:
38 |         cat -= 2
39 |     elif 27 <= cat <= 28:
40 |         cat -= 3
41 |     elif 31 <= cat <= 44:
42 |         cat -= 5
43 |     elif 46 <= cat <= 65:
44 |         cat -= 6
45 |     elif cat == 67:
46 |         cat -= 7
47 |     elif cat == 70:
48 |         cat -= 9
49 |     elif 72 <= cat <= 82:
50 |         cat -= 10
51 |     elif 84 <= cat <= 90:
52 |         cat -= 11
53 |     name_box_id[image_path].append([ant['bbox'], cat])
54 | 
55 | for key, box_infos in name_box_id.items():
56 |     list_file.write(key)
57 |     for info in box_infos:
58 |         x_min = int(info[0][0])
59 |         y_min = int(info[0][1])
60 |         x_max = x_min + int(info[0][2])
61 |         y_max = y_min + int(info[0][3])
62 | 
63 |         box_info = " %d,%d,%d,%d,%d" % (x_min, y_min, x_max, y_max, int(info[1]))
64 |         list_file.write(box_info)
65 |     list_file.write('\n')
66 | list_file.close()
67 | 
68 | # list_file_val.close()
69 | # clean dataset
70 | with open(os.path.join(wd, 'model_data', gen_files), 'r') as f1:
71 |     old_line = f1.readlines()
72 | with open(os.path.join(wd, 'model_data', gen_files), 'w') as f2:
73 |     for line in old_line:
74 |         line_ = line.split(' ')
75 |         if len(line_) > 1:
76 |             f2.write(line)
77 | 


--------------------------------------------------------------------------------
/util/load_weights.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def load_weight(var_list, file_path):
 7 |     with open(file_path, "rb") as fp:
 8 |         _ = np.fromfile(fp, dtype=np.int32, count=5)
 9 |         weights = np.fromfile(fp, dtype=np.float32)
10 |     ptr = 0
11 |     i = 0
12 |     assign_ops = []
13 |     while i < len(var_list) - 1:
14 |         var1 = var_list[i]
15 |         var2 = var_list[i + 1]
16 |         # do something only if we process conv layer
17 |         if 'cnn' in var1.name:
18 |             # check type of next layer
19 |             if 'batch' in var2.name:
20 |                 # load batch norm params
21 |                 gamma, beta, mean, var = var_list[i + 1:i + 5]
22 |                 batch_norm_vars = [beta, gamma, mean, var]
23 |                 for var in batch_norm_vars:
24 |                     shape = var.shape.as_list()
25 |                     num_params = np.prod(shape)
26 |                     var_weights = weights[ptr:ptr + num_params].reshape(shape)
27 |                     ptr += num_params
28 |                     assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
29 | 
30 |                 # we move the pointer by 4, because we loaded 4 variables
31 |                 i += 4
32 |             elif 'cnn' in var2.name:
33 |                 # load biases
34 |                 bias = var2
35 |                 bias_shape = bias.shape.as_list()
36 |                 bias_params = np.prod(bias_shape)
37 |                 bias_weights = weights[ptr:ptr + bias_params].reshape(bias_shape)
38 |                 assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
39 |                 if 'yolo_head' in bias.name:  # if num_classes is not 80
40 |                     ptr += 255
41 |                 else:
42 |                     ptr += bias_params
43 | 
44 |                 # we loaded 1 variable
45 |                 i += 1
46 |         # we can load weights of conv layer
47 | 
48 |         shape = var1.shape.as_list()
49 |         num_params = np.prod(shape)
50 |         var_weights = weights[ptr:ptr + num_params].reshape((shape[3], shape[2], shape[0], shape[1]))
51 |         # remember to transpose to column-major
52 |         # DarkNet conv_weights are serialized Caffe-style:
53 |         # (out_dim, in_dim, height, width)
54 |         # We would like to set these to Tensorflow order:
55 |         # (height, width, in_dim, out_dim)
56 |         var_weights = np.transpose(var_weights, (2, 3, 1, 0))
57 |         assign_ops.append(tf.assign(var1, var_weights, validate_shape=True))
58 | 
59 |         if 'yolo_head' in var1.name:  # if num_classes is not 80
60 |             shape_ = shape[:3]
61 |             shape_.append(255)
62 |             ptr += np.prod(shape_)
63 | 
64 |         else:
65 |             ptr += num_params
66 | 
67 |         i += 1
68 |     assert ptr == len(weights), "load failed, please verify your weight file"
69 |     return assign_ops
70 | 


--------------------------------------------------------------------------------
/util/kmeans.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class YOLO_Kmeans:
  5 | 
  6 |     def __init__(self, cluster_number, filename):
  7 |         self.cluster_number = cluster_number
  8 |         self.filename = filename
  9 | 
 10 |     def iou(self, boxes, clusters):  # 1 box -> k clusters
 11 |         n = boxes.shape[0]
 12 |         k = self.cluster_number
 13 | 
 14 |         box_area = boxes[:, 0] * boxes[:, 1]
 15 |         box_area = box_area.repeat(k)
 16 |         box_area = np.reshape(box_area, (n, k))
 17 | 
 18 |         cluster_area = clusters[:, 0] * clusters[:, 1]
 19 |         cluster_area = np.tile(cluster_area, [1, n])
 20 |         cluster_area = np.reshape(cluster_area, (n, k))
 21 | 
 22 |         box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
 23 |         cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
 24 |         min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
 25 | 
 26 |         box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
 27 |         cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
 28 |         min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
 29 |         inter_area = np.multiply(min_w_matrix, min_h_matrix)
 30 | 
 31 |         result = inter_area / (box_area + cluster_area - inter_area)
 32 |         return result
 33 | 
 34 |     def avg_iou(self, boxes, clusters):
 35 |         accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
 36 |         return accuracy
 37 | 
 38 |     def kmeans(self, boxes, k, dist=np.median):
 39 |         box_number = boxes.shape[0]
 40 |         distances = np.empty((box_number, k))
 41 |         last_nearest = np.zeros((box_number,))
 42 |         np.random.seed()
 43 |         clusters = boxes[np.random.choice(
 44 |             box_number, k, replace=False)]  # init k clusters
 45 |         while True:
 46 | 
 47 |             distances = 1 - self.iou(boxes, clusters)
 48 | 
 49 |             current_nearest = np.argmin(distances, axis=1)
 50 |             if (last_nearest == current_nearest).all():
 51 |                 break  # clusters won't change
 52 |             for cluster in range(k):
 53 |                 clusters[cluster] = dist(  # update clusters
 54 |                     boxes[current_nearest == cluster], axis=0)
 55 | 
 56 |             last_nearest = current_nearest
 57 | 
 58 |         return clusters
 59 | 
 60 |     def result2txt(self, data):
 61 |         f = open("model_data/yolo_tiny_anchors.txt", 'w')
 62 |         row = np.shape(data)[0]
 63 |         for i in range(row):
 64 |             if i == 0:
 65 |                 x_y = "%d,%d" % (data[i][0], data[i][1])
 66 |             else:
 67 |                 x_y = ", %d,%d" % (data[i][0], data[i][1])
 68 |             f.write(x_y)
 69 |         f.close()
 70 | 
 71 |     def txt2boxes(self):
 72 |         f = open(self.filename, 'r')
 73 |         dataSet = []
 74 |         for line in f:
 75 |             infos = line.split(" ")
 76 |             length = len(infos)
 77 |             for i in range(1, length):
 78 |                 width = int(infos[i].split(",")[2]) - \
 79 |                         int(infos[i].split(",")[0])
 80 |                 height = int(infos[i].split(",")[3]) - \
 81 |                          int(infos[i].split(",")[1])
 82 |                 dataSet.append([width, height])
 83 |         result = np.array(dataSet)
 84 |         f.close()
 85 |         return result
 86 | 
 87 |     def txt2clusters(self):
 88 |         all_boxes = self.txt2boxes()
 89 |         result = self.kmeans(all_boxes, k=self.cluster_number)
 90 |         result = result[np.lexsort(result.T[0, None])]
 91 |         self.result2txt(result)
 92 |         print("K anchors:\n {}".format(result))
 93 |         print("Accuracy: {:.2f}%".format(
 94 |             self.avg_iou(all_boxes, result) * 100))
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     cluster_number = 9
 99 |     filename = "model_data/train.txt"
100 |     kmeans = YOLO_Kmeans(cluster_number, filename)
101 |     kmeans.txt2clusters()
102 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from collections import defaultdict
  3 | from os.path import join, split
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | from config.pred_config import get_config
 10 | from net.yolo3_net import model
 11 | from util.box_utils import pick_box
 12 | from util.image_utils import get_color_table, read_image_and_lable
 13 | from util.utils import cal_fp_fn_tp_tn, cal_mAP
 14 | 
 15 | 
 16 | class YOLO():
 17 |     def __init__(self, config):
 18 |         self.config = config
 19 | 
 20 |         net_type, tiny = split(self.config.weight_path)[-1].split('_')[:2]
 21 | 
 22 |         if tiny == 'tiny':
 23 |             self.anchor_path = join('model_data', 'yolo_anchors_tiny.txt')
 24 |         else:
 25 |             self.anchor_path = join('model_data', 'yolo_anchors.txt')
 26 | 
 27 |         self.classes = self._get_classes()
 28 |         self.anchors = self._get_anchors()
 29 |         self.hw = [416, 416]
 30 |         self.batch_size = 64
 31 |         self.ious_thres = [0.5, 0.75]
 32 | 
 33 |         self.test_path = "model_data/test.txt"
 34 | 
 35 |         with open(self.test_path) as f:
 36 |             self.test_data = f.readlines()
 37 | 
 38 |         if tiny == 'tiny':
 39 |             assert 6 == len(
 40 |                 self.anchors), 'the model type does not match with anchors, check anchors or type param'
 41 |         else:
 42 |             assert 9 == len(
 43 |                 self.anchors), 'the model type does not match with anchors, check anchors or type param'
 44 | 
 45 |         self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3])
 46 |         self.is_training = tf.placeholder(tf.bool, shape=[])
 47 |         self.pred = model(self.input, len(self.classes), self.anchors, net_type, self.is_training, False)
 48 | 
 49 |         print('start load net_type: {}_{}_model'.format(net_type, tiny))
 50 | 
 51 |         # load weights
 52 |         conf = tf.ConfigProto()
 53 |         conf.gpu_options.allow_growth = True
 54 | 
 55 |         # change fraction according to your GPU
 56 |         # conf.gpu_options.per_process_gpu_memory_fraction = 0.05
 57 | 
 58 |         self.sess = tf.Session(config=conf)
 59 |         saver = tf.train.Saver()
 60 |         saver.restore(self.sess, self.config.weight_path)
 61 |         self.color_table = get_color_table(len(self.classes))
 62 | 
 63 |     def _get_anchors(self):
 64 |         """loads the anchors from a file"""
 65 |         with open(self.anchor_path) as f:
 66 |             anchors = f.readline()
 67 |         anchors = [float(x) for x in anchors.split(',')]
 68 |         return np.array(anchors).reshape(-1, 2)
 69 | 
 70 |     def _get_classes(self):
 71 |         """loads the classes"""
 72 |         with open(self.config.classes_path) as f:
 73 |             class_names = f.readlines()
 74 |         class_names = [c.strip() for c in class_names]
 75 |         return class_names
 76 | 
 77 |     def test(self):
 78 |         total_test_case = len(self.test_data)
 79 | 
 80 |         FP_TP = defaultdict(lambda: defaultdict(list))
 81 |         GT_NUMS = defaultdict(int)
 82 |         GTS = defaultdict(lambda: defaultdict(list))
 83 |         DETECTION = defaultdict(lambda: defaultdict(list))
 84 |         img_data = []
 85 | 
 86 |         print("total test case:", total_test_case)
 87 | 
 88 |         for i in range(total_test_case):
 89 | 
 90 |             img, xyxy = read_image_and_lable(self.test_data[i], self.hw, is_training=False)
 91 |             img_data.append(img)
 92 |             print("{}/{}".format(i, total_test_case))
 93 |             for per_xyxy in xyxy:
 94 |                 GTS[i % self.batch_size][self.classes[int(per_xyxy[4])]].append(per_xyxy[:4].tolist())
 95 | 
 96 |             if (i + 1) % self.batch_size == 0:  # a batch
 97 |                 boxes = self.sess.run(self.pred, feed_dict={self.input: img_data, self.is_training: False})
 98 | 
 99 |                 for b in range(self.batch_size):
100 |                     picked_boxes = pick_box(boxes[b], 0.01, 0.5, self.hw, self.classes)  # NMS
101 |                     for picked_box in picked_boxes:
102 |                         DETECTION[b][self.classes[int(picked_box[5])]].append(picked_box[:5].tolist())
103 | 
104 |                 # cal FP TP
105 |                 cal_fp_fn_tp_tn(DETECTION, GTS, FP_TP, GT_NUMS, self.classes, self.ious_thres)
106 | 
107 |                 DETECTION.clear()
108 |                 GTS.clear()
109 |                 img_data.clear()
110 | 
111 |         APs, mAPs = cal_mAP(FP_TP, GT_NUMS, self.classes, self.ious_thres)
112 |         print(APs, mAPs)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     configs = get_config()
117 |     YOLO(configs).test()
118 | 


--------------------------------------------------------------------------------
/util/image_utils.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import random
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from matplotlib.colors import hsv_to_rgb, rgb_to_hsv
  8 | 
  9 | 
 10 | def rand(a=0., b=1.):
 11 |     return random.random() * (b - a) + a
 12 | 
 13 | 
 14 | def read_image_and_lable(gt_path, hw, hue=.1, sat=1.5, val=1.5, is_training=True):
 15 |     """read image form image_set path random distort image """
 16 |     f_path, *_label = gt_path.split(' ')
 17 |     if not len(_label):
 18 |         # f_path = f_path.split('\n')[0]
 19 |         return
 20 |     image_raw_data = cv2.imread(f_path)[..., ::-1]  # RGB h*w*c
 21 |     height, width = image_raw_data.shape[0], image_raw_data.shape[1]
 22 |     image_data = cv2.resize(image_raw_data, tuple(hw[::-1])) / 255.0
 23 | 
 24 |     h_scale = hw[0] / height
 25 |     w_scale = hw[1] / width
 26 |     # anchor[:, 0] *= w_scale
 27 |     # anchor[:, 1] *= h_scale
 28 | 
 29 |     xyxy = []
 30 | 
 31 |     for per_label in _label:
 32 |         xmin, ymin, xmax, ymax, cls = list(map(float, per_label.split(',')))
 33 |         xyxy.append([xmin * w_scale, ymin * h_scale, xmax * w_scale, ymax * h_scale, cls])
 34 |     xyxy = np.array(xyxy)
 35 | 
 36 |     if is_training:
 37 | 
 38 |         # random flip image from top to down
 39 |         if rand() < .5:
 40 |             image_data = cv2.flip(image_data, 0)
 41 |             tmp = xyxy[:, 1].copy()
 42 |             xyxy[:, 1] = hw[0] - xyxy[:, 3]
 43 |             xyxy[:, 3] = hw[0] - tmp
 44 | 
 45 |         # random flip image from left to right
 46 |         if rand() < .5:
 47 |             image_data = cv2.flip(image_data, 1)
 48 |             tmp = xyxy[:, 0].copy()
 49 |             xyxy[:, 0] = hw[1] - xyxy[:, 2]
 50 |             xyxy[:, 2] = hw[1] - tmp
 51 | 
 52 |         # distort image
 53 |         if rand() < 0.5:
 54 |             x = rgb_to_hsv(image_data)
 55 |             hue = rand(-hue, hue)
 56 |             sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
 57 |             val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
 58 |             x[..., 0] += hue
 59 |             x[..., 0][x[..., 0] > 1] -= 1
 60 |             x[..., 0][x[..., 0] < 0] += 1
 61 |             x[..., 1] *= sat
 62 |             x[..., 2] *= val
 63 |             x[x > 1] = 1
 64 |             x[x < 0] = 0
 65 | 
 66 |             image_data = hsv_to_rgb(x)  # RGB
 67 |         # random pad
 68 |         if rand() < .5:
 69 |             pad_top = random.randint(0, 25)
 70 |             pad_left = random.randint(0, 25)
 71 |             if rand() < .5:
 72 |                 image_data = np.pad(image_data, ((pad_top, 0), (pad_left, 0), (0, 0)), 'edge')
 73 |             else:
 74 |                 image_data = np.pad(image_data, ((pad_top, 0), (pad_left, 0), (0, 0)), 'constant')
 75 |             image_data = image_data[:hw[0], :hw[1], :]
 76 |             for i in range(xyxy.shape[0]):
 77 |                 xyxy[i, 0] = pad_left + xyxy[i, 0] if pad_left + xyxy[i, 0] < hw[1] else hw[1]
 78 |                 xyxy[i, 2] = pad_left + xyxy[i, 2] if pad_left + xyxy[i, 2] < hw[1] else hw[1]
 79 |                 xyxy[i, 1] = pad_top + xyxy[i, 1] if pad_top + xyxy[i, 1] < hw[0] else hw[0]
 80 |                 xyxy[i, 3] = pad_top + xyxy[i, 3] if pad_top + xyxy[i, 3] < hw[0] else hw[0]
 81 |         # random pad
 82 |         if rand() < .5:
 83 |             pad_bottom = random.randint(0, 25)
 84 |             pad_right = random.randint(0, 25)
 85 |             if rand() < .5:
 86 |                 image_data = np.pad(image_data, ((0, pad_bottom), (0, pad_right), (0, 0)), 'edge')
 87 |             else:
 88 |                 image_data = np.pad(image_data, ((0, pad_bottom), (0, pad_right), (0, 0)), 'constant')
 89 |             image_data = image_data[pad_bottom:hw[0] + pad_bottom, pad_right:hw[1] + pad_right, :]
 90 |             for i in range(xyxy.shape[0]):
 91 |                 xyxy[i, 0] = xyxy[i, 0] - pad_right if xyxy[i, 0] - pad_right > 0 else 0
 92 |                 xyxy[i, 2] = xyxy[i, 2] - pad_right if xyxy[i, 2] - pad_right > 0 else 0
 93 |                 xyxy[i, 1] = xyxy[i, 1] - pad_bottom if xyxy[i, 1] - pad_bottom > 0 else 0
 94 |                 xyxy[i, 3] = xyxy[i, 3] - pad_bottom if xyxy[i, 3] - pad_bottom > 0 else 0
 95 |     return image_data, xyxy
 96 | 
 97 | 
 98 | def get_color_table(class_num, seed=200):
 99 |     random.seed(seed)
100 |     color_table = {}
101 |     for i in range(class_num):
102 |         color_table[i] = [random.randint(0, 255) for _ in range(3)]
103 |     return color_table
104 | 
105 | 
106 | def plot_img(img, picked_boxes, color_table, classes, is_gt=False):
107 |     """
108 |     get original boxes and plot them
109 |     """
110 |     for co, bbox in enumerate(picked_boxes):
111 |         color = color_table[int(bbox[5])]
112 |         tl = int(min(round(0.002 * max(img.shape[0:2])), min(bbox[3] - bbox[1], bbox[2] - bbox[0])))
113 |         t2 = max(tl - 1, 1)  # font thickness
114 |         if is_gt:
115 |             label = "gts: {}".format(classes[int(bbox[5])])
116 |         else:
117 |             label = "{} {:.2f}".format(classes[int(bbox[5])], bbox[4])
118 |         img = cv2.rectangle(img, tuple(np.int32([bbox[0], bbox[1]])),
119 |                             tuple(np.int32([bbox[2], bbox[3]])), color, 3)
120 |         img = cv2.putText(img, label, tuple(np.int32([bbox[0], bbox[1]])),
121 |                           cv2.FONT_HERSHEY_TRIPLEX, float(tl) / 3, color, thickness=t2, lineType=cv2.LINE_AA)
122 | 
123 |     return img
124 | 


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from os.path import join, split
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | from config.pred_config import get_config
  9 | from net.yolo3_net import model
 10 | from util.box_utils import pick_box, get_true_box
 11 | from util.image_utils import get_color_table, plot_img
 12 | 
 13 | 
 14 | class YOLO():
 15 |     def __init__(self, config):
 16 |         self.config = config
 17 | 
 18 |         net_type, tiny = split(self.config.weight_path)[-1].split('_')[:2]
 19 | 
 20 |         if tiny == 'tiny':
 21 |             self.anchor_path = join('model_data', 'yolo_anchors_tiny.txt')
 22 |         else:
 23 |             self.anchor_path = join('model_data', 'yolo_anchors.txt')
 24 | 
 25 |         self.classes = self._get_classes()
 26 |         self.anchors = self._get_anchors()
 27 |         self.hw = [416, 416]
 28 |         self.batch_size = 1
 29 | 
 30 |         if tiny == 'tiny':
 31 |             assert 6 == len(
 32 |                 self.anchors), 'the model type does not match with anchors, check anchors or type param'
 33 |         else:
 34 |             assert 9 == len(
 35 |                 self.anchors), 'the model type does not match with anchors, check anchors or type param'
 36 | 
 37 |         self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3])
 38 |         self.is_training = tf.placeholder(tf.bool, shape=[])
 39 |         self.pred = model(self.input, len(self.classes), self.anchors, net_type, self.is_training, False)
 40 | 
 41 |         print('start load net_type: {}_{}_model'.format(net_type, tiny))
 42 |         # load weights
 43 |         conf = tf.ConfigProto()
 44 |         # conf.gpu_options.allow_growth = True
 45 | 
 46 |         # change fraction according to your GPU
 47 |         conf.gpu_options.per_process_gpu_memory_fraction = 0.05
 48 |         self.sess = tf.Session(config=conf)
 49 |         saver = tf.train.Saver()
 50 |         saver.restore(self.sess, self.config.weight_path)
 51 |         self.color_table = get_color_table(len(self.classes))
 52 | 
 53 |     def _get_anchors(self):
 54 |         """loads the anchors from a file"""
 55 |         with open(self.anchor_path) as f:
 56 |             anchors = f.readline()
 57 |         anchors = [float(x) for x in anchors.split(',')]
 58 |         return np.array(anchors).reshape(-1, 2)
 59 | 
 60 |     def _get_classes(self):
 61 |         """loads the classes"""
 62 |         with open(self.config.classes_path) as f:
 63 |             class_names = f.readlines()
 64 |         class_names = [c.strip() for c in class_names]
 65 |         return class_names
 66 | 
 67 |     def forward(self, img):
 68 |         """
 69 |         :param img:  shape = (h,w,c), 0-255
 70 |         :return:
 71 |         """
 72 |         height, width = img.shape[:2]
 73 |         img_ = cv2.resize(img, tuple(self.hw)[::-1])
 74 |         h_r = height / self.hw[0]
 75 |         w_r = width / self.hw[1]
 76 | 
 77 |         im_data = np.expand_dims(img_[..., ::-1], 0) / 255.0
 78 |         boxes = self.sess.run(self.pred, feed_dict={self.input: im_data, self.is_training: False})
 79 | 
 80 |         vis_img = []
 81 |         for b in range(self.batch_size):
 82 |             picked_boxes = pick_box(boxes[b], 0.3, 0.6, self.hw, self.classes)
 83 |             true_boxes = get_true_box(picked_boxes, w_r, h_r)
 84 |             per_img = img
 85 |             per_img = plot_img(per_img, true_boxes, self.color_table, self.classes)
 86 |             print('find {} boxes'.format(len(true_boxes)))
 87 |             print(true_boxes)
 88 |             vis_img.append(per_img)
 89 |         return vis_img[0]
 90 | 
 91 |     def detect_image(self, img_path):
 92 |         img = cv2.imread(img_path)
 93 |         if img is None:
 94 |             return None
 95 |         img = self.forward(img)
 96 |         cv2.imshow('img', img)
 97 |         cv2.imwrite('tiny.jpg', img)
 98 |         cv2.waitKey(0)
 99 |         return 1
100 | 
101 |     def detect_video(self, video_path):
102 |         cap = cv2.VideoCapture(video_path)
103 |         if not cap.isOpened():
104 |             raise IOError("Couldn't open webcam or video")
105 |         # video_FourCC = -1
106 |         video_FourCC = cv2.VideoWriter_fourcc(*'XVID')
107 |         video_fps = cap.get(cv2.CAP_PROP_FPS)
108 |         width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
109 | 
110 |         # writer = cv2.VideoWriter('output.mp4', video_FourCC, video_fps, (width, height))
111 |         writer = cv2.VideoWriter('output.avi', video_FourCC, video_fps, (width, height))
112 | 
113 |         total_time = 0
114 |         curr_fps = 0
115 |         fps = "FPS: ??"
116 |         time1 = time.time()
117 | 
118 |         while True:
119 |             ret, frame = cap.read()
120 |             if ret:
121 |                 out = self.forward(frame)
122 |                 time2 = time.time()
123 |                 d_time = time2 - time1
124 |                 time1 = time2
125 |                 total_time += d_time
126 |                 curr_fps += 1
127 |                 if total_time >= 1:
128 |                     fps = "FPS: {}".format(curr_fps)
129 |                     total_time -= 1
130 |                     curr_fps = 0
131 | 
132 |                 out = cv2.putText(out, fps, tuple(np.int32([20, 30])),
133 |                                   cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 255))
134 |                 out = cv2.resize(out, (width, height))
135 |                 cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
136 |                 cv2.imshow('result', out)
137 |                 cv2.waitKey(1)
138 |                 writer.write(out)
139 |             else:
140 |                 break
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     configs = get_config()
145 |     yolo = YOLO(configs)
146 |     if configs.video:
147 |         yolo.detect_video(configs.video)
148 |     elif configs.image:
149 |         yolo.detect_image(configs.image)
150 |     else:
151 |         while True:
152 |             img_path = input('input image path:')
153 |             if not yolo.detect_image(img_path):
154 |                 print('check your iamge path ')
155 |             continue
156 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # yolo3-tensorflow 
  2 | TensorFlow implementation of yolo v3 objects detection.  
  3 | Based: full or tiny, and cnn or mobilenets(mobilenet_v1, mobilenet_v2);  
  4 | We can get 6 combination, but 1 of them has a little parameters and performs badly. 
  5 | So, you should build these 5 combination as folloing:
  6 | * cnn + full
  7 | * cnn + tiny
  8 | * mobilenet_v1 + full
  9 | * mobilenet_v2 + full
 10 | * mobilenet_v2 + tiny 
 11 | 
 12 | These 5 frameworks are provided in this repository.
 13 | 
 14 | ## Dependence
 15 |  python3  
 16 |  tensorflow >= 1.12  
 17 |  opencv  
 18 | 
 19 | ## Quick start
 20 | * cnn full yolo3
 21 |   1. Download official [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) and put it on `model_data` floder of project.
 22 |   2. Run the command `python convert_weights.py full`  to convert weights to TensorFlow checkpoint file, which will locate in `logs/cnn_full/` and named `cnn_full_model.data-00000-of-00001`
 23 |   3. Run the command `python yolo.py` or `python yolo.py -w logs/cnn_full/cnn_full_model`  and input the image path to detect.
 24 |   4. Detect example:  
 25 |      <img src="images/full.jpg"/>
 26 | * cnn tiny yolo3
 27 |   1. Download official [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) and put it on `model_data` floder of project.
 28 |   2. Run the command `python convert_weights.py tiny`  to convert weights to TensorFlow checkpoint file, which will locate in `logs/cnn_tiny/` and named `cnn_tiny_model.data-00000-of-00001`
 29 |   3. Run the command `python yolo.py -w logs/cnn_tiny/cnn_tiny_model` and input the image path to detect.
 30 |   4. Detect example:  
 31 |      <img src="images/tiny.jpg"/>
 32 | 
 33 | ## Train
 34 | 
 35 | 1. Prepare Dataset  
 36 | Before training, you should generate your own annotation file and class names file.
 37 | One row for one image  
 38 | Row format: image_file_path box1 box2 ... boxN  
 39 | Box format: x_min,y_min,x_max,y_max,class_id (no space)  
 40 | For VOC dataset, try `python util/voc_annotation.py`  
 41 | For your own dataset, you should change the [voc_annotation.py](voc_annotation.py)  
 42 | Here is an example:
 43 |     ```
 44 |     path/to/img1.jpg 50,100,150,200,0 30,50,200,120,3
 45 |     path/to/img2.jpg 120,300,250,600,2
 46 |     ...
 47 |     ```
 48 | 
 49 | 2. Prepare yolo anchors   
 50 | run `python util/kmeans.py` to generate anchors. Note that, anchor number 
 51 | should be 9 if you wang to train full yolo, else it should be 6. 
 52 | 
 53 | 3. Start to train  
 54 | The train arguments can be seen in [config/train_config.py](config/train_config.py).      
 55 |     ```
 56 |     usage: train.py [-h] [-n NET_TYPE] [-t TINY] [-b BATCH_SIZE] [-e EPOCH]
 57 |                     [-lr LEARN_RATE] [-pt PRETRAIN_PATH]
 58 |                     [--anchor_path ANCHOR_PATH] [--train_path TRAIN_PATH]
 59 |                     [--classes_path CLASSES_PATH] [-d DEBUG]
 60 |     
 61 |     optional arguments:
 62 |       -h, --help            show this help message and exit
 63 |       -n NET_TYPE, --net_type NET_TYPE
 64 |                             net type: cnn, mobilenetv1 mobilenetv2 or mobilenetv3
 65 |       -t TINY, --tiny TINY  whether tiny yolo or not
 66 |       -b BATCH_SIZE, --batch_size BATCH_SIZE
 67 |                             batch_size
 68 |       -e EPOCH, --epoch EPOCH
 69 |                             epoch
 70 |       -lr LEARN_RATE, --learn_rate LEARN_RATE
 71 |                             learn_rate
 72 |       -pt PRETRAIN_PATH, --pretrain_path PRETRAIN_PATH
 73 |                             pretrain path
 74 |       --anchor_path ANCHOR_PATH
 75 |                             anchor path
 76 |       --train_path TRAIN_PATH
 77 |                             train file path
 78 |       --classes_path CLASSES_PATH
 79 |                             classes path
 80 |       -d DEBUG, --debug DEBUG
 81 |                             whether print per item loss
 82 |     ```
 83 |     The dafault framework is cnn + full. If you want to train others, you can pass 
 84 |     the `-n` (cnn, mobilenetv1 or mobilenetv2) and `-t` (True or False) arguments.
 85 | 
 86 | 4. To be simple  
 87 |    I have write scripts in `shell` folder. Just run `CUDA_VISIBLE_DEVICES='0' sh ./shell/train_cnn_full.sh` or
 88 |    `CUDA_VISIBLE_DEVICES='0' nohup stdbuf -oL sh ./shell/train_cnn_full.sh > logs/cnn_full.txt &` in background and the log 
 89 |    will be write in cnn_full.txt.  
 90 |    You can also change some other arguments such as batch_size and epoch and so on.  
 91 |    If you want to use pretrain, you should pass the pretrain path. I will provide the pretrain weights later.
 92 | 5. NOTE
 93 |     The mobilenet is converged more slower than cnn, you should train more epoch.
 94 | 6. Tensorboard
 95 |     You can use Tensorboard to watch the training trend.  
 96 |     Run `Tensorboard --logdir ./  --host 127.0.0.1`   
 97 |     you can see mAP score
 98 |      <img src="images/mAP.png"/>
 99 | 7.  test your training weights with your test datasets  
100 |     `python test.py`  
101 |     you maybe need to change configs in config/pred_conf.py
102 |     
103 |     
104 | ## Predict
105 | The prediction arguments can be seen in [config/pred_config.py](config/pred_config.py).
106 | ```
107 | usage: yolo.py [-h] [-i IMAGE] [-v VIDEO] [-w WEIGHT_PATH] [--score SCORE]
108 |                [--classes_path CLASSES_PATH]
109 | 
110 | optional arguments:
111 |   -h, --help            show this help message and exit
112 |   -i IMAGE, --image IMAGE
113 |                         image path
114 |   -v VIDEO, --video VIDEO
115 |                         video path
116 |   -w WEIGHT_PATH, --weight_path WEIGHT_PATH
117 |                         weight path
118 |   --score SCORE         score threshold
119 |   --classes_path CLASSES_PATH
120 |                         classes path
121 |  ```
122 |  Note that, the weights filename should be like `cnn_full_model.xxx`, `cnn_tiny_model.xxx`, or others. 
123 |  the framework will be built by the word 'cnn' and 'full' or 'cnn' and 'tiny'.  
124 |  You can predict an image or video.  
125 |  For example:  
126 |  `python yolo.py -w weight_path`  
127 |  `python yolo.py -i imgage_path -w weight_path`  
128 |  `python yolo.py -v video_path -w weight_path`


--------------------------------------------------------------------------------
/util/box_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | def box_anchor_iou(b1, b2):
  6 |     '''Return iou tensor
  7 |     Parameters
  8 |     ----------
  9 |     b1: tensor, shape=(batch,... 2), wh
 10 |     b2: tensor, shape=(j, 2), wh
 11 |     Returns
 12 |     -------
 13 |     iou: tensor, shape=(i1,...,iN, j)
 14 |     '''
 15 | 
 16 |     # Expand dim to apply broadcasting.
 17 |     b1 = np.expand_dims(b1, -2)
 18 |     b1_mins = - b1 / 2
 19 |     b1_maxes = b1 / 2
 20 | 
 21 |     # Expand dim to apply broadcasting.
 22 |     b2 = np.expand_dims(b2, 0)
 23 |     b2_mins = -b2 / 2
 24 |     b2_maxes = b2 / 2
 25 | 
 26 |     intersect_mins = np.maximum(b1_mins, b2_mins)
 27 |     intersect_maxes = np.minimum(b1_maxes, b2_maxes)
 28 |     intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
 29 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
 30 |     b1_area = b1[..., 0] * b1[..., 1]
 31 |     b2_area = b2[..., 0] * b2[..., 1]
 32 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
 33 | 
 34 |     return iou
 35 | 
 36 | 
 37 | def pick_box(boxes, score_threshold, nms_iou_threshold, hw, classes):
 38 |     """
 39 |     :param boxes: (boxes_num, 5+numclass),xywh
 40 |     :param score_threshold: score_threshold
 41 |     :param nms_iou_threshold: nms iou_threshold
 42 |     :param hw: sacled_image height and width
 43 |     :param classes: classes num
 44 |     :return:
 45 |     """
 46 |     score = boxes[..., 4:5] * boxes[..., 5:]
 47 |     idx = np.where(score > score_threshold)
 48 |     box_select = boxes[idx[:2]]
 49 |     box_xywh = box_select[:, :4]
 50 |     box_xyxy = wh2xy_np(box_xywh)
 51 |     if not len(box_xyxy):
 52 |         return []
 53 |     box_truncated = []
 54 |     for box_k in box_xyxy:
 55 |         box_k[0] = box_k[0] if box_k[0] >= 0 else 0
 56 |         box_k[1] = box_k[1] if box_k[1] >= 0 else 0
 57 |         box_k[2] = box_k[2] if box_k[2] <= hw[1] else hw[1]
 58 |         box_k[3] = box_k[3] if box_k[3] <= hw[0] else hw[0]
 59 |         box_truncated.append(box_k)
 60 |     box_xyxy = np.stack(box_truncated)
 61 |     box_socre = score[idx]
 62 |     clsid = idx[2]
 63 |     picked_boxes = nms_np(
 64 |         np.concatenate([box_xyxy, box_socre.reshape([-1, 1]), clsid.reshape([-1, 1])], -1),
 65 |         len(classes), iou_threshold=nms_iou_threshold)
 66 |     return picked_boxes
 67 | 
 68 | 
 69 | def nms_np(boxes, classes, iou_threshold=0.3, max_output=20):
 70 |     """Return nms
 71 |     Parameters
 72 |     ----------
 73 |     :param boxes:  shape=(boxnum 6), xyxy,score,cls
 74 |     :param iou_threshold:  iou_threshold
 75 |     :param max_output:  max_output
 76 |     :param classes:  total_classes_num
 77 | 
 78 |     Returns
 79 |     -------
 80 |     nms boxes
 81 |     """
 82 | 
 83 |     picked_boxes = []
 84 | 
 85 |     for c in range(classes):
 86 |         b = boxes[boxes[..., -1] == c]
 87 |         score = b[..., 4]
 88 |         order = np.argsort(score)
 89 |         count = 0
 90 |         while order.size > 0 and count < max_output:
 91 |             # The index of largest confidence score
 92 |             index = order[-1]
 93 | 
 94 |             # Pick the bounding box with largest confidence score
 95 |             picked_boxes.append(b[index])
 96 | 
 97 |             b1_mins = b[index][0:2]
 98 |             b1_maxes = b[index][2:4]
 99 |             b1_wh = b1_maxes - b1_mins
100 | 
101 |             b2_mins = b[order[:-1]][..., 0:2]
102 |             b2_maxes = b[order[:-1]][..., 2:4]
103 |             b2_wh = b2_maxes - b2_mins
104 | 
105 |             intersect_mins = np.maximum(b1_mins, b2_mins)
106 |             intersect_maxes = np.minimum(b1_maxes, b2_maxes)
107 |             intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
108 |             intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
109 |             b1_area = b1_wh[..., 0] * b1_wh[..., 1]
110 |             b2_area = b2_wh[..., 0] * b2_wh[..., 1]
111 |             iou = intersect_area / (b1_area + b2_area - intersect_area)
112 | 
113 |             left = np.where(iou < iou_threshold)
114 |             order = order[left]
115 |             count += 1
116 | 
117 |     return picked_boxes
118 | 
119 | 
120 | def xy2wh_np(b):
121 |     """
122 |     :param b:  list xmin ymin xmax ymax
123 |     :return: shape=(...,4) x0 y0 w h
124 |     """
125 |     xmin, ymin, xmax, ymax = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
126 |     x0 = (xmin + xmax) / 2.0
127 |     y0 = (ymin + ymax) / 2.0
128 |     w = xmax - xmin
129 |     h = ymax - ymin
130 |     return np.concatenate([x0, y0, w, h], -1)
131 | 
132 | 
133 | def wh2xy_np(b):
134 |     """
135 |     :param b: shape=(...,4) x0 y0 w h
136 |     :return: shape=(...,4) xmin ymin xmax ymax
137 |     """
138 |     x0, y0, w, h = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
139 |     xmin = x0 - w / 2.0
140 |     xmax = x0 + w / 2.0
141 |     ymin = y0 - h / 2.0
142 |     ymax = y0 + h / 2.0
143 |     return np.concatenate([xmin, ymin, xmax, ymax], -1)
144 | 
145 | 
146 | def box_iou(b1, b2):
147 |     '''Return iou tensor
148 |     Parameters
149 |     ----------
150 |     b1: tensor, shape=(batch,... 4), xywh
151 |     b2: tensor, shape=(j, 4), xywh
152 |     Returns
153 |     -------
154 |     iou: tensor, shape=(i1,...,iN, j)
155 |     '''
156 | 
157 |     # Expand dim to apply broadcasting.
158 |     b1 = tf.expand_dims(b1, -2)
159 |     b1_xy = b1[..., :2]
160 |     b1_wh = b1[..., 2:4]
161 |     b1_wh_half = b1_wh / 2.
162 |     b1_mins = b1_xy - b1_wh_half
163 |     b1_maxes = b1_xy + b1_wh_half
164 | 
165 |     # Expand dim to apply broadcasting.
166 |     b2 = tf.expand_dims(b2, 0)
167 |     b2_xy = b2[..., :2]
168 |     b2_wh = b2[..., 2:4]
169 |     b2_wh_half = b2_wh / 2.
170 |     b2_mins = b2_xy - b2_wh_half
171 |     b2_maxes = b2_xy + b2_wh_half
172 | 
173 |     intersect_mins = tf.maximum(b1_mins, b2_mins)
174 |     intersect_maxes = tf.minimum(b1_maxes, b2_maxes)
175 |     intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
176 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
177 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
178 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
179 |     iou = tf.math.divide(intersect_area, b1_area + b2_area - intersect_area, name='iou')
180 | 
181 |     return iou
182 | 
183 | 
184 | def box_iou_np(b1, b2):
185 |     """
186 |     Return iou tensor
187 |     Parameters
188 |     ----------
189 |     b1: array shape=(i, 4), xyxy
190 |     b2: array, shape=(j, 4), xyxy
191 |     Returns
192 |     -------
193 |     iou: array, shape=(i1,...,iN, j)
194 |     """
195 | 
196 |     # Expand dim to apply broadcasting.
197 |     b1 = np.expand_dims(b1[...,:4], -2)
198 | 
199 |     # Expand dim to apply broadcasting.
200 |     b2 = np.expand_dims(b2[...,:4], 0)
201 | 
202 |     intersect_mins = np.maximum(b1[...,0:2], b2[...,0:2])
203 |     intersect_maxes = np.minimum(b1[...,2:4], b2[...,2:4])
204 |     intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
205 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
206 |     b1_area = (b1[..., 2] - b1[..., 0]) * (b1[..., 3] - b1[..., 1])
207 |     b2_area = (b2[..., 2] - b2[..., 0]) * (b2[..., 3] - b2[..., 1])
208 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
209 | 
210 |     return iou
211 | 
212 | 
213 | 
214 | def xy2wh(b):
215 |     """
216 |     :param b: shape=(...,4) xmin ymin xmax ymax
217 |     :return: shape=(...,4) x0 y0 w h
218 |     """
219 |     xmin, ymin, xmax, ymax = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
220 |     x0 = (xmin + xmax) / 2.0
221 |     y0 = (ymin + ymax) / 2.0
222 |     w = xmax - xmin
223 |     h = ymax - ymin
224 |     return tf.concat([x0, y0, w, h], -1)
225 | 
226 | 
227 | def wh2xy(b):
228 |     """
229 |     :param b: shape=(...,4) x0 y0 w h
230 |     :return: shape=(...,4) xmin ymin xmax ymax
231 |     """
232 |     x0, y0, w, h = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
233 |     xmin = x0 - w / 2.0
234 |     xmax = x0 + w / 2.0
235 |     ymin = y0 - h / 2.0
236 |     ymax = y0 + h / 2.0
237 |     return tf.concat([xmin, ymin, xmax, ymax], -1)
238 | 
239 | 
240 | def np_sigmoid(x):
241 |     return 1 / (1 + np.exp(-x))
242 | 
243 | 
244 | def get_true_box(picked_boxes, w_r, h_r):
245 |     """ get original true box according to ori image scale"""
246 |     true_boxes = []
247 |     for co, bbox in enumerate(picked_boxes):
248 |         bbox[0] *= w_r
249 |         bbox[2] *= w_r
250 |         bbox[1] *= h_r
251 |         bbox[3] *= h_r
252 |         true_boxes.append(bbox)
253 |     if not len(true_boxes):
254 |         return true_boxes
255 |     true_boxes = np.concatenate(true_boxes, 0).reshape(-1, 6)
256 |     return true_boxes
257 | 
258 | if __name__ == '__main__':
259 |     bx = tf.placeholder(tf.float32, [2, 4, 4])
260 |     xy2wh(bx)
261 | 


--------------------------------------------------------------------------------
/util/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from util.box_utils import box_iou_np
  4 | from collections import defaultdict
  5 | 
  6 | 
  7 | def np_sigmoid(x):
  8 |     return 1 / (1 + np.exp(-x))
  9 | 
 10 | 
 11 | def sec2time(sec, n_msec=3):
 12 |     ''' Convert seconds to 'D days, HH:MM:SS.FFF' '''
 13 |     m, s = divmod(sec, 60)
 14 |     h, m = divmod(m, 60)
 15 |     d, h = divmod(h, 24)
 16 |     if n_msec > 0:
 17 |         pattern = '%%02dh %%02dm %%0%d.%dfs' % (n_msec + 3, n_msec)
 18 |     else:
 19 |         pattern = r'%02dh %02dm %02s'
 20 |     if d == 0:
 21 |         return pattern % (h, m, s)
 22 |     return ('%d d, ' + pattern) % (d, h, m, s)
 23 | 
 24 | 
 25 | def cal_fp_fn_tp_tn(detection, ground_truth, FP_TP, GT_NUM, classes, iou_thres_list):
 26 |     """
 27 |     calculate FP TP FN TN accroding to detection and ground truth
 28 |     :param detection: a dict, the format:
 29 | 
 30 |     {
 31 |         image1: { class1: [
 32 |                            [xmin, ymim, xmax, ymax, confidence_score], # obj1
 33 |                            [xmin, ymim, xmax, ymax, confidence_score], # obj2
 34 |                            ...
 35 |                            ],
 36 |                   class2: [[xmin, ymim, xmax, ymax, confidence_score]],
 37 |                   ...
 38 |                 },
 39 | 
 40 |         image2: { class1: [
 41 |                            [xmin, ymim, xmax, ymax, confidence_score], # obj1
 42 |                            [xmin, ymim, xmax, ymax, confidence_score] # obj2
 43 |                            ...
 44 |                            ],
 45 |                 ...
 46 |                 },
 47 |         ...
 48 |     }
 49 | 
 50 | 
 51 |     :param ground_truth: a dict:
 52 |     {
 53 |         image1: { class1: [
 54 |                            [xmin, ymim, xmax, ymax], # obj1
 55 |                            [xmin, ymim, xmax, ymax] # obj2
 56 |                            ...
 57 |                            ],
 58 |                   class2: [
 59 |                            [xmin, ymim, xmax, ymax], # obj1
 60 |                            [xmin, ymim, xmax, ymax] # obj2
 61 |                            ...
 62 |                            ],
 63 |                    ...
 64 |                 },
 65 | 
 66 |         image2: { class1: [
 67 |                            [xmin, ymim, xmax, ymax], # obj1
 68 |                            [xmin, ymim, xmax, ymax] # obj2
 69 |                            ...
 70 |                            ],
 71 |                 ...
 72 |                 },
 73 |         ...
 74 |     }
 75 | 
 76 |     :param FP_TP : a dict returned
 77 |     {
 78 |         iou_thres1: {
 79 |                 class1: [
 80 |                          [False, confidence_score],  # image1_obj1, False means FP, True means TP
 81 |                          [False, confidence_score],  # image1_obj2, False means FP, True means TP
 82 |                          [False, confidence_score],  # image2_obj1, False means FP, True means TP
 83 |                          [False, confidence_score],  # image2_obj2, False means FP, True means TP
 84 |                          ],
 85 |                 class2: [[False, confidence_score]],
 86 |                       ...
 87 |                 },
 88 | 
 89 |         iou_thres2: {
 90 |                 class1: [
 91 |                          [False, confidence_score],  # image1_obj1, False means FP, True means TP
 92 |                          [False, confidence_score],  # image1_obj2, False means FP, True means TP
 93 |                          [False, confidence_score],  # image2_obj1, False means FP, True means TP
 94 |                          [False, confidence_score],  # image2_obj2, False means FP, True means TP
 95 |                          ],
 96 |                 class2: [[False, confidence_score]],
 97 |                       ...
 98 |                 },
 99 |         ...
100 |     }
101 | 
102 | 
103 |     :param GT_NUM: a dict that stores the total gt box, to calculate recall rate
104 |     {
105 |         class1: num1,
106 |         class2: num2,
107 |         ...
108 |     }
109 | 
110 |     :param classes: list, classes name
111 |     :param iou_thres_list: list, iou_threshold
112 | 
113 | 
114 |     """
115 |     for i in detection.keys():  # image file name
116 |         det_objs = detection[i]  # detection dict
117 |         gt_objs = ground_truth[i]  # gt dict
118 |         for j in classes:  # class name
119 |             det_boxes = np.array(det_objs[j])  # detection boxes
120 |             gt_boxes = np.array(gt_objs[j])  # gt boxes
121 | 
122 |             if not len(gt_boxes):  # if gt boxes is none, all detection box is FP
123 |                 for iou_thres in iou_thres_list:
124 |                     for box_index, box in enumerate(det_boxes):  # init
125 |                         FP_TP[iou_thres][j].append([False, box[4]])
126 |                 continue
127 | 
128 |             GT_NUM[j] += len(gt_boxes)
129 | 
130 |             if not len(det_boxes):  # if gt boxes is not none, but detection box is NONE, only add the gt num
131 |                 continue
132 | 
133 |             ious = box_iou_np(det_boxes, gt_boxes)  # calculate iou
134 |             # ious_larger = np.where(ious > iou_thres, ious, np.zeros_like(ious))
135 |             ious_index = np.argmax(ious, 0)  # find max iou index, which will be TP, others will be FP
136 | 
137 |             for iou_thres in iou_thres_list:
138 |                 for box_index, box in enumerate(det_boxes):  # init
139 |                     FP_TP[iou_thres][j].append([False, box[4]])
140 | 
141 |                 for gt_index in range(len(gt_boxes)):
142 |                     selected = ious_index[gt_index]
143 |                     sel_index = len(det_boxes) - selected - 1
144 |                     FP_TP[iou_thres][j][~sel_index][0] = ious[selected, gt_index] >= iou_thres
145 | 
146 | 
147 | def cal_mAP(FP_TP, GT_NUM, classes, iou_thres_list):
148 |     """
149 |     calculate mAP
150 |     :param FP_TP : a dict returned
151 |     {
152 |         iou_thres1: {
153 |                 class1: [
154 |                          [False, confidence_score],  # image1_obj1, False means FP, True means TP
155 |                          [False, confidence_score],  # image1_obj2, False means FP, True means TP
156 |                          [False, confidence_score],  # image2_obj1, False means FP, True means TP
157 |                          [False, confidence_score],  # image2_obj2, False means FP, True means TP
158 |                          ],
159 |                 class2: [[False, confidence_score]],
160 |                       ...
161 |                 },
162 | 
163 |         iou_thres2: {
164 |                 class1: [
165 |                          [False, confidence_score],  # image1_obj1, False means FP, True means TP
166 |                          [False, confidence_score],  # image1_obj2, False means FP, True means TP
167 |                          [False, confidence_score],  # image2_obj1, False means FP, True means TP
168 |                          [False, confidence_score],  # image2_obj2, False means FP, True means TP
169 |                          ],
170 |                 class2: [[False, confidence_score]],
171 |                       ...
172 |                 },
173 |         ...
174 |     }
175 | 
176 | 
177 |     :param GT_NUM: a dict that stores the total gt box, to calculate recall rate
178 |     {
179 |         class1: num1,
180 |         class2: num2,
181 |         ...
182 |     }
183 | 
184 |     :param classes: list, classes name
185 |     :param iou_thres_list: list, iou_threshold
186 | 
187 |     """
188 |     iou_class_AP = {}
189 |     iou_mAP = {}
190 |     for iou_thres in iou_thres_list:
191 |         class_AP = {}
192 |         for cls in classes:
193 |             fp_tp = FP_TP[iou_thres][cls]
194 |             fp_tp = sorted(fp_tp, key=lambda x: x[1], reverse=True)
195 |             TP, total_det = 0, 0
196 |             precision = [1.0]
197 |             recall = [0.0]
198 | 
199 |             # calculate pr for each box
200 |             for per_fp_tp in fp_tp:
201 |                 total_det += 1
202 |                 if per_fp_tp[0]:
203 |                     TP += 1
204 |                 precision.append(TP / total_det)
205 |                 if not GT_NUM[cls]:
206 |                     print('your valid or test data is too small that cannot cover all classes')
207 |                     recall.append(0)
208 |                 else:
209 |                     recall.append(TP / GT_NUM[cls])
210 | 
211 |             # calculate AP by all points interpolation
212 |             AP = 0
213 |             i_old = 0
214 |             for i in range(1, len(recall)):
215 |                 if recall[i] == recall[i_old]:
216 |                     continue
217 |                 p = max(precision[i:])
218 |                 AP += p * (recall[i] - recall[i_old])
219 |                 i_old = i
220 |             class_AP[cls] = AP
221 |         iou_class_AP[iou_thres] = class_AP
222 |         iou_mAP[iou_thres] = sum(class_AP.values()) / len(classes)
223 | 
224 |     return iou_class_AP, iou_mAP
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     detection = {
229 |         "image1": {
230 |             "class1": [[1, 2, 3, 4, 5],
231 |                        [10, 20, 30, 40, 4],
232 |                        [1, 2, 3, 40, 3],
233 |                        ],
234 |             "class2": [[1, 2, 3, 4, 5],
235 |                        [10, 20, 30, 40, 4],
236 |                        [1, 2, 3, 40, 3],
237 |                        ]
238 |         },
239 |         "image2": {
240 |             "class1": [[1, 2, 3, 4, 5],
241 |                        [10, 20, 30, 40, 4],
242 |                        [1, 2, 3, 40, 3],
243 |                        ],
244 |             "class2": [[1, 2, 3, 4, 5],
245 |                        [10, 20, 30, 40, 4],
246 |                        [1, 2, 3, 40, 3],
247 |                        ]
248 |         }
249 | 
250 |     }
251 | 
252 |     GT = {
253 |         "image1": {
254 |             "class1": [[1, 2, 3.4, 4],
255 |                        [10, 20, 30, 40],
256 |                        ],
257 |             "class2": [[1, 2, 3.4, 4],
258 |                        ],
259 |         },
260 |         "image2": {
261 |             "class1": [[1, 2, 3.4, 4],
262 |                        [10, 20, 30, 40],
263 |                        ],
264 |             "class2": [[1, 2, 3.4, 4],
265 |                        ],
266 |         },
267 |     }
268 |     fp = defaultdict(lambda: defaultdict(list))
269 |     nums = defaultdict(int)
270 |     cal_fp_fn_tp_tn(detection, GT, fp, nums, ["class1", "class2"], [0.4, 0.9, 1])
271 |     a = cal_mAP(fp, nums, ["class1", "class2"], [0.4, 0.9, 1])
272 |     print()
273 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from collections import defaultdict
  3 | from copy import deepcopy
  4 | from os import getcwd
  5 | from os.path import join, split
  6 | 
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | from config.train_config import get_config
 11 | from net.yolo3_net import loss, model
 12 | from util.box_utils import box_anchor_iou, pick_box, xy2wh_np
 13 | from util.image_utils import get_color_table, plot_img, read_image_and_lable
 14 | from util.utils import sec2time, cal_fp_fn_tp_tn, cal_mAP
 15 | 
 16 | 
 17 | class YOLO():
 18 |     def __init__(self, config):
 19 |         self.config = config
 20 | 
 21 |         self.batch_size = self.config.batch_size
 22 |         self.epoch = self.config.epoch
 23 |         self.learn_rate = self.config.learn_rate
 24 | 
 25 |         self.lambda_coord = 5
 26 |         self.lambda_noobj = 0.5
 27 |         self.lambda_cls = 1
 28 |         self.iou_threshold = 0.5  # used to decide whether box is BG or FG
 29 | 
 30 |         self.ious_thres = [0.5, 0.75]  # used to calculate mAP
 31 | 
 32 |         self.classes = self.__get_classes()
 33 |         self.anchors = self.__get_anchors()
 34 |         self.hw = [416, 416]
 35 |         if self.config.tiny:
 36 |             assert 6 == len(
 37 |                 self.anchors), 'model type does not match with anchors, check anchors or type param'
 38 |             self.log_path = join(getcwd(), 'logs', self.config.net_type + '_tiny')
 39 |         else:
 40 |             assert 9 == len(
 41 |                 self.anchors), 'model type does not match with anchors, check anchors or type param'
 42 |             self.log_path = join(getcwd(), 'logs', self.config.net_type + '_full')
 43 |         self.pretrain_path = self.config.pretrain_path
 44 | 
 45 |         self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3])
 46 |         self.is_training = tf.placeholder(tf.bool, shape=[])
 47 |         self.label = None
 48 | 
 49 |         with open(self.config.train_path) as f:
 50 |             self.train_data = f.readlines()
 51 |         with open(self.config.valid_path) as f:
 52 |             self.val_data = f.readlines()
 53 | 
 54 |         self.color_table = get_color_table(len(self.classes))
 55 | 
 56 |     def __get_anchors(self):
 57 |         """loads the anchors from a file"""
 58 |         with open(self.config.anchor_path) as f:
 59 |             anchors = f.readline()
 60 |         anchors = [float(x) for x in anchors.split(',')]
 61 |         return np.array(anchors).reshape(-1, 2)
 62 | 
 63 |     def __get_classes(self):
 64 |         """loads the classes"""
 65 |         with open(self.config.classes_path) as f:
 66 |             class_names = f.readlines()
 67 |         class_names = [c.strip() for c in class_names]
 68 |         return class_names
 69 | 
 70 |     def generate_data(self, grid_shape, is_val=False):
 71 | 
 72 |         gds_init = [np.zeros(g_shape[1:3] + [3, 9 + len(self.classes)]) for g_shape in grid_shape]
 73 | 
 74 |         idx = 0
 75 | 
 76 |         GTS = defaultdict(lambda: defaultdict(list))
 77 | 
 78 |         if is_val:
 79 |             gts = self.val_data
 80 |         else:
 81 |             gts = self.train_data
 82 |         while True:
 83 |             img_files = []
 84 |             labels = []
 85 |             b = 0
 86 |             GTS.clear()
 87 | 
 88 |             while idx < len(gts) - self.batch_size:  # a batch
 89 |                 try:
 90 |                     res = read_image_and_lable(gts[idx + b], self.hw, is_training=not is_val)
 91 |                     # print(idx + b)
 92 |                 except IndexError:
 93 |                     raise Exception('it should not happen')
 94 |                 else:
 95 |                     if not res:
 96 |                         raise Exception('check your dataset, it has none label')
 97 | 
 98 |                     img, _label = res
 99 | 
100 |                     img_files.append(img)
101 | 
102 |                     for per_xyxy in _label:
103 |                         GTS[b][self.classes[int(per_xyxy[4])]].append(per_xyxy[:4].tolist())
104 | 
105 |                     _label_ = np.concatenate([xy2wh_np(_label[:, :4]), _label[:, 4:]], -1)  # change to xywh
106 | 
107 |                     gds = deepcopy(gds_init)
108 |                     for per_label in _label_:
109 |                         x0, y0, w, h = per_label[:4]
110 |                         if w == 0 or h == 0:
111 |                             continue
112 |                         box_iou = box_anchor_iou(self.anchors, per_label[2:4])
113 |                         k = np.argmax(box_iou)
114 |                         div, mod = divmod(int(k), 3)
115 |                         div = len(grid_shape) - 1 - div
116 |                         h_r = self.hw[0] / gds[div].shape[0]
117 |                         w_r = self.hw[1] / gds[div].shape[1]
118 |                         i = int(np.floor(x0 / w_r))
119 |                         j = int(np.floor(y0 / h_r))
120 | 
121 |                         gds[div][j, i, mod, 0] = x0 / w_r - i
122 |                         gds[div][j, i, mod, 1] = y0 / h_r - j
123 |                         gds[div][j, i, mod, 2] = np.log(w / self.anchors[k, 0] + 1e-5)
124 |                         gds[div][j, i, mod, 3] = np.log(h / self.anchors[k, 1] + 1e-5)
125 | 
126 |                         gds[div][j, i, mod, 4] = x0
127 |                         gds[div][j, i, mod, 5] = y0
128 |                         gds[div][j, i, mod, 6] = w
129 |                         gds[div][j, i, mod, 7] = h
130 | 
131 |                         gds[div][j, i, mod, 8] = 1
132 |                         gds[div][j, i, mod, 9 + int(per_label[4])] = 1
133 | 
134 |                     gds = [gd.reshape([-1, 3, 9 + len(self.classes)]) for gd in gds]
135 |                     labels.append(np.concatenate(gds, 0))
136 |                     b += 1
137 |                     if len(labels) == self.batch_size:
138 |                         idx += self.batch_size
139 |                         break
140 |             if idx >= len(gts) - self.batch_size:
141 |                 np.random.shuffle(gts)
142 |                 idx = 0
143 |             img_files, labels = np.array(img_files, np.float32), np.array(labels, np.float32)
144 |             if is_val:
145 |                 yield img_files, labels, GTS
146 |             else:
147 |                 yield img_files, labels, idx
148 | 
149 |     def train(self):
150 |         # pred, losses, op = self.create_model()
151 |         pred = model(self.input, len(self.classes), self.anchors, self.config.net_type, self.is_training, True)
152 |         grid_shape = [g.get_shape().as_list() for g in pred[2]]
153 | 
154 |         s = sum([g[2] * g[1] for g in grid_shape])
155 |         self.label = tf.placeholder(tf.float32, [self.batch_size, s, 3, 9 + len(self.classes)])
156 |         # for data in self.generate_data(grid_shape):
157 |         #     print()
158 | 
159 |         update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
160 |         var_list = tf.global_variables()
161 | 
162 |         losses = loss(pred, self.label, self.hw, self.lambda_coord, self.lambda_noobj, self.lambda_cls,
163 |                       self.iou_threshold, self.config.debug)
164 |         opt = tf.train.AdamOptimizer(self.learn_rate)
165 | 
166 |         with tf.control_dependencies(update_ops):
167 |             op = opt.minimize(losses)
168 | 
169 |         # summary
170 |         writer = tf.summary.FileWriter(self.log_path, max_queue=-1)
171 |         img_tensor = tf.placeholder(tf.float32, [2 * self.batch_size] + self.hw + [3])
172 | 
173 |         with tf.name_scope('loss'):
174 |             train_loss_tensor = tf.placeholder(tf.float32)
175 |             val_loss_tensor = tf.placeholder(tf.float32)
176 |             tf.summary.scalar('train_loss', train_loss_tensor)
177 |             tf.summary.scalar('val_loss', val_loss_tensor)
178 | 
179 |         with tf.name_scope('mAP'):
180 |             for iou in self.ious_thres:
181 |                 with tf.name_scope('iou{}'.format(iou)):
182 |                     exec('map_with_iou{} = tf.placeholder(tf.float32)'.format(int(iou * 100)))
183 |                     exec('tf.summary.scalar("mAP", map_with_iou{})'.format(int(iou * 100)))
184 | 
185 |         with tf.name_scope('per_class_AP'):
186 |             for iou in self.ious_thres:
187 |                 with tf.name_scope('iou{}'.format(iou)):
188 |                     for per_cls in self.classes:
189 |                         per_cls = per_cls.replace(' ', '_')
190 |                         exec('ap_{}_with_iou{} = tf.placeholder(tf.float32)'.format(per_cls, int(iou * 100)))
191 |                         exec('tf.summary.scalar("{}", ap_{}_with_iou{})'.format(per_cls, per_cls, int(iou * 100)))
192 | 
193 |         tf.summary.image('img', img_tensor, 2 * self.batch_size)
194 |         summary = tf.summary.merge_all()
195 | 
196 |         conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
197 |         sess = tf.Session(config=conf)
198 |         # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
199 |         # sess = tf_debug.TensorBoardDebugWrapperSession(sess, "PC-DAIXILI:6001")
200 | 
201 |         saver = tf.train.Saver(var_list=var_list, max_to_keep=5)
202 |         # saver = tf.train.Saver()
203 | 
204 |         # init
205 |         init = tf.global_variables_initializer()
206 |         sess.run(init)
207 | 
208 |         if len(self.pretrain_path):
209 |             flag = 0
210 |             try:
211 |                 print('try to restore the whole graph')
212 |                 saver.restore(sess, self.pretrain_path)
213 |                 print('successfully restore the whole graph')
214 |             except:
215 |                 print('failed to restore the whole graph')
216 |                 flag = 1
217 |             if flag:
218 |                 try:
219 |                     print('try to restore the graph body')
220 |                     restore_weights = [v for v in var_list if 'yolo_head' not in v.name]
221 |                     sv = tf.train.Saver(var_list=restore_weights)
222 |                     sv.restore(sess, self.pretrain_path)
223 |                     print('successfully restore the graph body')
224 |                 except Exception:
225 |                     raise Exception('restore body failed, please check the pretained weight')
226 | 
227 |         total_step = int(np.ceil(len(self.train_data) / self.batch_size)) * self.epoch
228 | 
229 |         print('train on {} samples, val on {} samples, batch size {}, total {} epoch'.format(len(self.train_data),
230 |                                                                                              len(self.val_data),
231 |                                                                                              self.batch_size,
232 |                                                                                              self.epoch))
233 |         step = 0
234 |         epoch = 0
235 |         t0 = time.time()
236 | 
237 |         DETECTION = defaultdict(lambda: defaultdict(list))
238 |         FP_TP = defaultdict(lambda: defaultdict(list))
239 |         GT_NUMS = defaultdict(int)
240 | 
241 |         for data in self.generate_data(grid_shape):
242 |             step += 1
243 | 
244 |             img, label, idx = data
245 |             pred_, losses_, _ = sess.run([pred, losses, op], {
246 |                 self.input: img,
247 |                 self.label: label,
248 |                 self.is_training: True
249 |             })
250 |             t1 = time.time()
251 |             print('step:{:<d}/{} epoch:{} loss:{:< .3f} ETA:{}'.format(
252 |                 step, total_step, epoch, losses_,
253 |                 sec2time((t1 - t0) * (total_step / step - 1))))
254 | 
255 |             if idx == 0:
256 |                 # for training visual
257 |                 raw_, boxes, grid = pred_
258 |                 vis_img = []
259 |                 for b in range(self.batch_size):
260 |                     picked_boxes = pick_box(boxes[b], 0.3, 0.3, self.hw, self.classes)
261 |                     per_img = np.array(img[b] * 255, dtype=np.uint8)
262 |                     # draw pred
263 |                     per_img_ = per_img.copy()
264 |                     per_img_ = plot_img(per_img_, picked_boxes, self.color_table, self.classes)
265 |                     vis_img.append(per_img_)
266 | 
267 |                     # draw gts
268 |                     per_img_ = per_img.copy()
269 |                     per_label = label[b]
270 |                     picked_boxes = pick_box(per_label[..., 4:], 0.3, 0.3, self.hw, self.classes)
271 |                     per_img_ = plot_img(per_img_, picked_boxes, self.color_table, self.classes,
272 |                                         True)
273 |                     vis_img.append(per_img_)
274 | 
275 |                 # cal valid_loss
276 |                 val_loss_ = 0
277 |                 val_step = 0
278 | 
279 |                 cnt = 0
280 | 
281 |                 GT_NUMS.clear()
282 |                 DETECTION.clear()
283 |                 FP_TP.clear()
284 | 
285 |                 for val_data in self.generate_data(grid_shape, is_val=True):
286 | 
287 |                     cnt += self.batch_size
288 |                     print("valid data: {}/{}".format(cnt, len(self.val_data)), end='\n')
289 |                     img, label, GTS = val_data
290 |                     pred_, losses__ = sess.run([pred, losses], {
291 |                         self.input: img,
292 |                         self.label: label,
293 |                         self.is_training: False
294 |                     })
295 | 
296 |                     _, boxes_, _ = pred_
297 |                     for b in range(self.batch_size):
298 |                         DETECTION[b] = defaultdict(list)
299 |                         picked_boxes = pick_box(boxes_[b], 0.01, 0.5, self.hw, self.classes)  # NMS
300 |                         for picked_box in picked_boxes:
301 |                             DETECTION[b][self.classes[int(picked_box[5])]].append(picked_box[:5].tolist())
302 | 
303 |                     # cal FP TP
304 |                     # import pdb
305 |                     # pdb.set_trace()
306 |                     cal_fp_fn_tp_tn(DETECTION, GTS, FP_TP, GT_NUMS, self.classes, self.ious_thres)
307 | 
308 |                     val_loss_ += losses__
309 |                     val_step += self.batch_size
310 | 
311 |                     DETECTION.clear()
312 | 
313 |                     if val_step >= len(self.val_data):
314 |                         break
315 | 
316 |                 APs, mAPs = cal_mAP(FP_TP, GT_NUMS, self.classes, self.ious_thres)
317 |                 print(APs)
318 |                 print(mAPs)
319 |                 # import pdb
320 |                 # pdb.set_trace()
321 |                 val_loss_ /= (val_step / self.batch_size)
322 | 
323 |                 feed_dict = {
324 |                     img_tensor: np.array(vis_img),
325 |                     train_loss_tensor: losses_,
326 |                     val_loss_tensor: val_loss_
327 |                 }
328 | 
329 |                 for iou in self.ious_thres:
330 |                     exec('feed_dict[map_with_iou{0}] = mAPs[{1}] '.format(int(iou * 100), iou))
331 |                     for per_cls in self.classes:
332 |                         per_clses = per_cls.replace(' ', '_')
333 |                         exec(
334 |                             'feed_dict[ap_{0}_with_iou{1}] = APs[{2}]["{3}"] '.format(per_clses, int(iou * 100), iou,
335 |                                                                                       per_cls))
336 | 
337 |                 ss = sess.run(summary, feed_dict=feed_dict)
338 |                 writer.add_summary(ss, epoch)
339 |                 saver.save(sess, join(self.log_path, split(self.log_path)[-1] + '_model_epoch_{}'.format(epoch)),
340 |                            write_meta_graph=False, write_state=False)
341 |                 print('epoch:{} train_loss:{:< .3f} val_loss:{:< .3f}'.format(
342 |                     epoch, losses_, val_loss_))
343 |                 epoch += 1
344 |             if epoch >= self.epoch:
345 |                 break
346 | 
347 | 
348 | if __name__ == '__main__':
349 |     configs = get_config()
350 |     YOLO(configs).train()
351 | 


--------------------------------------------------------------------------------
/net/yolo3_net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | from util.box_utils import box_iou
  4 | 
  5 | """
  6 | (1280 * 640)
  7 | input = (640 * 320)
  8 | 640 * 320
  9 | 320 * 160
 10 | 160 * 80
 11 | 80 * 40
 12 | 40 * 20
 13 | 20 * 10
 14 | 10 * 5
 15 | """
 16 | leaky_alpha = 0.1
 17 | 
 18 | xavier_initializer = tf.initializers.glorot_uniform()
 19 | 
 20 | 
 21 | def conv_block(x, filters, stride, out_channel, net_type, is_training, name='', relu=True):
 22 |     """
 23 |     :param x: input :nhwc
 24 |     :param filters: list [f_w, f_h]
 25 |     :param stride: list int
 26 |     :param out_channel: int, out_channel
 27 |     :param net_type: cnn mobilenet
 28 |     :param is_training: used in BN
 29 |     :param name: str
 30 |     :param relu: boolean
 31 |     :return: depwise and pointwise out
 32 |     """
 33 |     with tf.name_scope('' + name):
 34 |         in_channel = x.shape[3].value
 35 |         if net_type == 'cnn':
 36 |             with tf.name_scope('cnn'):
 37 |                 # weight = tf.Variable(tf.truncated_normal([filters[0], filters[1], in_channel, out_channel], 0, 0.01))
 38 |                 weight = tf.Variable(xavier_initializer([filters[0], filters[1], in_channel, out_channel]))
 39 |                 if stride[0] == 2:  # refer to "https://github.com/qqwweee/keras-yolo3/issues/8"
 40 |                     x = tf.pad(x, tf.constant([[0, 0], [1, 0, ], [1, 0], [0, 0]]))
 41 |                     x = tf.nn.conv2d(x, weight, [1, stride[0], stride[1], 1], 'VALID')
 42 |                 else:
 43 |                     x = tf.nn.conv2d(x, weight, [1, stride[0], stride[1], 1], 'SAME')
 44 |                 if relu:
 45 |                     x = tf.layers.batch_normalization(x, training=is_training)
 46 |                     x = tf.nn.leaky_relu(x, leaky_alpha)
 47 |                 else:
 48 |                     bias = tf.Variable(tf.zeros(shape=out_channel))
 49 |                     x += bias
 50 |         elif net_type == 'mobilenetv1':
 51 |             with tf.name_scope('depthwise'):
 52 |                 # depthwise_weight = tf.Variable(tf.truncated_normal([filters[0], filters[1], in_channel, 1], 0, 0.01))
 53 |                 depthwise_weight = tf.Variable(xavier_initializer([filters[0], filters[1], in_channel, 1]))
 54 |                 x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride[0], stride[1], 1], 'SAME')
 55 |                 x = tf.layers.batch_normalization(x, training=is_training)
 56 |                 x = tf.nn.relu6(x)
 57 | 
 58 |             with tf.name_scope('pointwise'):
 59 |                 # pointwise_weight = tf.Variable(tf.truncated_normal([1, 1, in_channel, out_channel], 0, 0.01))
 60 |                 pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, out_channel]))
 61 |                 x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
 62 |                 if relu:
 63 |                     x = tf.layers.batch_normalization(x, training=is_training)
 64 |                     x = tf.nn.relu6(x)
 65 |                 else:
 66 |                     bias = tf.Variable(tf.zeros(shape=out_channel))
 67 |                     x += bias
 68 | 
 69 |         elif net_type == 'mobilenetv2':
 70 |             tmp_channel = out_channel * 3
 71 |             with tf.name_scope('expand_pointwise'):
 72 |                 pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, tmp_channel]))
 73 |                 x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
 74 |                 x = tf.layers.batch_normalization(x, training=is_training)
 75 |                 x = tf.nn.relu6(x)
 76 |                 print("Activation function : relu6")
 77 |             with tf.name_scope('depthwise'):
 78 |                 depthwise_weight = tf.Variable(xavier_initializer([filters[0], filters[1], tmp_channel, 1]))
 79 |                 x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride[0], stride[1], 1], 'SAME')
 80 |                 x = tf.layers.batch_normalization(x, training=is_training)
 81 |                 x = tf.nn.relu6(x)
 82 |             with tf.name_scope('project_pointwise'):
 83 |                 pointwise_weight = tf.Variable(xavier_initializer([1, 1, tmp_channel, out_channel]))
 84 |                 x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
 85 |                 if relu:
 86 |                     x = tf.layers.batch_normalization(x, training=is_training)
 87 |                     #x = tf.nn.relu6(x)
 88 |                 else:
 89 |                     bias = tf.Variable(tf.zeros(shape=out_channel))
 90 |                     x += bias
 91 |         else:
 92 |             raise Exception('net type is error, please check')
 93 |     return x
 94 | 
 95 | 
 96 | def residual(x, net_type, is_training, out_channel=1, expand_time=1, stride=1):
 97 |     if net_type in ['cnn', 'mobilenetv1']:
 98 |         out_channel = x.shape[3].value
 99 |         shortcut = x
100 |         x = conv_block(x, [1, 1], [1, 1], out_channel // 2, net_type='cnn', is_training=is_training)
101 |         x = conv_block(x, [3, 3], [1, 1], out_channel, net_type='cnn', is_training=is_training)
102 |         x += shortcut
103 | 
104 |     elif net_type == 'mobilenetv2':#倒置残差块 Inverted Residuals
105 |         shortcut = x
106 |         in_channel = x.shape[3].value
107 |         tmp_channel = in_channel * expand_time
108 |         with tf.name_scope('expand_pointwise'):#点卷积 拓展，生成一个高维信息域 参考《深度可分离卷积文档》
109 |             pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, tmp_channel]))
110 |             x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
111 |             x = tf.layers.batch_normalization(x, training=is_training)
112 |             x = tf.nn.relu6(x)
113 |         with tf.name_scope('depthwise'):#深度卷积 
114 |             depthwise_weight = tf.Variable(xavier_initializer([3, 3, tmp_channel, 1]))
115 |             x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride, stride, 1], 'SAME')
116 |             x = tf.layers.batch_normalization(x, training=is_training)
117 |             x = tf.nn.relu6(x)
118 |         with tf.name_scope('project_pointwise'):#点卷积
119 |             pointwise_weight = tf.Variable(xavier_initializer([1, 1, tmp_channel, out_channel]))
120 |             x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
121 |             x = tf.layers.batch_normalization(x, training=is_training)
122 |             #不用激活函数，线性激活 避免信息丢失  
123 |             #x = tf.nn.relu6(x)
124 |         x += shortcut#快捷链接 避免梯度消失 
125 | 
126 |     return x
127 | 
128 | 
129 | def upsample(x, scale):
130 |     new_height = x.shape[1] * scale
131 |     new_width = x.shape[2] * scale
132 |     resized = tf.image.resize_images(x, [new_height, new_width])
133 |     return resized
134 | 
135 | 
136 | def full_yolo_body(x, out_channel, net_type, is_training):
137 |     channel = out_channel
138 |     if net_type in ['mobilenetv2']:
139 |         net_type = 'mobilenetv1'
140 |     x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training)
141 |     x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training)
142 |     x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training)
143 |     x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training)
144 |     x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training)
145 |     x_route = x
146 |     x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training)
147 |     return x_route, x
148 | 
149 | 
150 | def full_darknet_body(x, net_type, is_training):#特征检测网络 
151 |     """
152 |     yolo3_tiny build by net_type
153 |     :param x:
154 |     :param is_training:
155 |     :param net_type: cnn mobilenet
156 |     :return:
157 |     """
158 |     if net_type in ['cnn', 'mobilenetv1']:
159 |         x = conv_block(x, [3, 3], [1, 1], 32, 'cnn', is_training=is_training)
160 | 
161 |         # down sample
162 |         x = conv_block(x, [3, 3], [2, 2], 64, 'cnn', is_training=is_training)
163 |         for i in range(1):
164 |             x = residual(x, net_type, is_training)
165 | 
166 |         # down sample
167 |         x = conv_block(x, [3, 3], [2, 2], 128, 'cnn', is_training=is_training)
168 |         for i in range(2):
169 |             x = residual(x, net_type, is_training)
170 | 
171 |         # down sample
172 |         x = conv_block(x, [3, 3], [2, 2], 256, 'cnn', is_training=is_training)
173 |         for i in range(8):
174 |             x = residual(x, net_type, is_training)
175 |         route2 = x
176 | 
177 |         # down sample
178 |         x = conv_block(x, [3, 3], [2, 2], 512, 'cnn', is_training=is_training)
179 |         for i in range(8):
180 |             x = residual(x, net_type, is_training)
181 |         route1 = x
182 | 
183 |         # down sample
184 |         x = conv_block(x, [3, 3], [2, 2], 1024, 'cnn', is_training=is_training)
185 |         for i in range(4):
186 |             x = residual(x, net_type, is_training)
187 | 
188 |     elif net_type == 'mobilenetv2':#
189 | 
190 |         print('MobileNet V2 ------------------ input image batch’s shape:',x.shape)
191 | 	#x 为   416×416 图像  标准的mobilnet v2 输入为 224 ×224有一定差异  
192 |         x = conv_block(x, [3, 3], [2, 2], 32, 'cnn', is_training=is_training)	#conv2d正常卷积，输出208×208×32通道
193 |         
194 |         print('1 ------------------ input image batch’s shape:',x.shape)
195 |         x = conv_block(x, [3, 3], [2, 2], 16, net_type, is_training=is_training) #残差块卷积，输出104×104×16 下采样
196 |         x = conv_block(x, [3, 3], [1, 1], 24, net_type, is_training=is_training) #残差块卷积，输出104×104×24
197 |         x = residual(x, net_type, is_training, 24, 1)#残差块卷积，输出104×104×24
198 |         x = conv_block(x, [3, 3], [2, 2], 32, net_type, is_training=is_training) #残差块卷积，输出52×52×32   下采样
199 |         #print('2 ------------------ input image batch’s shape:',x.shape)
200 |         for i in range(2):# 残差块卷积 输出 52×52×32
201 |             x = residual(x, net_type, is_training, 32, 1)
202 |         #print('21 ------------------ input image batch’s shape:',x.shape)
203 |         print('ROUTE2 ------------------ batch’s shape:',x.shape)	
204 |         route2 = x
205 | 
206 |         #print('4 ------------------ input image batch’s shape:',x.shape)
207 |         x = conv_block(x, [3, 3], [2, 2], 64, net_type, is_training=is_training)#残差块卷积，输出26×26×64   下采样
208 |         for i in range(3):# 残差块卷积 输出 26×26×64  
209 |             x = residual(x, net_type, is_training, 64, 6)
210 |         x = conv_block(x, [3, 3], [1, 1], 96, net_type, is_training=is_training)#残差块卷积，输出26×26×96   更改输出通道
211 |         for i in range(2):# 残差块卷积 输出 26×26×64  
212 |             x = residual(x, net_type, is_training, 96, 6)
213 |         print('ROUTE1 ------------------ batch’s shape:',x.shape)
214 |         route1 = x
215 | 
216 |         # down sample
217 |         print('5 ------------------ input image batch’s shape:',x.shape)
218 |         x = conv_block(x, [3, 3], [2, 2], 160, net_type, is_training=is_training)#残差块卷积，输出13×13×160 下采样
219 |         for i in range(2):
220 |             x = residual(x, net_type, is_training, 160, 1)
221 |         x = conv_block(x, [3, 3], [1, 1], 320, net_type, is_training=is_training)#残差块卷积，输出13×13×320 更改输出通道
222 |         print('Final ------------------ batch’s shape:',x.shape)
223 |     else:
224 |         route1, route2 = [], []
225 |     return x, route1, route2
226 | 
227 | 
228 | def full_yolo_head(x, route1, route2, num_class, anchors, net_type, is_training):
229 |     with tf.name_scope('body_layer1'):
230 |         x_route, x = full_yolo_body(x, 1024, net_type, is_training)
231 |     x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training,  "yolo_head1", False)
232 |     fe1, box1, grid1 = yolo(x, anchors[[6, 7, 8]])
233 | 
234 |     with tf.name_scope('head_layer2'):
235 |         x = conv_block(x_route, [1, 1], [1, 1], x_route.shape[-1].value // 2, net_type, is_training)
236 |         x = upsample(x, 2)
237 |         x = tf.concat([x, route1], 3)
238 |         x_route, x = full_yolo_body(x, 512, net_type, is_training)
239 |     x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head2", False)
240 |     fe2, box2, grid2 = yolo(x, anchors[[3, 4, 5]])
241 | 
242 |     with tf.name_scope('head_layer3'):
243 |         x = conv_block(x_route, [1, 1], [1, 1], x_route.shape[-1].value // 2, net_type, is_training)
244 |         x = upsample(x, 2)
245 |         x = tf.concat([x, route2], 3)
246 |         x_route, x = full_yolo_body(x, 256, net_type, is_training)
247 |     x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head3", False)
248 |     fe3, box3, grid3 = yolo(x, anchors[[0, 1, 2]])
249 | 
250 |     fe = tf.concat([fe1, fe2, fe3], 1)
251 |     boxes = tf.concat([box1, box2, box3], 1)
252 |     return fe, boxes, grid1, grid2, grid3
253 | 
254 | 
255 | def tiny_darknet_body(x, net_type, is_training):
256 |     """
257 |     yolo3_tiny build by net_type
258 |     :param x:
259 |     :param is_training: used in bn
260 |     :param net_type: cnn or mobile-net
261 |     :return:
262 |     """
263 |     if net_type in ['mobilenetv1', 'mobilenetv2']:
264 |         net_type = 'mobilenetv1'
265 |     x = conv_block(x, [3, 3], [1, 1], 16, net_type, is_training)
266 |     x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
267 | 
268 |     x = conv_block(x, [3, 3], [1, 1], 32, net_type, is_training)
269 |     x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
270 | 
271 |     x = conv_block(x, [3, 3], [1, 1], 64, net_type, is_training)
272 |     x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
273 | 
274 |     x = conv_block(x, [3, 3], [1, 1], 128, net_type, is_training)
275 |     x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
276 | 
277 |     x = conv_block(x, [3, 3], [1, 1], 256, net_type, is_training)
278 |     x_route = x
279 |     x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
280 | 
281 |     x = conv_block(x, [3, 3], [1, 1], 512, net_type, is_training)
282 |     x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 1, 1, 1], 'SAME')
283 | 
284 |     x = conv_block(x, [3, 3], [1, 1], 1024, net_type, is_training)
285 | 
286 |     return x, x_route
287 | 
288 | 
289 | def tiny_yolo_head(x, x_route1, num_class, anchors, net_type, is_training):
290 |     with tf.name_scope('head_layer1'):
291 |         x = conv_block(x, [1, 1], [1, 1], 256, net_type, is_training)
292 |         x_route2 = x
293 |         x = conv_block(x, [3, 3], [1, 1], 512, net_type, is_training)
294 |         x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head1", False)
295 |         fe1 = x
296 |         fe1, box1, grid1 = yolo(fe1, anchors[[3, 4, 5]])
297 | 
298 |     with tf.name_scope('head_layer2'):
299 |         x = conv_block(x_route2, [1, 1], [1, 1], 128, net_type, is_training)
300 |         x = upsample(x, 2)
301 |         x = tf.concat([x, x_route1], 3)
302 |         x = conv_block(x, [3, 3], [1, 1], 256, net_type, is_training)
303 |         x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head2", False)
304 |         fe2 = x
305 |         fe2, box2, grid2 = yolo(fe2, anchors[[0, 1, 2]])
306 | 
307 |     fe = tf.concat([fe1, fe2], 1)
308 |     box = tf.concat([box1, box2], 1)
309 |     return fe, box, grid1, grid2
310 | 
311 | 
312 | def yolo(f, anchors):
313 |     """
314 |     convert feature to box and scores
315 |     :param f:
316 |     :param anchors:
317 |     :return:
318 |     """
319 |     anchor_tensor = tf.constant(anchors, tf.float32)
320 |     batchsize = f.shape[0]
321 |     f = tf.reshape(f, [f.shape[0], f.shape[1], f.shape[2], 3, -1])
322 |     grid_y = tf.tile(tf.reshape(tf.range(f.shape[1]), [1, -1, 1, 1]), [batchsize, 1, f.shape[2], 1])
323 |     grid_x = tf.tile(tf.reshape(tf.range(f.shape[2]), [1, 1, -1, 1]), [batchsize, f.shape[1], 1, 1])
324 |     grid = tf.tile(tf.cast(tf.concat([grid_x, grid_y], -1), tf.float32)[:, :, :, tf.newaxis, :], (1, 1, 1, 3, 1))
325 | 
326 |     box_xy = (tf.nn.sigmoid(f[..., :2]) + grid) / tf.cast(grid.shape[::-1][2:4], tf.float32, )
327 |     box_wh = tf.math.exp(f[..., 2:4]) * anchor_tensor
328 |     box_confidence = tf.nn.sigmoid(f[..., 4:5])
329 |     classes_score = tf.nn.sigmoid(f[..., 5:])
330 |     boxes = tf.reshape(tf.concat([box_xy, box_wh, box_confidence, classes_score], -1), [batchsize, -1, 3, f.shape[4]])
331 |     f = tf.reshape(f, [batchsize, -1, 3, f.shape[4]])
332 |     return f, boxes, grid
333 | 
334 | 
335 | def model(x, num_classes, anchors, net_type, is_training, cal_loss=False):
336 |     batchsize, height, width, _ = x.get_shape().as_list()
337 |     if len(anchors) == 6:
338 |         x, x_route = tiny_darknet_body(x, net_type, is_training)
339 |         raw_pred, y, *grid = tiny_yolo_head(x, x_route, num_classes, anchors, net_type, is_training)
340 |     else:
341 |         x, route1, route2 = full_darknet_body(x, net_type, is_training)
342 |         raw_pred, y, *grid = full_yolo_head(x, route1, route2, num_classes, anchors, net_type, is_training)
343 | 
344 |     box_xy, box_wh, box_confidence, classes_score = y[..., :2], y[..., 2:4], y[..., 4:5], y[..., 5:]
345 |     box_xy *= tf.constant([width, height], tf.float32)
346 |     # box_wh *= tf.constant([width, height], tf.float32)
347 |     boxe = tf.concat([box_xy, box_wh, box_confidence, classes_score], -1, name='debug_pred')
348 | 
349 |     if cal_loss:
350 |         return raw_pred, boxe, grid
351 |     else:
352 |         return boxe
353 | 
354 | 
355 | def loss(pred, gts, input_size, lambda_coord, lambda_noobj, lambda_cls, iou_threshold, debug_=False):
356 |     """
357 |     :param pred: (batch_size, num_boxes, 3, 5+num_class)[x0 y0 w h ] raw_pres + boxes +grid
358 |     :param gts: shape = (batch_size, num_boxes, 3, 4+4+1+num_class) [xywh,calsses]
359 |     :param input_size: height * width
360 |     :param lambda_coord: lambda
361 |     :param lambda_noobj: lambda
362 |     :param lambda_cls: lambda
363 |     :param iou_threshold: iou_threshold
364 |     :param debug_:
365 |     :return:
366 |     """
367 | 
368 |     def binary_cross(_labels, _pred):
369 |         # pred = tf.clip_by_value(pred, 1e-10, 1 - 1e-10)
370 |         # return -labels * tf.math.log(pred)
371 |         # pred = tf.math.log(pred / (1 - pred))
372 |         return tf.nn.sigmoid_cross_entropy_with_logits(labels=_labels, logits=_pred)
373 | 
374 |     raw_pred, pred_boxes, grid = pred
375 | 
376 |     raw_gt_xy, raw_gt_wh = gts[..., 0:2], gts[..., 2:4]
377 |     true_gt_xy, true_gt_wh = gts[..., 4:6], gts[..., 6:8]
378 |     masks = gts[..., 8]
379 |     batchsize = masks.shape[0].value
380 |     i_height, i_width = input_size
381 | 
382 |     # cal ignore_mask
383 |     ignore_mask = []
384 |     for b in range(batchsize):
385 |         true_box = tf.boolean_mask(gts[b:b + 1, ..., 4:8], masks[b:b + 1], name='debug_true_box')
386 |         with tf.name_scope('debug_iou'):
387 |             ious = box_iou(pred_boxes[b:b + 1, ..., :4], true_box)
388 |         ious = tf.reduce_max(ious, -1)
389 |         ignore_mask_ = tf.where(ious > iou_threshold, tf.zeros_like(ious), tf.ones_like(ious))
390 |         ignore_mask.append(ignore_mask_)
391 |     ignore_mask = tf.concat(ignore_mask, 0, name='debug_ignore_mask')
392 | 
393 |     boxes_scale = 2 - true_gt_wh[..., 0] / i_width * true_gt_wh[..., 1] / i_height
394 |     # boxes_scale = 1
395 | 
396 |     varss = tf.trainable_variables()
397 |     l2_loss = tf.reduce_sum([tf.nn.l2_loss(var) for var in varss]) * 0.001
398 | 
399 |     masks_noobj = (1 - masks) * ignore_mask
400 | 
401 |     # n_xywh = tf.reduce_sum(masks, name='debug_n_xywh')
402 |     # n_noob = tf.reduce_sum(masks_noobj, name='debug_n_noobj') / 100
403 |     n_xywh = batchsize
404 |     n_noob = batchsize
405 | 
406 |     loss_xy = tf.reduce_sum(
407 |         lambda_coord * masks * boxes_scale * tf.reduce_sum(
408 |             # tf.math.square(raw_gt_xy - tf.math.sigmoid(raw_pred[..., 0:2]))
409 |             binary_cross(_labels=raw_gt_xy, _pred=raw_pred[..., 0:2])
410 |             , -1), name='debug_loss_xy') / n_xywh
411 |     loss_wh = tf.reduce_sum(
412 |         lambda_coord * masks * boxes_scale * tf.reduce_sum(
413 |             tf.math.square(raw_gt_wh - raw_pred[..., 2:4]),
414 |             -1), name='debug_loss_wh') / n_xywh
415 |     loss_obj_confidence = tf.reduce_sum(
416 |         masks * binary_cross(_labels=masks, _pred=raw_pred[..., 4]), name='debug_loss_obj') / n_xywh
417 | 
418 |     loss_noobj_confidence = tf.reduce_sum(
419 |         lambda_noobj * masks_noobj * binary_cross(_labels=masks, _pred=raw_pred[..., 4]),
420 |         name='debug_loss_noobj') / n_noob
421 |     loss_cls = tf.reduce_sum(
422 |         masks * lambda_cls * tf.reduce_sum(
423 |             binary_cross(_labels=gts[..., 9:], _pred=raw_pred[..., 5:]), -1), name='debug_loss_cls'
424 |     ) / n_xywh
425 |     if debug_:
426 |         p = tf.print("loss_xy", loss_xy, "loss_wh", loss_wh, "loss_obj_confidence", loss_obj_confidence,
427 |                      'loss_noobj_confidence', loss_noobj_confidence, "loss_cls", loss_cls, "l2_loss", l2_loss)
428 |         with tf.control_dependencies([p]):
429 |             return loss_xy + loss_wh + loss_obj_confidence + loss_noobj_confidence + loss_cls + l2_loss
430 |     return loss_xy + loss_wh + loss_obj_confidence + loss_noobj_confidence + loss_cls + l2_loss
431 | 


--------------------------------------------------------------------------------