├── net
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── yolo3_net.cpython-36.pyc
└── yolo3_net.py
├── util
├── __init__.py
├── __pycache__
│ ├── config.cpython-36.pyc
│ ├── utils.cpython-36.pyc
│ ├── __init__.cpython-36.pyc
│ ├── box_util.cpython-36.pyc
│ ├── box_utils.cpython-36.pyc
│ ├── image_util.cpython-36.pyc
│ └── image_utils.cpython-36.pyc
├── voc_annotation.py
├── coco_annotation.py
├── load_weights.py
├── kmeans.py
├── image_utils.py
├── box_utils.py
└── utils.py
├── model_data
├── yolo_anchors_tiny.txt
├── yolo_anchors.txt
├── voc_classes.txt
└── coco_classes.txt
├── images
├── full.jpg
├── mAP.png
└── tiny.jpg
├── shell
├── train_cnn_full.sh
├── train_cnn_tiny.sh
├── train_mobilenetv1_full.sh
├── train_mobilenetv2_full.sh
└── train_mobilenetv2_tiny.sh
├── config
├── pred_config.py
└── train_config.py
├── convert_weights.py
├── test.py
├── yolo.py
├── readme.md
└── train.py
/net/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/model_data/yolo_anchors_tiny.txt:
--------------------------------------------------------------------------------
1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
--------------------------------------------------------------------------------
/images/full.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/full.jpg
--------------------------------------------------------------------------------
/images/mAP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/mAP.png
--------------------------------------------------------------------------------
/images/tiny.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/images/tiny.jpg
--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
--------------------------------------------------------------------------------
/util/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/util/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/net/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/net/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/net/__pycache__/yolo3_net.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/net/__pycache__/yolo3_net.cpython-36.pyc
--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/util/__pycache__/box_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/box_util.cpython-36.pyc
--------------------------------------------------------------------------------
/util/__pycache__/box_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/box_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/util/__pycache__/image_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/image_util.cpython-36.pyc
--------------------------------------------------------------------------------
/util/__pycache__/image_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuodongQi/yolo3_tensorflow/HEAD/util/__pycache__/image_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
1 | aeroplane
2 | bicycle
3 | bird
4 | boat
5 | bottle
6 | bus
7 | car
8 | cat
9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
--------------------------------------------------------------------------------
/shell/train_cnn_full.sh:
--------------------------------------------------------------------------------
1 | NET_TYPE="cnn"
2 | TINY=False
3 | ANCHOR_PATH="./model_data/yolo_anchors.txt"
4 | PRETRAIN_PATH=""
5 |
6 | epoch=200
7 | batch_size=4
8 | learning_rate=1e-4
9 |
10 | debug=False
11 |
12 | if [ -z "${PRETRAIN_PATH}" ]
13 |
14 | then
15 |
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 |
26 | else
27 |
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 |
39 | fi
40 |
41 | echo $cmd
42 | $cmd
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/shell/train_cnn_tiny.sh:
--------------------------------------------------------------------------------
1 | NET_TYPE="cnn"
2 | TINY=True
3 | ANCHOR_PATH="./model_data/yolo_anchors_tiny.txt"
4 | PRETRAIN_PATH=""
5 |
6 | epoch=200
7 | batch_size=4
8 | learning_rate=1e-4
9 |
10 | debug=False
11 |
12 | if [ -z "${PRETRAIN_PATH}" ]
13 |
14 | then
15 |
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 |
26 | else
27 |
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 |
39 | fi
40 |
41 | echo $cmd
42 | $cmd
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/shell/train_mobilenetv1_full.sh:
--------------------------------------------------------------------------------
1 | NET_TYPE="mobilenetv1"
2 | TINY=False
3 | ANCHOR_PATH="./model_data/yolo_anchors.txt"
4 | PRETRAIN_PATH=""
5 |
6 | epoch=200
7 | batch_size=4
8 | learning_rate=1e-4
9 |
10 | debug=False
11 |
12 | if [ -z "${PRETRAIN_PATH}" ]
13 |
14 | then
15 |
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 |
26 | else
27 |
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 |
39 | fi
40 |
41 | echo $cmd
42 | $cmd
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/shell/train_mobilenetv2_full.sh:
--------------------------------------------------------------------------------
1 | NET_TYPE="mobilenetv2"
2 | TINY=False
3 | ANCHOR_PATH="./model_data/yolo_anchors.txt"
4 | PRETRAIN_PATH=""
5 |
6 | epoch=200
7 | batch_size=4
8 | learning_rate=1e-4
9 |
10 | debug=False
11 |
12 | if [ -z "${PRETRAIN_PATH}" ]
13 |
14 | then
15 |
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 |
26 | else
27 |
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 |
39 | fi
40 |
41 | echo $cmd
42 | $cmd
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/shell/train_mobilenetv2_tiny.sh:
--------------------------------------------------------------------------------
1 | NET_TYPE="mobilenetv2"
2 | TINY=True
3 | ANCHOR_PATH="./model_data/yolo_anchors_tiny.txt"
4 | PRETRAIN_PATH=""
5 |
6 | epoch=200
7 | batch_size=4
8 | learning_rate=1e-4
9 |
10 | debug=False
11 |
12 | if [ -z "${PRETRAIN_PATH}" ]
13 |
14 | then
15 |
16 | cmd="python train.py \
17 | -n "${NET_TYPE}" \
18 | -t ${TINY} \
19 | -e ${epoch} \
20 | -b ${batch_size} \
21 | -lr ${learning_rate} \
22 | -d ${debug} \
23 | --anchor_path ${ANCHOR_PATH}
24 | "
25 |
26 | else
27 |
28 | cmd="python train.py \
29 | -n "${NET_TYPE}" \
30 | -t ${TINY} \
31 | -pt "${PRETRAIN_PATH}" \
32 | -e ${epoch} \
33 | -b ${batch_size} \
34 | -lr ${learning_rate} \
35 | -d ${debug} \
36 | --anchor_path ${ANCHOR_PATH}
37 | "
38 |
39 | fi
40 |
41 | echo $cmd
42 | $cmd
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/config/pred_config.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from os import getcwd
3 | from os.path import join
4 |
5 |
6 | def get_config():
7 | root = getcwd()
8 | conf = argparse.ArgumentParser()
9 |
10 | conf.add_argument('-i', '--image', default=None, type=str, help='image path')
11 | conf.add_argument('-v', '--video', default=None, type=str, help='video path')
12 |
13 | # load weight_path
14 | conf.add_argument('-w', '--weight_path', type=str, help='weight path',
15 | default='logs/cnn_full/cnn_full_model_epoch_20')
16 |
17 | conf.add_argument('--score', default=0.3, type=float, help='score threshold')
18 |
19 | conf.add_argument('--classes_path', type=str, help='classes path',
20 | default=join(root, 'model_data', 'coco_classes.txt'))
21 |
22 | return conf.parse_args()
23 |
--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
--------------------------------------------------------------------------------
/convert_weights.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import sys
3 | import time
4 | from os import makedirs
5 | from os.path import exists, join, split
6 |
7 | import numpy as np
8 | import tensorflow as tf
9 |
10 | from net.yolo3_net import model
11 | from util.load_weights import load_weight
12 |
13 |
14 | def convert(is_tiny=False):
15 | if is_tiny:
16 | anchors = np.array([[1, 1]] * 6)
17 | weight_path = join('model_data', 'yolov3-tiny.weights')
18 | save_path = join('logs', 'cnn_tiny', 'cnn_tiny_model')
19 | else:
20 | anchors = np.array([[1, 1]] * 9)
21 | weight_path = join('model_data', 'yolov3.weights')
22 | save_path = join('logs', 'cnn_full', 'cnn_full_model')
23 |
24 | if not exists(split(save_path)[0]):
25 | makedirs(split(save_path)[0])
26 | input_data = tf.placeholder(dtype=tf.float32, shape=(1, 416, 416, 3))
27 |
28 | model(input_data, 80, anchors, 'cnn', True, False)
29 |
30 | model_vars_ = tf.global_variables()
31 | assert weight_path.endswith('.weights'), '{} is not a .weights files'.format(weight_path)
32 | assign_ops_ = load_weight(model_vars_, weight_path)
33 | t0 = time.time()
34 | print("start loading weights")
35 | saver = tf.train.Saver()
36 | with tf.Session() as sess:
37 | sess.run(assign_ops_)
38 | saver.save(sess, save_path, write_meta_graph=False, write_state=False)
39 | t1 = time.time()
40 | print("convert weights is over, cost {0:.4f}s".format(t1 - t0))
41 |
42 |
43 | if __name__ == '__main__':
44 | boolen = sys.argv[1]
45 | if boolen.lower() == 'tiny':
46 | convert(True)
47 | elif boolen.lower() == 'full':
48 | convert(False)
49 | else:
50 | raise Exception('unkonwm argument')
51 |
--------------------------------------------------------------------------------
/config/train_config.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from os import getcwd
3 | from os.path import join
4 |
5 |
6 | def str2bool(v):
7 | if v.lower() in ['yes', 'true']:
8 | return True
9 | elif v.lower() in ['no', 'false']:
10 | return False
11 | else:
12 | raise argparse.ArgumentTypeError()
13 |
14 |
15 | def get_config():
16 | root = getcwd()
17 | conf = argparse.ArgumentParser()
18 |
19 | # yolo3 type
20 | conf.add_argument('-n', "--net_type", type=str, help='net type: cnn, mobilenetv1 mobilenetv2 or mobilenetv3',
21 | default='cnn')
22 | conf.add_argument('-t', '--tiny', type=str2bool, help='whether tiny yolo or not', default=False)
23 |
24 | # training argument
25 | conf.add_argument('-b', '--batch_size', type=int, help='batch_size', default=4)
26 | conf.add_argument('-e', '--epoch', type=int, help='epoch', default=100)
27 | conf.add_argument('-lr', '--learn_rate', type=float, help='learn_rate', default=1e-4)
28 |
29 | # load pretrain
30 | conf.add_argument('-pt', '--pretrain_path', type=str, help='pretrain path', default='logs/cnn_full/cnn_full_model')
31 |
32 | conf.add_argument('--anchor_path', type=str, help='anchor path',
33 | default=join(root, 'model_data', 'yolo_anchors.txt'))
34 | conf.add_argument('--train_path', type=str, help='train file path',
35 | default=join(root, 'model_data', 'train.txt'))
36 | conf.add_argument('--valid_path', type=str, help='valid file path',
37 | default=join(root, 'model_data', 'valid.txt'))
38 | conf.add_argument('--classes_path', type=str, help='classes path',
39 | default=join(root, 'model_data', 'coco_classes.txt'))
40 |
41 | conf.add_argument('-d', '--debug', type=str2bool, help='whether print per item loss', default=False)
42 | return conf.parse_args()
43 |
--------------------------------------------------------------------------------
/util/voc_annotation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import xml.etree.ElementTree as ET
4 |
5 | wd = os.path.dirname(os.getcwd())
6 | class_path = os.path.join(wd, 'model_data', 'voc_classes.txt') # change to the classes path you want to detect
7 | is_train = True # whether train dataset or valid dataset
8 |
9 | if is_train:
10 | image_dir = '' # your train image dir
11 | annotation_dir = '' # your train image annotation dir
12 | gen_files = 'train.txt'
13 | else:
14 | image_dir = '' # your val image dir
15 | annotation_dir = '' # your val image annotation dir
16 | gen_files = 'valid.txt'
17 |
18 | with open(class_path) as f:
19 | class_names = f.readlines()
20 | classes = [c.strip() for c in class_names]
21 |
22 | list_file_train = open(os.path.join(wd, 'model_data', gen_files), 'w')
23 |
24 | annotation_files = os.listdir(annotation_dir)
25 | random.shuffle(annotation_files)
26 |
27 | for i in range(0, len(annotation_files), 1):
28 | annotation_file = annotation_files[i]
29 |
30 | list_file_train.write('%s/%s.jpg' % (image_dir, annotation_file.split('.')[0]))
31 |
32 | xml_file = os.path.join(annotation_dir, annotation_file)
33 | try:
34 | in_file = open(xml_file, 'r')
35 | except:
36 | print("open failed {0}".format(xml_file))
37 | else:
38 | # print("open success {0}".format(image_id))
39 | tree = ET.parse(in_file)
40 | root = tree.getroot()
41 |
42 | for obj in root.iter('object'):
43 | difficult = obj.find('difficult').text
44 | cls = obj.find('name').text
45 | if cls not in classes or int(difficult) == 1:
46 | continue
47 | cls_id = classes.index(cls)
48 | xmlbox = obj.find('bndbox')
49 | b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text),
50 | int(xmlbox.find('ymax').text))
51 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
52 | # list_file_train.write(" " + ",".join([str(a) for a in b]) + ',' + str(0))
53 | list_file_train.write('\n')
54 |
55 | list_file_train.close()
56 | # list_file_val.close()
57 | # clean dataset
58 | with open(os.path.join(wd, 'model_data', gen_files), 'r') as f1:
59 | old_line = f1.readlines()
60 | with open(os.path.join(wd, 'model_data', gen_files), 'w') as f2:
61 | for line in old_line:
62 | line_ = line.split(' ')
63 | if len(line_) > 1:
64 | f2.write(line)
65 |
--------------------------------------------------------------------------------
/util/coco_annotation.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from collections import defaultdict
4 |
5 | wd = os.path.dirname(os.getcwd())
6 | class_path = os.path.join(wd, 'model_data', 'coco_classes.txt') # change to the classes path you want to detect
7 | is_train = 1 # whether train dataset or valid dataset
8 |
9 | if is_train:
10 | image_dir = '/media/data1/datasets/coco/train2017' # your train image dir
11 | annotation_file = '/media/data1/datasets/coco/annotations/instances_train2017.json' # your train image annotation dir
12 | gen_files = 'train.txt'
13 | else:
14 | image_dir = '/media/data1/datasets/coco/val2017' # your val image dir
15 | annotation_file = '/media/data1/datasets/coco/annotations/instances_val2017.json' # your val image annotation dir
16 | gen_files = 'valid.txt'
17 |
18 | name_box_id = defaultdict(list)
19 | id_name = dict()
20 | with open(class_path) as f:
21 | class_names = f.readlines()
22 | classes = [c.strip() for c in class_names]
23 |
24 | list_file = open(os.path.join(wd, 'model_data', gen_files), 'w')
25 |
26 | with open(annotation_file) as f:
27 | data = json.load(f)
28 | annotations = data['annotations']
29 |
30 | for ant in annotations:
31 | image_id = ant['image_id']
32 | image_path = os.path.join(image_dir, '%012d.jpg' % image_id)
33 | cat = ant['category_id']
34 |
35 | if 1 <= cat <= 11:
36 | cat -= 1
37 | elif 13 <= cat <= 25:
38 | cat -= 2
39 | elif 27 <= cat <= 28:
40 | cat -= 3
41 | elif 31 <= cat <= 44:
42 | cat -= 5
43 | elif 46 <= cat <= 65:
44 | cat -= 6
45 | elif cat == 67:
46 | cat -= 7
47 | elif cat == 70:
48 | cat -= 9
49 | elif 72 <= cat <= 82:
50 | cat -= 10
51 | elif 84 <= cat <= 90:
52 | cat -= 11
53 | name_box_id[image_path].append([ant['bbox'], cat])
54 |
55 | for key, box_infos in name_box_id.items():
56 | list_file.write(key)
57 | for info in box_infos:
58 | x_min = int(info[0][0])
59 | y_min = int(info[0][1])
60 | x_max = x_min + int(info[0][2])
61 | y_max = y_min + int(info[0][3])
62 |
63 | box_info = " %d,%d,%d,%d,%d" % (x_min, y_min, x_max, y_max, int(info[1]))
64 | list_file.write(box_info)
65 | list_file.write('\n')
66 | list_file.close()
67 |
68 | # list_file_val.close()
69 | # clean dataset
70 | with open(os.path.join(wd, 'model_data', gen_files), 'r') as f1:
71 | old_line = f1.readlines()
72 | with open(os.path.join(wd, 'model_data', gen_files), 'w') as f2:
73 | for line in old_line:
74 | line_ = line.split(' ')
75 | if len(line_) > 1:
76 | f2.write(line)
77 |
--------------------------------------------------------------------------------
/util/load_weights.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import numpy as np
3 | import tensorflow as tf
4 |
5 |
6 | def load_weight(var_list, file_path):
7 | with open(file_path, "rb") as fp:
8 | _ = np.fromfile(fp, dtype=np.int32, count=5)
9 | weights = np.fromfile(fp, dtype=np.float32)
10 | ptr = 0
11 | i = 0
12 | assign_ops = []
13 | while i < len(var_list) - 1:
14 | var1 = var_list[i]
15 | var2 = var_list[i + 1]
16 | # do something only if we process conv layer
17 | if 'cnn' in var1.name:
18 | # check type of next layer
19 | if 'batch' in var2.name:
20 | # load batch norm params
21 | gamma, beta, mean, var = var_list[i + 1:i + 5]
22 | batch_norm_vars = [beta, gamma, mean, var]
23 | for var in batch_norm_vars:
24 | shape = var.shape.as_list()
25 | num_params = np.prod(shape)
26 | var_weights = weights[ptr:ptr + num_params].reshape(shape)
27 | ptr += num_params
28 | assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
29 |
30 | # we move the pointer by 4, because we loaded 4 variables
31 | i += 4
32 | elif 'cnn' in var2.name:
33 | # load biases
34 | bias = var2
35 | bias_shape = bias.shape.as_list()
36 | bias_params = np.prod(bias_shape)
37 | bias_weights = weights[ptr:ptr + bias_params].reshape(bias_shape)
38 | assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
39 | if 'yolo_head' in bias.name: # if num_classes is not 80
40 | ptr += 255
41 | else:
42 | ptr += bias_params
43 |
44 | # we loaded 1 variable
45 | i += 1
46 | # we can load weights of conv layer
47 |
48 | shape = var1.shape.as_list()
49 | num_params = np.prod(shape)
50 | var_weights = weights[ptr:ptr + num_params].reshape((shape[3], shape[2], shape[0], shape[1]))
51 | # remember to transpose to column-major
52 | # DarkNet conv_weights are serialized Caffe-style:
53 | # (out_dim, in_dim, height, width)
54 | # We would like to set these to Tensorflow order:
55 | # (height, width, in_dim, out_dim)
56 | var_weights = np.transpose(var_weights, (2, 3, 1, 0))
57 | assign_ops.append(tf.assign(var1, var_weights, validate_shape=True))
58 |
59 | if 'yolo_head' in var1.name: # if num_classes is not 80
60 | shape_ = shape[:3]
61 | shape_.append(255)
62 | ptr += np.prod(shape_)
63 |
64 | else:
65 | ptr += num_params
66 |
67 | i += 1
68 | assert ptr == len(weights), "load failed, please verify your weight file"
69 | return assign_ops
70 |
--------------------------------------------------------------------------------
/util/kmeans.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class YOLO_Kmeans:
5 |
6 | def __init__(self, cluster_number, filename):
7 | self.cluster_number = cluster_number
8 | self.filename = filename
9 |
10 | def iou(self, boxes, clusters): # 1 box -> k clusters
11 | n = boxes.shape[0]
12 | k = self.cluster_number
13 |
14 | box_area = boxes[:, 0] * boxes[:, 1]
15 | box_area = box_area.repeat(k)
16 | box_area = np.reshape(box_area, (n, k))
17 |
18 | cluster_area = clusters[:, 0] * clusters[:, 1]
19 | cluster_area = np.tile(cluster_area, [1, n])
20 | cluster_area = np.reshape(cluster_area, (n, k))
21 |
22 | box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
23 | cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
24 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
25 |
26 | box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
27 | cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
28 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
29 | inter_area = np.multiply(min_w_matrix, min_h_matrix)
30 |
31 | result = inter_area / (box_area + cluster_area - inter_area)
32 | return result
33 |
34 | def avg_iou(self, boxes, clusters):
35 | accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
36 | return accuracy
37 |
38 | def kmeans(self, boxes, k, dist=np.median):
39 | box_number = boxes.shape[0]
40 | distances = np.empty((box_number, k))
41 | last_nearest = np.zeros((box_number,))
42 | np.random.seed()
43 | clusters = boxes[np.random.choice(
44 | box_number, k, replace=False)] # init k clusters
45 | while True:
46 |
47 | distances = 1 - self.iou(boxes, clusters)
48 |
49 | current_nearest = np.argmin(distances, axis=1)
50 | if (last_nearest == current_nearest).all():
51 | break # clusters won't change
52 | for cluster in range(k):
53 | clusters[cluster] = dist( # update clusters
54 | boxes[current_nearest == cluster], axis=0)
55 |
56 | last_nearest = current_nearest
57 |
58 | return clusters
59 |
60 | def result2txt(self, data):
61 | f = open("model_data/yolo_tiny_anchors.txt", 'w')
62 | row = np.shape(data)[0]
63 | for i in range(row):
64 | if i == 0:
65 | x_y = "%d,%d" % (data[i][0], data[i][1])
66 | else:
67 | x_y = ", %d,%d" % (data[i][0], data[i][1])
68 | f.write(x_y)
69 | f.close()
70 |
71 | def txt2boxes(self):
72 | f = open(self.filename, 'r')
73 | dataSet = []
74 | for line in f:
75 | infos = line.split(" ")
76 | length = len(infos)
77 | for i in range(1, length):
78 | width = int(infos[i].split(",")[2]) - \
79 | int(infos[i].split(",")[0])
80 | height = int(infos[i].split(",")[3]) - \
81 | int(infos[i].split(",")[1])
82 | dataSet.append([width, height])
83 | result = np.array(dataSet)
84 | f.close()
85 | return result
86 |
87 | def txt2clusters(self):
88 | all_boxes = self.txt2boxes()
89 | result = self.kmeans(all_boxes, k=self.cluster_number)
90 | result = result[np.lexsort(result.T[0, None])]
91 | self.result2txt(result)
92 | print("K anchors:\n {}".format(result))
93 | print("Accuracy: {:.2f}%".format(
94 | self.avg_iou(all_boxes, result) * 100))
95 |
96 |
97 | if __name__ == "__main__":
98 | cluster_number = 9
99 | filename = "model_data/train.txt"
100 | kmeans = YOLO_Kmeans(cluster_number, filename)
101 | kmeans.txt2clusters()
102 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import defaultdict
3 | from os.path import join, split
4 |
5 | import cv2
6 | import numpy as np
7 | import tensorflow as tf
8 |
9 | from config.pred_config import get_config
10 | from net.yolo3_net import model
11 | from util.box_utils import pick_box
12 | from util.image_utils import get_color_table, read_image_and_lable
13 | from util.utils import cal_fp_fn_tp_tn, cal_mAP
14 |
15 |
16 | class YOLO():
17 | def __init__(self, config):
18 | self.config = config
19 |
20 | net_type, tiny = split(self.config.weight_path)[-1].split('_')[:2]
21 |
22 | if tiny == 'tiny':
23 | self.anchor_path = join('model_data', 'yolo_anchors_tiny.txt')
24 | else:
25 | self.anchor_path = join('model_data', 'yolo_anchors.txt')
26 |
27 | self.classes = self._get_classes()
28 | self.anchors = self._get_anchors()
29 | self.hw = [416, 416]
30 | self.batch_size = 64
31 | self.ious_thres = [0.5, 0.75]
32 |
33 | self.test_path = "model_data/test.txt"
34 |
35 | with open(self.test_path) as f:
36 | self.test_data = f.readlines()
37 |
38 | if tiny == 'tiny':
39 | assert 6 == len(
40 | self.anchors), 'the model type does not match with anchors, check anchors or type param'
41 | else:
42 | assert 9 == len(
43 | self.anchors), 'the model type does not match with anchors, check anchors or type param'
44 |
45 | self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3])
46 | self.is_training = tf.placeholder(tf.bool, shape=[])
47 | self.pred = model(self.input, len(self.classes), self.anchors, net_type, self.is_training, False)
48 |
49 | print('start load net_type: {}_{}_model'.format(net_type, tiny))
50 |
51 | # load weights
52 | conf = tf.ConfigProto()
53 | conf.gpu_options.allow_growth = True
54 |
55 | # change fraction according to your GPU
56 | # conf.gpu_options.per_process_gpu_memory_fraction = 0.05
57 |
58 | self.sess = tf.Session(config=conf)
59 | saver = tf.train.Saver()
60 | saver.restore(self.sess, self.config.weight_path)
61 | self.color_table = get_color_table(len(self.classes))
62 |
63 | def _get_anchors(self):
64 | """loads the anchors from a file"""
65 | with open(self.anchor_path) as f:
66 | anchors = f.readline()
67 | anchors = [float(x) for x in anchors.split(',')]
68 | return np.array(anchors).reshape(-1, 2)
69 |
70 | def _get_classes(self):
71 | """loads the classes"""
72 | with open(self.config.classes_path) as f:
73 | class_names = f.readlines()
74 | class_names = [c.strip() for c in class_names]
75 | return class_names
76 |
77 | def test(self):
78 | total_test_case = len(self.test_data)
79 |
80 | FP_TP = defaultdict(lambda: defaultdict(list))
81 | GT_NUMS = defaultdict(int)
82 | GTS = defaultdict(lambda: defaultdict(list))
83 | DETECTION = defaultdict(lambda: defaultdict(list))
84 | img_data = []
85 |
86 | print("total test case:", total_test_case)
87 |
88 | for i in range(total_test_case):
89 |
90 | img, xyxy = read_image_and_lable(self.test_data[i], self.hw, is_training=False)
91 | img_data.append(img)
92 | print("{}/{}".format(i, total_test_case))
93 | for per_xyxy in xyxy:
94 | GTS[i % self.batch_size][self.classes[int(per_xyxy[4])]].append(per_xyxy[:4].tolist())
95 |
96 | if (i + 1) % self.batch_size == 0: # a batch
97 | boxes = self.sess.run(self.pred, feed_dict={self.input: img_data, self.is_training: False})
98 |
99 | for b in range(self.batch_size):
100 | picked_boxes = pick_box(boxes[b], 0.01, 0.5, self.hw, self.classes) # NMS
101 | for picked_box in picked_boxes:
102 | DETECTION[b][self.classes[int(picked_box[5])]].append(picked_box[:5].tolist())
103 |
104 | # cal FP TP
105 | cal_fp_fn_tp_tn(DETECTION, GTS, FP_TP, GT_NUMS, self.classes, self.ious_thres)
106 |
107 | DETECTION.clear()
108 | GTS.clear()
109 | img_data.clear()
110 |
111 | APs, mAPs = cal_mAP(FP_TP, GT_NUMS, self.classes, self.ious_thres)
112 | print(APs, mAPs)
113 |
114 |
115 | if __name__ == '__main__':
116 | configs = get_config()
117 | YOLO(configs).test()
118 |
--------------------------------------------------------------------------------
/util/image_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf8
2 |
3 | import random
4 |
5 | import cv2
6 | import numpy as np
7 | from matplotlib.colors import hsv_to_rgb, rgb_to_hsv
8 |
9 |
10 | def rand(a=0., b=1.):
11 | return random.random() * (b - a) + a
12 |
13 |
14 | def read_image_and_lable(gt_path, hw, hue=.1, sat=1.5, val=1.5, is_training=True):
15 | """read image form image_set path random distort image """
16 | f_path, *_label = gt_path.split(' ')
17 | if not len(_label):
18 | # f_path = f_path.split('\n')[0]
19 | return
20 | image_raw_data = cv2.imread(f_path)[..., ::-1] # RGB h*w*c
21 | height, width = image_raw_data.shape[0], image_raw_data.shape[1]
22 | image_data = cv2.resize(image_raw_data, tuple(hw[::-1])) / 255.0
23 |
24 | h_scale = hw[0] / height
25 | w_scale = hw[1] / width
26 | # anchor[:, 0] *= w_scale
27 | # anchor[:, 1] *= h_scale
28 |
29 | xyxy = []
30 |
31 | for per_label in _label:
32 | xmin, ymin, xmax, ymax, cls = list(map(float, per_label.split(',')))
33 | xyxy.append([xmin * w_scale, ymin * h_scale, xmax * w_scale, ymax * h_scale, cls])
34 | xyxy = np.array(xyxy)
35 |
36 | if is_training:
37 |
38 | # random flip image from top to down
39 | if rand() < .5:
40 | image_data = cv2.flip(image_data, 0)
41 | tmp = xyxy[:, 1].copy()
42 | xyxy[:, 1] = hw[0] - xyxy[:, 3]
43 | xyxy[:, 3] = hw[0] - tmp
44 |
45 | # random flip image from left to right
46 | if rand() < .5:
47 | image_data = cv2.flip(image_data, 1)
48 | tmp = xyxy[:, 0].copy()
49 | xyxy[:, 0] = hw[1] - xyxy[:, 2]
50 | xyxy[:, 2] = hw[1] - tmp
51 |
52 | # distort image
53 | if rand() < 0.5:
54 | x = rgb_to_hsv(image_data)
55 | hue = rand(-hue, hue)
56 | sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
57 | val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
58 | x[..., 0] += hue
59 | x[..., 0][x[..., 0] > 1] -= 1
60 | x[..., 0][x[..., 0] < 0] += 1
61 | x[..., 1] *= sat
62 | x[..., 2] *= val
63 | x[x > 1] = 1
64 | x[x < 0] = 0
65 |
66 | image_data = hsv_to_rgb(x) # RGB
67 | # random pad
68 | if rand() < .5:
69 | pad_top = random.randint(0, 25)
70 | pad_left = random.randint(0, 25)
71 | if rand() < .5:
72 | image_data = np.pad(image_data, ((pad_top, 0), (pad_left, 0), (0, 0)), 'edge')
73 | else:
74 | image_data = np.pad(image_data, ((pad_top, 0), (pad_left, 0), (0, 0)), 'constant')
75 | image_data = image_data[:hw[0], :hw[1], :]
76 | for i in range(xyxy.shape[0]):
77 | xyxy[i, 0] = pad_left + xyxy[i, 0] if pad_left + xyxy[i, 0] < hw[1] else hw[1]
78 | xyxy[i, 2] = pad_left + xyxy[i, 2] if pad_left + xyxy[i, 2] < hw[1] else hw[1]
79 | xyxy[i, 1] = pad_top + xyxy[i, 1] if pad_top + xyxy[i, 1] < hw[0] else hw[0]
80 | xyxy[i, 3] = pad_top + xyxy[i, 3] if pad_top + xyxy[i, 3] < hw[0] else hw[0]
81 | # random pad
82 | if rand() < .5:
83 | pad_bottom = random.randint(0, 25)
84 | pad_right = random.randint(0, 25)
85 | if rand() < .5:
86 | image_data = np.pad(image_data, ((0, pad_bottom), (0, pad_right), (0, 0)), 'edge')
87 | else:
88 | image_data = np.pad(image_data, ((0, pad_bottom), (0, pad_right), (0, 0)), 'constant')
89 | image_data = image_data[pad_bottom:hw[0] + pad_bottom, pad_right:hw[1] + pad_right, :]
90 | for i in range(xyxy.shape[0]):
91 | xyxy[i, 0] = xyxy[i, 0] - pad_right if xyxy[i, 0] - pad_right > 0 else 0
92 | xyxy[i, 2] = xyxy[i, 2] - pad_right if xyxy[i, 2] - pad_right > 0 else 0
93 | xyxy[i, 1] = xyxy[i, 1] - pad_bottom if xyxy[i, 1] - pad_bottom > 0 else 0
94 | xyxy[i, 3] = xyxy[i, 3] - pad_bottom if xyxy[i, 3] - pad_bottom > 0 else 0
95 | return image_data, xyxy
96 |
97 |
98 | def get_color_table(class_num, seed=200):
99 | random.seed(seed)
100 | color_table = {}
101 | for i in range(class_num):
102 | color_table[i] = [random.randint(0, 255) for _ in range(3)]
103 | return color_table
104 |
105 |
106 | def plot_img(img, picked_boxes, color_table, classes, is_gt=False):
107 | """
108 | get original boxes and plot them
109 | """
110 | for co, bbox in enumerate(picked_boxes):
111 | color = color_table[int(bbox[5])]
112 | tl = int(min(round(0.002 * max(img.shape[0:2])), min(bbox[3] - bbox[1], bbox[2] - bbox[0])))
113 | t2 = max(tl - 1, 1) # font thickness
114 | if is_gt:
115 | label = "gts: {}".format(classes[int(bbox[5])])
116 | else:
117 | label = "{} {:.2f}".format(classes[int(bbox[5])], bbox[4])
118 | img = cv2.rectangle(img, tuple(np.int32([bbox[0], bbox[1]])),
119 | tuple(np.int32([bbox[2], bbox[3]])), color, 3)
120 | img = cv2.putText(img, label, tuple(np.int32([bbox[0], bbox[1]])),
121 | cv2.FONT_HERSHEY_TRIPLEX, float(tl) / 3, color, thickness=t2, lineType=cv2.LINE_AA)
122 |
123 | return img
124 |
--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
1 | import time
2 | from os.path import join, split
3 |
4 | import cv2
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | from config.pred_config import get_config
9 | from net.yolo3_net import model
10 | from util.box_utils import pick_box, get_true_box
11 | from util.image_utils import get_color_table, plot_img
12 |
13 |
14 | class YOLO():
15 | def __init__(self, config):
16 | self.config = config
17 |
18 | net_type, tiny = split(self.config.weight_path)[-1].split('_')[:2]
19 |
20 | if tiny == 'tiny':
21 | self.anchor_path = join('model_data', 'yolo_anchors_tiny.txt')
22 | else:
23 | self.anchor_path = join('model_data', 'yolo_anchors.txt')
24 |
25 | self.classes = self._get_classes()
26 | self.anchors = self._get_anchors()
27 | self.hw = [416, 416]
28 | self.batch_size = 1
29 |
30 | if tiny == 'tiny':
31 | assert 6 == len(
32 | self.anchors), 'the model type does not match with anchors, check anchors or type param'
33 | else:
34 | assert 9 == len(
35 | self.anchors), 'the model type does not match with anchors, check anchors or type param'
36 |
37 | self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3])
38 | self.is_training = tf.placeholder(tf.bool, shape=[])
39 | self.pred = model(self.input, len(self.classes), self.anchors, net_type, self.is_training, False)
40 |
41 | print('start load net_type: {}_{}_model'.format(net_type, tiny))
42 | # load weights
43 | conf = tf.ConfigProto()
44 | # conf.gpu_options.allow_growth = True
45 |
46 | # change fraction according to your GPU
47 | conf.gpu_options.per_process_gpu_memory_fraction = 0.05
48 | self.sess = tf.Session(config=conf)
49 | saver = tf.train.Saver()
50 | saver.restore(self.sess, self.config.weight_path)
51 | self.color_table = get_color_table(len(self.classes))
52 |
53 | def _get_anchors(self):
54 | """loads the anchors from a file"""
55 | with open(self.anchor_path) as f:
56 | anchors = f.readline()
57 | anchors = [float(x) for x in anchors.split(',')]
58 | return np.array(anchors).reshape(-1, 2)
59 |
60 | def _get_classes(self):
61 | """loads the classes"""
62 | with open(self.config.classes_path) as f:
63 | class_names = f.readlines()
64 | class_names = [c.strip() for c in class_names]
65 | return class_names
66 |
67 | def forward(self, img):
68 | """
69 | :param img: shape = (h,w,c), 0-255
70 | :return:
71 | """
72 | height, width = img.shape[:2]
73 | img_ = cv2.resize(img, tuple(self.hw)[::-1])
74 | h_r = height / self.hw[0]
75 | w_r = width / self.hw[1]
76 |
77 | im_data = np.expand_dims(img_[..., ::-1], 0) / 255.0
78 | boxes = self.sess.run(self.pred, feed_dict={self.input: im_data, self.is_training: False})
79 |
80 | vis_img = []
81 | for b in range(self.batch_size):
82 | picked_boxes = pick_box(boxes[b], 0.3, 0.6, self.hw, self.classes)
83 | true_boxes = get_true_box(picked_boxes, w_r, h_r)
84 | per_img = img
85 | per_img = plot_img(per_img, true_boxes, self.color_table, self.classes)
86 | print('find {} boxes'.format(len(true_boxes)))
87 | print(true_boxes)
88 | vis_img.append(per_img)
89 | return vis_img[0]
90 |
91 | def detect_image(self, img_path):
92 | img = cv2.imread(img_path)
93 | if img is None:
94 | return None
95 | img = self.forward(img)
96 | cv2.imshow('img', img)
97 | cv2.imwrite('tiny.jpg', img)
98 | cv2.waitKey(0)
99 | return 1
100 |
101 | def detect_video(self, video_path):
102 | cap = cv2.VideoCapture(video_path)
103 | if not cap.isOpened():
104 | raise IOError("Couldn't open webcam or video")
105 | # video_FourCC = -1
106 | video_FourCC = cv2.VideoWriter_fourcc(*'XVID')
107 | video_fps = cap.get(cv2.CAP_PROP_FPS)
108 | width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
109 |
110 | # writer = cv2.VideoWriter('output.mp4', video_FourCC, video_fps, (width, height))
111 | writer = cv2.VideoWriter('output.avi', video_FourCC, video_fps, (width, height))
112 |
113 | total_time = 0
114 | curr_fps = 0
115 | fps = "FPS: ??"
116 | time1 = time.time()
117 |
118 | while True:
119 | ret, frame = cap.read()
120 | if ret:
121 | out = self.forward(frame)
122 | time2 = time.time()
123 | d_time = time2 - time1
124 | time1 = time2
125 | total_time += d_time
126 | curr_fps += 1
127 | if total_time >= 1:
128 | fps = "FPS: {}".format(curr_fps)
129 | total_time -= 1
130 | curr_fps = 0
131 |
132 | out = cv2.putText(out, fps, tuple(np.int32([20, 30])),
133 | cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 255))
134 | out = cv2.resize(out, (width, height))
135 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
136 | cv2.imshow('result', out)
137 | cv2.waitKey(1)
138 | writer.write(out)
139 | else:
140 | break
141 |
142 |
143 | if __name__ == '__main__':
144 | configs = get_config()
145 | yolo = YOLO(configs)
146 | if configs.video:
147 | yolo.detect_video(configs.video)
148 | elif configs.image:
149 | yolo.detect_image(configs.image)
150 | else:
151 | while True:
152 | img_path = input('input image path:')
153 | if not yolo.detect_image(img_path):
154 | print('check your iamge path ')
155 | continue
156 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # yolo3-tensorflow
2 | TensorFlow implementation of yolo v3 objects detection.
3 | Based: full or tiny, and cnn or mobilenets(mobilenet_v1, mobilenet_v2);
4 | We can get 6 combination, but 1 of them has a little parameters and performs badly.
5 | So, you should build these 5 combination as folloing:
6 | * cnn + full
7 | * cnn + tiny
8 | * mobilenet_v1 + full
9 | * mobilenet_v2 + full
10 | * mobilenet_v2 + tiny
11 |
12 | These 5 frameworks are provided in this repository.
13 |
14 | ## Dependence
15 | python3
16 | tensorflow >= 1.12
17 | opencv
18 |
19 | ## Quick start
20 | * cnn full yolo3
21 | 1. Download official [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) and put it on `model_data` floder of project.
22 | 2. Run the command `python convert_weights.py full` to convert weights to TensorFlow checkpoint file, which will locate in `logs/cnn_full/` and named `cnn_full_model.data-00000-of-00001`
23 | 3. Run the command `python yolo.py` or `python yolo.py -w logs/cnn_full/cnn_full_model` and input the image path to detect.
24 | 4. Detect example:
25 |
26 | * cnn tiny yolo3
27 | 1. Download official [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) and put it on `model_data` floder of project.
28 | 2. Run the command `python convert_weights.py tiny` to convert weights to TensorFlow checkpoint file, which will locate in `logs/cnn_tiny/` and named `cnn_tiny_model.data-00000-of-00001`
29 | 3. Run the command `python yolo.py -w logs/cnn_tiny/cnn_tiny_model` and input the image path to detect.
30 | 4. Detect example:
31 |
32 |
33 | ## Train
34 |
35 | 1. Prepare Dataset
36 | Before training, you should generate your own annotation file and class names file.
37 | One row for one image
38 | Row format: image_file_path box1 box2 ... boxN
39 | Box format: x_min,y_min,x_max,y_max,class_id (no space)
40 | For VOC dataset, try `python util/voc_annotation.py`
41 | For your own dataset, you should change the [voc_annotation.py](voc_annotation.py)
42 | Here is an example:
43 | ```
44 | path/to/img1.jpg 50,100,150,200,0 30,50,200,120,3
45 | path/to/img2.jpg 120,300,250,600,2
46 | ...
47 | ```
48 |
49 | 2. Prepare yolo anchors
50 | run `python util/kmeans.py` to generate anchors. Note that, anchor number
51 | should be 9 if you wang to train full yolo, else it should be 6.
52 |
53 | 3. Start to train
54 | The train arguments can be seen in [config/train_config.py](config/train_config.py).
55 | ```
56 | usage: train.py [-h] [-n NET_TYPE] [-t TINY] [-b BATCH_SIZE] [-e EPOCH]
57 | [-lr LEARN_RATE] [-pt PRETRAIN_PATH]
58 | [--anchor_path ANCHOR_PATH] [--train_path TRAIN_PATH]
59 | [--classes_path CLASSES_PATH] [-d DEBUG]
60 |
61 | optional arguments:
62 | -h, --help show this help message and exit
63 | -n NET_TYPE, --net_type NET_TYPE
64 | net type: cnn, mobilenetv1 mobilenetv2 or mobilenetv3
65 | -t TINY, --tiny TINY whether tiny yolo or not
66 | -b BATCH_SIZE, --batch_size BATCH_SIZE
67 | batch_size
68 | -e EPOCH, --epoch EPOCH
69 | epoch
70 | -lr LEARN_RATE, --learn_rate LEARN_RATE
71 | learn_rate
72 | -pt PRETRAIN_PATH, --pretrain_path PRETRAIN_PATH
73 | pretrain path
74 | --anchor_path ANCHOR_PATH
75 | anchor path
76 | --train_path TRAIN_PATH
77 | train file path
78 | --classes_path CLASSES_PATH
79 | classes path
80 | -d DEBUG, --debug DEBUG
81 | whether print per item loss
82 | ```
83 | The dafault framework is cnn + full. If you want to train others, you can pass
84 | the `-n` (cnn, mobilenetv1 or mobilenetv2) and `-t` (True or False) arguments.
85 |
86 | 4. To be simple
87 | I have write scripts in `shell` folder. Just run `CUDA_VISIBLE_DEVICES='0' sh ./shell/train_cnn_full.sh` or
88 | `CUDA_VISIBLE_DEVICES='0' nohup stdbuf -oL sh ./shell/train_cnn_full.sh > logs/cnn_full.txt &` in background and the log
89 | will be write in cnn_full.txt.
90 | You can also change some other arguments such as batch_size and epoch and so on.
91 | If you want to use pretrain, you should pass the pretrain path. I will provide the pretrain weights later.
92 | 5. NOTE
93 | The mobilenet is converged more slower than cnn, you should train more epoch.
94 | 6. Tensorboard
95 | You can use Tensorboard to watch the training trend.
96 | Run `Tensorboard --logdir ./ --host 127.0.0.1`
97 | you can see mAP score
98 |
99 | 7. test your training weights with your test datasets
100 | `python test.py`
101 | you maybe need to change configs in config/pred_conf.py
102 |
103 |
104 | ## Predict
105 | The prediction arguments can be seen in [config/pred_config.py](config/pred_config.py).
106 | ```
107 | usage: yolo.py [-h] [-i IMAGE] [-v VIDEO] [-w WEIGHT_PATH] [--score SCORE]
108 | [--classes_path CLASSES_PATH]
109 |
110 | optional arguments:
111 | -h, --help show this help message and exit
112 | -i IMAGE, --image IMAGE
113 | image path
114 | -v VIDEO, --video VIDEO
115 | video path
116 | -w WEIGHT_PATH, --weight_path WEIGHT_PATH
117 | weight path
118 | --score SCORE score threshold
119 | --classes_path CLASSES_PATH
120 | classes path
121 | ```
122 | Note that, the weights filename should be like `cnn_full_model.xxx`, `cnn_tiny_model.xxx`, or others.
123 | the framework will be built by the word 'cnn' and 'full' or 'cnn' and 'tiny'.
124 | You can predict an image or video.
125 | For example:
126 | `python yolo.py -w weight_path`
127 | `python yolo.py -i imgage_path -w weight_path`
128 | `python yolo.py -v video_path -w weight_path`
--------------------------------------------------------------------------------
/util/box_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 |
5 | def box_anchor_iou(b1, b2):
6 | '''Return iou tensor
7 | Parameters
8 | ----------
9 | b1: tensor, shape=(batch,... 2), wh
10 | b2: tensor, shape=(j, 2), wh
11 | Returns
12 | -------
13 | iou: tensor, shape=(i1,...,iN, j)
14 | '''
15 |
16 | # Expand dim to apply broadcasting.
17 | b1 = np.expand_dims(b1, -2)
18 | b1_mins = - b1 / 2
19 | b1_maxes = b1 / 2
20 |
21 | # Expand dim to apply broadcasting.
22 | b2 = np.expand_dims(b2, 0)
23 | b2_mins = -b2 / 2
24 | b2_maxes = b2 / 2
25 |
26 | intersect_mins = np.maximum(b1_mins, b2_mins)
27 | intersect_maxes = np.minimum(b1_maxes, b2_maxes)
28 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
29 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
30 | b1_area = b1[..., 0] * b1[..., 1]
31 | b2_area = b2[..., 0] * b2[..., 1]
32 | iou = intersect_area / (b1_area + b2_area - intersect_area)
33 |
34 | return iou
35 |
36 |
37 | def pick_box(boxes, score_threshold, nms_iou_threshold, hw, classes):
38 | """
39 | :param boxes: (boxes_num, 5+numclass),xywh
40 | :param score_threshold: score_threshold
41 | :param nms_iou_threshold: nms iou_threshold
42 | :param hw: sacled_image height and width
43 | :param classes: classes num
44 | :return:
45 | """
46 | score = boxes[..., 4:5] * boxes[..., 5:]
47 | idx = np.where(score > score_threshold)
48 | box_select = boxes[idx[:2]]
49 | box_xywh = box_select[:, :4]
50 | box_xyxy = wh2xy_np(box_xywh)
51 | if not len(box_xyxy):
52 | return []
53 | box_truncated = []
54 | for box_k in box_xyxy:
55 | box_k[0] = box_k[0] if box_k[0] >= 0 else 0
56 | box_k[1] = box_k[1] if box_k[1] >= 0 else 0
57 | box_k[2] = box_k[2] if box_k[2] <= hw[1] else hw[1]
58 | box_k[3] = box_k[3] if box_k[3] <= hw[0] else hw[0]
59 | box_truncated.append(box_k)
60 | box_xyxy = np.stack(box_truncated)
61 | box_socre = score[idx]
62 | clsid = idx[2]
63 | picked_boxes = nms_np(
64 | np.concatenate([box_xyxy, box_socre.reshape([-1, 1]), clsid.reshape([-1, 1])], -1),
65 | len(classes), iou_threshold=nms_iou_threshold)
66 | return picked_boxes
67 |
68 |
69 | def nms_np(boxes, classes, iou_threshold=0.3, max_output=20):
70 | """Return nms
71 | Parameters
72 | ----------
73 | :param boxes: shape=(boxnum 6), xyxy,score,cls
74 | :param iou_threshold: iou_threshold
75 | :param max_output: max_output
76 | :param classes: total_classes_num
77 |
78 | Returns
79 | -------
80 | nms boxes
81 | """
82 |
83 | picked_boxes = []
84 |
85 | for c in range(classes):
86 | b = boxes[boxes[..., -1] == c]
87 | score = b[..., 4]
88 | order = np.argsort(score)
89 | count = 0
90 | while order.size > 0 and count < max_output:
91 | # The index of largest confidence score
92 | index = order[-1]
93 |
94 | # Pick the bounding box with largest confidence score
95 | picked_boxes.append(b[index])
96 |
97 | b1_mins = b[index][0:2]
98 | b1_maxes = b[index][2:4]
99 | b1_wh = b1_maxes - b1_mins
100 |
101 | b2_mins = b[order[:-1]][..., 0:2]
102 | b2_maxes = b[order[:-1]][..., 2:4]
103 | b2_wh = b2_maxes - b2_mins
104 |
105 | intersect_mins = np.maximum(b1_mins, b2_mins)
106 | intersect_maxes = np.minimum(b1_maxes, b2_maxes)
107 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
108 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
109 | b1_area = b1_wh[..., 0] * b1_wh[..., 1]
110 | b2_area = b2_wh[..., 0] * b2_wh[..., 1]
111 | iou = intersect_area / (b1_area + b2_area - intersect_area)
112 |
113 | left = np.where(iou < iou_threshold)
114 | order = order[left]
115 | count += 1
116 |
117 | return picked_boxes
118 |
119 |
120 | def xy2wh_np(b):
121 | """
122 | :param b: list xmin ymin xmax ymax
123 | :return: shape=(...,4) x0 y0 w h
124 | """
125 | xmin, ymin, xmax, ymax = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
126 | x0 = (xmin + xmax) / 2.0
127 | y0 = (ymin + ymax) / 2.0
128 | w = xmax - xmin
129 | h = ymax - ymin
130 | return np.concatenate([x0, y0, w, h], -1)
131 |
132 |
133 | def wh2xy_np(b):
134 | """
135 | :param b: shape=(...,4) x0 y0 w h
136 | :return: shape=(...,4) xmin ymin xmax ymax
137 | """
138 | x0, y0, w, h = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
139 | xmin = x0 - w / 2.0
140 | xmax = x0 + w / 2.0
141 | ymin = y0 - h / 2.0
142 | ymax = y0 + h / 2.0
143 | return np.concatenate([xmin, ymin, xmax, ymax], -1)
144 |
145 |
146 | def box_iou(b1, b2):
147 | '''Return iou tensor
148 | Parameters
149 | ----------
150 | b1: tensor, shape=(batch,... 4), xywh
151 | b2: tensor, shape=(j, 4), xywh
152 | Returns
153 | -------
154 | iou: tensor, shape=(i1,...,iN, j)
155 | '''
156 |
157 | # Expand dim to apply broadcasting.
158 | b1 = tf.expand_dims(b1, -2)
159 | b1_xy = b1[..., :2]
160 | b1_wh = b1[..., 2:4]
161 | b1_wh_half = b1_wh / 2.
162 | b1_mins = b1_xy - b1_wh_half
163 | b1_maxes = b1_xy + b1_wh_half
164 |
165 | # Expand dim to apply broadcasting.
166 | b2 = tf.expand_dims(b2, 0)
167 | b2_xy = b2[..., :2]
168 | b2_wh = b2[..., 2:4]
169 | b2_wh_half = b2_wh / 2.
170 | b2_mins = b2_xy - b2_wh_half
171 | b2_maxes = b2_xy + b2_wh_half
172 |
173 | intersect_mins = tf.maximum(b1_mins, b2_mins)
174 | intersect_maxes = tf.minimum(b1_maxes, b2_maxes)
175 | intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
176 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
177 | b1_area = b1_wh[..., 0] * b1_wh[..., 1]
178 | b2_area = b2_wh[..., 0] * b2_wh[..., 1]
179 | iou = tf.math.divide(intersect_area, b1_area + b2_area - intersect_area, name='iou')
180 |
181 | return iou
182 |
183 |
184 | def box_iou_np(b1, b2):
185 | """
186 | Return iou tensor
187 | Parameters
188 | ----------
189 | b1: array shape=(i, 4), xyxy
190 | b2: array, shape=(j, 4), xyxy
191 | Returns
192 | -------
193 | iou: array, shape=(i1,...,iN, j)
194 | """
195 |
196 | # Expand dim to apply broadcasting.
197 | b1 = np.expand_dims(b1[...,:4], -2)
198 |
199 | # Expand dim to apply broadcasting.
200 | b2 = np.expand_dims(b2[...,:4], 0)
201 |
202 | intersect_mins = np.maximum(b1[...,0:2], b2[...,0:2])
203 | intersect_maxes = np.minimum(b1[...,2:4], b2[...,2:4])
204 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
205 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
206 | b1_area = (b1[..., 2] - b1[..., 0]) * (b1[..., 3] - b1[..., 1])
207 | b2_area = (b2[..., 2] - b2[..., 0]) * (b2[..., 3] - b2[..., 1])
208 | iou = intersect_area / (b1_area + b2_area - intersect_area)
209 |
210 | return iou
211 |
212 |
213 |
214 | def xy2wh(b):
215 | """
216 | :param b: shape=(...,4) xmin ymin xmax ymax
217 | :return: shape=(...,4) x0 y0 w h
218 | """
219 | xmin, ymin, xmax, ymax = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
220 | x0 = (xmin + xmax) / 2.0
221 | y0 = (ymin + ymax) / 2.0
222 | w = xmax - xmin
223 | h = ymax - ymin
224 | return tf.concat([x0, y0, w, h], -1)
225 |
226 |
227 | def wh2xy(b):
228 | """
229 | :param b: shape=(...,4) x0 y0 w h
230 | :return: shape=(...,4) xmin ymin xmax ymax
231 | """
232 | x0, y0, w, h = b[..., 0:1], b[..., 1:2], b[..., 2:3], b[..., 3:4]
233 | xmin = x0 - w / 2.0
234 | xmax = x0 + w / 2.0
235 | ymin = y0 - h / 2.0
236 | ymax = y0 + h / 2.0
237 | return tf.concat([xmin, ymin, xmax, ymax], -1)
238 |
239 |
240 | def np_sigmoid(x):
241 | return 1 / (1 + np.exp(-x))
242 |
243 |
244 | def get_true_box(picked_boxes, w_r, h_r):
245 | """ get original true box according to ori image scale"""
246 | true_boxes = []
247 | for co, bbox in enumerate(picked_boxes):
248 | bbox[0] *= w_r
249 | bbox[2] *= w_r
250 | bbox[1] *= h_r
251 | bbox[3] *= h_r
252 | true_boxes.append(bbox)
253 | if not len(true_boxes):
254 | return true_boxes
255 | true_boxes = np.concatenate(true_boxes, 0).reshape(-1, 6)
256 | return true_boxes
257 |
258 | if __name__ == '__main__':
259 | bx = tf.placeholder(tf.float32, [2, 4, 4])
260 | xy2wh(bx)
261 |
--------------------------------------------------------------------------------
/util/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from util.box_utils import box_iou_np
4 | from collections import defaultdict
5 |
6 |
7 | def np_sigmoid(x):
8 | return 1 / (1 + np.exp(-x))
9 |
10 |
11 | def sec2time(sec, n_msec=3):
12 | ''' Convert seconds to 'D days, HH:MM:SS.FFF' '''
13 | m, s = divmod(sec, 60)
14 | h, m = divmod(m, 60)
15 | d, h = divmod(h, 24)
16 | if n_msec > 0:
17 | pattern = '%%02dh %%02dm %%0%d.%dfs' % (n_msec + 3, n_msec)
18 | else:
19 | pattern = r'%02dh %02dm %02s'
20 | if d == 0:
21 | return pattern % (h, m, s)
22 | return ('%d d, ' + pattern) % (d, h, m, s)
23 |
24 |
25 | def cal_fp_fn_tp_tn(detection, ground_truth, FP_TP, GT_NUM, classes, iou_thres_list):
26 | """
27 | calculate FP TP FN TN accroding to detection and ground truth
28 | :param detection: a dict, the format:
29 |
30 | {
31 | image1: { class1: [
32 | [xmin, ymim, xmax, ymax, confidence_score], # obj1
33 | [xmin, ymim, xmax, ymax, confidence_score], # obj2
34 | ...
35 | ],
36 | class2: [[xmin, ymim, xmax, ymax, confidence_score]],
37 | ...
38 | },
39 |
40 | image2: { class1: [
41 | [xmin, ymim, xmax, ymax, confidence_score], # obj1
42 | [xmin, ymim, xmax, ymax, confidence_score] # obj2
43 | ...
44 | ],
45 | ...
46 | },
47 | ...
48 | }
49 |
50 |
51 | :param ground_truth: a dict:
52 | {
53 | image1: { class1: [
54 | [xmin, ymim, xmax, ymax], # obj1
55 | [xmin, ymim, xmax, ymax] # obj2
56 | ...
57 | ],
58 | class2: [
59 | [xmin, ymim, xmax, ymax], # obj1
60 | [xmin, ymim, xmax, ymax] # obj2
61 | ...
62 | ],
63 | ...
64 | },
65 |
66 | image2: { class1: [
67 | [xmin, ymim, xmax, ymax], # obj1
68 | [xmin, ymim, xmax, ymax] # obj2
69 | ...
70 | ],
71 | ...
72 | },
73 | ...
74 | }
75 |
76 | :param FP_TP : a dict returned
77 | {
78 | iou_thres1: {
79 | class1: [
80 | [False, confidence_score], # image1_obj1, False means FP, True means TP
81 | [False, confidence_score], # image1_obj2, False means FP, True means TP
82 | [False, confidence_score], # image2_obj1, False means FP, True means TP
83 | [False, confidence_score], # image2_obj2, False means FP, True means TP
84 | ],
85 | class2: [[False, confidence_score]],
86 | ...
87 | },
88 |
89 | iou_thres2: {
90 | class1: [
91 | [False, confidence_score], # image1_obj1, False means FP, True means TP
92 | [False, confidence_score], # image1_obj2, False means FP, True means TP
93 | [False, confidence_score], # image2_obj1, False means FP, True means TP
94 | [False, confidence_score], # image2_obj2, False means FP, True means TP
95 | ],
96 | class2: [[False, confidence_score]],
97 | ...
98 | },
99 | ...
100 | }
101 |
102 |
103 | :param GT_NUM: a dict that stores the total gt box, to calculate recall rate
104 | {
105 | class1: num1,
106 | class2: num2,
107 | ...
108 | }
109 |
110 | :param classes: list, classes name
111 | :param iou_thres_list: list, iou_threshold
112 |
113 |
114 | """
115 | for i in detection.keys(): # image file name
116 | det_objs = detection[i] # detection dict
117 | gt_objs = ground_truth[i] # gt dict
118 | for j in classes: # class name
119 | det_boxes = np.array(det_objs[j]) # detection boxes
120 | gt_boxes = np.array(gt_objs[j]) # gt boxes
121 |
122 | if not len(gt_boxes): # if gt boxes is none, all detection box is FP
123 | for iou_thres in iou_thres_list:
124 | for box_index, box in enumerate(det_boxes): # init
125 | FP_TP[iou_thres][j].append([False, box[4]])
126 | continue
127 |
128 | GT_NUM[j] += len(gt_boxes)
129 |
130 | if not len(det_boxes): # if gt boxes is not none, but detection box is NONE, only add the gt num
131 | continue
132 |
133 | ious = box_iou_np(det_boxes, gt_boxes) # calculate iou
134 | # ious_larger = np.where(ious > iou_thres, ious, np.zeros_like(ious))
135 | ious_index = np.argmax(ious, 0) # find max iou index, which will be TP, others will be FP
136 |
137 | for iou_thres in iou_thres_list:
138 | for box_index, box in enumerate(det_boxes): # init
139 | FP_TP[iou_thres][j].append([False, box[4]])
140 |
141 | for gt_index in range(len(gt_boxes)):
142 | selected = ious_index[gt_index]
143 | sel_index = len(det_boxes) - selected - 1
144 | FP_TP[iou_thres][j][~sel_index][0] = ious[selected, gt_index] >= iou_thres
145 |
146 |
147 | def cal_mAP(FP_TP, GT_NUM, classes, iou_thres_list):
148 | """
149 | calculate mAP
150 | :param FP_TP : a dict returned
151 | {
152 | iou_thres1: {
153 | class1: [
154 | [False, confidence_score], # image1_obj1, False means FP, True means TP
155 | [False, confidence_score], # image1_obj2, False means FP, True means TP
156 | [False, confidence_score], # image2_obj1, False means FP, True means TP
157 | [False, confidence_score], # image2_obj2, False means FP, True means TP
158 | ],
159 | class2: [[False, confidence_score]],
160 | ...
161 | },
162 |
163 | iou_thres2: {
164 | class1: [
165 | [False, confidence_score], # image1_obj1, False means FP, True means TP
166 | [False, confidence_score], # image1_obj2, False means FP, True means TP
167 | [False, confidence_score], # image2_obj1, False means FP, True means TP
168 | [False, confidence_score], # image2_obj2, False means FP, True means TP
169 | ],
170 | class2: [[False, confidence_score]],
171 | ...
172 | },
173 | ...
174 | }
175 |
176 |
177 | :param GT_NUM: a dict that stores the total gt box, to calculate recall rate
178 | {
179 | class1: num1,
180 | class2: num2,
181 | ...
182 | }
183 |
184 | :param classes: list, classes name
185 | :param iou_thres_list: list, iou_threshold
186 |
187 | """
188 | iou_class_AP = {}
189 | iou_mAP = {}
190 | for iou_thres in iou_thres_list:
191 | class_AP = {}
192 | for cls in classes:
193 | fp_tp = FP_TP[iou_thres][cls]
194 | fp_tp = sorted(fp_tp, key=lambda x: x[1], reverse=True)
195 | TP, total_det = 0, 0
196 | precision = [1.0]
197 | recall = [0.0]
198 |
199 | # calculate pr for each box
200 | for per_fp_tp in fp_tp:
201 | total_det += 1
202 | if per_fp_tp[0]:
203 | TP += 1
204 | precision.append(TP / total_det)
205 | if not GT_NUM[cls]:
206 | print('your valid or test data is too small that cannot cover all classes')
207 | recall.append(0)
208 | else:
209 | recall.append(TP / GT_NUM[cls])
210 |
211 | # calculate AP by all points interpolation
212 | AP = 0
213 | i_old = 0
214 | for i in range(1, len(recall)):
215 | if recall[i] == recall[i_old]:
216 | continue
217 | p = max(precision[i:])
218 | AP += p * (recall[i] - recall[i_old])
219 | i_old = i
220 | class_AP[cls] = AP
221 | iou_class_AP[iou_thres] = class_AP
222 | iou_mAP[iou_thres] = sum(class_AP.values()) / len(classes)
223 |
224 | return iou_class_AP, iou_mAP
225 |
226 |
227 | if __name__ == '__main__':
228 | detection = {
229 | "image1": {
230 | "class1": [[1, 2, 3, 4, 5],
231 | [10, 20, 30, 40, 4],
232 | [1, 2, 3, 40, 3],
233 | ],
234 | "class2": [[1, 2, 3, 4, 5],
235 | [10, 20, 30, 40, 4],
236 | [1, 2, 3, 40, 3],
237 | ]
238 | },
239 | "image2": {
240 | "class1": [[1, 2, 3, 4, 5],
241 | [10, 20, 30, 40, 4],
242 | [1, 2, 3, 40, 3],
243 | ],
244 | "class2": [[1, 2, 3, 4, 5],
245 | [10, 20, 30, 40, 4],
246 | [1, 2, 3, 40, 3],
247 | ]
248 | }
249 |
250 | }
251 |
252 | GT = {
253 | "image1": {
254 | "class1": [[1, 2, 3.4, 4],
255 | [10, 20, 30, 40],
256 | ],
257 | "class2": [[1, 2, 3.4, 4],
258 | ],
259 | },
260 | "image2": {
261 | "class1": [[1, 2, 3.4, 4],
262 | [10, 20, 30, 40],
263 | ],
264 | "class2": [[1, 2, 3.4, 4],
265 | ],
266 | },
267 | }
268 | fp = defaultdict(lambda: defaultdict(list))
269 | nums = defaultdict(int)
270 | cal_fp_fn_tp_tn(detection, GT, fp, nums, ["class1", "class2"], [0.4, 0.9, 1])
271 | a = cal_mAP(fp, nums, ["class1", "class2"], [0.4, 0.9, 1])
272 | print()
273 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import defaultdict
3 | from copy import deepcopy
4 | from os import getcwd
5 | from os.path import join, split
6 |
7 | import numpy as np
8 | import tensorflow as tf
9 |
10 | from config.train_config import get_config
11 | from net.yolo3_net import loss, model
12 | from util.box_utils import box_anchor_iou, pick_box, xy2wh_np
13 | from util.image_utils import get_color_table, plot_img, read_image_and_lable
14 | from util.utils import sec2time, cal_fp_fn_tp_tn, cal_mAP
15 |
16 |
17 | class YOLO():
18 | def __init__(self, config):
19 | self.config = config
20 |
21 | self.batch_size = self.config.batch_size
22 | self.epoch = self.config.epoch
23 | self.learn_rate = self.config.learn_rate
24 |
25 | self.lambda_coord = 5
26 | self.lambda_noobj = 0.5
27 | self.lambda_cls = 1
28 | self.iou_threshold = 0.5 # used to decide whether box is BG or FG
29 |
30 | self.ious_thres = [0.5, 0.75] # used to calculate mAP
31 |
32 | self.classes = self.__get_classes()
33 | self.anchors = self.__get_anchors()
34 | self.hw = [416, 416]
35 | if self.config.tiny:
36 | assert 6 == len(
37 | self.anchors), 'model type does not match with anchors, check anchors or type param'
38 | self.log_path = join(getcwd(), 'logs', self.config.net_type + '_tiny')
39 | else:
40 | assert 9 == len(
41 | self.anchors), 'model type does not match with anchors, check anchors or type param'
42 | self.log_path = join(getcwd(), 'logs', self.config.net_type + '_full')
43 | self.pretrain_path = self.config.pretrain_path
44 |
45 | self.input = tf.placeholder(tf.float32, [self.batch_size] + self.hw + [3])
46 | self.is_training = tf.placeholder(tf.bool, shape=[])
47 | self.label = None
48 |
49 | with open(self.config.train_path) as f:
50 | self.train_data = f.readlines()
51 | with open(self.config.valid_path) as f:
52 | self.val_data = f.readlines()
53 |
54 | self.color_table = get_color_table(len(self.classes))
55 |
56 | def __get_anchors(self):
57 | """loads the anchors from a file"""
58 | with open(self.config.anchor_path) as f:
59 | anchors = f.readline()
60 | anchors = [float(x) for x in anchors.split(',')]
61 | return np.array(anchors).reshape(-1, 2)
62 |
63 | def __get_classes(self):
64 | """loads the classes"""
65 | with open(self.config.classes_path) as f:
66 | class_names = f.readlines()
67 | class_names = [c.strip() for c in class_names]
68 | return class_names
69 |
70 | def generate_data(self, grid_shape, is_val=False):
71 |
72 | gds_init = [np.zeros(g_shape[1:3] + [3, 9 + len(self.classes)]) for g_shape in grid_shape]
73 |
74 | idx = 0
75 |
76 | GTS = defaultdict(lambda: defaultdict(list))
77 |
78 | if is_val:
79 | gts = self.val_data
80 | else:
81 | gts = self.train_data
82 | while True:
83 | img_files = []
84 | labels = []
85 | b = 0
86 | GTS.clear()
87 |
88 | while idx < len(gts) - self.batch_size: # a batch
89 | try:
90 | res = read_image_and_lable(gts[idx + b], self.hw, is_training=not is_val)
91 | # print(idx + b)
92 | except IndexError:
93 | raise Exception('it should not happen')
94 | else:
95 | if not res:
96 | raise Exception('check your dataset, it has none label')
97 |
98 | img, _label = res
99 |
100 | img_files.append(img)
101 |
102 | for per_xyxy in _label:
103 | GTS[b][self.classes[int(per_xyxy[4])]].append(per_xyxy[:4].tolist())
104 |
105 | _label_ = np.concatenate([xy2wh_np(_label[:, :4]), _label[:, 4:]], -1) # change to xywh
106 |
107 | gds = deepcopy(gds_init)
108 | for per_label in _label_:
109 | x0, y0, w, h = per_label[:4]
110 | if w == 0 or h == 0:
111 | continue
112 | box_iou = box_anchor_iou(self.anchors, per_label[2:4])
113 | k = np.argmax(box_iou)
114 | div, mod = divmod(int(k), 3)
115 | div = len(grid_shape) - 1 - div
116 | h_r = self.hw[0] / gds[div].shape[0]
117 | w_r = self.hw[1] / gds[div].shape[1]
118 | i = int(np.floor(x0 / w_r))
119 | j = int(np.floor(y0 / h_r))
120 |
121 | gds[div][j, i, mod, 0] = x0 / w_r - i
122 | gds[div][j, i, mod, 1] = y0 / h_r - j
123 | gds[div][j, i, mod, 2] = np.log(w / self.anchors[k, 0] + 1e-5)
124 | gds[div][j, i, mod, 3] = np.log(h / self.anchors[k, 1] + 1e-5)
125 |
126 | gds[div][j, i, mod, 4] = x0
127 | gds[div][j, i, mod, 5] = y0
128 | gds[div][j, i, mod, 6] = w
129 | gds[div][j, i, mod, 7] = h
130 |
131 | gds[div][j, i, mod, 8] = 1
132 | gds[div][j, i, mod, 9 + int(per_label[4])] = 1
133 |
134 | gds = [gd.reshape([-1, 3, 9 + len(self.classes)]) for gd in gds]
135 | labels.append(np.concatenate(gds, 0))
136 | b += 1
137 | if len(labels) == self.batch_size:
138 | idx += self.batch_size
139 | break
140 | if idx >= len(gts) - self.batch_size:
141 | np.random.shuffle(gts)
142 | idx = 0
143 | img_files, labels = np.array(img_files, np.float32), np.array(labels, np.float32)
144 | if is_val:
145 | yield img_files, labels, GTS
146 | else:
147 | yield img_files, labels, idx
148 |
149 | def train(self):
150 | # pred, losses, op = self.create_model()
151 | pred = model(self.input, len(self.classes), self.anchors, self.config.net_type, self.is_training, True)
152 | grid_shape = [g.get_shape().as_list() for g in pred[2]]
153 |
154 | s = sum([g[2] * g[1] for g in grid_shape])
155 | self.label = tf.placeholder(tf.float32, [self.batch_size, s, 3, 9 + len(self.classes)])
156 | # for data in self.generate_data(grid_shape):
157 | # print()
158 |
159 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
160 | var_list = tf.global_variables()
161 |
162 | losses = loss(pred, self.label, self.hw, self.lambda_coord, self.lambda_noobj, self.lambda_cls,
163 | self.iou_threshold, self.config.debug)
164 | opt = tf.train.AdamOptimizer(self.learn_rate)
165 |
166 | with tf.control_dependencies(update_ops):
167 | op = opt.minimize(losses)
168 |
169 | # summary
170 | writer = tf.summary.FileWriter(self.log_path, max_queue=-1)
171 | img_tensor = tf.placeholder(tf.float32, [2 * self.batch_size] + self.hw + [3])
172 |
173 | with tf.name_scope('loss'):
174 | train_loss_tensor = tf.placeholder(tf.float32)
175 | val_loss_tensor = tf.placeholder(tf.float32)
176 | tf.summary.scalar('train_loss', train_loss_tensor)
177 | tf.summary.scalar('val_loss', val_loss_tensor)
178 |
179 | with tf.name_scope('mAP'):
180 | for iou in self.ious_thres:
181 | with tf.name_scope('iou{}'.format(iou)):
182 | exec('map_with_iou{} = tf.placeholder(tf.float32)'.format(int(iou * 100)))
183 | exec('tf.summary.scalar("mAP", map_with_iou{})'.format(int(iou * 100)))
184 |
185 | with tf.name_scope('per_class_AP'):
186 | for iou in self.ious_thres:
187 | with tf.name_scope('iou{}'.format(iou)):
188 | for per_cls in self.classes:
189 | per_cls = per_cls.replace(' ', '_')
190 | exec('ap_{}_with_iou{} = tf.placeholder(tf.float32)'.format(per_cls, int(iou * 100)))
191 | exec('tf.summary.scalar("{}", ap_{}_with_iou{})'.format(per_cls, per_cls, int(iou * 100)))
192 |
193 | tf.summary.image('img', img_tensor, 2 * self.batch_size)
194 | summary = tf.summary.merge_all()
195 |
196 | conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
197 | sess = tf.Session(config=conf)
198 | # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
199 | # sess = tf_debug.TensorBoardDebugWrapperSession(sess, "PC-DAIXILI:6001")
200 |
201 | saver = tf.train.Saver(var_list=var_list, max_to_keep=5)
202 | # saver = tf.train.Saver()
203 |
204 | # init
205 | init = tf.global_variables_initializer()
206 | sess.run(init)
207 |
208 | if len(self.pretrain_path):
209 | flag = 0
210 | try:
211 | print('try to restore the whole graph')
212 | saver.restore(sess, self.pretrain_path)
213 | print('successfully restore the whole graph')
214 | except:
215 | print('failed to restore the whole graph')
216 | flag = 1
217 | if flag:
218 | try:
219 | print('try to restore the graph body')
220 | restore_weights = [v for v in var_list if 'yolo_head' not in v.name]
221 | sv = tf.train.Saver(var_list=restore_weights)
222 | sv.restore(sess, self.pretrain_path)
223 | print('successfully restore the graph body')
224 | except Exception:
225 | raise Exception('restore body failed, please check the pretained weight')
226 |
227 | total_step = int(np.ceil(len(self.train_data) / self.batch_size)) * self.epoch
228 |
229 | print('train on {} samples, val on {} samples, batch size {}, total {} epoch'.format(len(self.train_data),
230 | len(self.val_data),
231 | self.batch_size,
232 | self.epoch))
233 | step = 0
234 | epoch = 0
235 | t0 = time.time()
236 |
237 | DETECTION = defaultdict(lambda: defaultdict(list))
238 | FP_TP = defaultdict(lambda: defaultdict(list))
239 | GT_NUMS = defaultdict(int)
240 |
241 | for data in self.generate_data(grid_shape):
242 | step += 1
243 |
244 | img, label, idx = data
245 | pred_, losses_, _ = sess.run([pred, losses, op], {
246 | self.input: img,
247 | self.label: label,
248 | self.is_training: True
249 | })
250 | t1 = time.time()
251 | print('step:{:= len(self.val_data):
314 | break
315 |
316 | APs, mAPs = cal_mAP(FP_TP, GT_NUMS, self.classes, self.ious_thres)
317 | print(APs)
318 | print(mAPs)
319 | # import pdb
320 | # pdb.set_trace()
321 | val_loss_ /= (val_step / self.batch_size)
322 |
323 | feed_dict = {
324 | img_tensor: np.array(vis_img),
325 | train_loss_tensor: losses_,
326 | val_loss_tensor: val_loss_
327 | }
328 |
329 | for iou in self.ious_thres:
330 | exec('feed_dict[map_with_iou{0}] = mAPs[{1}] '.format(int(iou * 100), iou))
331 | for per_cls in self.classes:
332 | per_clses = per_cls.replace(' ', '_')
333 | exec(
334 | 'feed_dict[ap_{0}_with_iou{1}] = APs[{2}]["{3}"] '.format(per_clses, int(iou * 100), iou,
335 | per_cls))
336 |
337 | ss = sess.run(summary, feed_dict=feed_dict)
338 | writer.add_summary(ss, epoch)
339 | saver.save(sess, join(self.log_path, split(self.log_path)[-1] + '_model_epoch_{}'.format(epoch)),
340 | write_meta_graph=False, write_state=False)
341 | print('epoch:{} train_loss:{:< .3f} val_loss:{:< .3f}'.format(
342 | epoch, losses_, val_loss_))
343 | epoch += 1
344 | if epoch >= self.epoch:
345 | break
346 |
347 |
348 | if __name__ == '__main__':
349 | configs = get_config()
350 | YOLO(configs).train()
351 |
--------------------------------------------------------------------------------
/net/yolo3_net.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | from util.box_utils import box_iou
4 |
5 | """
6 | (1280 * 640)
7 | input = (640 * 320)
8 | 640 * 320
9 | 320 * 160
10 | 160 * 80
11 | 80 * 40
12 | 40 * 20
13 | 20 * 10
14 | 10 * 5
15 | """
16 | leaky_alpha = 0.1
17 |
18 | xavier_initializer = tf.initializers.glorot_uniform()
19 |
20 |
21 | def conv_block(x, filters, stride, out_channel, net_type, is_training, name='', relu=True):
22 | """
23 | :param x: input :nhwc
24 | :param filters: list [f_w, f_h]
25 | :param stride: list int
26 | :param out_channel: int, out_channel
27 | :param net_type: cnn mobilenet
28 | :param is_training: used in BN
29 | :param name: str
30 | :param relu: boolean
31 | :return: depwise and pointwise out
32 | """
33 | with tf.name_scope('' + name):
34 | in_channel = x.shape[3].value
35 | if net_type == 'cnn':
36 | with tf.name_scope('cnn'):
37 | # weight = tf.Variable(tf.truncated_normal([filters[0], filters[1], in_channel, out_channel], 0, 0.01))
38 | weight = tf.Variable(xavier_initializer([filters[0], filters[1], in_channel, out_channel]))
39 | if stride[0] == 2: # refer to "https://github.com/qqwweee/keras-yolo3/issues/8"
40 | x = tf.pad(x, tf.constant([[0, 0], [1, 0, ], [1, 0], [0, 0]]))
41 | x = tf.nn.conv2d(x, weight, [1, stride[0], stride[1], 1], 'VALID')
42 | else:
43 | x = tf.nn.conv2d(x, weight, [1, stride[0], stride[1], 1], 'SAME')
44 | if relu:
45 | x = tf.layers.batch_normalization(x, training=is_training)
46 | x = tf.nn.leaky_relu(x, leaky_alpha)
47 | else:
48 | bias = tf.Variable(tf.zeros(shape=out_channel))
49 | x += bias
50 | elif net_type == 'mobilenetv1':
51 | with tf.name_scope('depthwise'):
52 | # depthwise_weight = tf.Variable(tf.truncated_normal([filters[0], filters[1], in_channel, 1], 0, 0.01))
53 | depthwise_weight = tf.Variable(xavier_initializer([filters[0], filters[1], in_channel, 1]))
54 | x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride[0], stride[1], 1], 'SAME')
55 | x = tf.layers.batch_normalization(x, training=is_training)
56 | x = tf.nn.relu6(x)
57 |
58 | with tf.name_scope('pointwise'):
59 | # pointwise_weight = tf.Variable(tf.truncated_normal([1, 1, in_channel, out_channel], 0, 0.01))
60 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, out_channel]))
61 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
62 | if relu:
63 | x = tf.layers.batch_normalization(x, training=is_training)
64 | x = tf.nn.relu6(x)
65 | else:
66 | bias = tf.Variable(tf.zeros(shape=out_channel))
67 | x += bias
68 |
69 | elif net_type == 'mobilenetv2':
70 | tmp_channel = out_channel * 3
71 | with tf.name_scope('expand_pointwise'):
72 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, tmp_channel]))
73 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
74 | x = tf.layers.batch_normalization(x, training=is_training)
75 | x = tf.nn.relu6(x)
76 | print("Activation function : relu6")
77 | with tf.name_scope('depthwise'):
78 | depthwise_weight = tf.Variable(xavier_initializer([filters[0], filters[1], tmp_channel, 1]))
79 | x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride[0], stride[1], 1], 'SAME')
80 | x = tf.layers.batch_normalization(x, training=is_training)
81 | x = tf.nn.relu6(x)
82 | with tf.name_scope('project_pointwise'):
83 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, tmp_channel, out_channel]))
84 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
85 | if relu:
86 | x = tf.layers.batch_normalization(x, training=is_training)
87 | #x = tf.nn.relu6(x)
88 | else:
89 | bias = tf.Variable(tf.zeros(shape=out_channel))
90 | x += bias
91 | else:
92 | raise Exception('net type is error, please check')
93 | return x
94 |
95 |
96 | def residual(x, net_type, is_training, out_channel=1, expand_time=1, stride=1):
97 | if net_type in ['cnn', 'mobilenetv1']:
98 | out_channel = x.shape[3].value
99 | shortcut = x
100 | x = conv_block(x, [1, 1], [1, 1], out_channel // 2, net_type='cnn', is_training=is_training)
101 | x = conv_block(x, [3, 3], [1, 1], out_channel, net_type='cnn', is_training=is_training)
102 | x += shortcut
103 |
104 | elif net_type == 'mobilenetv2':#倒置残差块 Inverted Residuals
105 | shortcut = x
106 | in_channel = x.shape[3].value
107 | tmp_channel = in_channel * expand_time
108 | with tf.name_scope('expand_pointwise'):#点卷积 拓展,生成一个高维信息域 参考《深度可分离卷积文档》
109 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, in_channel, tmp_channel]))
110 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
111 | x = tf.layers.batch_normalization(x, training=is_training)
112 | x = tf.nn.relu6(x)
113 | with tf.name_scope('depthwise'):#深度卷积
114 | depthwise_weight = tf.Variable(xavier_initializer([3, 3, tmp_channel, 1]))
115 | x = tf.nn.depthwise_conv2d(x, depthwise_weight, [1, stride, stride, 1], 'SAME')
116 | x = tf.layers.batch_normalization(x, training=is_training)
117 | x = tf.nn.relu6(x)
118 | with tf.name_scope('project_pointwise'):#点卷积
119 | pointwise_weight = tf.Variable(xavier_initializer([1, 1, tmp_channel, out_channel]))
120 | x = tf.nn.conv2d(x, pointwise_weight, [1, 1, 1, 1], 'SAME')
121 | x = tf.layers.batch_normalization(x, training=is_training)
122 | #不用激活函数,线性激活 避免信息丢失
123 | #x = tf.nn.relu6(x)
124 | x += shortcut#快捷链接 避免梯度消失
125 |
126 | return x
127 |
128 |
129 | def upsample(x, scale):
130 | new_height = x.shape[1] * scale
131 | new_width = x.shape[2] * scale
132 | resized = tf.image.resize_images(x, [new_height, new_width])
133 | return resized
134 |
135 |
136 | def full_yolo_body(x, out_channel, net_type, is_training):
137 | channel = out_channel
138 | if net_type in ['mobilenetv2']:
139 | net_type = 'mobilenetv1'
140 | x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training)
141 | x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training)
142 | x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training)
143 | x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training)
144 | x = conv_block(x, [1, 1], [1, 1], channel // 2, net_type, is_training=is_training)
145 | x_route = x
146 | x = conv_block(x, [3, 3], [1, 1], channel, net_type, is_training=is_training)
147 | return x_route, x
148 |
149 |
150 | def full_darknet_body(x, net_type, is_training):#特征检测网络
151 | """
152 | yolo3_tiny build by net_type
153 | :param x:
154 | :param is_training:
155 | :param net_type: cnn mobilenet
156 | :return:
157 | """
158 | if net_type in ['cnn', 'mobilenetv1']:
159 | x = conv_block(x, [3, 3], [1, 1], 32, 'cnn', is_training=is_training)
160 |
161 | # down sample
162 | x = conv_block(x, [3, 3], [2, 2], 64, 'cnn', is_training=is_training)
163 | for i in range(1):
164 | x = residual(x, net_type, is_training)
165 |
166 | # down sample
167 | x = conv_block(x, [3, 3], [2, 2], 128, 'cnn', is_training=is_training)
168 | for i in range(2):
169 | x = residual(x, net_type, is_training)
170 |
171 | # down sample
172 | x = conv_block(x, [3, 3], [2, 2], 256, 'cnn', is_training=is_training)
173 | for i in range(8):
174 | x = residual(x, net_type, is_training)
175 | route2 = x
176 |
177 | # down sample
178 | x = conv_block(x, [3, 3], [2, 2], 512, 'cnn', is_training=is_training)
179 | for i in range(8):
180 | x = residual(x, net_type, is_training)
181 | route1 = x
182 |
183 | # down sample
184 | x = conv_block(x, [3, 3], [2, 2], 1024, 'cnn', is_training=is_training)
185 | for i in range(4):
186 | x = residual(x, net_type, is_training)
187 |
188 | elif net_type == 'mobilenetv2':#
189 |
190 | print('MobileNet V2 ------------------ input image batch’s shape:',x.shape)
191 | #x 为 416×416 图像 标准的mobilnet v2 输入为 224 ×224有一定差异
192 | x = conv_block(x, [3, 3], [2, 2], 32, 'cnn', is_training=is_training) #conv2d正常卷积,输出208×208×32通道
193 |
194 | print('1 ------------------ input image batch’s shape:',x.shape)
195 | x = conv_block(x, [3, 3], [2, 2], 16, net_type, is_training=is_training) #残差块卷积,输出104×104×16 下采样
196 | x = conv_block(x, [3, 3], [1, 1], 24, net_type, is_training=is_training) #残差块卷积,输出104×104×24
197 | x = residual(x, net_type, is_training, 24, 1)#残差块卷积,输出104×104×24
198 | x = conv_block(x, [3, 3], [2, 2], 32, net_type, is_training=is_training) #残差块卷积,输出52×52×32 下采样
199 | #print('2 ------------------ input image batch’s shape:',x.shape)
200 | for i in range(2):# 残差块卷积 输出 52×52×32
201 | x = residual(x, net_type, is_training, 32, 1)
202 | #print('21 ------------------ input image batch’s shape:',x.shape)
203 | print('ROUTE2 ------------------ batch’s shape:',x.shape)
204 | route2 = x
205 |
206 | #print('4 ------------------ input image batch’s shape:',x.shape)
207 | x = conv_block(x, [3, 3], [2, 2], 64, net_type, is_training=is_training)#残差块卷积,输出26×26×64 下采样
208 | for i in range(3):# 残差块卷积 输出 26×26×64
209 | x = residual(x, net_type, is_training, 64, 6)
210 | x = conv_block(x, [3, 3], [1, 1], 96, net_type, is_training=is_training)#残差块卷积,输出26×26×96 更改输出通道
211 | for i in range(2):# 残差块卷积 输出 26×26×64
212 | x = residual(x, net_type, is_training, 96, 6)
213 | print('ROUTE1 ------------------ batch’s shape:',x.shape)
214 | route1 = x
215 |
216 | # down sample
217 | print('5 ------------------ input image batch’s shape:',x.shape)
218 | x = conv_block(x, [3, 3], [2, 2], 160, net_type, is_training=is_training)#残差块卷积,输出13×13×160 下采样
219 | for i in range(2):
220 | x = residual(x, net_type, is_training, 160, 1)
221 | x = conv_block(x, [3, 3], [1, 1], 320, net_type, is_training=is_training)#残差块卷积,输出13×13×320 更改输出通道
222 | print('Final ------------------ batch’s shape:',x.shape)
223 | else:
224 | route1, route2 = [], []
225 | return x, route1, route2
226 |
227 |
228 | def full_yolo_head(x, route1, route2, num_class, anchors, net_type, is_training):
229 | with tf.name_scope('body_layer1'):
230 | x_route, x = full_yolo_body(x, 1024, net_type, is_training)
231 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head1", False)
232 | fe1, box1, grid1 = yolo(x, anchors[[6, 7, 8]])
233 |
234 | with tf.name_scope('head_layer2'):
235 | x = conv_block(x_route, [1, 1], [1, 1], x_route.shape[-1].value // 2, net_type, is_training)
236 | x = upsample(x, 2)
237 | x = tf.concat([x, route1], 3)
238 | x_route, x = full_yolo_body(x, 512, net_type, is_training)
239 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head2", False)
240 | fe2, box2, grid2 = yolo(x, anchors[[3, 4, 5]])
241 |
242 | with tf.name_scope('head_layer3'):
243 | x = conv_block(x_route, [1, 1], [1, 1], x_route.shape[-1].value // 2, net_type, is_training)
244 | x = upsample(x, 2)
245 | x = tf.concat([x, route2], 3)
246 | x_route, x = full_yolo_body(x, 256, net_type, is_training)
247 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head3", False)
248 | fe3, box3, grid3 = yolo(x, anchors[[0, 1, 2]])
249 |
250 | fe = tf.concat([fe1, fe2, fe3], 1)
251 | boxes = tf.concat([box1, box2, box3], 1)
252 | return fe, boxes, grid1, grid2, grid3
253 |
254 |
255 | def tiny_darknet_body(x, net_type, is_training):
256 | """
257 | yolo3_tiny build by net_type
258 | :param x:
259 | :param is_training: used in bn
260 | :param net_type: cnn or mobile-net
261 | :return:
262 | """
263 | if net_type in ['mobilenetv1', 'mobilenetv2']:
264 | net_type = 'mobilenetv1'
265 | x = conv_block(x, [3, 3], [1, 1], 16, net_type, is_training)
266 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
267 |
268 | x = conv_block(x, [3, 3], [1, 1], 32, net_type, is_training)
269 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
270 |
271 | x = conv_block(x, [3, 3], [1, 1], 64, net_type, is_training)
272 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
273 |
274 | x = conv_block(x, [3, 3], [1, 1], 128, net_type, is_training)
275 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
276 |
277 | x = conv_block(x, [3, 3], [1, 1], 256, net_type, is_training)
278 | x_route = x
279 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
280 |
281 | x = conv_block(x, [3, 3], [1, 1], 512, net_type, is_training)
282 | x = tf.nn.max_pool(x, [1, 2, 2, 1], [1, 1, 1, 1], 'SAME')
283 |
284 | x = conv_block(x, [3, 3], [1, 1], 1024, net_type, is_training)
285 |
286 | return x, x_route
287 |
288 |
289 | def tiny_yolo_head(x, x_route1, num_class, anchors, net_type, is_training):
290 | with tf.name_scope('head_layer1'):
291 | x = conv_block(x, [1, 1], [1, 1], 256, net_type, is_training)
292 | x_route2 = x
293 | x = conv_block(x, [3, 3], [1, 1], 512, net_type, is_training)
294 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head1", False)
295 | fe1 = x
296 | fe1, box1, grid1 = yolo(fe1, anchors[[3, 4, 5]])
297 |
298 | with tf.name_scope('head_layer2'):
299 | x = conv_block(x_route2, [1, 1], [1, 1], 128, net_type, is_training)
300 | x = upsample(x, 2)
301 | x = tf.concat([x, x_route1], 3)
302 | x = conv_block(x, [3, 3], [1, 1], 256, net_type, is_training)
303 | x = conv_block(x, [1, 1], [1, 1], 3 * (5 + num_class), 'cnn', is_training, "yolo_head2", False)
304 | fe2 = x
305 | fe2, box2, grid2 = yolo(fe2, anchors[[0, 1, 2]])
306 |
307 | fe = tf.concat([fe1, fe2], 1)
308 | box = tf.concat([box1, box2], 1)
309 | return fe, box, grid1, grid2
310 |
311 |
312 | def yolo(f, anchors):
313 | """
314 | convert feature to box and scores
315 | :param f:
316 | :param anchors:
317 | :return:
318 | """
319 | anchor_tensor = tf.constant(anchors, tf.float32)
320 | batchsize = f.shape[0]
321 | f = tf.reshape(f, [f.shape[0], f.shape[1], f.shape[2], 3, -1])
322 | grid_y = tf.tile(tf.reshape(tf.range(f.shape[1]), [1, -1, 1, 1]), [batchsize, 1, f.shape[2], 1])
323 | grid_x = tf.tile(tf.reshape(tf.range(f.shape[2]), [1, 1, -1, 1]), [batchsize, f.shape[1], 1, 1])
324 | grid = tf.tile(tf.cast(tf.concat([grid_x, grid_y], -1), tf.float32)[:, :, :, tf.newaxis, :], (1, 1, 1, 3, 1))
325 |
326 | box_xy = (tf.nn.sigmoid(f[..., :2]) + grid) / tf.cast(grid.shape[::-1][2:4], tf.float32, )
327 | box_wh = tf.math.exp(f[..., 2:4]) * anchor_tensor
328 | box_confidence = tf.nn.sigmoid(f[..., 4:5])
329 | classes_score = tf.nn.sigmoid(f[..., 5:])
330 | boxes = tf.reshape(tf.concat([box_xy, box_wh, box_confidence, classes_score], -1), [batchsize, -1, 3, f.shape[4]])
331 | f = tf.reshape(f, [batchsize, -1, 3, f.shape[4]])
332 | return f, boxes, grid
333 |
334 |
335 | def model(x, num_classes, anchors, net_type, is_training, cal_loss=False):
336 | batchsize, height, width, _ = x.get_shape().as_list()
337 | if len(anchors) == 6:
338 | x, x_route = tiny_darknet_body(x, net_type, is_training)
339 | raw_pred, y, *grid = tiny_yolo_head(x, x_route, num_classes, anchors, net_type, is_training)
340 | else:
341 | x, route1, route2 = full_darknet_body(x, net_type, is_training)
342 | raw_pred, y, *grid = full_yolo_head(x, route1, route2, num_classes, anchors, net_type, is_training)
343 |
344 | box_xy, box_wh, box_confidence, classes_score = y[..., :2], y[..., 2:4], y[..., 4:5], y[..., 5:]
345 | box_xy *= tf.constant([width, height], tf.float32)
346 | # box_wh *= tf.constant([width, height], tf.float32)
347 | boxe = tf.concat([box_xy, box_wh, box_confidence, classes_score], -1, name='debug_pred')
348 |
349 | if cal_loss:
350 | return raw_pred, boxe, grid
351 | else:
352 | return boxe
353 |
354 |
355 | def loss(pred, gts, input_size, lambda_coord, lambda_noobj, lambda_cls, iou_threshold, debug_=False):
356 | """
357 | :param pred: (batch_size, num_boxes, 3, 5+num_class)[x0 y0 w h ] raw_pres + boxes +grid
358 | :param gts: shape = (batch_size, num_boxes, 3, 4+4+1+num_class) [xywh,calsses]
359 | :param input_size: height * width
360 | :param lambda_coord: lambda
361 | :param lambda_noobj: lambda
362 | :param lambda_cls: lambda
363 | :param iou_threshold: iou_threshold
364 | :param debug_:
365 | :return:
366 | """
367 |
368 | def binary_cross(_labels, _pred):
369 | # pred = tf.clip_by_value(pred, 1e-10, 1 - 1e-10)
370 | # return -labels * tf.math.log(pred)
371 | # pred = tf.math.log(pred / (1 - pred))
372 | return tf.nn.sigmoid_cross_entropy_with_logits(labels=_labels, logits=_pred)
373 |
374 | raw_pred, pred_boxes, grid = pred
375 |
376 | raw_gt_xy, raw_gt_wh = gts[..., 0:2], gts[..., 2:4]
377 | true_gt_xy, true_gt_wh = gts[..., 4:6], gts[..., 6:8]
378 | masks = gts[..., 8]
379 | batchsize = masks.shape[0].value
380 | i_height, i_width = input_size
381 |
382 | # cal ignore_mask
383 | ignore_mask = []
384 | for b in range(batchsize):
385 | true_box = tf.boolean_mask(gts[b:b + 1, ..., 4:8], masks[b:b + 1], name='debug_true_box')
386 | with tf.name_scope('debug_iou'):
387 | ious = box_iou(pred_boxes[b:b + 1, ..., :4], true_box)
388 | ious = tf.reduce_max(ious, -1)
389 | ignore_mask_ = tf.where(ious > iou_threshold, tf.zeros_like(ious), tf.ones_like(ious))
390 | ignore_mask.append(ignore_mask_)
391 | ignore_mask = tf.concat(ignore_mask, 0, name='debug_ignore_mask')
392 |
393 | boxes_scale = 2 - true_gt_wh[..., 0] / i_width * true_gt_wh[..., 1] / i_height
394 | # boxes_scale = 1
395 |
396 | varss = tf.trainable_variables()
397 | l2_loss = tf.reduce_sum([tf.nn.l2_loss(var) for var in varss]) * 0.001
398 |
399 | masks_noobj = (1 - masks) * ignore_mask
400 |
401 | # n_xywh = tf.reduce_sum(masks, name='debug_n_xywh')
402 | # n_noob = tf.reduce_sum(masks_noobj, name='debug_n_noobj') / 100
403 | n_xywh = batchsize
404 | n_noob = batchsize
405 |
406 | loss_xy = tf.reduce_sum(
407 | lambda_coord * masks * boxes_scale * tf.reduce_sum(
408 | # tf.math.square(raw_gt_xy - tf.math.sigmoid(raw_pred[..., 0:2]))
409 | binary_cross(_labels=raw_gt_xy, _pred=raw_pred[..., 0:2])
410 | , -1), name='debug_loss_xy') / n_xywh
411 | loss_wh = tf.reduce_sum(
412 | lambda_coord * masks * boxes_scale * tf.reduce_sum(
413 | tf.math.square(raw_gt_wh - raw_pred[..., 2:4]),
414 | -1), name='debug_loss_wh') / n_xywh
415 | loss_obj_confidence = tf.reduce_sum(
416 | masks * binary_cross(_labels=masks, _pred=raw_pred[..., 4]), name='debug_loss_obj') / n_xywh
417 |
418 | loss_noobj_confidence = tf.reduce_sum(
419 | lambda_noobj * masks_noobj * binary_cross(_labels=masks, _pred=raw_pred[..., 4]),
420 | name='debug_loss_noobj') / n_noob
421 | loss_cls = tf.reduce_sum(
422 | masks * lambda_cls * tf.reduce_sum(
423 | binary_cross(_labels=gts[..., 9:], _pred=raw_pred[..., 5:]), -1), name='debug_loss_cls'
424 | ) / n_xywh
425 | if debug_:
426 | p = tf.print("loss_xy", loss_xy, "loss_wh", loss_wh, "loss_obj_confidence", loss_obj_confidence,
427 | 'loss_noobj_confidence', loss_noobj_confidence, "loss_cls", loss_cls, "l2_loss", l2_loss)
428 | with tf.control_dependencies([p]):
429 | return loss_xy + loss_wh + loss_obj_confidence + loss_noobj_confidence + loss_cls + l2_loss
430 | return loss_xy + loss_wh + loss_obj_confidence + loss_noobj_confidence + loss_cls + l2_loss
431 |
--------------------------------------------------------------------------------