├── nets
    ├── __init__.py
    ├── yolo.py
    ├── efficientnet.py
    └── yolo_training.py
├── utils
    ├── __init__.py
    ├── utils_fit.py
    ├── utils.py
    ├── utils_bbox.py
    ├── dataloader.py
    └── callbacks.py
├── logs
    └── README.md
├── VOCdevkit
    └── VOC2007
    │   ├── Annotations
    │       └── README.md
    │   ├── JPEGImages
    │       └── README.md
    │   └── ImageSets
    │       └── Main
    │           └── README.md
├── img
    └── street.jpg
├── model_data
    ├── yolo_anchors.txt
    ├── simhei.ttf
    ├── voc_classes.txt
    └── coco_classes.txt
├── requirements.txt
├── summary.py
├── LICENSE
├── .gitignore
├── utils_coco
    ├── get_map_coco.py
    └── coco_annotation.py
├── kmeans_for_anchors.py
├── voc_annotation.py
├── get_map.py
├── README.md
├── predict.py
├── yolo.py
├── 常见问题汇总.md
└── train.py


/nets/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/logs/README.md:
--------------------------------------------------------------------------------
1 | 存放训练后的模型


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/Annotations/README.md:
--------------------------------------------------------------------------------
1 | 存放标签文件


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/JPEGImages/README.md:
--------------------------------------------------------------------------------
1 | 存放图片文件


--------------------------------------------------------------------------------
/VOCdevkit/VOC2007/ImageSets/Main/README.md:
--------------------------------------------------------------------------------
1 | 存放训练索引文件


--------------------------------------------------------------------------------
/img/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bubbliiiing/efficientnet-yolo3-tf2/HEAD/img/street.jpg


--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
2 | 


--------------------------------------------------------------------------------
/model_data/simhei.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bubbliiiing/efficientnet-yolo3-tf2/HEAD/model_data/simhei.ttf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.4.1
2 | numpy==1.18.4
3 | matplotlib==3.2.1
4 | opencv_python==4.2.0.34
5 | tensorflow_gpu==2.2.0
6 | tqdm==4.46.1
7 | Pillow==8.2.0
8 | h5py==2.10.0
9 | 


--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor


--------------------------------------------------------------------------------
/summary.py:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------#
 2 | #   该部分代码用于看网络结构
 3 | #--------------------------------------------#
 4 | from nets.yolo import yolo_body
 5 | from utils.utils import net_flops
 6 | 
 7 | if __name__ == "__main__":
 8 |     input_shape     = [416, 416]
 9 |     anchors_mask    = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
10 |     num_classes     = 80
11 |     phi             = 0
12 |     
13 |     model = yolo_body([input_shape[0], input_shape[1], 3], anchors_mask, num_classes, phi = phi)
14 |     #--------------------------------------------#
15 |     #   查看网络结构网络结构
16 |     #--------------------------------------------#
17 |     model.summary()
18 |     #--------------------------------------------#
19 |     #   计算网络的FLOPS
20 |     #--------------------------------------------#
21 |     net_flops(model, table=False)
22 |     
23 |     #--------------------------------------------#
24 |     #   获得网络每个层的名称与序号
25 |     #--------------------------------------------#
26 |     # for i,layer in enumerate(model.layers):
27 |     #     print(i,layer.name)
28 | 


--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Bubbliiiing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # ignore map, miou, datasets
  2 | map_out/
  3 | miou_out/
  4 | VOCdevkit/
  5 | datasets/
  6 | Medical_Datasets/
  7 | lfw/
  8 | logs/
  9 | model_data/
 10 | .temp_map_out/
 11 | 
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | pip-wheel-metadata/
 35 | share/python-wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .nox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | *.py,cover
 62 | .hypothesis/
 63 | .pytest_cache/
 64 | 
 65 | # Translations
 66 | *.mo
 67 | *.pot
 68 | 
 69 | # Django stuff:
 70 | *.log
 71 | local_settings.py
 72 | db.sqlite3
 73 | db.sqlite3-journal
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # IPython
 92 | profile_default/
 93 | ipython_config.py
 94 | 
 95 | # pyenv
 96 | .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106 | __pypackages__/
107 | 
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 | 
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 | 
128 | # Rope project settings
129 | .ropeproject
130 | 
131 | # mkdocs documentation
132 | /site
133 | 
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 | 
139 | # Pyre type checker
140 | .pyre/
141 | 


--------------------------------------------------------------------------------
/utils_coco/get_map_coco.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | from PIL import Image
 7 | from pycocotools.coco import COCO
 8 | from pycocotools.cocoeval import COCOeval
 9 | from tqdm import tqdm
10 | from utils.utils import cvtColor, preprocess_input, resize_image
11 | from yolo import YOLO
12 | 
13 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
14 | for gpu in gpus:
15 |     tf.config.experimental.set_memory_growth(gpu, True)
16 | #----------------------------------------------------------------------------#
17 | #   map_mode用于指定该文件运行时计算的内容
18 | #   map_mode为0代表整个map计算流程，包括获得预测结果、计算map。
19 | #   map_mode为1代表仅仅获得预测结果。
20 | #   map_mode为2代表仅仅获得计算map。
21 | #--------------------------------------------------------------------------#
22 | map_mode            = 0
23 | #-------------------------------------------------------#
24 | #   指向了验证集标签与图片路径
25 | #-------------------------------------------------------#
26 | cocoGt_path         = 'coco_dataset/annotations/instances_val2017.json'
27 | dataset_img_path    = 'coco_dataset/val2017'
28 | #-------------------------------------------------------#
29 | #   结果输出的文件夹，默认为map_out
30 | #-------------------------------------------------------#
31 | temp_save_path      = 'map_out/coco_eval'
32 | 
33 | class mAP_YOLO(YOLO):
34 |     #---------------------------------------------------#
35 |     #   检测图片
36 |     #---------------------------------------------------#
37 |     def detect_image(self, image_id, image, results, clsid2catid):
38 |         #---------------------------------------------------------#
39 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
40 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
41 |         #---------------------------------------------------------#
42 |         image       = cvtColor(image)
43 |         #---------------------------------------------------------#
44 |         #   给图像增加灰条，实现不失真的resize
45 |         #   也可以直接resize进行识别
46 |         #---------------------------------------------------------#
47 |         image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
48 |         #---------------------------------------------------------#
49 |         #   添加上batch_size维度，并进行归一化
50 |         #---------------------------------------------------------#
51 |         image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
52 | 
53 |         #---------------------------------------------------------#
54 |         #   将图像输入网络当中进行预测！
55 |         #---------------------------------------------------------#
56 |         input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
57 |         out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 
58 | 
59 |         for i, c in enumerate(out_classes):
60 |             result                      = {}
61 |             top, left, bottom, right    = out_boxes[i]
62 | 
63 |             result["image_id"]      = int(image_id)
64 |             result["category_id"]   = clsid2catid[c]
65 |             result["bbox"]          = [float(left),float(top),float(right-left),float(bottom-top)]
66 |             result["score"]         = float(out_scores[i])
67 |             results.append(result)
68 | 
69 |         return results
70 | 
71 | if __name__ == "__main__":
72 |     if not os.path.exists(temp_save_path):
73 |         os.makedirs(temp_save_path)
74 | 
75 |     cocoGt      = COCO(cocoGt_path)
76 |     ids         = list(cocoGt.imgToAnns.keys())
77 |     clsid2catid = cocoGt.getCatIds()
78 | 
79 |     if map_mode == 0 or map_mode == 1:
80 |         yolo = mAP_YOLO(confidence = 0.001, nms_iou = 0.65)
81 | 
82 |         with open(os.path.join(temp_save_path, 'eval_results.json'),"w") as f:
83 |             results = []
84 |             for image_id in tqdm(ids):
85 |                 image_path  = os.path.join(dataset_img_path, cocoGt.loadImgs(image_id)[0]['file_name'])
86 |                 image       = Image.open(image_path)
87 |                 results     = yolo.detect_image(image_id, image, results, clsid2catid)
88 |             json.dump(results, f)
89 | 
90 |     if map_mode == 0 or map_mode == 2:
91 |         cocoDt      = cocoGt.loadRes(os.path.join(temp_save_path, 'eval_results.json'))
92 |         cocoEval    = COCOeval(cocoGt, cocoDt, 'bbox') 
93 |         cocoEval.evaluate()
94 |         cocoEval.accumulate()
95 |         cocoEval.summarize()
96 |         print("Get map done.")
97 | 


--------------------------------------------------------------------------------
/utils_coco/coco_annotation.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------#
  2 | #   用于处理COCO数据集，根据json文件生成txt文件用于训练
  3 | #-------------------------------------------------------#
  4 | import json
  5 | import os
  6 | from collections import defaultdict
  7 | 
  8 | #-------------------------------------------------------#
  9 | #   指向了COCO训练集与验证集图片的路径
 10 | #-------------------------------------------------------#
 11 | train_datasets_path     = "coco_dataset/train2017"
 12 | val_datasets_path       = "coco_dataset/val2017"
 13 | 
 14 | #-------------------------------------------------------#
 15 | #   指向了COCO训练集与验证集标签的路径
 16 | #-------------------------------------------------------#
 17 | train_annotation_path   = "coco_dataset/annotations/instances_train2017.json"
 18 | val_annotation_path     = "coco_dataset/annotations/instances_val2017.json"
 19 | 
 20 | #-------------------------------------------------------#
 21 | #   生成的txt文件路径
 22 | #-------------------------------------------------------#
 23 | train_output_path       = "coco_train.txt"
 24 | val_output_path         = "coco_val.txt"
 25 | 
 26 | if __name__ == "__main__":
 27 |     name_box_id = defaultdict(list)
 28 |     id_name     = dict()
 29 |     f           = open(train_annotation_path, encoding='utf-8')
 30 |     data        = json.load(f)
 31 | 
 32 |     annotations = data['annotations']
 33 |     for ant in annotations:
 34 |         id = ant['image_id']
 35 |         name = os.path.join(train_datasets_path, '%012d.jpg' % id)
 36 |         cat = ant['category_id']
 37 |         if cat >= 1 and cat <= 11:
 38 |             cat = cat - 1
 39 |         elif cat >= 13 and cat <= 25:
 40 |             cat = cat - 2
 41 |         elif cat >= 27 and cat <= 28:
 42 |             cat = cat - 3
 43 |         elif cat >= 31 and cat <= 44:
 44 |             cat = cat - 5
 45 |         elif cat >= 46 and cat <= 65:
 46 |             cat = cat - 6
 47 |         elif cat == 67:
 48 |             cat = cat - 7
 49 |         elif cat == 70:
 50 |             cat = cat - 9
 51 |         elif cat >= 72 and cat <= 82:
 52 |             cat = cat - 10
 53 |         elif cat >= 84 and cat <= 90:
 54 |             cat = cat - 11
 55 |         name_box_id[name].append([ant['bbox'], cat])
 56 | 
 57 |     f = open(train_output_path, 'w')
 58 |     for key in name_box_id.keys():
 59 |         f.write(key)
 60 |         box_infos = name_box_id[key]
 61 |         for info in box_infos:
 62 |             x_min = int(info[0][0])
 63 |             y_min = int(info[0][1])
 64 |             x_max = x_min + int(info[0][2])
 65 |             y_max = y_min + int(info[0][3])
 66 | 
 67 |             box_info = " %d,%d,%d,%d,%d" % (
 68 |                 x_min, y_min, x_max, y_max, int(info[1]))
 69 |             f.write(box_info)
 70 |         f.write('\n')
 71 |     f.close()
 72 | 
 73 |     name_box_id = defaultdict(list)
 74 |     id_name     = dict()
 75 |     f           = open(val_annotation_path, encoding='utf-8')
 76 |     data        = json.load(f)
 77 | 
 78 |     annotations = data['annotations']
 79 |     for ant in annotations:
 80 |         id = ant['image_id']
 81 |         name = os.path.join(val_datasets_path, '%012d.jpg' % id)
 82 |         cat = ant['category_id']
 83 |         if cat >= 1 and cat <= 11:
 84 |             cat = cat - 1
 85 |         elif cat >= 13 and cat <= 25:
 86 |             cat = cat - 2
 87 |         elif cat >= 27 and cat <= 28:
 88 |             cat = cat - 3
 89 |         elif cat >= 31 and cat <= 44:
 90 |             cat = cat - 5
 91 |         elif cat >= 46 and cat <= 65:
 92 |             cat = cat - 6
 93 |         elif cat == 67:
 94 |             cat = cat - 7
 95 |         elif cat == 70:
 96 |             cat = cat - 9
 97 |         elif cat >= 72 and cat <= 82:
 98 |             cat = cat - 10
 99 |         elif cat >= 84 and cat <= 90:
100 |             cat = cat - 11
101 |         name_box_id[name].append([ant['bbox'], cat])
102 | 
103 |     f = open(val_output_path, 'w')
104 |     for key in name_box_id.keys():
105 |         f.write(key)
106 |         box_infos = name_box_id[key]
107 |         for info in box_infos:
108 |             x_min = int(info[0][0])
109 |             y_min = int(info[0][1])
110 |             x_max = x_min + int(info[0][2])
111 |             y_max = y_min + int(info[0][3])
112 | 
113 |             box_info = " %d,%d,%d,%d,%d" % (
114 |                 x_min, y_min, x_max, y_max, int(info[1]))
115 |             f.write(box_info)
116 |         f.write('\n')
117 |     f.close()
118 | 


--------------------------------------------------------------------------------
/utils/utils_fit.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import tensorflow as tf
  4 | from nets.yolo import yolo_loss
  5 | from tqdm import tqdm
  6 | 
  7 | 
  8 | #----------------------#
  9 | #   防止bug
 10 | #----------------------#
 11 | def get_train_step_fn(input_shape, anchors, anchors_mask, num_classes, strategy):
 12 |     @tf.function
 13 |     def train_step(imgs, targets, net, optimizer):
 14 |         with tf.GradientTape() as tape:
 15 |             #----------------------#
 16 |             #   计算loss
 17 |             #----------------------#
 18 |             P5_output, P4_output, P3_output = net(imgs, training=True)
 19 |             args        = [P5_output, P4_output, P3_output] + targets
 20 |             loss_value  = yolo_loss(
 21 |                 args, input_shape, anchors, anchors_mask, num_classes, 
 22 |                 balance     = [0.4, 1.0, 4],
 23 |                 box_ratio   = 0.05, 
 24 |                 obj_ratio   = 5 * (input_shape[0] * input_shape[1]) / (416 ** 2),
 25 |                 cls_ratio   = 1 * (num_classes / 80)
 26 |             )
 27 |             loss_value  = tf.reduce_sum(net.losses) + loss_value
 28 |         grads = tape.gradient(loss_value, net.trainable_variables)
 29 |         optimizer.apply_gradients(zip(grads, net.trainable_variables))
 30 |         return loss_value
 31 |     if strategy == None:
 32 |         return train_step
 33 |     else:
 34 |         #----------------------#
 35 |         #   多gpu训练
 36 |         #----------------------#
 37 |         @tf.function
 38 |         def distributed_train_step(images, targets, net, optimizer):
 39 |             per_replica_losses = strategy.run(train_step, args=(images, targets, net, optimizer,))
 40 |             return strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_losses,
 41 |                                     axis=None)
 42 |         return distributed_train_step
 43 | 
 44 | #----------------------#
 45 | #   防止bug
 46 | #----------------------#
 47 | def get_val_step_fn(input_shape, anchors, anchors_mask, num_classes, strategy):
 48 |     @tf.function
 49 |     def val_step(imgs, targets, net, optimizer):
 50 |         #----------------------#
 51 |         #   计算loss
 52 |         #----------------------#
 53 |         P5_output, P4_output, P3_output = net(imgs, training=False)
 54 |         args        = [P5_output, P4_output, P3_output] + targets
 55 |         loss_value  = yolo_loss(
 56 |             args, input_shape, anchors, anchors_mask, num_classes, 
 57 |             balance     = [0.4, 1.0, 4],
 58 |             box_ratio   = 0.05, 
 59 |             obj_ratio   = 5 * (input_shape[0] * input_shape[1]) / (416 ** 2),
 60 |             cls_ratio   = 1 * (num_classes / 80)
 61 |         )
 62 |         loss_value  = tf.reduce_sum(net.losses) + loss_value
 63 |         return loss_value
 64 |     if strategy == None:
 65 |         return val_step
 66 |     else:
 67 |         #----------------------#
 68 |         #   多gpu验证
 69 |         #----------------------#
 70 |         @tf.function
 71 |         def distributed_val_step(images, targets, net, optimizer):
 72 |             per_replica_losses = strategy.run(val_step, args=(images, targets, net, optimizer,))
 73 |             return strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_losses,
 74 |                                     axis=None)
 75 |         return distributed_val_step
 76 | 
 77 | def fit_one_epoch(net, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, 
 78 |             input_shape, anchors, anchors_mask, num_classes, save_period, save_dir, strategy):
 79 | 
 80 |     train_step  = get_train_step_fn(input_shape, anchors, anchors_mask, num_classes, strategy)
 81 |     val_step    = get_val_step_fn(input_shape, anchors, anchors_mask, num_classes, strategy)
 82 | 
 83 |     loss        = 0
 84 |     val_loss    = 0
 85 |     print('Start Train')
 86 |     with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
 87 |         for iteration, batch in enumerate(gen):
 88 |             if iteration >= epoch_step:
 89 |                 break
 90 |             images, target0, target1, target2 = batch[0], batch[1], batch[2], batch[3]
 91 |             targets     = [target0, target1, target2]
 92 |             loss_value  = train_step(images, targets, net, optimizer)
 93 |             loss        = loss + loss_value
 94 | 
 95 |             pbar.set_postfix(**{'total_loss': float(loss) / (iteration + 1), 
 96 |                                 'lr'        : optimizer._decayed_lr(tf.float32).numpy()})
 97 |             pbar.update(1)
 98 |     print('Finish Train')
 99 |             
100 |     print('Start Validation')
101 |     with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
102 |         for iteration, batch in enumerate(gen_val):
103 |             if iteration >= epoch_step_val:
104 |                 break
105 |             images, target0, target1, target2 = batch[0], batch[1], batch[2], batch[3]
106 |             targets     = [target0, target1, target2]
107 |             loss_value  = val_step(images, targets, net, optimizer)
108 |             val_loss    = val_loss + loss_value
109 | 
110 |             pbar.set_postfix(**{'val_loss': float(val_loss) / (iteration + 1)})
111 |             pbar.update(1)
112 |     print('Finish Validation')
113 | 
114 |     logs = {'loss': loss.numpy() / epoch_step, 'val_loss': val_loss.numpy() / epoch_step_val}
115 |     loss_history.on_epoch_end([], logs)
116 |     eval_callback.on_epoch_end(epoch, logs)
117 |     print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
118 |     print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val))
119 |     
120 |     #-----------------------------------------------#
121 |     #   保存权值
122 |     #-----------------------------------------------#
123 |     if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
124 |         net.save_weights(os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.h5" % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val)))
125 |         
126 |     if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss):
127 |         print('Save best model to best_epoch_weights.pth')
128 |         net.save_weights(os.path.join(save_dir, "best_epoch_weights.h5"))
129 |             
130 |     net.save_weights(os.path.join(save_dir, "last_epoch_weights.h5"))


--------------------------------------------------------------------------------
/nets/yolo.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | 
  3 | from tensorflow.keras.initializers import RandomNormal
  4 | from tensorflow.keras.layers import (BatchNormalization, Concatenate, Conv2D,
  5 |                                      Input, Lambda, LeakyReLU, UpSampling2D)
  6 | from tensorflow.keras.models import Model
  7 | from tensorflow.keras.regularizers import l2
  8 | from utils.utils import compose
  9 | 
 10 | from nets.efficientnet import (EfficientNetB0, EfficientNetB1, EfficientNetB2,
 11 |                                EfficientNetB3, EfficientNetB4, EfficientNetB5,
 12 |                                EfficientNetB6, EfficientNetB7)
 13 | from nets.yolo_training import yolo_loss
 14 | 
 15 | Efficient = [EfficientNetB0,EfficientNetB1,EfficientNetB2,EfficientNetB3,EfficientNetB4,EfficientNetB5,EfficientNetB6,EfficientNetB7]
 16 | 
 17 | #------------------------------------------------------#
 18 | #   单次卷积
 19 | #   DarknetConv2D
 20 | #------------------------------------------------------#
 21 | @wraps(Conv2D)
 22 | def DarknetConv2D(*args, **kwargs):
 23 |     darknet_conv_kwargs = {'kernel_initializer' : RandomNormal(stddev=0.02), 'kernel_regularizer' : l2(kwargs.get('weight_decay', 5e-4))}
 24 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2, 2) else 'same'   
 25 |     try:
 26 |         del kwargs['weight_decay']
 27 |     except:
 28 |         pass
 29 |     darknet_conv_kwargs.update(kwargs)
 30 |     return Conv2D(*args, **darknet_conv_kwargs)
 31 |     
 32 | #---------------------------------------------------#
 33 | #   卷积块 -> 卷积 + 标准化 + 激活函数
 34 | #   DarknetConv2D + BatchNormalization + LeakyReLU
 35 | #---------------------------------------------------#
 36 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 37 |     no_bias_kwargs = {'use_bias': False}
 38 |     no_bias_kwargs.update(kwargs)
 39 |     return compose( 
 40 |         DarknetConv2D(*args, **no_bias_kwargs),
 41 |         BatchNormalization(),
 42 |         LeakyReLU(alpha=0.1))
 43 | 
 44 | #---------------------------------------------------#
 45 | #   特征层->最后的输出
 46 | #---------------------------------------------------#
 47 | def make_five_conv(x, num_filters, weight_decay=5e-4):
 48 |     x = DarknetConv2D_BN_Leaky(num_filters, (1,1), weight_decay=weight_decay)(x)
 49 |     x = DarknetConv2D_BN_Leaky(num_filters*2, (3,3), weight_decay=weight_decay)(x)
 50 |     x = DarknetConv2D_BN_Leaky(num_filters, (1,1), weight_decay=weight_decay)(x)
 51 |     x = DarknetConv2D_BN_Leaky(num_filters*2, (3,3), weight_decay=weight_decay)(x)
 52 |     x = DarknetConv2D_BN_Leaky(num_filters, (1,1), weight_decay=weight_decay)(x)
 53 |     return x
 54 | 
 55 | def make_yolo_head(x, num_filters, out_filters, weight_decay=5e-4):
 56 |     y = DarknetConv2D_BN_Leaky(num_filters*2, (3,3), weight_decay=weight_decay)(x)
 57 |     y = DarknetConv2D(out_filters, (1,1), weight_decay=weight_decay)(y)
 58 |     return y
 59 | 
 60 | #---------------------------------------------------#
 61 | #   FPN网络的构建，并且获得预测结果
 62 | #---------------------------------------------------#
 63 | def yolo_body(input_shape, anchors_mask, num_classes, phi = 0, weight_decay=5e-4):
 64 |     inputs      = Input(input_shape)
 65 |     #---------------------------------------------------#   
 66 |     #   生成efficientnet的主干模型，以efficientnetB0为例
 67 |     #   获得三个有效特征层，他们的shape分别是：
 68 |     #   52, 52, 40
 69 |     #   26, 26, 112
 70 |     #   13, 13, 320
 71 |     #---------------------------------------------------#
 72 |     feats, filters_outs = Efficient[phi](inputs = inputs)
 73 |     feat1 = feats[2]
 74 |     feat2 = feats[4]
 75 |     feat3 = feats[6]
 76 | 
 77 |     #------------------------------------------------------------------------#
 78 |     #   以efficientnet网络的输出通道数，构建FPN
 79 |     #------------------------------------------------------------------------#
 80 | 
 81 |     x   = make_five_conv(feat3, int(filters_outs[2]), weight_decay)
 82 |     #---------------------------------------------------#
 83 |     #   第一个特征层
 84 |     #   out0 = (batch_size, 255, 13, 13)
 85 |     #---------------------------------------------------#
 86 |     P5  = make_yolo_head(x, int(filters_outs[2]), len(anchors_mask[0]) * (num_classes+5), weight_decay)
 87 | 
 88 |     x   = compose(DarknetConv2D_BN_Leaky(int(filters_outs[1]), (1,1), weight_decay=weight_decay), UpSampling2D(2))(x)
 89 | 
 90 |     x   = Concatenate()([x, feat2])
 91 |     x   = make_five_conv(x, int(filters_outs[1]), weight_decay)
 92 |     #---------------------------------------------------#
 93 |     #   第二个特征层
 94 |     #   out1 = (batch_size, 255, 26, 26)
 95 |     #---------------------------------------------------#
 96 |     P4  = make_yolo_head(x, int(filters_outs[1]), len(anchors_mask[1]) * (num_classes+5), weight_decay)
 97 | 
 98 |     x   = compose(DarknetConv2D_BN_Leaky(int(filters_outs[0]), (1,1), weight_decay=weight_decay), UpSampling2D(2))(x)
 99 | 
100 |     x   = Concatenate()([x, feat1])
101 |     x   = make_five_conv(x, int(filters_outs[0]), weight_decay)
102 |     #---------------------------------------------------#
103 |     #   第三个特征层
104 |     #   out3 = (batch_size, 255, 52, 52)
105 |     #---------------------------------------------------#
106 |     P3  = make_yolo_head(x, int(filters_outs[0]), len(anchors_mask[2]) * (num_classes+5), weight_decay)
107 |     return Model(inputs, [P5, P4, P3])
108 | 
109 | 
110 | def get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask):
111 |     y_true = [Input(shape = (input_shape[0] // {0:32, 1:16, 2:8}[l], input_shape[1] // {0:32, 1:16, 2:8}[l], \
112 |                                 len(anchors_mask[l]), num_classes + 5)) for l in range(len(anchors_mask))]
113 |     model_loss  = Lambda(
114 |         yolo_loss, 
115 |         output_shape    = (1, ), 
116 |         name            = 'yolo_loss', 
117 |         arguments       = {
118 |             'input_shape'       : input_shape, 
119 |             'anchors'           : anchors, 
120 |             'anchors_mask'      : anchors_mask, 
121 |             'num_classes'       : num_classes, 
122 |             'balance'           : [0.4, 1.0, 4],
123 |             'box_ratio'         : 0.05,
124 |             'obj_ratio'         : 5 * (input_shape[0] * input_shape[1]) / (416 ** 2), 
125 |             'cls_ratio'         : 1 * (num_classes / 80)
126 |         }
127 |     )([*model_body.output, *y_true])
128 |     model       = Model([model_body.input, *y_true], model_loss)
129 |     return model
130 | 


--------------------------------------------------------------------------------
/kmeans_for_anchors.py:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------------------------------#
  2 | #   kmeans虽然会对数据集中的框进行聚类，但是很多数据集由于框的大小相近，聚类出来的9个框相差不大，
  3 | #   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框，shape越小的特征层适合越大的先验框
  4 | #   原始网络的先验框已经按大中小比例分配好了，不进行聚类也会有非常好的效果。
  5 | #-------------------------------------------------------------------------------------------------------#
  6 | import glob
  7 | import xml.etree.ElementTree as ET
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | import numpy as np
 11 | from tqdm import tqdm
 12 | 
 13 | 
 14 | def cas_iou(box, cluster):
 15 |     x = np.minimum(cluster[:, 0], box[0])
 16 |     y = np.minimum(cluster[:, 1], box[1])
 17 | 
 18 |     intersection = x * y
 19 |     area1 = box[0] * box[1]
 20 | 
 21 |     area2 = cluster[:,0] * cluster[:,1]
 22 |     iou = intersection / (area1 + area2 - intersection)
 23 | 
 24 |     return iou
 25 | 
 26 | def avg_iou(box, cluster):
 27 |     return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])])
 28 | 
 29 | def kmeans(box, k):
 30 |     #-------------------------------------------------------------#
 31 |     #   取出一共有多少框
 32 |     #-------------------------------------------------------------#
 33 |     row = box.shape[0]
 34 |     
 35 |     #-------------------------------------------------------------#
 36 |     #   每个框各个点的位置
 37 |     #-------------------------------------------------------------#
 38 |     distance = np.empty((row, k))
 39 |     
 40 |     #-------------------------------------------------------------#
 41 |     #   最后的聚类位置
 42 |     #-------------------------------------------------------------#
 43 |     last_clu = np.zeros((row, ))
 44 | 
 45 |     np.random.seed()
 46 | 
 47 |     #-------------------------------------------------------------#
 48 |     #   随机选5个当聚类中心
 49 |     #-------------------------------------------------------------#
 50 |     cluster = box[np.random.choice(row, k, replace = False)]
 51 | 
 52 |     iter = 0
 53 |     while True:
 54 |         #-------------------------------------------------------------#
 55 |         #   计算当前框和先验框的宽高比例
 56 |         #-------------------------------------------------------------#
 57 |         for i in range(row):
 58 |             distance[i] = 1 - cas_iou(box[i], cluster)
 59 |         
 60 |         #-------------------------------------------------------------#
 61 |         #   取出最小点
 62 |         #-------------------------------------------------------------#
 63 |         near = np.argmin(distance, axis=1)
 64 | 
 65 |         if (last_clu == near).all():
 66 |             break
 67 |         
 68 |         #-------------------------------------------------------------#
 69 |         #   求每一个类的中位点
 70 |         #-------------------------------------------------------------#
 71 |         for j in range(k):
 72 |             cluster[j] = np.median(
 73 |                 box[near == j],axis=0)
 74 | 
 75 |         last_clu = near
 76 |         if iter % 5 == 0:
 77 |             print('iter: {:d}. avg_iou:{:.2f}'.format(iter, avg_iou(box, cluster)))
 78 |         iter += 1
 79 | 
 80 |     return cluster, near
 81 | 
 82 | def load_data(path):
 83 |     data = []
 84 |     #-------------------------------------------------------------#
 85 |     #   对于每一个xml都寻找box
 86 |     #-------------------------------------------------------------#
 87 |     for xml_file in tqdm(glob.glob('{}/*xml'.format(path))):
 88 |         tree    = ET.parse(xml_file)
 89 |         height  = int(tree.findtext('./size/height'))
 90 |         width   = int(tree.findtext('./size/width'))
 91 |         if height<=0 or width<=0:
 92 |             continue
 93 |         
 94 |         #-------------------------------------------------------------#
 95 |         #   对于每一个目标都获得它的宽高
 96 |         #-------------------------------------------------------------#
 97 |         for obj in tree.iter('object'):
 98 |             xmin = int(float(obj.findtext('bndbox/xmin'))) / width
 99 |             ymin = int(float(obj.findtext('bndbox/ymin'))) / height
100 |             xmax = int(float(obj.findtext('bndbox/xmax'))) / width
101 |             ymax = int(float(obj.findtext('bndbox/ymax'))) / height
102 | 
103 |             xmin = np.float64(xmin)
104 |             ymin = np.float64(ymin)
105 |             xmax = np.float64(xmax)
106 |             ymax = np.float64(ymax)
107 |             # 得到宽高
108 |             data.append([xmax - xmin, ymax - ymin])
109 |     return np.array(data)
110 | 
111 | if __name__ == '__main__':
112 |     np.random.seed(0)
113 |     #-------------------------------------------------------------#
114 |     #   运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
115 |     #   会生成yolo_anchors.txt
116 |     #-------------------------------------------------------------#
117 |     input_shape = [416, 416]
118 |     anchors_num = 9
119 |     #-------------------------------------------------------------#
120 |     #   载入数据集，可以使用VOC的xml
121 |     #-------------------------------------------------------------#
122 |     path        = 'VOCdevkit/VOC2007/Annotations'
123 |     
124 |     #-------------------------------------------------------------#
125 |     #   载入所有的xml
126 |     #   存储格式为转化为比例后的width,height
127 |     #-------------------------------------------------------------#
128 |     print('Load xmls.')
129 |     data = load_data(path)
130 |     print('Load xmls done.')
131 |     
132 |     #-------------------------------------------------------------#
133 |     #   使用k聚类算法
134 |     #-------------------------------------------------------------#
135 |     print('K-means boxes.')
136 |     cluster, near   = kmeans(data, anchors_num)
137 |     print('K-means boxes done.')
138 |     data            = data * np.array([input_shape[1], input_shape[0]])
139 |     cluster         = cluster * np.array([input_shape[1], input_shape[0]])
140 | 
141 |     #-------------------------------------------------------------#
142 |     #   绘图
143 |     #-------------------------------------------------------------#
144 |     for j in range(anchors_num):
145 |         plt.scatter(data[near == j][:,0], data[near == j][:,1])
146 |         plt.scatter(cluster[j][0], cluster[j][1], marker='x', c='black')
147 |     plt.savefig("kmeans_for_anchors.jpg")
148 |     plt.show()
149 |     print('Save kmeans_for_anchors.jpg in root dir.')
150 | 
151 |     cluster = cluster[np.argsort(cluster[:, 0] * cluster[:, 1])]
152 |     print('avg_ratio:{:.2f}'.format(avg_iou(data, cluster)))
153 |     print(cluster)
154 | 
155 |     f = open("yolo_anchors.txt", 'w')
156 |     row = np.shape(cluster)[0]
157 |     for i in range(row):
158 |         if i == 0:
159 |             x_y = "%d,%d" % (cluster[i][0], cluster[i][1])
160 |         else:
161 |             x_y = ", %d,%d" % (cluster[i][0], cluster[i][1])
162 |         f.write(x_y)
163 |     f.close()
164 | 


--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import xml.etree.ElementTree as ET
  4 | 
  5 | import numpy as np
  6 | 
  7 | from utils.utils import get_classes
  8 | 
  9 | #--------------------------------------------------------------------------------------------------------------------------------#
 10 | #   annotation_mode用于指定该文件运行时计算的内容
 11 | #   annotation_mode为0代表整个标签处理过程，包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
 12 | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
 13 | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
 14 | #--------------------------------------------------------------------------------------------------------------------------------#
 15 | annotation_mode     = 0
 16 | #-------------------------------------------------------------------#
 17 | #   必须要修改，用于生成2007_train.txt、2007_val.txt的目标信息
 18 | #   与训练和预测所用的classes_path一致即可
 19 | #   如果生成的2007_train.txt里面没有目标信息
 20 | #   那么就是因为classes没有设定正确
 21 | #   仅在annotation_mode为0和2的时候有效
 22 | #-------------------------------------------------------------------#
 23 | classes_path        = 'model_data/voc_classes.txt'
 24 | #--------------------------------------------------------------------------------------------------------------------------------#
 25 | #   trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1
 26 | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1
 27 | #   仅在annotation_mode为0和1的时候有效
 28 | #--------------------------------------------------------------------------------------------------------------------------------#
 29 | trainval_percent    = 0.9
 30 | train_percent       = 0.9
 31 | #-------------------------------------------------------#
 32 | #   指向VOC数据集所在的文件夹
 33 | #   默认指向根目录下的VOC数据集
 34 | #-------------------------------------------------------#
 35 | VOCdevkit_path  = 'VOCdevkit'
 36 | 
 37 | VOCdevkit_sets  = [('2007', 'train'), ('2007', 'val')]
 38 | classes, _      = get_classes(classes_path)
 39 | 
 40 | #-------------------------------------------------------#
 41 | #   统计目标数量
 42 | #-------------------------------------------------------#
 43 | photo_nums  = np.zeros(len(VOCdevkit_sets))
 44 | nums        = np.zeros(len(classes))
 45 | def convert_annotation(year, image_id, list_file):
 46 |     in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8')
 47 |     tree=ET.parse(in_file)
 48 |     root = tree.getroot()
 49 | 
 50 |     for obj in root.iter('object'):
 51 |         difficult = 0 
 52 |         if obj.find('difficult')!=None:
 53 |             difficult = obj.find('difficult').text
 54 |         cls = obj.find('name').text
 55 |         if cls not in classes or int(difficult)==1:
 56 |             continue
 57 |         cls_id = classes.index(cls)
 58 |         xmlbox = obj.find('bndbox')
 59 |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
 60 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
 61 |         
 62 |         nums[classes.index(cls)] = nums[classes.index(cls)] + 1
 63 |         
 64 | if __name__ == "__main__":
 65 |     random.seed(0)
 66 |     if " " in os.path.abspath(VOCdevkit_path):
 67 |         raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格，否则会影响正常的模型训练，请注意修改。")
 68 | 
 69 |     if annotation_mode == 0 or annotation_mode == 1:
 70 |         print("Generate txt in ImageSets.")
 71 |         xmlfilepath     = os.path.join(VOCdevkit_path, 'VOC2007/Annotations')
 72 |         saveBasePath    = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main')
 73 |         temp_xml        = os.listdir(xmlfilepath)
 74 |         total_xml       = []
 75 |         for xml in temp_xml:
 76 |             if xml.endswith(".xml"):
 77 |                 total_xml.append(xml)
 78 | 
 79 |         num     = len(total_xml)  
 80 |         list    = range(num)  
 81 |         tv      = int(num*trainval_percent)  
 82 |         tr      = int(tv*train_percent)  
 83 |         trainval= random.sample(list,tv)  
 84 |         train   = random.sample(trainval,tr)  
 85 |         
 86 |         print("train and val size",tv)
 87 |         print("train size",tr)
 88 |         ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
 89 |         ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
 90 |         ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
 91 |         fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
 92 |         
 93 |         for i in list:  
 94 |             name=total_xml[i][:-4]+'\n'  
 95 |             if i in trainval:  
 96 |                 ftrainval.write(name)  
 97 |                 if i in train:  
 98 |                     ftrain.write(name)  
 99 |                 else:  
100 |                     fval.write(name)  
101 |             else:  
102 |                 ftest.write(name)  
103 |         
104 |         ftrainval.close()  
105 |         ftrain.close()  
106 |         fval.close()  
107 |         ftest.close()
108 |         print("Generate txt in ImageSets done.")
109 | 
110 |     if annotation_mode == 0 or annotation_mode == 2:
111 |         print("Generate 2007_train.txt and 2007_val.txt for train.")
112 |         type_index = 0
113 |         for year, image_set in VOCdevkit_sets:
114 |             image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split()
115 |             list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
116 |             for image_id in image_ids:
117 |                 list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id))
118 | 
119 |                 convert_annotation(year, image_id, list_file)
120 |                 list_file.write('\n')
121 |             photo_nums[type_index] = len(image_ids)
122 |             type_index += 1
123 |             list_file.close()
124 |         print("Generate 2007_train.txt and 2007_val.txt for train done.")
125 |         
126 |         def printTable(List1, List2):
127 |             for i in range(len(List1[0])):
128 |                 print("|", end=' ')
129 |                 for j in range(len(List1)):
130 |                     print(List1[j][i].rjust(int(List2[j])), end=' ')
131 |                     print("|", end=' ')
132 |                 print()
133 | 
134 |         str_nums = [str(int(x)) for x in nums]
135 |         tableData = [
136 |             classes, str_nums
137 |         ]
138 |         colWidths = [0]*len(tableData)
139 |         len1 = 0
140 |         for i in range(len(tableData)):
141 |             for j in range(len(tableData[i])):
142 |                 if len(tableData[i][j]) > colWidths[i]:
143 |                     colWidths[i] = len(tableData[i][j])
144 |         printTable(tableData, colWidths)
145 | 
146 |         if photo_nums[0] <= 500:
147 |             print("训练集数量小于500，属于较小的数据量，请注意设置较大的训练世代（Epoch）以满足足够的梯度下降次数（Step）。")
148 | 
149 |         if np.sum(nums) == 0:
150 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
151 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
152 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
153 |             print("（重要的事情说三遍）。")
154 | 


--------------------------------------------------------------------------------
/get_map.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | import tensorflow as tf
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | 
  8 | from utils.utils import get_classes
  9 | from utils.utils_map import get_coco_map, get_map
 10 | from yolo import YOLO
 11 | 
 12 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
 13 | for gpu in gpus:
 14 |     tf.config.experimental.set_memory_growth(gpu, True)
 15 |     
 16 | if __name__ == "__main__":
 17 |     '''
 18 |     Recall和Precision不像AP是一个面积的概念，因此在门限值（Confidence）不同时，网络的Recall和Precision值是不同的。
 19 |     默认情况下，本代码计算的Recall和Precision代表的是当门限值（Confidence）为0.5时，所对应的Recall和Precision值。
 20 | 
 21 |     受到mAP计算原理的限制，网络在计算mAP时需要获得近乎所有的预测框，这样才可以计算不同门限条件下的Recall和Precision值
 22 |     因此，本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些，目的是列出所有可能的预测框，
 23 |     '''
 24 |     #------------------------------------------------------------------------------------------------------------------#
 25 |     #   map_mode用于指定该文件运行时计算的内容
 26 |     #   map_mode为0代表整个map计算流程，包括获得预测结果、获得真实框、计算VOC_map。
 27 |     #   map_mode为1代表仅仅获得预测结果。
 28 |     #   map_mode为2代表仅仅获得真实框。
 29 |     #   map_mode为3代表仅仅计算VOC_map。
 30 |     #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
 31 |     #-------------------------------------------------------------------------------------------------------------------#
 32 |     map_mode        = 0
 33 |     #--------------------------------------------------------------------------------------#
 34 |     #   此处的classes_path用于指定需要测量VOC_map的类别
 35 |     #   一般情况下与训练和预测所用的classes_path一致即可
 36 |     #--------------------------------------------------------------------------------------#
 37 |     classes_path    = 'model_data/voc_classes.txt'
 38 |     #--------------------------------------------------------------------------------------#
 39 |     #   MINOVERLAP用于指定想要获得的mAP0.x，mAP0.x的意义是什么请同学们百度一下。
 40 |     #   比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
 41 |     #
 42 |     #   当某一预测框与真实框重合度大于MINOVERLAP时，该预测框被认为是正样本，否则为负样本。
 43 |     #   因此MINOVERLAP的值越大，预测框要预测的越准确才能被认为是正样本，此时算出来的mAP值越低，
 44 |     #--------------------------------------------------------------------------------------#
 45 |     MINOVERLAP      = 0.5
 46 |     #--------------------------------------------------------------------------------------#
 47 |     #   受到mAP计算原理的限制，网络在计算mAP时需要获得近乎所有的预测框，这样才可以计算mAP
 48 |     #   因此，confidence的值应当设置的尽量小进而获得全部可能的预测框。
 49 |     #   
 50 |     #   该值一般不调整。因为计算mAP需要获得近乎所有的预测框，此处的confidence不能随便更改。
 51 |     #   想要获得不同门限值下的Recall和Precision值，请修改下方的score_threhold。
 52 |     #--------------------------------------------------------------------------------------#
 53 |     confidence      = 0.001
 54 |     #--------------------------------------------------------------------------------------#
 55 |     #   预测时使用到的非极大抑制值的大小，越大表示非极大抑制越不严格。
 56 |     #   
 57 |     #   该值一般不调整。
 58 |     #--------------------------------------------------------------------------------------#
 59 |     nms_iou         = 0.5
 60 |     #---------------------------------------------------------------------------------------------------------------#
 61 |     #   Recall和Precision不像AP是一个面积的概念，因此在门限值不同时，网络的Recall和Precision值是不同的。
 62 |     #   
 63 |     #   默认情况下，本代码计算的Recall和Precision代表的是当门限值为0.5（此处定义为score_threhold）时所对应的Recall和Precision值。
 64 |     #   因为计算mAP需要获得近乎所有的预测框，上面定义的confidence不能随便更改。
 65 |     #   这里专门定义一个score_threhold用于代表门限值，进而在计算mAP时找到门限值对应的Recall和Precision值。
 66 |     #---------------------------------------------------------------------------------------------------------------#
 67 |     score_threhold  = 0.5
 68 |     #-------------------------------------------------------#
 69 |     #   map_vis用于指定是否开启VOC_map计算的可视化
 70 |     #-------------------------------------------------------#
 71 |     map_vis         = False
 72 |     #-------------------------------------------------------#
 73 |     #   指向VOC数据集所在的文件夹
 74 |     #   默认指向根目录下的VOC数据集
 75 |     #-------------------------------------------------------#
 76 |     VOCdevkit_path  = 'VOCdevkit'
 77 |     #-------------------------------------------------------#
 78 |     #   结果输出的文件夹，默认为map_out
 79 |     #-------------------------------------------------------#
 80 |     map_out_path    = 'map_out'
 81 | 
 82 |     image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/test.txt")).read().strip().split()
 83 | 
 84 |     if not os.path.exists(map_out_path):
 85 |         os.makedirs(map_out_path)
 86 |     if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
 87 |         os.makedirs(os.path.join(map_out_path, 'ground-truth'))
 88 |     if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
 89 |         os.makedirs(os.path.join(map_out_path, 'detection-results'))
 90 |     if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
 91 |         os.makedirs(os.path.join(map_out_path, 'images-optional'))
 92 | 
 93 |     class_names, _ = get_classes(classes_path)
 94 | 
 95 |     if map_mode == 0 or map_mode == 1:
 96 |         print("Load model.")
 97 |         yolo = YOLO(confidence = confidence, nms_iou = nms_iou)
 98 |         print("Load model done.")
 99 | 
100 |         print("Get predict result.")
101 |         for image_id in tqdm(image_ids):
102 |             image_path  = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/"+image_id+".jpg")
103 |             image       = Image.open(image_path)
104 |             if map_vis:
105 |                 image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
106 |             yolo.get_map_txt(image_id, image, class_names, map_out_path)
107 |         print("Get predict result done.")
108 |         
109 |     if map_mode == 0 or map_mode == 2:
110 |         print("Get ground truth result.")
111 |         for image_id in tqdm(image_ids):
112 |             with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
113 |                 root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot()
114 |                 for obj in root.findall('object'):
115 |                     difficult_flag = False
116 |                     if obj.find('difficult')!=None:
117 |                         difficult = obj.find('difficult').text
118 |                         if int(difficult)==1:
119 |                             difficult_flag = True
120 |                     obj_name = obj.find('name').text
121 |                     if obj_name not in class_names:
122 |                         continue
123 |                     bndbox  = obj.find('bndbox')
124 |                     left    = bndbox.find('xmin').text
125 |                     top     = bndbox.find('ymin').text
126 |                     right   = bndbox.find('xmax').text
127 |                     bottom  = bndbox.find('ymax').text
128 | 
129 |                     if difficult_flag:
130 |                         new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
131 |                     else:
132 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
133 |         print("Get ground truth result done.")
134 | 
135 |     if map_mode == 0 or map_mode == 3:
136 |         print("Get map.")
137 |         get_map(MINOVERLAP, True, score_threhold = score_threhold, path = map_out_path)
138 |         print("Get map done.")
139 | 
140 |     if map_mode == 4:
141 |         print("Get map.")
142 |         get_coco_map(class_names = class_names, path = map_out_path)
143 |         print("Get map done.")
144 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## YOLOV3：You Only Look Once目标检测模型在tf2当中的实现-替换efficientnet主干网络 
  2 | ---
  3 | 
  4 | ## 目录
  5 | 1. [仓库更新 Top News](#仓库更新)
  6 | 2. [相关仓库 Related code](#相关仓库)
  7 | 3. [性能情况 Performance](#性能情况)
  8 | 4. [所需环境 Environment](#所需环境)
  9 | 5. [文件下载 Download](#文件下载)
 10 | 6. [训练步骤 How2train](#训练步骤)
 11 | 7. [预测步骤 How2predict](#预测步骤)
 12 | 8. [评估步骤 How2eval](#评估步骤)
 13 | 9. [参考资料 Reference](#Reference)
 14 | 
 15 | ## Top News
 16 | **`2022-04`**:**支持多GPU训练，新增各个种类目标数量计算，新增heatmap。**  
 17 | 
 18 | **`2022-03`**:**进行了大幅度的更新，修改了loss组成，使得分类、目标、回归loss的比例合适、支持step、cos学习率下降法、支持adam、sgd优化器选择、支持学习率根据batch_size自适应调整、新增图片裁剪。**  
 19 | BiliBili视频中的原仓库地址为：https://github.com/bubbliiiing/efficientnet-yolo3-tf2/tree/bilibili
 20 | 
 21 | **`2021-10`**:**进行了大幅度的更新，增加了大量注释、增加了大量可调整参数、对代码的组成模块进行修改、增加fps、视频预测、批量预测等功能。**   
 22 | 
 23 | ## 相关仓库
 24 | | 模型 | 路径 |
 25 | | :----- | :----- |
 26 | YoloV3 | https://github.com/bubbliiiing/yolo3-tf2  
 27 | Efficientnet-Yolo3 | https://github.com/bubbliiiing/efficientnet-yolo3-tf2  
 28 | YoloV4 | https://github.com/bubbliiiing/yolov4-tf2
 29 | YoloV4-tiny | https://github.com/bubbliiiing/yolov4-tiny-tf2
 30 | Mobilenet-Yolov4 | https://github.com/bubbliiiing/mobilenet-yolov4-tf2
 31 | YoloV5-V5.0 | https://github.com/bubbliiiing/yolov5-tf2
 32 | YoloV5-V6.1 | https://github.com/bubbliiiing/yolov5-v6.1-tf2
 33 | YoloX | https://github.com/bubbliiiing/yolox-tf2
 34 | Yolov7 | https://github.com/bubbliiiing/yolov7-tf2
 35 | Yolov7-tiny | https://github.com/bubbliiiing/yolov7-tiny-tf2
 36 | 
 37 | ## 性能情况
 38 | | 训练数据集 | 权值文件名称 | 测试数据集 | 输入图片大小 | mAP 0.5:0.95 | mAP 0.5 |
 39 | | :-----: | :-----: | :------: | :------: | :------: | :-----: |
 40 | | VOC07+12 | [yolov3_efficientnet_b0_voc.h5](https://github.com/bubbliiiing/efficientnet-yolo3-tf2/releases/download/v1.0/yolov3_efficientnet_b0_voc.h5) | VOC-Test07 | 416x416 | - | 79.44
 41 | | VOC07+12 | [yolov3_efficientnet_b1_voc.h5](https://github.com/bubbliiiing/efficientnet-yolo3-tf2/releases/download/v1.0/yolov3_efficientnet_b1_voc.h5) | VOC-Test07 | 416x416 | - | 81.15
 42 | | VOC07+12 | [yolov3_efficientnet_b2_voc.h5](https://github.com/bubbliiiing/efficientnet-yolo3-tf2/releases/download/v1.0/yolov3_efficientnet_b2_voc.h5) | VOC-Test07 | 416x416 | - | 80.35
 43 | 
 44 | ## 所需环境
 45 | tensorflow-gpu==2.2.0
 46 | 
 47 | ## 文件下载
 48 | 训练所需的所有efficientnet权重可以在百度网盘下载   
 49 | 链接: https://pan.baidu.com/s/1jZmNDfGI_YYt998gAGWhuA     
 50 | 提取码: fxqe    
 51 | 
 52 | VOC数据集下载地址如下，里面已经包括了训练集、测试集、验证集（与测试集一样），无需再次划分：  
 53 | 链接: https://pan.baidu.com/s/19Mw2u_df_nBzsC2lg20fQA    
 54 | 提取码: j5ge    
 55 | 
 56 | ## 训练步骤
 57 | ### a、训练VOC07+12数据集
 58 | 1. 数据集的准备   
 59 | **本文使用VOC格式进行训练，训练前需要下载好VOC07+12的数据集，解压后放在根目录**  
 60 | 
 61 | 2. 数据集的处理   
 62 | 修改voc_annotation.py里面的annotation_mode=2，运行voc_annotation.py生成根目录下的2007_train.txt和2007_val.txt。   
 63 | 
 64 | 3. 开始网络训练   
 65 | train.py的默认参数用于训练VOC数据集，直接运行train.py即可开始训练。   
 66 | 
 67 | 4. 训练结果预测   
 68 | 训练结果预测需要用到两个文件，分别是yolo.py和predict.py。我们首先需要去yolo.py里面修改model_path以及classes_path，这两个参数必须要修改。   
 69 | **model_path指向训练好的权值文件，在logs文件夹里。   
 70 | classes_path指向检测类别所对应的txt。**   
 71 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。   
 72 | 
 73 | ### b、训练自己的数据集
 74 | 1. 数据集的准备  
 75 | **本文使用VOC格式进行训练，训练前需要自己制作好数据集，**    
 76 | 训练前将标签文件放在VOCdevkit文件夹下的VOC2007文件夹下的Annotation中。   
 77 | 训练前将图片文件放在VOCdevkit文件夹下的VOC2007文件夹下的JPEGImages中。   
 78 | 
 79 | 2. 数据集的处理  
 80 | 在完成数据集的摆放之后，我们需要利用voc_annotation.py获得训练用的2007_train.txt和2007_val.txt。   
 81 | 修改voc_annotation.py里面的参数。第一次训练可以仅修改classes_path，classes_path用于指向检测类别所对应的txt。   
 82 | 训练自己的数据集时，可以自己建立一个cls_classes.txt，里面写自己所需要区分的类别。   
 83 | model_data/cls_classes.txt文件内容为：      
 84 | ```python
 85 | cat
 86 | dog
 87 | ...
 88 | ```
 89 | 修改voc_annotation.py中的classes_path，使其对应cls_classes.txt，并运行voc_annotation.py。  
 90 | 
 91 | 3. 开始网络训练  
 92 | **训练的参数较多，均在train.py中，大家可以在下载库后仔细看注释，其中最重要的部分依然是train.py里的classes_path。**  
 93 | **classes_path用于指向检测类别所对应的txt，这个txt和voc_annotation.py里面的txt一样！训练自己的数据集必须要修改！**  
 94 | 修改完classes_path后就可以运行train.py开始训练了，在训练多个epoch后，权值会生成在logs文件夹中。  
 95 | 
 96 | 4. 训练结果预测  
 97 | 训练结果预测需要用到两个文件，分别是yolo.py和predict.py。在yolo.py里面修改model_path以及classes_path。  
 98 | **model_path指向训练好的权值文件，在logs文件夹里。  
 99 | classes_path指向检测类别所对应的txt。**  
100 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。  
101 | 
102 | ## 预测步骤
103 | ### a、使用预训练权重
104 | 1. 下载完库后解压，在百度网盘下载权值，放入model_data，运行predict.py，输入  
105 | ```python
106 | img/street.jpg
107 | ```
108 | 2. 在predict.py里面进行设置可以进行fps测试和video视频检测。  
109 | ### b、使用自己训练的权重
110 | 1. 按照训练步骤训练。  
111 | 2. 在yolo.py文件里面，在如下部分修改model_path和classes_path使其对应训练好的文件；**model_path对应logs文件夹下面的权值文件，classes_path是model_path对应分的类**。  
112 | ```python
113 | _defaults = {
114 |     #--------------------------------------------------------------------------#
115 |     #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
116 |     #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
117 |     #
118 |     #   训练好后logs文件夹下存在多个权值文件，选择验证集损失较低的即可。
119 |     #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
120 |     #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
121 |     #--------------------------------------------------------------------------#
122 |     "model_path"        : 'model_data/yolov3_efficientnet_b2_voc.h5',
123 |     "classes_path"      : 'model_data/voc_classes.txt',
124 |     #---------------------------------------------------------------------#
125 |     #   anchors_path代表先验框对应的txt文件，一般不修改。
126 |     #   anchors_mask用于帮助代码找到对应的先验框，一般不修改。
127 |     #---------------------------------------------------------------------#
128 |     "anchors_path"      : 'model_data/yolo_anchors.txt',
129 |     "anchors_mask"      : [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
130 |     #---------------------------------------------------------------------#
131 |     #   输入图片的大小，必须为32的倍数。
132 |     #---------------------------------------------------------------------#
133 |     "input_shape"       : [416, 416],
134 |     #---------------------------------------------------------------------#
135 |     #   efficientnet的版本
136 |     #   phi = 0代表efficientnet-B0-yolov3
137 |     #   phi = 1代表efficientnet-B1-yolov3
138 |     #   phi = 2代表efficientnet-B2-yolov3   
139 |     #   …… 以此类推
140 |     #---------------------------------------------------------------------#
141 |     "phi"               : 2,
142 |     #---------------------------------------------------------------------#
143 |     #   只有得分大于置信度的预测框会被保留下来
144 |     #---------------------------------------------------------------------#
145 |     "confidence"        : 0.5,
146 |     #---------------------------------------------------------------------#
147 |     #   非极大抑制所用到的nms_iou大小
148 |     #---------------------------------------------------------------------#
149 |     "nms_iou"           : 0.3,
150 |     "max_boxes"         : 100,
151 |     #---------------------------------------------------------------------#
152 |     #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
153 |     #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
154 |     #---------------------------------------------------------------------#
155 |     "letterbox_image"   : True,
156 | }
157 | ```
158 | 3. 运行predict.py，输入  
159 | ```python
160 | img/street.jpg
161 | ```
162 | 4. 在predict.py里面进行设置可以进行fps测试和video视频检测。  
163 | 
164 | ## 评估步骤 
165 | ### a、评估VOC07+12的测试集
166 | 1. 本文使用VOC格式进行评估。VOC07+12已经划分好了测试集，无需利用voc_annotation.py生成ImageSets文件夹下的txt。
167 | 2. 在yolo.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的txt。**  
168 | 3. 运行get_map.py即可获得评估结果，评估结果会保存在map_out文件夹中。
169 | 
170 | ### b、评估自己的数据集
171 | 1. 本文使用VOC格式进行评估。  
172 | 2. 如果在训练前已经运行过voc_annotation.py文件，代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例，可以修改voc_annotation.py文件下的trainval_percent。trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1。train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1。
173 | 3. 利用voc_annotation.py划分测试集后，前往get_map.py文件修改classes_path，classes_path用于指向检测类别所对应的txt，这个txt和训练时的txt一样。评估自己的数据集必须要修改。
174 | 4. 在yolo.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的txt。**  
175 | 5. 运行get_map.py即可获得评估结果，评估结果会保存在map_out文件夹中。
176 | 
177 | ## Reference
178 | https://github.com/qqwweee/keras-yolo3  
179 | https://github.com/eriklindernoren/PyTorch-YOLOv3   
180 | https://github.com/BobLiu20/YOLOv3_PyTorch
181 | 
182 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | #-----------------------------------------------------------------------#
  2 | #   predict.py将单张图片预测、摄像头检测、FPS测试和目录遍历检测等功能
  3 | #   整合到了一个py文件中，通过指定mode进行模式的修改。
  4 | #-----------------------------------------------------------------------#
  5 | import time
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | from PIL import Image
 11 | 
 12 | from yolo import YOLO
 13 | 
 14 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
 15 | for gpu in gpus:
 16 |     tf.config.experimental.set_memory_growth(gpu, True)
 17 |     
 18 | if __name__ == "__main__":
 19 |     yolo = YOLO()
 20 |     #----------------------------------------------------------------------------------------------------------#
 21 |     #   mode用于指定测试的模式：
 22 |     #   'predict'           表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
 23 |     #   'video'             表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
 24 |     #   'fps'               表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
 25 |     #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
 26 |     #   'heatmap'           表示进行预测结果的热力图可视化，详情查看下方注释。
 27 |     #----------------------------------------------------------------------------------------------------------#
 28 |     mode = "predict"
 29 |     #-------------------------------------------------------------------------#
 30 |     #   crop                指定了是否在单张图片预测后对目标进行截取
 31 |     #   count               指定了是否进行目标的计数
 32 |     #   crop、count仅在mode='predict'时有效
 33 |     #-------------------------------------------------------------------------#
 34 |     crop            = False
 35 |     count           = False
 36 |     #----------------------------------------------------------------------------------------------------------#
 37 |     #   video_path          用于指定视频的路径，当video_path=0时表示检测摄像头
 38 |     #                       想要检测视频，则设置如video_path = "xxx.mp4"即可，代表读取出根目录下的xxx.mp4文件。
 39 |     #   video_save_path     表示视频保存的路径，当video_save_path=""时表示不保存
 40 |     #                       想要保存视频，则设置如video_save_path = "yyy.mp4"即可，代表保存为根目录下的yyy.mp4文件。
 41 |     #   video_fps           用于保存的视频的fps
 42 |     #
 43 |     #   video_path、video_save_path和video_fps仅在mode='video'时有效
 44 |     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
 45 |     #----------------------------------------------------------------------------------------------------------#
 46 |     video_path      = 0
 47 |     video_save_path = ""
 48 |     video_fps       = 25.0
 49 |     #----------------------------------------------------------------------------------------------------------#
 50 |     #   test_interval       用于指定测量fps的时候，图片检测的次数。理论上test_interval越大，fps越准确。
 51 |     #   fps_image_path      用于指定测试的fps图片
 52 |     #   
 53 |     #   test_interval和fps_image_path仅在mode='fps'有效
 54 |     #----------------------------------------------------------------------------------------------------------#
 55 |     test_interval   = 100
 56 |     fps_image_path  = "img/street.jpg"
 57 |     #-------------------------------------------------------------------------#
 58 |     #   dir_origin_path     指定了用于检测的图片的文件夹路径
 59 |     #   dir_save_path       指定了检测完图片的保存路径
 60 |     #   
 61 |     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
 62 |     #-------------------------------------------------------------------------#
 63 |     dir_origin_path = "img/"
 64 |     dir_save_path   = "img_out/"
 65 |     #-------------------------------------------------------------------------#
 66 |     #   heatmap_save_path   热力图的保存路径，默认保存在model_data下
 67 |     #   
 68 |     #   heatmap_save_path仅在mode='heatmap'有效
 69 |     #-------------------------------------------------------------------------#
 70 |     heatmap_save_path = "model_data/heatmap_vision.png"
 71 | 
 72 |     if mode == "predict":
 73 |         '''
 74 |         1、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
 75 |         2、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
 76 |         3、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
 77 |         在原图上利用矩阵的方式进行截取。
 78 |         4、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
 79 |         比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
 80 |         '''
 81 |         while True:
 82 |             img = input('Input image filename:')
 83 |             try:
 84 |                 image = Image.open(img)
 85 |             except:
 86 |                 print('Open Error! Try again!')
 87 |                 continue
 88 |             else:
 89 |                 r_image = yolo.detect_image(image, crop = crop, count = count)
 90 |                 r_image.show()
 91 | 
 92 |     elif mode == "video":
 93 |         capture = cv2.VideoCapture(video_path)
 94 |         if video_save_path!="":
 95 |             fourcc  = cv2.VideoWriter_fourcc(*'XVID')
 96 |             size    = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 97 |             out     = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
 98 | 
 99 |         ref, frame = capture.read()
100 |         if not ref:
101 |             raise ValueError("未能正确读取摄像头（视频），请注意是否正确安装摄像头（是否正确填写视频路径）。")
102 | 
103 |         fps = 0.0
104 |         while(True):
105 |             t1 = time.time()
106 |             # 读取某一帧
107 |             ref, frame = capture.read()
108 |             if not ref:
109 |                 break
110 |             # 格式转变，BGRtoRGB
111 |             frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
112 |             # 转变成Image
113 |             frame = Image.fromarray(np.uint8(frame))
114 |             # 进行检测
115 |             frame = np.array(yolo.detect_image(frame))
116 |             # RGBtoBGR满足opencv显示格式
117 |             frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
118 |             
119 |             fps  = ( fps + (1./(time.time()-t1)) ) / 2
120 |             print("fps= %.2f"%(fps))
121 |             frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
122 |             
123 |             cv2.imshow("video",frame)
124 |             c= cv2.waitKey(1) & 0xff 
125 |             if video_save_path!="":
126 |                 out.write(frame)
127 | 
128 |             if c==27:
129 |                 capture.release()
130 |                 break
131 | 
132 |         print("Video Detection Done!")
133 |         capture.release()
134 |         if video_save_path!="":
135 |             print("Save processed video to the path :" + video_save_path)
136 |             out.release()
137 |         cv2.destroyAllWindows()
138 |         
139 |     elif mode == "fps":
140 |         img = Image.open(fps_image_path)
141 |         tact_time = yolo.get_FPS(img, test_interval)
142 |         print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
143 | 
144 |     elif mode == "dir_predict":
145 |         import os
146 | 
147 |         from tqdm import tqdm
148 | 
149 |         img_names = os.listdir(dir_origin_path)
150 |         for img_name in tqdm(img_names):
151 |             if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
152 |                 image_path  = os.path.join(dir_origin_path, img_name)
153 |                 image       = Image.open(image_path)
154 |                 r_image     = yolo.detect_image(image)
155 |                 if not os.path.exists(dir_save_path):
156 |                     os.makedirs(dir_save_path)
157 |                 r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
158 | 
159 |     elif mode == "heatmap":
160 |         while True:
161 |             img = input('Input image filename:')
162 |             try:
163 |                 image = Image.open(img)
164 |             except:
165 |                 print('Open Error! Try again!')
166 |                 continue
167 |             else:
168 |                 yolo.detect_heatmap(image, heatmap_save_path)
169 |         
170 |     else:
171 |         raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
172 | 


--------------------------------------------------------------------------------
/nets/efficientnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from copy import deepcopy
  3 | 
  4 | import tensorflow as tf
  5 | from tensorflow.keras import backend, layers
  6 | 
  7 | #-------------------------------------------------#
  8 | #   一共七个大结构块，每个大结构块都有特定的参数
  9 | #-------------------------------------------------#
 10 | DEFAULT_BLOCKS_ARGS = [
 11 |     {'kernel_size': 3, 'repeats': 1, 'filters_in': 32, 'filters_out': 16,
 12 |      'expand_ratio': 1, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25},
 13 | 
 14 |     {'kernel_size': 3, 'repeats': 2, 'filters_in': 16, 'filters_out': 24,
 15 |      'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
 16 | 
 17 |     {'kernel_size': 5, 'repeats': 2, 'filters_in': 24, 'filters_out': 40,
 18 |      'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
 19 | 
 20 |     {'kernel_size': 3, 'repeats': 3, 'filters_in': 40, 'filters_out': 80,
 21 |      'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
 22 | 
 23 |     {'kernel_size': 5, 'repeats': 3, 'filters_in': 80, 'filters_out': 112,
 24 |      'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25},
 25 | 
 26 |     {'kernel_size': 5, 'repeats': 4, 'filters_in': 112, 'filters_out': 192,
 27 |      'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25},
 28 | 
 29 |     {'kernel_size': 3, 'repeats': 1, 'filters_in': 192, 'filters_out': 320,
 30 |      'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25}
 31 | ]
 32 | 
 33 | #-------------------------------------------------#
 34 | #   Kernel的初始化器
 35 | #-------------------------------------------------#
 36 | CONV_KERNEL_INITIALIZER = {
 37 |     'class_name': 'VarianceScaling',
 38 |     'config': {
 39 |         'scale': 2.0,
 40 |         'mode': 'fan_out',
 41 |         'distribution': 'normal'
 42 |     }
 43 | }
 44 | 
 45 | #-------------------------------------------------#
 46 | #   用于计算卷积层的padding的大小
 47 | #-------------------------------------------------#
 48 | def correct_pad(inputs, kernel_size):
 49 |     img_dim = 1
 50 |     input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]
 51 | 
 52 |     if isinstance(kernel_size, int):
 53 |         kernel_size = (kernel_size, kernel_size)
 54 | 
 55 |     if input_size[0] is None:
 56 |         adjust = (1, 1)
 57 |     else:
 58 |         adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
 59 | 
 60 |     correct = (kernel_size[0] // 2, kernel_size[1] // 2)
 61 | 
 62 |     return ((correct[0] - adjust[0], correct[0]),
 63 |             (correct[1] - adjust[1], correct[1]))
 64 | 
 65 | #-------------------------------------------------#
 66 | #   该函数的目的是保证filter的大小可以被8整除
 67 | #-------------------------------------------------#
 68 | def round_filters(filters, divisor, width_coefficient):
 69 |     filters *= width_coefficient
 70 |     new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
 71 |     if new_filters < 0.9 * filters:
 72 |         new_filters += divisor
 73 |     return int(new_filters)
 74 | 
 75 | #-------------------------------------------------#
 76 | #   计算模块的重复次数
 77 | #-------------------------------------------------#
 78 | def round_repeats(repeats, depth_coefficient):
 79 |     return int(math.ceil(depth_coefficient * repeats))
 80 | 
 81 | #-------------------------------------------------#
 82 | #   efficient_block
 83 | #-------------------------------------------------#
 84 | def block(inputs, activation_fn=tf.nn.swish, drop_rate=0., name='',
 85 |           filters_in=32, filters_out=16, kernel_size=3, strides=1,
 86 |           expand_ratio=1, se_ratio=0., id_skip=True):
 87 | 
 88 |     filters = filters_in * expand_ratio
 89 |     #-------------------------------------------------#
 90 |     #   利用Inverted residuals
 91 |     #   part1 利用1x1卷积进行通道数上升
 92 |     #-------------------------------------------------#
 93 |     if expand_ratio != 1:
 94 |         x = layers.Conv2D(filters, 1,
 95 |                           padding='same',
 96 |                           use_bias=False,
 97 |                           kernel_initializer=CONV_KERNEL_INITIALIZER,
 98 |                           name=name + 'expand_conv')(inputs)
 99 |         x = layers.BatchNormalization(axis=3, name=name + 'expand_bn')(x)
100 |         x = layers.Activation(activation_fn, name=name + 'expand_activation')(x)
101 |     else:
102 |         x = inputs
103 | 
104 |     #------------------------------------------------------#
105 |     #   如果步长为2x2的话，利用深度可分离卷积进行高宽压缩
106 |     #   part2 利用3x3卷积对每一个channel进行卷积
107 |     #------------------------------------------------------#
108 |     if strides == 2:
109 |         x = layers.ZeroPadding2D(padding=correct_pad(x, kernel_size),
110 |                                  name=name + 'dwconv_pad')(x)
111 |         conv_pad = 'valid'
112 |     else:
113 |         conv_pad = 'same'
114 |         
115 |     x = layers.DepthwiseConv2D(kernel_size,
116 |                                strides=strides,
117 |                                padding=conv_pad,
118 |                                use_bias=False,
119 |                                depthwise_initializer=CONV_KERNEL_INITIALIZER,
120 |                                name=name + 'dwconv')(x)
121 |     x = layers.BatchNormalization(axis=3, name=name + 'bn')(x)
122 |     x = layers.Activation(activation_fn, name=name + 'activation')(x)
123 | 
124 |     #------------------------------------------------------#
125 |     #   完成深度可分离卷积后
126 |     #   对深度可分离卷积的结果施加注意力机制
127 |     #------------------------------------------------------#
128 |     if 0 < se_ratio <= 1:
129 |         filters_se = max(1, int(filters_in * se_ratio))
130 |         se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x)
131 |         se = layers.Reshape((1, 1, filters), name=name + 'se_reshape')(se)
132 |         #------------------------------------------------------#
133 |         #   通道先压缩后上升，最后利用sigmoid将值固定到0-1之间
134 |         #------------------------------------------------------#
135 |         se = layers.Conv2D(filters_se, 1,
136 |                            padding='same',
137 |                            activation=activation_fn,
138 |                            kernel_initializer=CONV_KERNEL_INITIALIZER,
139 |                            name=name + 'se_reduce')(se)
140 |         se = layers.Conv2D(filters, 1,
141 |                            padding='same',
142 |                            activation='sigmoid',
143 |                            kernel_initializer=CONV_KERNEL_INITIALIZER,
144 |                            name=name + 'se_expand')(se)
145 |         x = layers.multiply([x, se], name=name + 'se_excite')
146 | 
147 |     #------------------------------------------------------#
148 |     #   part3 利用1x1卷积进行通道下降
149 |     #------------------------------------------------------#
150 |     x = layers.Conv2D(filters_out, 1,
151 |                       padding='same',
152 |                       use_bias=False,
153 |                       kernel_initializer=CONV_KERNEL_INITIALIZER,
154 |                       name=name + 'project_conv')(x)
155 |     x = layers.BatchNormalization(axis=3, name=name + 'project_bn')(x)
156 | 
157 |     #------------------------------------------------------#
158 |     #   part4 如果满足残差条件，那么就增加残差边
159 |     #------------------------------------------------------#
160 |     if (id_skip is True and strides == 1 and filters_in == filters_out):
161 |         if drop_rate > 0:
162 |             x = layers.Dropout(drop_rate,
163 |                                noise_shape=(None, 1, 1, 1),
164 |                                name=name + 'drop')(x)
165 |         x = layers.add([x, inputs], name=name + 'add')
166 | 
167 |     return x
168 | 
169 | def EfficientNet(width_coefficient,
170 |                  depth_coefficient,
171 |                  drop_connect_rate=0.2,
172 |                  depth_divisor=8,
173 |                  activation_fn=tf.nn.swish,
174 |                  blocks_args=DEFAULT_BLOCKS_ARGS,
175 |                  inputs=None,
176 |                  **kwargs):
177 |     img_input = inputs
178 | 
179 |     #-------------------------------------------------#
180 |     #   创建stem部分
181 |     #   416,416,3 -> 208,208,32
182 |     #-------------------------------------------------#
183 |     x = img_input
184 |     x = layers.ZeroPadding2D(padding=correct_pad(x, 3),
185 |                              name='stem_conv_pad')(x)
186 |     x = layers.Conv2D(round_filters(32, depth_divisor, width_coefficient), 3,
187 |                       strides=2,
188 |                       padding='valid',
189 |                       use_bias=False,
190 |                       kernel_initializer=CONV_KERNEL_INITIALIZER,
191 |                       name='stem_conv')(x)
192 |     x = layers.BatchNormalization(axis=3, name='stem_bn')(x)
193 |     x = layers.Activation(activation_fn, name='stem_activation')(x)
194 | 
195 |     #-------------------------------------------------#
196 |     #   进行一个深度的copy
197 |     #-------------------------------------------------#
198 |     blocks_args = deepcopy(blocks_args)
199 |     
200 |     #-------------------------------------------------#
201 |     #   计算总的efficient_block的数量
202 |     #-------------------------------------------------#
203 |     b = 0
204 |     blocks = float(sum(args['repeats'] for args in blocks_args))
205 | 
206 |     feats = []
207 |     filters_outs = []
208 |     #------------------------------------------------------------------------------#
209 |     #   对结构块参数进行循环、一共进行7个大的结构块。
210 |     #   每个大结构块下会重复小的efficient_block、每个大结构块的shape变化为：
211 |     #   208,208,32 -> 208,208,16 -> 104,104,24 -> 52,52,40 
212 |     #   -> 26,26,80 -> 26,26,112 -> 13,13,192 -> 13,13,320
213 |     #   输入为208,208,32，最终获得三个shape的有效特征层
214 |     #   104,104,24、26,26,112、13,13,320
215 |     #------------------------------------------------------------------------------#
216 |     for (i, args) in enumerate(blocks_args):
217 |         assert args['repeats'] > 0
218 |         args['filters_in'] = round_filters(args['filters_in'], depth_divisor, width_coefficient)
219 |         args['filters_out'] = round_filters(args['filters_out'], depth_divisor, width_coefficient)
220 | 
221 |         for j in range(round_repeats(args.pop('repeats'), depth_coefficient)):
222 |             if j > 0:
223 |                 args['strides'] = 1
224 |                 args['filters_in'] = args['filters_out']
225 |             x = block(x, activation_fn, drop_connect_rate * b / blocks,
226 |                       name='block{}{}_'.format(i + 1, chr(j + 97)), **args)
227 |             b += 1
228 |         feats.append(x)
229 |         if i == 2 or i == 4 or i == 6:
230 |             filters_outs.append(args['filters_out'])
231 |     return feats, filters_outs
232 | 
233 | 
234 | def EfficientNetB0(inputs=None, **kwargs):
235 |     return EfficientNet(1.0, 1.0, inputs=inputs, **kwargs)
236 | 
237 | 
238 | def EfficientNetB1(inputs=None, **kwargs):
239 |     return EfficientNet(1.0, 1.1, inputs=inputs, **kwargs)
240 | 
241 | 
242 | def EfficientNetB2(inputs=None, **kwargs):
243 |     return EfficientNet(1.1, 1.2, inputs=inputs, **kwargs)
244 | 
245 | 
246 | def EfficientNetB3(inputs=None, **kwargs):
247 |     return EfficientNet(1.2, 1.4, inputs=inputs, **kwargs)
248 | 
249 | 
250 | def EfficientNetB4(inputs=None, **kwargs):
251 |     return EfficientNet(1.4, 1.8, inputs=inputs, **kwargs)
252 | 
253 | 
254 | def EfficientNetB5(inputs=None, **kwargs):
255 |     return EfficientNet(1.6, 2.2, inputs=inputs, **kwargs)
256 | 
257 | 
258 | 
259 | def EfficientNetB6(inputs=None, **kwargs):
260 |     return EfficientNet(1.8, 2.6, inputs=inputs, **kwargs)
261 | 
262 | 
263 | def EfficientNetB7(inputs=None, **kwargs):
264 |     return EfficientNet(2.0, 3.1, inputs=inputs, **kwargs)
265 | 
266 | 
267 | if __name__ == '__main__':
268 |     print(EfficientNetB0())
269 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce
  2 | 
  3 | import numpy as np
  4 | from PIL import Image
  5 | 
  6 | 
  7 | def compose(*funcs):
  8 |     if funcs:
  9 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
 10 |     else:
 11 |         raise ValueError('Composition of empty sequence not supported.')
 12 | 
 13 | #---------------------------------------------------------#
 14 | #   将图像转换成RGB图像，防止灰度图在预测时报错。
 15 | #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
 16 | #---------------------------------------------------------#
 17 | def cvtColor(image):
 18 |     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
 19 |         return image 
 20 |     else:
 21 |         image = image.convert('RGB')
 22 |         return image 
 23 |     
 24 | #---------------------------------------------------#
 25 | #   对输入图像进行resize
 26 | #---------------------------------------------------#
 27 | def resize_image(image, size, letterbox_image):
 28 |     iw, ih  = image.size
 29 |     w, h    = size
 30 |     if letterbox_image:
 31 |         scale   = min(w/iw, h/ih)
 32 |         nw      = int(iw*scale)
 33 |         nh      = int(ih*scale)
 34 | 
 35 |         image   = image.resize((nw,nh), Image.BICUBIC)
 36 |         new_image = Image.new('RGB', size, (128,128,128))
 37 |         new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 38 |     else:
 39 |         new_image = image.resize((w, h), Image.BICUBIC)
 40 |     return new_image
 41 | 
 42 | #---------------------------------------------------#
 43 | #   获得类
 44 | #---------------------------------------------------#
 45 | def get_classes(classes_path):
 46 |     with open(classes_path, encoding='utf-8') as f:
 47 |         class_names = f.readlines()
 48 |     class_names = [c.strip() for c in class_names]
 49 |     return class_names, len(class_names)
 50 | 
 51 | #---------------------------------------------------#
 52 | #   获得先验框
 53 | #---------------------------------------------------#
 54 | def get_anchors(anchors_path):
 55 |     '''loads the anchors from a file'''
 56 |     with open(anchors_path, encoding='utf-8') as f:
 57 |         anchors = f.readline()
 58 |     anchors = [float(x) for x in anchors.split(',')]
 59 |     anchors = np.array(anchors).reshape(-1, 2)
 60 |     return anchors, len(anchors)
 61 | 
 62 | def preprocess_input(image):
 63 |     image /= 255.0
 64 |     return image
 65 | 
 66 | def show_config(**kwargs):
 67 |     print('Configurations:')
 68 |     print('-' * 70)
 69 |     print('|%25s | %40s|' % ('keys', 'values'))
 70 |     print('-' * 70)
 71 |     for key, value in kwargs.items():
 72 |         print('|%25s | %40s|' % (str(key), str(value)))
 73 |     print('-' * 70)
 74 | 
 75 | #-------------------------------------------------------------------------------------------------------------------------------#
 76 | #   From https://github.com/ckyrkou/Keras_FLOP_Estimator 
 77 | #   Fix lots of bugs
 78 | #-------------------------------------------------------------------------------------------------------------------------------#
 79 | def net_flops(model, table=False, print_result=True):
 80 |     if (table == True):
 81 |         print("\n")
 82 |         print('%25s | %16s | %16s | %16s | %16s | %6s | %6s' % (
 83 |             'Layer Name', 'Input Shape', 'Output Shape', 'Kernel Size', 'Filters', 'Strides', 'FLOPS'))
 84 |         print('=' * 120)
 85 |         
 86 |     #---------------------------------------------------#
 87 |     #   总的FLOPs
 88 |     #---------------------------------------------------#
 89 |     t_flops = 0
 90 |     factor  = 1e9
 91 | 
 92 |     for l in model.layers:
 93 |         try:
 94 |             #--------------------------------------#
 95 |             #   所需参数的初始化定义
 96 |             #--------------------------------------#
 97 |             o_shape, i_shape, strides, ks, filters = ('', '', ''), ('', '', ''), (1, 1), (0, 0), 0
 98 |             flops   = 0
 99 |             #--------------------------------------#
100 |             #   获得层的名字
101 |             #--------------------------------------#
102 |             name    = l.name
103 |             
104 |             if ('InputLayer' in str(l)):
105 |                 i_shape = l.get_input_shape_at(0)[1:4]
106 |                 o_shape = l.get_output_shape_at(0)[1:4]
107 |                 
108 |             #--------------------------------------#
109 |             #   Reshape层
110 |             #--------------------------------------#
111 |             elif ('Reshape' in str(l)):
112 |                 i_shape = l.get_input_shape_at(0)[1:4]
113 |                 o_shape = l.get_output_shape_at(0)[1:4]
114 | 
115 |             #--------------------------------------#
116 |             #   填充层
117 |             #--------------------------------------#
118 |             elif ('Padding' in str(l)):
119 |                 i_shape = l.get_input_shape_at(0)[1:4]
120 |                 o_shape = l.get_output_shape_at(0)[1:4]
121 | 
122 |             #--------------------------------------#
123 |             #   平铺层
124 |             #--------------------------------------#
125 |             elif ('Flatten' in str(l)):
126 |                 i_shape = l.get_input_shape_at(0)[1:4]
127 |                 o_shape = l.get_output_shape_at(0)[1:4]
128 |                 
129 |             #--------------------------------------#
130 |             #   激活函数层
131 |             #--------------------------------------#
132 |             elif 'Activation' in str(l):
133 |                 i_shape = l.get_input_shape_at(0)[1:4]
134 |                 o_shape = l.get_output_shape_at(0)[1:4]
135 |                 
136 |             #--------------------------------------#
137 |             #   LeakyReLU
138 |             #--------------------------------------#
139 |             elif 'LeakyReLU' in str(l):
140 |                 for i in range(len(l._inbound_nodes)):
141 |                     i_shape = l.get_input_shape_at(i)[1:4]
142 |                     o_shape = l.get_output_shape_at(i)[1:4]
143 |                     
144 |                     flops   += i_shape[0] * i_shape[1] * i_shape[2]
145 |                     
146 |             #--------------------------------------#
147 |             #   池化层
148 |             #--------------------------------------#
149 |             elif 'MaxPooling' in str(l):
150 |                 i_shape = l.get_input_shape_at(0)[1:4]
151 |                 o_shape = l.get_output_shape_at(0)[1:4]
152 |                     
153 |             #--------------------------------------#
154 |             #   池化层
155 |             #--------------------------------------#
156 |             elif ('AveragePooling' in str(l) and 'Global' not in str(l)):
157 |                 strides = l.strides
158 |                 ks      = l.pool_size
159 |                 
160 |                 for i in range(len(l._inbound_nodes)):
161 |                     i_shape = l.get_input_shape_at(i)[1:4]
162 |                     o_shape = l.get_output_shape_at(i)[1:4]
163 |                     
164 |                     flops   += o_shape[0] * o_shape[1] * o_shape[2]
165 | 
166 |             #--------------------------------------#
167 |             #   全局池化层
168 |             #--------------------------------------#
169 |             elif ('AveragePooling' in str(l) and 'Global' in str(l)):
170 |                 for i in range(len(l._inbound_nodes)):
171 |                     i_shape = l.get_input_shape_at(i)[1:4]
172 |                     o_shape = l.get_output_shape_at(i)[1:4]
173 |                     
174 |                     flops += (i_shape[0] * i_shape[1] + 1) * i_shape[2]
175 |                 
176 |             #--------------------------------------#
177 |             #   标准化层
178 |             #--------------------------------------#
179 |             elif ('BatchNormalization' in str(l)):
180 |                 for i in range(len(l._inbound_nodes)):
181 |                     i_shape = l.get_input_shape_at(i)[1:4]
182 |                     o_shape = l.get_output_shape_at(i)[1:4]
183 | 
184 |                     temp_flops = 1
185 |                     for i in range(len(i_shape)):
186 |                         temp_flops *= i_shape[i]
187 |                     temp_flops *= 2
188 |                     
189 |                     flops += temp_flops
190 |                 
191 |             #--------------------------------------#
192 |             #   全连接层
193 |             #--------------------------------------#
194 |             elif ('Dense' in str(l)):
195 |                 for i in range(len(l._inbound_nodes)):
196 |                     i_shape = l.get_input_shape_at(i)[1:4]
197 |                     o_shape = l.get_output_shape_at(i)[1:4]
198 |                 
199 |                     temp_flops = 1
200 |                     for i in range(len(o_shape)):
201 |                         temp_flops *= o_shape[i]
202 |                         
203 |                     if (i_shape[-1] == None):
204 |                         temp_flops = temp_flops * o_shape[-1]
205 |                     else:
206 |                         temp_flops = temp_flops * i_shape[-1]
207 |                     flops += temp_flops
208 | 
209 |             #--------------------------------------#
210 |             #   普通卷积层
211 |             #--------------------------------------#
212 |             elif ('Conv2D' in str(l) and 'DepthwiseConv2D' not in str(l) and 'SeparableConv2D' not in str(l)):
213 |                 strides = l.strides
214 |                 ks      = l.kernel_size
215 |                 filters = l.filters
216 |                 bias    = 1 if l.use_bias else 0
217 |                 
218 |                 for i in range(len(l._inbound_nodes)):
219 |                     i_shape = l.get_input_shape_at(i)[1:4]
220 |                     o_shape = l.get_output_shape_at(i)[1:4]
221 |                     
222 |                     if (filters == None):
223 |                         filters = i_shape[2]
224 |                     flops += filters * o_shape[0] * o_shape[1] * (ks[0] * ks[1] * i_shape[2] + bias)
225 | 
226 |             #--------------------------------------#
227 |             #   逐层卷积层
228 |             #--------------------------------------#
229 |             elif ('Conv2D' in str(l) and 'DepthwiseConv2D' in str(l) and 'SeparableConv2D' not in str(l)):
230 |                 strides = l.strides
231 |                 ks      = l.kernel_size
232 |                 filters = l.filters
233 |                 bias    = 1 if l.use_bias else 0
234 |             
235 |                 for i in range(len(l._inbound_nodes)):
236 |                     i_shape = l.get_input_shape_at(i)[1:4]
237 |                     o_shape = l.get_output_shape_at(i)[1:4]
238 |                     
239 |                     if (filters == None):
240 |                         filters = i_shape[2]
241 |                     flops += filters * o_shape[0] * o_shape[1] * (ks[0] * ks[1] + bias)
242 |                 
243 |             #--------------------------------------#
244 |             #   深度可分离卷积层
245 |             #--------------------------------------#
246 |             elif ('Conv2D' in str(l) and 'DepthwiseConv2D' not in str(l) and 'SeparableConv2D' in str(l)):
247 |                 strides = l.strides
248 |                 ks      = l.kernel_size
249 |                 filters = l.filters
250 |                 
251 |                 for i in range(len(l._inbound_nodes)):
252 |                     i_shape = l.get_input_shape_at(i)[1:4]
253 |                     o_shape = l.get_output_shape_at(i)[1:4]
254 |                     
255 |                     if (filters == None):
256 |                         filters = i_shape[2]
257 |                     flops += i_shape[2] * o_shape[0] * o_shape[1] * (ks[0] * ks[1] + bias) + \
258 |                              filters * o_shape[0] * o_shape[1] * (1 * 1 * i_shape[2] + bias)
259 |             #--------------------------------------#
260 |             #   模型中有模型时
261 |             #--------------------------------------#
262 |             elif 'Model' in str(l):
263 |                 flops = net_flops(l, print_result=False)
264 |                 
265 |             t_flops += flops
266 | 
267 |             if (table == True):
268 |                 print('%25s | %16s | %16s | %16s | %16s | %6s | %5.4f' % (
269 |                     name[:25], str(i_shape), str(o_shape), str(ks), str(filters), str(strides), flops))
270 |                 
271 |         except:
272 |             pass
273 |     
274 |     t_flops = t_flops * 2
275 |     if print_result:
276 |         show_flops = t_flops / factor
277 |         print('Total GFLOPs: %.3fG' % (show_flops))
278 |     return t_flops


--------------------------------------------------------------------------------
/utils/utils_bbox.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras import backend as K
  3 | 
  4 | 
  5 | #---------------------------------------------------#
  6 | #   对box进行调整，使其符合真实图片的样子
  7 | #---------------------------------------------------#
  8 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image):
  9 |     #-----------------------------------------------------------------#
 10 |     #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
 11 |     #-----------------------------------------------------------------#
 12 |     box_yx = box_xy[..., ::-1]
 13 |     box_hw = box_wh[..., ::-1]
 14 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
 15 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
 16 | 
 17 |     if letterbox_image:
 18 |         #-----------------------------------------------------------------#
 19 |         #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
 20 |         #   new_shape指的是宽高缩放情况
 21 |         #-----------------------------------------------------------------#
 22 |         new_shape = K.round(image_shape * K.min(input_shape/image_shape))
 23 |         offset  = (input_shape - new_shape)/2./input_shape
 24 |         scale   = input_shape/new_shape
 25 | 
 26 |         box_yx  = (box_yx - offset) * scale
 27 |         box_hw *= scale
 28 | 
 29 |     box_mins    = box_yx - (box_hw / 2.)
 30 |     box_maxes   = box_yx + (box_hw / 2.)
 31 |     boxes  = K.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]])
 32 |     boxes *= K.concatenate([image_shape, image_shape])
 33 |     return boxes
 34 | 
 35 | #---------------------------------------------------#
 36 | #   将预测值的每个特征层调成真实值
 37 | #---------------------------------------------------#
 38 | def get_anchors_and_decode(feats, anchors, num_classes, input_shape, calc_loss=False):
 39 |     num_anchors = len(anchors)
 40 |     #------------------------------------------#
 41 |     #   grid_shape指的是特征层的高和宽
 42 |     #------------------------------------------#
 43 |     grid_shape = K.shape(feats)[1:3]
 44 |     #--------------------------------------------------------------------#
 45 |     #   获得各个特征点的坐标信息。生成的shape为(13, 13, num_anchors, 2)
 46 |     #--------------------------------------------------------------------#
 47 |     grid_x  = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, num_anchors, 1])
 48 |     grid_y  = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], num_anchors, 1])
 49 |     grid    = K.cast(K.concatenate([grid_x, grid_y]), K.dtype(feats))
 50 |     #---------------------------------------------------------------#
 51 |     #   将先验框进行拓展，生成的shape为(13, 13, num_anchors, 2)
 52 |     #---------------------------------------------------------------#
 53 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, num_anchors, 2])
 54 |     anchors_tensor = K.tile(anchors_tensor, [grid_shape[0], grid_shape[1], 1, 1])
 55 | 
 56 |     #---------------------------------------------------#
 57 |     #   将预测结果调整成(batch_size,13,13,3,85)
 58 |     #   85可拆分成4 + 1 + 80
 59 |     #   4代表的是中心宽高的调整参数
 60 |     #   1代表的是框的置信度
 61 |     #   80代表的是种类的置信度
 62 |     #---------------------------------------------------#
 63 |     feats           = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
 64 |     #------------------------------------------#
 65 |     #   对先验框进行解码，并进行归一化
 66 |     #------------------------------------------#
 67 |     box_xy          = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
 68 |     box_wh          = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
 69 |     #------------------------------------------#
 70 |     #   获得预测框的置信度
 71 |     #------------------------------------------#
 72 |     box_confidence  = K.sigmoid(feats[..., 4:5])
 73 |     box_class_probs = K.sigmoid(feats[..., 5:])
 74 |     
 75 |     #---------------------------------------------------------------------#
 76 |     #   在计算loss的时候返回grid, feats, box_xy, box_wh
 77 |     #   在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
 78 |     #---------------------------------------------------------------------#
 79 |     if calc_loss == True:
 80 |         return grid, feats, box_xy, box_wh
 81 |     return box_xy, box_wh, box_confidence, box_class_probs
 82 | 
 83 | #---------------------------------------------------#
 84 | #   图片预测
 85 | #---------------------------------------------------#
 86 | def DecodeBox(outputs,
 87 |             anchors,
 88 |             num_classes,
 89 |             input_shape,
 90 |             #-----------------------------------------------------------#
 91 |             #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
 92 |             #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
 93 |             #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
 94 |             #-----------------------------------------------------------#
 95 |             anchor_mask     = [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
 96 |             max_boxes       = 100,
 97 |             confidence      = 0.5,
 98 |             nms_iou         = 0.3,
 99 |             letterbox_image = True):
100 |     
101 |     image_shape = K.reshape(outputs[-1],[-1])
102 | 
103 |     box_xy = []
104 |     box_wh = []
105 |     box_confidence  = []
106 |     box_class_probs = []
107 |     for i in range(len(anchor_mask)):
108 |         sub_box_xy, sub_box_wh, sub_box_confidence, sub_box_class_probs = \
109 |             get_anchors_and_decode(outputs[i], anchors[anchor_mask[i]], num_classes, input_shape)
110 |         box_xy.append(K.reshape(sub_box_xy, [-1, 2]))
111 |         box_wh.append(K.reshape(sub_box_wh, [-1, 2]))
112 |         box_confidence.append(K.reshape(sub_box_confidence, [-1, 1]))
113 |         box_class_probs.append(K.reshape(sub_box_class_probs, [-1, num_classes]))
114 |     box_xy          = K.concatenate(box_xy, axis = 0)
115 |     box_wh          = K.concatenate(box_wh, axis = 0)
116 |     box_confidence  = K.concatenate(box_confidence, axis = 0)
117 |     box_class_probs = K.concatenate(box_class_probs, axis = 0)
118 |     
119 |     #------------------------------------------------------------------------------------------------------------#
120 |     #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条，因此生成的box_xy, box_wh是相对于有灰条的图像的
121 |     #   我们需要对其进行修改，去除灰条的部分。 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
122 |     #   如果没有使用letterbox_image也需要将归一化后的box_xy, box_wh调整成相对于原图大小的
123 |     #------------------------------------------------------------------------------------------------------------#
124 |     boxes       = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
125 | 
126 |     box_scores  = box_confidence * box_class_probs
127 | 
128 |     #-----------------------------------------------------------#
129 |     #   判断得分是否大于score_threshold
130 |     #-----------------------------------------------------------#
131 |     mask             = box_scores >= confidence
132 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
133 |     boxes_out   = []
134 |     scores_out  = []
135 |     classes_out = []
136 |     for c in range(num_classes):
137 |         #-----------------------------------------------------------#
138 |         #   取出所有box_scores >= score_threshold的框，和成绩
139 |         #-----------------------------------------------------------#
140 |         class_boxes      = tf.boolean_mask(boxes, mask[:, c])
141 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
142 | 
143 |         #-----------------------------------------------------------#
144 |         #   非极大抑制
145 |         #   保留一定区域内得分最大的框
146 |         #-----------------------------------------------------------#
147 |         nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=nms_iou)
148 | 
149 |         #-----------------------------------------------------------#
150 |         #   获取非极大抑制后的结果
151 |         #   下列三个分别是：框的位置，得分与种类
152 |         #-----------------------------------------------------------#
153 |         class_boxes         = K.gather(class_boxes, nms_index)
154 |         class_box_scores    = K.gather(class_box_scores, nms_index)
155 |         classes             = K.ones_like(class_box_scores, 'int32') * c
156 | 
157 |         boxes_out.append(class_boxes)
158 |         scores_out.append(class_box_scores)
159 |         classes_out.append(classes)
160 |     boxes_out      = K.concatenate(boxes_out, axis=0)
161 |     scores_out     = K.concatenate(scores_out, axis=0)
162 |     classes_out    = K.concatenate(classes_out, axis=0)
163 | 
164 |     return boxes_out, scores_out, classes_out
165 | 
166 | if __name__ == "__main__":
167 |     import matplotlib.pyplot as plt
168 |     import numpy as np
169 | 
170 |     def sigmoid(x):
171 |         s = 1 / (1 + np.exp(-x))
172 |         return s
173 |     #---------------------------------------------------#
174 |     #   将预测值的每个特征层调成真实值
175 |     #---------------------------------------------------#
176 |     def get_anchors_and_decode(feats, anchors, num_classes):
177 |         # feats     [batch_size, 13, 13, 3 * (5 + num_classes)]
178 |         # anchors   [3, 2]
179 |         # num_classes 
180 |         # 3
181 |         num_anchors = len(anchors)
182 |         #------------------------------------------#
183 |         #   grid_shape指的是特征层的高和宽
184 |         #   grid_shape [13, 13] 
185 |         #------------------------------------------#
186 |         grid_shape = np.shape(feats)[1:3]
187 |         #--------------------------------------------------------------------#
188 |         #   获得各个特征点的坐标信息。生成的shape为(13, 13, num_anchors, 2)
189 |         #   grid_x [13, 13, 3, 1]
190 |         #   grid_y [13, 13, 3, 1]
191 |         #   grid   [13, 13, 3, 2]
192 |         #--------------------------------------------------------------------#
193 |         grid_x  = np.tile(np.reshape(np.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, num_anchors, 1])
194 |         grid_y  = np.tile(np.reshape(np.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], num_anchors, 1])
195 |         grid    = np.concatenate([grid_x, grid_y], -1)
196 |         #---------------------------------------------------------------#
197 |         #   将先验框进行拓展，生成的shape为(13, 13, num_anchors, 2)
198 |         #   [1, 1, 3, 2]
199 |         #   [13, 13, 3, 2]
200 |         #---------------------------------------------------------------#
201 |         anchors_tensor = np.reshape(anchors, [1, 1, num_anchors, 2])
202 |         anchors_tensor = np.tile(anchors_tensor, [grid_shape[0], grid_shape[1], 1, 1]) 
203 | 
204 |         #---------------------------------------------------#
205 |         #   将预测结果调整成(batch_size,13,13,3,85)
206 |         #   85可拆分成4 + 1 + 80
207 |         #   4代表的是中心宽高的调整参数
208 |         #   1代表的是框的置信度
209 |         #   80代表的是种类的置信度
210 |         #   [batch_size, 13, 13, 3 * (5 + num_classes)]
211 |         #   [batch_size, 13, 13, 3, 5 + num_classes]
212 |         #---------------------------------------------------#
213 |         feats           = np.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
214 |         #------------------------------------------#
215 |         #   对先验框进行解码，并进行归一化
216 |         #------------------------------------------#
217 |         box_xy          = sigmoid(feats[..., :2]) + grid
218 |         box_wh          = np.exp(feats[..., 2:4]) * anchors_tensor
219 |         #------------------------------------------#
220 |         #   获得预测框的置信度
221 |         #------------------------------------------#
222 |         box_confidence  = sigmoid(feats[..., 4:5])
223 |         box_class_probs = sigmoid(feats[..., 5:])
224 | 
225 |         box_wh = box_wh / 32
226 |         anchors_tensor = anchors_tensor / 32
227 |         fig = plt.figure()
228 |         ax = fig.add_subplot(121)
229 |         plt.ylim(-2,15)
230 |         plt.xlim(-2,15)
231 |         plt.scatter(grid_x,grid_y)
232 |         plt.scatter(5,5,c='black')
233 |         plt.gca().invert_yaxis()
234 | 
235 | 
236 |         anchor_left = grid_x - anchors_tensor/2 
237 |         anchor_top = grid_y - anchors_tensor/2 
238 |         print(np.shape(anchors_tensor))
239 |         print(np.shape(box_xy))
240 |         rect1 = plt.Rectangle([anchor_left[5,5,0,0],anchor_top[5,5,0,1]],anchors_tensor[0,0,0,0],anchors_tensor[0,0,0,1],color="r",fill=False)
241 |         rect2 = plt.Rectangle([anchor_left[5,5,1,0],anchor_top[5,5,1,1]],anchors_tensor[0,0,1,0],anchors_tensor[0,0,1,1],color="r",fill=False)
242 |         rect3 = plt.Rectangle([anchor_left[5,5,2,0],anchor_top[5,5,2,1]],anchors_tensor[0,0,2,0],anchors_tensor[0,0,2,1],color="r",fill=False)
243 | 
244 |         ax.add_patch(rect1)
245 |         ax.add_patch(rect2)
246 |         ax.add_patch(rect3)
247 | 
248 |         ax = fig.add_subplot(122)
249 |         plt.ylim(-2,15)
250 |         plt.xlim(-2,15)
251 |         plt.scatter(grid_x,grid_y)
252 |         plt.scatter(5,5,c='black')
253 |         plt.scatter(box_xy[0,5,5,:,0],box_xy[0,5,5,:,1],c='r')
254 |         plt.gca().invert_yaxis()
255 | 
256 |         pre_left = box_xy[...,0] - box_wh[...,0]/2 
257 |         pre_top = box_xy[...,1] - box_wh[...,1]/2 
258 | 
259 |         rect1 = plt.Rectangle([pre_left[0,5,5,0],pre_top[0,5,5,0]],box_wh[0,5,5,0,0],box_wh[0,5,5,0,1],color="r",fill=False)
260 |         rect2 = plt.Rectangle([pre_left[0,5,5,1],pre_top[0,5,5,1]],box_wh[0,5,5,1,0],box_wh[0,5,5,1,1],color="r",fill=False)
261 |         rect3 = plt.Rectangle([pre_left[0,5,5,2],pre_top[0,5,5,2]],box_wh[0,5,5,2,0],box_wh[0,5,5,2,1],color="r",fill=False)
262 | 
263 |         ax.add_patch(rect1)
264 |         ax.add_patch(rect2)
265 |         ax.add_patch(rect3)
266 | 
267 |         plt.show()
268 |         #
269 |     feat = np.random.normal(0,0.5,[4,13,13,75])
270 |     anchors = [[142, 110],[192, 243],[459, 401]]
271 |     get_anchors_and_decode(feat,anchors,20)
272 | 


--------------------------------------------------------------------------------
/utils/dataloader.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from random import shuffle
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | from PIL import Image
  7 | from tensorflow import keras
  8 | 
  9 | from utils.utils import cvtColor, preprocess_input
 10 | 
 11 | 
 12 | class YoloDatasets(keras.utils.Sequence):
 13 |     def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, anchors_mask, train):
 14 |         self.annotation_lines   = annotation_lines
 15 |         self.length             = len(self.annotation_lines)
 16 |         
 17 |         self.input_shape        = input_shape
 18 |         self.anchors            = anchors
 19 |         self.batch_size         = batch_size
 20 |         self.num_classes        = num_classes
 21 |         self.anchors_mask       = anchors_mask
 22 |         self.train              = train
 23 | 
 24 |     def __len__(self):
 25 |         return math.ceil(len(self.annotation_lines) / float(self.batch_size))
 26 | 
 27 |     def __getitem__(self, index):
 28 |         image_data  = []
 29 |         box_data    = []
 30 |         for i in range(index * self.batch_size, (index + 1) * self.batch_size):  
 31 |             i           = i % self.length
 32 |             #---------------------------------------------------#
 33 |             #   训练时进行数据的随机增强
 34 |             #   验证时不进行数据的随机增强
 35 |             #---------------------------------------------------#
 36 |             image, box  = self.get_random_data(self.annotation_lines[i], self.input_shape, random = self.train)
 37 |             image_data.append(preprocess_input(np.array(image, np.float32)))
 38 |             box_data.append(box)
 39 | 
 40 |         image_data  = np.array(image_data)
 41 |         box_data    = np.array(box_data)
 42 |         y_true      = self.preprocess_true_boxes(box_data, self.input_shape, self.anchors, self.num_classes)
 43 |         return [image_data, *y_true], np.zeros(self.batch_size)
 44 | 
 45 |     def generate(self):
 46 |         i = 0
 47 |         while True:
 48 |             image_data  = []
 49 |             box_data    = []
 50 |             for b in range(self.batch_size):
 51 |                 if i==0:
 52 |                     np.random.shuffle(self.annotation_lines)
 53 |                 image, box  = self.get_random_data(self.annotation_lines[i], self.input_shape, random = self.train)
 54 |                 i           = (i+1) % self.length
 55 |                 image_data.append(preprocess_input(np.array(image, np.float32)))
 56 |                 box_data.append(box)
 57 |             image_data  = np.array(image_data)
 58 |             box_data    = np.array(box_data)
 59 |             y_true      = self.preprocess_true_boxes(box_data, self.input_shape, self.anchors, self.num_classes)
 60 |             yield image_data, y_true[0], y_true[1], y_true[2]
 61 | 
 62 |     def on_epoch_end(self):
 63 |         shuffle(self.annotation_lines)
 64 | 
 65 |     def rand(self, a=0, b=1):
 66 |         return np.random.rand()*(b-a) + a
 67 | 
 68 |     def get_random_data(self, annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
 69 |         line    = annotation_line.split()
 70 |         #------------------------------#
 71 |         #   读取图像并转换成RGB图像
 72 |         #------------------------------#
 73 |         image   = Image.open(line[0])
 74 |         image   = cvtColor(image)
 75 |         #------------------------------#
 76 |         #   获得图像的高宽与目标高宽
 77 |         #------------------------------#
 78 |         iw, ih  = image.size
 79 |         h, w    = input_shape
 80 |         #------------------------------#
 81 |         #   获得预测框
 82 |         #------------------------------#
 83 |         box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 84 | 
 85 |         if not random:
 86 |             scale = min(w/iw, h/ih)
 87 |             nw = int(iw*scale)
 88 |             nh = int(ih*scale)
 89 |             dx = (w-nw)//2
 90 |             dy = (h-nh)//2
 91 | 
 92 |             #---------------------------------#
 93 |             #   将图像多余的部分加上灰条
 94 |             #---------------------------------#
 95 |             image       = image.resize((nw,nh), Image.BICUBIC)
 96 |             new_image   = Image.new('RGB', (w,h), (128,128,128))
 97 |             new_image.paste(image, (dx, dy))
 98 |             image_data  = np.array(new_image, np.float32)
 99 | 
100 |             #---------------------------------#
101 |             #   对真实框进行调整
102 |             #---------------------------------#
103 |             box_data = np.zeros((max_boxes,5))
104 |             if len(box)>0:
105 |                 np.random.shuffle(box)
106 |                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
107 |                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
108 |                 box[:, 0:2][box[:, 0:2]<0]  = 0
109 |                 box[:, 2][box[:, 2]>w]      = w
110 |                 box[:, 3][box[:, 3]>h]      = h
111 |                 box_w   = box[:, 2] - box[:, 0]
112 |                 box_h   = box[:, 3] - box[:, 1]
113 |                 box     = box[np.logical_and(box_w>1, box_h>1)]
114 |                 if len(box)>max_boxes: box = box[:max_boxes]
115 |                 box_data[:len(box)] = box
116 | 
117 |             return image_data, box_data
118 |                 
119 |         #------------------------------------------#
120 |         #   对图像进行缩放并且进行长和宽的扭曲
121 |         #------------------------------------------#
122 |         new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
123 |         scale = self.rand(.25, 2)
124 |         if new_ar < 1:
125 |             nh = int(scale*h)
126 |             nw = int(nh*new_ar)
127 |         else:
128 |             nw = int(scale*w)
129 |             nh = int(nw/new_ar)
130 |         image = image.resize((nw,nh), Image.BICUBIC)
131 | 
132 |         #------------------------------------------#
133 |         #   将图像多余的部分加上灰条
134 |         #------------------------------------------#
135 |         dx = int(self.rand(0, w-nw))
136 |         dy = int(self.rand(0, h-nh))
137 |         new_image = Image.new('RGB', (w,h), (128,128,128))
138 |         new_image.paste(image, (dx, dy))
139 |         image = new_image
140 | 
141 |         #------------------------------------------#
142 |         #   翻转图像
143 |         #------------------------------------------#
144 |         flip = self.rand()<.5
145 |         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
146 | 
147 |         image_data      = np.array(image, np.uint8)
148 |         #---------------------------------#
149 |         #   对图像进行色域变换
150 |         #   计算色域变换的参数
151 |         #---------------------------------#
152 |         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
153 |         #---------------------------------#
154 |         #   将图像转到HSV上
155 |         #---------------------------------#
156 |         hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
157 |         dtype           = image_data.dtype
158 |         #---------------------------------#
159 |         #   应用变换
160 |         #---------------------------------#
161 |         x       = np.arange(0, 256, dtype=r.dtype)
162 |         lut_hue = ((x * r[0]) % 180).astype(dtype)
163 |         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
164 |         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
165 | 
166 |         image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
167 |         image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
168 | 
169 |         #---------------------------------#
170 |         #   对真实框进行调整
171 |         #---------------------------------#
172 |         box_data = np.zeros((max_boxes,5))
173 |         if len(box)>0:
174 |             np.random.shuffle(box)
175 |             box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
176 |             box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
177 |             if flip: box[:, [0,2]] = w - box[:, [2,0]]
178 |             box[:, 0:2][box[:, 0:2]<0] = 0
179 |             box[:, 2][box[:, 2]>w] = w
180 |             box[:, 3][box[:, 3]>h] = h
181 |             box_w = box[:, 2] - box[:, 0]
182 |             box_h = box[:, 3] - box[:, 1]
183 |             box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
184 |             if len(box)>max_boxes: box = box[:max_boxes]
185 |             box_data[:len(box)] = box
186 |         
187 |         return image_data, box_data
188 | 
189 |     def preprocess_true_boxes(self, true_boxes, input_shape, anchors, num_classes):
190 |         assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
191 |         #-----------------------------------------------------------#
192 |         #   获得框的坐标和图片的大小
193 |         #-----------------------------------------------------------#
194 |         true_boxes  = np.array(true_boxes, dtype='float32')
195 |         input_shape = np.array(input_shape, dtype='int32')
196 |         
197 |         #-----------------------------------------------------------#
198 |         #   一共有三个特征层数
199 |         #-----------------------------------------------------------#
200 |         num_layers  = len(self.anchors_mask)
201 |         #-----------------------------------------------------------#
202 |         #   m为图片数量，grid_shapes为网格的shape
203 |         #-----------------------------------------------------------#
204 |         m           = true_boxes.shape[0]
205 |         grid_shapes = [input_shape // {0:32, 1:16, 2:8}[l] for l in range(num_layers)]
206 |         #-----------------------------------------------------------#
207 |         #   y_true的格式为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
208 |         #-----------------------------------------------------------#
209 |         y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(self.anchors_mask[l]), 5 + num_classes),
210 |                     dtype='float32') for l in range(num_layers)]
211 | 
212 |         #-----------------------------------------------------------#
213 |         #   通过计算获得真实框的中心和宽高
214 |         #   中心点(m,n,2) 宽高(m,n,2)
215 |         #-----------------------------------------------------------#
216 |         boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
217 |         boxes_wh =  true_boxes[..., 2:4] - true_boxes[..., 0:2]
218 |         #-----------------------------------------------------------#
219 |         #   将真实框归一化到小数形式
220 |         #-----------------------------------------------------------#
221 |         true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
222 |         true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
223 | 
224 |         #-----------------------------------------------------------#
225 |         #   [9,2] -> [1,9,2]
226 |         #-----------------------------------------------------------#
227 |         anchors         = np.expand_dims(anchors, 0)
228 |         anchor_maxes    = anchors / 2.
229 |         anchor_mins     = -anchor_maxes
230 | 
231 |         #-----------------------------------------------------------#
232 |         #   长宽要大于0才有效
233 |         #-----------------------------------------------------------#
234 |         valid_mask = boxes_wh[..., 0]>0
235 | 
236 |         for b in range(m):
237 |             #-----------------------------------------------------------#
238 |             #   对每一张图进行处理
239 |             #-----------------------------------------------------------#
240 |             wh = boxes_wh[b, valid_mask[b]]
241 |             if len(wh) == 0: continue
242 |             #-----------------------------------------------------------#
243 |             #   [n,2] -> [n,1,2]
244 |             #-----------------------------------------------------------#
245 |             wh          = np.expand_dims(wh, -2)
246 |             box_maxes   = wh / 2.
247 |             box_mins    = - box_maxes
248 | 
249 |             #-----------------------------------------------------------#
250 |             #   计算所有真实框和先验框的交并比
251 |             #   intersect_area  [n,9]
252 |             #   box_area        [n,1]
253 |             #   anchor_area     [1,9]
254 |             #   iou             [n,9]
255 |             #-----------------------------------------------------------#
256 |             intersect_mins  = np.maximum(box_mins, anchor_mins)
257 |             intersect_maxes = np.minimum(box_maxes, anchor_maxes)
258 |             intersect_wh    = np.maximum(intersect_maxes - intersect_mins, 0.)
259 |             intersect_area  = intersect_wh[..., 0] * intersect_wh[..., 1]
260 | 
261 |             box_area    = wh[..., 0] * wh[..., 1]
262 |             anchor_area = anchors[..., 0] * anchors[..., 1]
263 | 
264 |             iou = intersect_area / (box_area + anchor_area - intersect_area)
265 |             #-----------------------------------------------------------#
266 |             #   维度是[n,] 感谢 消尽不死鸟 的提醒
267 |             #-----------------------------------------------------------#
268 |             best_anchor = np.argmax(iou, axis=-1)
269 | 
270 |             for t, n in enumerate(best_anchor):
271 |                 #-----------------------------------------------------------#
272 |                 #   找到每个真实框所属的特征层
273 |                 #-----------------------------------------------------------#
274 |                 for l in range(num_layers):
275 |                     if n in self.anchors_mask[l]:
276 |                         #-----------------------------------------------------------#
277 |                         #   floor用于向下取整，找到真实框所属的特征层对应的x、y轴坐标
278 |                         #-----------------------------------------------------------#
279 |                         i = np.floor(true_boxes[b,t,0] * grid_shapes[l][1]).astype('int32')
280 |                         j = np.floor(true_boxes[b,t,1] * grid_shapes[l][0]).astype('int32')
281 |                         #-----------------------------------------------------------#
282 |                         #   k指的的当前这个特征点的第k个先验框
283 |                         #-----------------------------------------------------------#
284 |                         k = self.anchors_mask[l].index(n)
285 |                         #-----------------------------------------------------------#
286 |                         #   c指的是当前这个真实框的种类
287 |                         #-----------------------------------------------------------#
288 |                         c = true_boxes[b, t, 4].astype('int32')
289 |                         #-----------------------------------------------------------#
290 |                         #   y_true的shape为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
291 |                         #   最后的85可以拆分成4+1+80，4代表的是框的中心与宽高、
292 |                         #   1代表的是置信度、80代表的是种类
293 |                         #-----------------------------------------------------------#
294 |                         y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
295 |                         y_true[l][b, j, i, k, 4] = 1
296 |                         y_true[l][b, j, i, k, 5+c] = 1
297 | 
298 |         return y_true
299 | 


--------------------------------------------------------------------------------
/utils/callbacks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | 
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | from matplotlib import pyplot as plt
  7 | import scipy.signal
  8 | 
  9 | import shutil
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | 
 13 | from tensorflow import keras
 14 | from tensorflow.keras import backend as K
 15 | from tensorflow.keras.layers import Input, Lambda
 16 | from tensorflow.keras.models import Model
 17 | from PIL import Image
 18 | from tqdm import tqdm
 19 | from .utils import cvtColor, preprocess_input, resize_image
 20 | from .utils_bbox import DecodeBox
 21 | from .utils_map import get_coco_map, get_map
 22 | 
 23 | 
 24 | class LossHistory(keras.callbacks.Callback):
 25 |     def __init__(self, log_dir):
 26 |         self.log_dir    = log_dir
 27 |         self.losses     = []
 28 |         self.val_loss   = []
 29 |         
 30 |         os.makedirs(self.log_dir)
 31 | 
 32 |     def on_epoch_end(self, epoch, logs={}):
 33 |         if not os.path.exists(self.log_dir):
 34 |             os.makedirs(self.log_dir)
 35 | 
 36 |         self.losses.append(logs.get('loss'))
 37 |         self.val_loss.append(logs.get('val_loss'))
 38 |         
 39 |         with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
 40 |             f.write(str(logs.get('loss')))
 41 |             f.write("\n")
 42 |         with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
 43 |             f.write(str(logs.get('val_loss')))
 44 |             f.write("\n")
 45 |         self.loss_plot()
 46 | 
 47 |     def loss_plot(self):
 48 |         iters = range(len(self.losses))
 49 | 
 50 |         plt.figure()
 51 |         plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
 52 |         plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
 53 |         try:
 54 |             if len(self.losses) < 25:
 55 |                 num = 5
 56 |             else:
 57 |                 num = 15
 58 |             
 59 |             plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
 60 |             plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
 61 |         except:
 62 |             pass
 63 | 
 64 |         plt.grid(True)
 65 |         plt.xlabel('Epoch')
 66 |         plt.ylabel('Loss')
 67 |         plt.title('A Loss Curve')
 68 |         plt.legend(loc="upper right")
 69 | 
 70 |         plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
 71 | 
 72 |         plt.cla()
 73 |         plt.close("all")
 74 | 
 75 | class ExponentDecayScheduler(keras.callbacks.Callback):
 76 |     def __init__(self,
 77 |                  decay_rate,
 78 |                  verbose=0):
 79 |         super(ExponentDecayScheduler, self).__init__()
 80 |         self.decay_rate         = decay_rate
 81 |         self.verbose            = verbose
 82 |         self.learning_rates     = []
 83 | 
 84 |     def on_epoch_end(self, batch, logs=None):
 85 |         learning_rate = K.get_value(self.model.optimizer.lr) * self.decay_rate
 86 |         K.set_value(self.model.optimizer.lr, learning_rate)
 87 |         if self.verbose > 0:
 88 |             print('Setting learning rate to %s.' % (learning_rate))
 89 | 
 90 | class EvalCallback(keras.callbacks.Callback):
 91 |     def __init__(self, model_body, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir,\
 92 |             map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1):
 93 |         super(EvalCallback, self).__init__()
 94 |         
 95 |         self.model_body         = model_body
 96 |         self.input_shape        = input_shape
 97 |         self.anchors            = anchors
 98 |         self.anchors_mask       = anchors_mask
 99 |         self.class_names        = class_names
100 |         self.num_classes        = num_classes
101 |         self.val_lines          = val_lines
102 |         self.log_dir            = log_dir
103 |         self.map_out_path       = map_out_path
104 |         self.max_boxes          = max_boxes
105 |         self.confidence         = confidence
106 |         self.nms_iou            = nms_iou
107 |         self.letterbox_image    = letterbox_image
108 |         self.MINOVERLAP         = MINOVERLAP
109 |         self.eval_flag          = eval_flag
110 |         self.period             = period
111 |         
112 |         #---------------------------------------------------------#
113 |         #   在DecodeBox函数中，我们会对预测结果进行后处理
114 |         #   后处理的内容包括，解码、非极大抑制、门限筛选等
115 |         #---------------------------------------------------------#
116 |         self.input_image_shape = Input([2,],batch_size=1)
117 |         inputs  = [*self.model_body.output, self.input_image_shape]
118 |         outputs = Lambda(
119 |             DecodeBox, 
120 |             output_shape = (1,), 
121 |             name = 'yolo_eval',
122 |             arguments = {
123 |                 'anchors'           : self.anchors, 
124 |                 'num_classes'       : self.num_classes, 
125 |                 'input_shape'       : self.input_shape, 
126 |                 'anchor_mask'       : self.anchors_mask,
127 |                 'confidence'        : self.confidence, 
128 |                 'nms_iou'           : self.nms_iou, 
129 |                 'max_boxes'         : self.max_boxes, 
130 |                 'letterbox_image'   : self.letterbox_image
131 |              }
132 |         )(inputs)
133 |         self.yolo_model = Model([self.model_body.input, self.input_image_shape], outputs)
134 |         
135 |         self.maps       = [0]
136 |         self.epoches    = [0]
137 |         if self.eval_flag:
138 |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
139 |                 f.write(str(0))
140 |                 f.write("\n")
141 | 
142 |     @tf.function
143 |     def get_pred(self, image_data, input_image_shape):
144 |         out_boxes, out_scores, out_classes = self.yolo_model([image_data, input_image_shape], training=False)
145 |         return out_boxes, out_scores, out_classes
146 | 
147 |     def get_map_txt(self, image_id, image, class_names, map_out_path):
148 |         f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
149 |         #---------------------------------------------------------#
150 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
151 |         #---------------------------------------------------------#
152 |         image       = cvtColor(image)
153 |         #---------------------------------------------------------#
154 |         #   给图像增加灰条，实现不失真的resize
155 |         #   也可以直接resize进行识别
156 |         #---------------------------------------------------------#
157 |         image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
158 |         #---------------------------------------------------------#
159 |         #   添加上batch_size维度，并进行归一化
160 |         #---------------------------------------------------------#
161 |         image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
162 | 
163 |         #---------------------------------------------------------#
164 |         #   将图像输入网络当中进行预测！
165 |         #---------------------------------------------------------#
166 |         input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
167 |         outputs             = self.get_pred(image_data, input_image_shape) 
168 |         out_boxes, out_scores, out_classes = [out.numpy() for out in outputs] 
169 | 
170 |         top_100     = np.argsort(out_scores)[::-1][:self.max_boxes]
171 |         out_boxes   = out_boxes[top_100]
172 |         out_scores  = out_scores[top_100]
173 |         out_classes = out_classes[top_100]
174 |         
175 |         for i, c in enumerate(out_classes):
176 |             predicted_class             = self.class_names[int(c)]
177 |             try:
178 |                 score                   = str(out_scores[i].numpy())
179 |             except:
180 |                 score                   = str(out_scores[i])
181 |             top, left, bottom, right    = out_boxes[i]
182 |             if predicted_class not in class_names:
183 |                 continue
184 | 
185 |             f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
186 | 
187 |         f.close()
188 |         return 
189 |     
190 |     def on_epoch_end(self, epoch, logs=None):
191 |         temp_epoch = epoch + 1
192 |         if temp_epoch % self.period == 0 and self.eval_flag:
193 |             if not os.path.exists(self.map_out_path):
194 |                 os.makedirs(self.map_out_path)
195 |             if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")):
196 |                 os.makedirs(os.path.join(self.map_out_path, "ground-truth"))
197 |             if not os.path.exists(os.path.join(self.map_out_path, "detection-results")):
198 |                 os.makedirs(os.path.join(self.map_out_path, "detection-results"))
199 |             print("Get map.")
200 |             for annotation_line in tqdm(self.val_lines):
201 |                 line        = annotation_line.split()
202 |                 image_id    = os.path.basename(line[0]).split('.')[0]
203 |                 #------------------------------#
204 |                 #   读取图像并转换成RGB图像
205 |                 #------------------------------#
206 |                 image       = Image.open(line[0])
207 |                 #------------------------------#
208 |                 #   获得预测框
209 |                 #------------------------------#
210 |                 gt_boxes    = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
211 |                 #------------------------------#
212 |                 #   获得预测txt
213 |                 #------------------------------#
214 |                 self.get_map_txt(image_id, image, self.class_names, self.map_out_path)
215 |                 
216 |                 #------------------------------#
217 |                 #   获得真实框txt
218 |                 #------------------------------#
219 |                 with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
220 |                     for box in gt_boxes:
221 |                         left, top, right, bottom, obj = box
222 |                         obj_name = self.class_names[obj]
223 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
224 |                         
225 |             print("Calculate Map.")
226 |             try:
227 |                 temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1]
228 |             except:
229 |                 temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path)
230 |             self.maps.append(temp_map)
231 |             self.epoches.append(temp_epoch)
232 | 
233 |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
234 |                 f.write(str(temp_map))
235 |                 f.write("\n")
236 |             
237 |             plt.figure()
238 |             plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map')
239 | 
240 |             plt.grid(True)
241 |             plt.xlabel('Epoch')
242 |             plt.ylabel('Map %s'%str(self.MINOVERLAP))
243 |             plt.title('A Map Curve')
244 |             plt.legend(loc="upper right")
245 | 
246 |             plt.savefig(os.path.join(self.log_dir, "epoch_map.png"))
247 |             plt.cla()
248 |             plt.close("all")
249 | 
250 |             print("Get map done.")
251 |             shutil.rmtree(self.map_out_path)
252 | 
253 | class ModelCheckpoint(keras.callbacks.Callback):
254 |     def __init__(self, filepath, monitor='val_loss', verbose=0,
255 |                  save_best_only=False, save_weights_only=False,
256 |                  mode='auto', period=1):
257 |         super(ModelCheckpoint, self).__init__()
258 |         self.monitor = monitor
259 |         self.verbose = verbose
260 |         self.filepath = filepath
261 |         self.save_best_only = save_best_only
262 |         self.save_weights_only = save_weights_only
263 |         self.period = period
264 |         self.epochs_since_last_save = 0
265 | 
266 |         if mode not in ['auto', 'min', 'max']:
267 |             warnings.warn('ModelCheckpoint mode %s is unknown, '
268 |                           'fallback to auto mode.' % (mode),
269 |                           RuntimeWarning)
270 |             mode = 'auto'
271 | 
272 |         if mode == 'min':
273 |             self.monitor_op = np.less
274 |             self.best = np.Inf
275 |         elif mode == 'max':
276 |             self.monitor_op = np.greater
277 |             self.best = -np.Inf
278 |         else:
279 |             if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
280 |                 self.monitor_op = np.greater
281 |                 self.best = -np.Inf
282 |             else:
283 |                 self.monitor_op = np.less
284 |                 self.best = np.Inf
285 | 
286 |     def on_epoch_end(self, epoch, logs=None):
287 |         logs = logs or {}
288 |         self.epochs_since_last_save += 1
289 |         if self.epochs_since_last_save >= self.period:
290 |             self.epochs_since_last_save = 0
291 |             filepath = self.filepath.format(epoch=epoch + 1, **logs)
292 |             if self.save_best_only:
293 |                 current = logs.get(self.monitor)
294 |                 if current is None:
295 |                     warnings.warn('Can save best model only with %s available, '
296 |                                   'skipping.' % (self.monitor), RuntimeWarning)
297 |                 else:
298 |                     if self.monitor_op(current, self.best):
299 |                         if self.verbose > 0:
300 |                             print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
301 |                                   ' saving model to %s'
302 |                                   % (epoch + 1, self.monitor, self.best,
303 |                                      current, filepath))
304 |                         self.best = current
305 |                         if self.save_weights_only:
306 |                             self.model.save_weights(filepath, overwrite=True)
307 |                         else:
308 |                             self.model.save(filepath, overwrite=True)
309 |                     else:
310 |                         if self.verbose > 0:
311 |                             print('\nEpoch %05d: %s did not improve' %
312 |                                   (epoch + 1, self.monitor))
313 |             else:
314 |                 if self.verbose > 0:
315 |                     print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
316 |                 if self.save_weights_only:
317 |                     self.model.save_weights(filepath, overwrite=True)
318 |                 else:
319 |                     self.model.save(filepath, overwrite=True)
320 | 
321 | 


--------------------------------------------------------------------------------
/nets/yolo_training.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from functools import partial
  3 | 
  4 | import tensorflow as tf
  5 | from tensorflow.keras import backend as K
  6 | from utils.utils_bbox import get_anchors_and_decode
  7 | 
  8 | 
  9 | def box_ciou(b1, b2):
 10 |     """
 11 |     输入为：
 12 |     ----------
 13 |     b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
 14 |     b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
 15 | 
 16 |     返回为：
 17 |     -------
 18 |     ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
 19 |     """
 20 |     #-----------------------------------------------------------#
 21 |     #   求出预测框左上角右下角
 22 |     #   b1_mins     (batch, feat_w, feat_h, anchor_num, 2)
 23 |     #   b1_maxes    (batch, feat_w, feat_h, anchor_num, 2)
 24 |     #-----------------------------------------------------------#
 25 |     b1_xy = b1[..., :2]
 26 |     b1_wh = b1[..., 2:4]
 27 |     b1_wh_half = b1_wh/2.
 28 |     b1_mins = b1_xy - b1_wh_half
 29 |     b1_maxes = b1_xy + b1_wh_half
 30 |     #-----------------------------------------------------------#
 31 |     #   求出真实框左上角右下角
 32 |     #   b2_mins     (batch, feat_w, feat_h, anchor_num, 2)
 33 |     #   b2_maxes    (batch, feat_w, feat_h, anchor_num, 2)
 34 |     #-----------------------------------------------------------#
 35 |     b2_xy = b2[..., :2]
 36 |     b2_wh = b2[..., 2:4]
 37 |     b2_wh_half = b2_wh/2.
 38 |     b2_mins = b2_xy - b2_wh_half
 39 |     b2_maxes = b2_xy + b2_wh_half
 40 | 
 41 |     #-----------------------------------------------------------#
 42 |     #   求真实框和预测框所有的iou
 43 |     #   iou         (batch, feat_w, feat_h, anchor_num)
 44 |     #-----------------------------------------------------------#
 45 |     intersect_mins = K.maximum(b1_mins, b2_mins)
 46 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
 47 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
 48 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
 49 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
 50 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
 51 |     union_area = b1_area + b2_area - intersect_area
 52 |     iou = intersect_area / K.maximum(union_area, K.epsilon())
 53 | 
 54 |     #-----------------------------------------------------------#
 55 |     #   计算中心的差距
 56 |     #   center_distance (batch, feat_w, feat_h, anchor_num)
 57 |     #-----------------------------------------------------------#
 58 |     center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1)
 59 |     enclose_mins = K.minimum(b1_mins, b2_mins)
 60 |     enclose_maxes = K.maximum(b1_maxes, b2_maxes)
 61 |     enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0)
 62 |     #-----------------------------------------------------------#
 63 |     #   计算对角线距离
 64 |     #   enclose_diagonal (batch, feat_w, feat_h, anchor_num)
 65 |     #-----------------------------------------------------------#
 66 |     enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1)
 67 |     ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal ,K.epsilon())
 68 |     
 69 |     v = 4 * K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1], K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi)
 70 |     alpha = v /  K.maximum((1.0 - iou + v), K.epsilon())
 71 |     ciou = ciou - alpha * v
 72 | 
 73 |     ciou = K.expand_dims(ciou, -1)
 74 |     return ciou
 75 | 
 76 | #---------------------------------------------------#
 77 | #   用于计算每个预测框与真实框的iou
 78 | #---------------------------------------------------#
 79 | def box_iou(b1, b2):
 80 |     #---------------------------------------------------#
 81 |     #   num_anchor,1,4
 82 |     #   计算左上角的坐标和右下角的坐标
 83 |     #---------------------------------------------------#
 84 |     b1          = K.expand_dims(b1, -2)
 85 |     b1_xy       = b1[..., :2]
 86 |     b1_wh       = b1[..., 2:4]
 87 |     b1_wh_half  = b1_wh/2.
 88 |     b1_mins     = b1_xy - b1_wh_half
 89 |     b1_maxes    = b1_xy + b1_wh_half
 90 | 
 91 |     #---------------------------------------------------#
 92 |     #   1,n,4
 93 |     #   计算左上角和右下角的坐标
 94 |     #---------------------------------------------------#
 95 |     b2          = K.expand_dims(b2, 0)
 96 |     b2_xy       = b2[..., :2]
 97 |     b2_wh       = b2[..., 2:4]
 98 |     b2_wh_half  = b2_wh/2.
 99 |     b2_mins     = b2_xy - b2_wh_half
100 |     b2_maxes    = b2_xy + b2_wh_half
101 | 
102 |     #---------------------------------------------------#
103 |     #   计算重合面积
104 |     #---------------------------------------------------#
105 |     intersect_mins  = K.maximum(b1_mins, b2_mins)
106 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
107 |     intersect_wh    = K.maximum(intersect_maxes - intersect_mins, 0.)
108 |     intersect_area  = intersect_wh[..., 0] * intersect_wh[..., 1]
109 |     b1_area         = b1_wh[..., 0] * b1_wh[..., 1]
110 |     b2_area         = b2_wh[..., 0] * b2_wh[..., 1]
111 |     iou             = intersect_area / (b1_area + b2_area - intersect_area)
112 |     return iou
113 | 
114 | #---------------------------------------------------#
115 | #   loss值计算
116 | #---------------------------------------------------#
117 | def yolo_loss(
118 |     args, 
119 |     input_shape, 
120 |     anchors, 
121 |     anchors_mask, 
122 |     num_classes, 
123 |     ignore_thresh   = 0.5,
124 |     balance         = [0.4, 1.0, 4], 
125 |     box_ratio       = 0.05, 
126 |     obj_ratio       = 1, 
127 |     cls_ratio       = 0.5 / 4, 
128 |     ciou_flag       = True, 
129 |     print_loss      = False
130 | ):
131 |     num_layers      = len(anchors_mask)
132 |     #---------------------------------------------------------------------------------------------------#
133 |     #   将预测结果和实际ground truth分开，args是[*model_body.output, *y_true]
134 |     #   y_true是一个列表，包含三个特征层，shape分别为:
135 |     #   (m,13,13,3,85)
136 |     #   (m,26,26,3,85)
137 |     #   (m,52,52,3,85)
138 |     #   yolo_outputs是一个列表，包含三个特征层，shape分别为:
139 |     #   (m,13,13,3,85)
140 |     #   (m,26,26,3,85)
141 |     #   (m,52,52,3,85)
142 |     #---------------------------------------------------------------------------------------------------#
143 |     y_true          = args[num_layers:]
144 |     yolo_outputs    = args[:num_layers]
145 | 
146 |     #-----------------------------------------------------------#
147 |     #   得到input_shpae为416,416 
148 |     #-----------------------------------------------------------#
149 |     input_shape = K.cast(input_shape, K.dtype(y_true[0]))
150 |     #-----------------------------------------------------------#
151 |     #   得到网格的shape为[13,13]; [26,26]; [52,52]
152 |     #-----------------------------------------------------------#
153 |     grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
154 | 
155 |     #-----------------------------------------------------------#
156 |     #   取出每一张图片
157 |     #   m的值就是batch_size
158 |     #-----------------------------------------------------------#
159 |     m = K.shape(yolo_outputs[0])[0]
160 | 
161 |     loss    = 0
162 |     #---------------------------------------------------------------------------------------------------#
163 |     #   y_true是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
164 |     #   yolo_outputs是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
165 |     #---------------------------------------------------------------------------------------------------#
166 |     for l in range(num_layers):
167 |         #-----------------------------------------------------------#
168 |         #   以第一个特征层(m,13,13,3,85)为例子
169 |         #   取出该特征层中存在目标的点的位置。(m,13,13,3,1)
170 |         #-----------------------------------------------------------#
171 |         object_mask         = y_true[l][..., 4:5]
172 |         #-----------------------------------------------------------#
173 |         #   取出其对应的种类(m,13,13,3,80)
174 |         #-----------------------------------------------------------#
175 |         true_class_probs    = y_true[l][..., 5:]
176 | 
177 |         #-----------------------------------------------------------#
178 |         #   将yolo_outputs的特征层输出进行处理、获得四个返回值
179 |         #   其中：
180 |         #   grid        (13,13,1,2) 网格坐标
181 |         #   raw_pred    (m,13,13,3,85) 尚未处理的预测结果
182 |         #   pred_xy     (m,13,13,3,2) 解码后的中心坐标
183 |         #   pred_wh     (m,13,13,3,2) 解码后的宽高坐标
184 |         #-----------------------------------------------------------#
185 |         grid, raw_pred, pred_xy, pred_wh = get_anchors_and_decode(yolo_outputs[l],
186 |              anchors[anchors_mask[l]], num_classes, input_shape, calc_loss=True)
187 |         
188 |         #-----------------------------------------------------------#
189 |         #   pred_box是解码后的预测的box的位置
190 |         #   (m,13,13,3,4)
191 |         #-----------------------------------------------------------#
192 |         pred_box = K.concatenate([pred_xy, pred_wh])
193 | 
194 |         #-----------------------------------------------------------#
195 |         #   找到负样本群组，第一步是创建一个数组，[]
196 |         #-----------------------------------------------------------#
197 |         ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
198 |         object_mask_bool = K.cast(object_mask, 'bool')
199 |         
200 |         #-----------------------------------------------------------#
201 |         #   对每一张图片计算ignore_mask
202 |         #-----------------------------------------------------------#
203 |         def loop_body(b, ignore_mask):
204 |             #-----------------------------------------------------------#
205 |             #   取出n个真实框：n,4
206 |             #-----------------------------------------------------------#
207 |             true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
208 |             #-----------------------------------------------------------#
209 |             #   计算预测框与真实框的iou
210 |             #   pred_box    13,13,3,4 预测框的坐标
211 |             #   true_box    n,4 真实框的坐标
212 |             #   iou         13,13,3,n 预测框和真实框的iou
213 |             #-----------------------------------------------------------#
214 |             iou = box_iou(pred_box[b], true_box)
215 | 
216 |             #-----------------------------------------------------------#
217 |             #   best_iou    13,13,3 每个特征点与真实框的最大重合程度
218 |             #-----------------------------------------------------------#
219 |             best_iou = K.max(iou, axis=-1)
220 | 
221 |             #-----------------------------------------------------------#
222 |             #   判断预测框和真实框的最大iou小于ignore_thresh
223 |             #   则认为该预测框没有与之对应的真实框
224 |             #   该操作的目的是：
225 |             #   忽略预测结果与真实框非常对应特征点，因为这些框已经比较准了
226 |             #   不适合当作负样本，所以忽略掉。
227 |             #-----------------------------------------------------------#
228 |             ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
229 |             return b+1, ignore_mask
230 | 
231 |         #-----------------------------------------------------------#
232 |         #   在这个地方进行一个循环、循环是对每一张图片进行的
233 |         #-----------------------------------------------------------#
234 |         _, ignore_mask = tf.while_loop(lambda b,*args: b < m, loop_body, [0, ignore_mask])
235 | 
236 |         #-----------------------------------------------------------#
237 |         #   ignore_mask用于提取出作为负样本的特征点
238 |         #   (m,13,13,3)
239 |         #-----------------------------------------------------------#
240 |         ignore_mask = ignore_mask.stack()
241 |         #   (m,13,13,3,1)
242 |         ignore_mask = K.expand_dims(ignore_mask, -1)
243 | 
244 |         #-----------------------------------------------------------#
245 |         #   reshape_y_true[...,2:3]和reshape_y_true[...,3:4]
246 |         #   表示真实框的宽高，二者均在0-1之间
247 |         #   真实框越大，比重越小，小框的比重更大。
248 |         #-----------------------------------------------------------#
249 |         box_loss_scale  = 2 - y_true[l][...,2:3] * y_true[l][...,3:4]
250 |         if ciou_flag:
251 |             #-----------------------------------------------------------#
252 |             #   计算Ciou loss
253 |             #-----------------------------------------------------------#
254 |             raw_true_box    = y_true[l][...,0:4]
255 |             ciou            = box_ciou(pred_box, raw_true_box)
256 |             ciou_loss       = object_mask * (1 - ciou)
257 |             location_loss   = K.sum(ciou_loss)
258 |         else:
259 |             #-----------------------------------------------------------#
260 |             #   将真实框进行编码，使其格式与预测的相同，后面用于计算loss
261 |             #-----------------------------------------------------------#
262 |             raw_true_xy     = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
263 |             raw_true_wh     = K.log(y_true[l][..., 2:4] / anchors[anchors_mask[l]] * input_shape[::-1])
264 | 
265 |             #-----------------------------------------------------------#
266 |             #   object_mask如果真实存在目标则保存其wh值
267 |             #   switch接口，就是一个if/else条件判断语句
268 |             #-----------------------------------------------------------#
269 |             raw_true_wh     = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh))
270 |             #-----------------------------------------------------------#
271 |             #   利用binary_crossentropy计算中心点偏移情况，效果更好
272 |             #-----------------------------------------------------------#
273 |             xy_loss         = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
274 |             #-----------------------------------------------------------#
275 |             #   wh_loss用于计算宽高损失
276 |             #-----------------------------------------------------------#
277 |             wh_loss         = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[...,2:4])
278 |             location_loss   = (K.sum(xy_loss) + K.sum(wh_loss)) * 0.1
279 |             
280 |         #------------------------------------------------------------------------------#
281 |         #   如果该位置本来有框，那么计算1与置信度的交叉熵
282 |         #   如果该位置本来没有框，那么计算0与置信度的交叉熵
283 |         #   在这其中会忽略一部分样本，这些被忽略的样本满足条件best_iou<ignore_thresh
284 |         #   该操作的目的是：
285 |         #   忽略预测结果与真实框非常对应特征点，因为这些框已经比较准了
286 |         #   不适合当作负样本，所以忽略掉。
287 |         #------------------------------------------------------------------------------#
288 |         confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) + \
289 |                     (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
290 |         
291 |         class_loss      = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)
292 | 
293 |         #-----------------------------------------------------------#
294 |         #   计算正样本数量
295 |         #-----------------------------------------------------------#
296 |         num_pos         = tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1)
297 |         num_neg         = tf.maximum(K.sum(K.cast((1 - object_mask) * ignore_mask, tf.float32)), 1)
298 |         #-----------------------------------------------------------#
299 |         #   将所有损失求和
300 |         #-----------------------------------------------------------#
301 |         location_loss   = location_loss * box_ratio / num_pos
302 |         confidence_loss = K.sum(confidence_loss) * balance[l] * obj_ratio / (num_pos + num_neg)
303 |         class_loss      = K.sum(class_loss) * cls_ratio / num_pos / num_classes
304 | 
305 |         loss            += location_loss + confidence_loss + class_loss
306 |         if print_loss:
307 |             loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, tf.shape(ignore_mask)], summarize=100, message='loss: ')
308 |     return loss
309 | 
310 | def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
311 |     def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
312 |         if iters <= warmup_total_iters:
313 |             # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
314 |             lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2
315 |             ) + warmup_lr_start
316 |         elif iters >= total_iters - no_aug_iter:
317 |             lr = min_lr
318 |         else:
319 |             lr = min_lr + 0.5 * (lr - min_lr) * (
320 |                 1.0
321 |                 + math.cos(
322 |                     math.pi
323 |                     * (iters - warmup_total_iters)
324 |                     / (total_iters - warmup_total_iters - no_aug_iter)
325 |                 )
326 |             )
327 |         return lr
328 | 
329 |     def step_lr(lr, decay_rate, step_size, iters):
330 |         if step_size < 1:
331 |             raise ValueError("step_size must above 1.")
332 |         n       = iters // step_size
333 |         out_lr  = lr * decay_rate ** n
334 |         return out_lr
335 | 
336 |     if lr_decay_type == "cos":
337 |         warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
338 |         warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
339 |         no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
340 |         func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
341 |     else:
342 |         decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
343 |         step_size   = total_iters / step_num
344 |         func = partial(step_lr, lr, decay_rate, step_size)
345 | 
346 |     return func
347 | 
348 | 


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | import colorsys
  2 | import os
  3 | import time
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from PIL import ImageDraw, ImageFont
  8 | from tensorflow.keras.layers import Input, Lambda
  9 | from tensorflow.keras.models import Model
 10 | 
 11 | from nets.yolo import yolo_body
 12 | from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input,
 13 |                          resize_image, show_config)
 14 | from utils.utils_bbox import DecodeBox
 15 | 
 16 | 
 17 | class YOLO(object):
 18 |     _defaults = {
 19 |         #--------------------------------------------------------------------------#
 20 |         #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
 21 |         #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
 22 |         #
 23 |         #   训练好后logs文件夹下存在多个权值文件，选择验证集损失较低的即可。
 24 |         #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
 25 |         #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
 26 |         #--------------------------------------------------------------------------#
 27 |         "model_path"        : 'model_data/yolov3_efficientnet_b2_voc.h5',
 28 |         "classes_path"      : 'model_data/voc_classes.txt',
 29 |         #---------------------------------------------------------------------#
 30 |         #   anchors_path代表先验框对应的txt文件，一般不修改。
 31 |         #   anchors_mask用于帮助代码找到对应的先验框，一般不修改。
 32 |         #---------------------------------------------------------------------#
 33 |         "anchors_path"      : 'model_data/yolo_anchors.txt',
 34 |         "anchors_mask"      : [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
 35 |         #---------------------------------------------------------------------#
 36 |         #   输入图片的大小，必须为32的倍数。
 37 |         #---------------------------------------------------------------------#
 38 |         "input_shape"       : [416, 416],
 39 |         #---------------------------------------------------------------------#
 40 |         #   efficientnet的版本
 41 |         #   phi = 0代表efficientnet-B0-yolov3
 42 |         #   phi = 1代表efficientnet-B1-yolov3
 43 |         #   phi = 2代表efficientnet-B2-yolov3   
 44 |         #   …… 以此类推
 45 |         #---------------------------------------------------------------------#
 46 |         "phi"               : 2,
 47 |         #---------------------------------------------------------------------#
 48 |         #   只有得分大于置信度的预测框会被保留下来
 49 |         #---------------------------------------------------------------------#
 50 |         "confidence"        : 0.5,
 51 |         #---------------------------------------------------------------------#
 52 |         #   非极大抑制所用到的nms_iou大小
 53 |         #---------------------------------------------------------------------#
 54 |         "nms_iou"           : 0.3,
 55 |         "max_boxes"         : 100,
 56 |         #---------------------------------------------------------------------#
 57 |         #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
 58 |         #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
 59 |         #---------------------------------------------------------------------#
 60 |         "letterbox_image"   : True,
 61 |     }
 62 | 
 63 |     @classmethod
 64 |     def get_defaults(cls, n):
 65 |         if n in cls._defaults:
 66 |             return cls._defaults[n]
 67 |         else:
 68 |             return "Unrecognized attribute name '" + n + "'"
 69 | 
 70 |     #---------------------------------------------------#
 71 |     #   初始化yolo
 72 |     #---------------------------------------------------#
 73 |     def __init__(self, **kwargs):
 74 |         self.__dict__.update(self._defaults)
 75 |         for name, value in kwargs.items():
 76 |             setattr(self, name, value)
 77 |             self._defaults[name] = value 
 78 |             
 79 |         #---------------------------------------------------#
 80 |         #   获得种类和先验框的数量
 81 |         #---------------------------------------------------#
 82 |         self.class_names, self.num_classes = get_classes(self.classes_path)
 83 |         self.anchors, self.num_anchors     = get_anchors(self.anchors_path)
 84 | 
 85 |         #---------------------------------------------------#
 86 |         #   画框设置不同的颜色
 87 |         #---------------------------------------------------#
 88 |         hsv_tuples  = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
 89 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 90 |         self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
 91 | 
 92 |         self.generate()
 93 | 
 94 |         show_config(**self._defaults)
 95 | 
 96 |     #---------------------------------------------------#
 97 |     #   载入模型
 98 |     #---------------------------------------------------#
 99 |     def generate(self):
100 |         model_path = os.path.expanduser(self.model_path)
101 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
102 |         
103 |         self.model = yolo_body([None, None, 3], self.anchors_mask, self.num_classes, self.phi)
104 |         self.model.load_weights(self.model_path)
105 | 
106 |         print('{} model, anchors, and classes loaded.'.format(model_path))
107 |         #---------------------------------------------------------#
108 |         #   在DecodeBox函数中，我们会对预测结果进行后处理
109 |         #   后处理的内容包括，解码、非极大抑制、门限筛选等
110 |         #---------------------------------------------------------#
111 |         self.input_image_shape = Input([2,],batch_size=1)
112 |         inputs  = [*self.model.output, self.input_image_shape]
113 |         outputs = Lambda(
114 |             DecodeBox, 
115 |             output_shape = (1,), 
116 |             name = 'yolo_eval',
117 |             arguments = {
118 |                 'anchors'           : self.anchors, 
119 |                 'num_classes'       : self.num_classes, 
120 |                 'input_shape'       : self.input_shape, 
121 |                 'anchor_mask'       : self.anchors_mask,
122 |                 'confidence'        : self.confidence, 
123 |                 'nms_iou'           : self.nms_iou, 
124 |                 'max_boxes'         : self.max_boxes, 
125 |                 'letterbox_image'   : self.letterbox_image
126 |              }
127 |         )(inputs)
128 |         self.yolo_model = Model([self.model.input, self.input_image_shape], outputs)
129 | 
130 |     @tf.function
131 |     def get_pred(self, image_data, input_image_shape):
132 |         out_boxes, out_scores, out_classes = self.yolo_model([image_data, input_image_shape], training=False)
133 |         return out_boxes, out_scores, out_classes
134 |     #---------------------------------------------------#
135 |     #   检测图片
136 |     #---------------------------------------------------#
137 |     def detect_image(self, image, crop = False, count = False):
138 |         #---------------------------------------------------------#
139 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
140 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
141 |         #---------------------------------------------------------#
142 |         image       = cvtColor(image)
143 |         #---------------------------------------------------------#
144 |         #   给图像增加灰条，实现不失真的resize
145 |         #   也可以直接resize进行识别
146 |         #---------------------------------------------------------#
147 |         image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
148 |         #---------------------------------------------------------#
149 |         #   添加上batch_size维度，并进行归一化
150 |         #---------------------------------------------------------#
151 |         image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
152 | 
153 |         #---------------------------------------------------------#
154 |         #   将图像输入网络当中进行预测！
155 |         #---------------------------------------------------------#
156 |         input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
157 |         out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 
158 | 
159 |         print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
160 |         #---------------------------------------------------------#
161 |         #   设置字体与边框厚度
162 |         #---------------------------------------------------------#
163 |         font        = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
164 |         thickness   = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
165 |         #---------------------------------------------------------#
166 |         #   计数
167 |         #---------------------------------------------------------#
168 |         if count:
169 |             print("top_label:", out_classes)
170 |             classes_nums    = np.zeros([self.num_classes])
171 |             for i in range(self.num_classes):
172 |                 num = np.sum(out_classes == i)
173 |                 if num > 0:
174 |                     print(self.class_names[i], " : ", num)
175 |                 classes_nums[i] = num
176 |             print("classes_nums:", classes_nums)
177 |         #---------------------------------------------------------#
178 |         #   是否进行目标的裁剪
179 |         #---------------------------------------------------------#
180 |         if crop:
181 |             for i, c in list(enumerate(out_boxes)):
182 |                 top, left, bottom, right = out_boxes[i]
183 |                 top     = max(0, np.floor(top).astype('int32'))
184 |                 left    = max(0, np.floor(left).astype('int32'))
185 |                 bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
186 |                 right   = min(image.size[0], np.floor(right).astype('int32'))
187 |                 
188 |                 dir_save_path = "img_crop"
189 |                 if not os.path.exists(dir_save_path):
190 |                     os.makedirs(dir_save_path)
191 |                 crop_image = image.crop([left, top, right, bottom])
192 |                 crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
193 |                 print("save crop_" + str(i) + ".png to " + dir_save_path)
194 |         #---------------------------------------------------------#
195 |         #   图像绘制
196 |         #---------------------------------------------------------#
197 |         for i, c in list(enumerate(out_classes)):
198 |             predicted_class = self.class_names[int(c)]
199 |             box             = out_boxes[i]
200 |             score           = out_scores[i]
201 | 
202 |             top, left, bottom, right = box
203 | 
204 |             top     = max(0, np.floor(top).astype('int32'))
205 |             left    = max(0, np.floor(left).astype('int32'))
206 |             bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
207 |             right   = min(image.size[0], np.floor(right).astype('int32'))
208 | 
209 |             label = '{} {:.2f}'.format(predicted_class, score)
210 |             draw = ImageDraw.Draw(image)
211 |             label_size = draw.textsize(label, font)
212 |             label = label.encode('utf-8')
213 |             print(label, top, left, bottom, right)
214 |             
215 |             if top - label_size[1] >= 0:
216 |                 text_origin = np.array([left, top - label_size[1]])
217 |             else:
218 |                 text_origin = np.array([left, top + 1])
219 | 
220 |             for i in range(thickness):
221 |                 draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
222 |             draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
223 |             draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
224 |             del draw
225 | 
226 |         return image
227 | 
228 |     def get_FPS(self, image, test_interval):
229 |         #---------------------------------------------------------#
230 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
231 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
232 |         #---------------------------------------------------------#
233 |         image       = cvtColor(image)
234 |         #---------------------------------------------------------#
235 |         #   给图像增加灰条，实现不失真的resize
236 |         #   也可以直接resize进行识别
237 |         #---------------------------------------------------------#
238 |         image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
239 |         #---------------------------------------------------------#
240 |         #   添加上batch_size维度，并进行归一化
241 |         #---------------------------------------------------------#
242 |         image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
243 |         
244 |         #---------------------------------------------------------#
245 |         #   将图像输入网络当中进行预测！
246 |         #---------------------------------------------------------#
247 |         input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
248 |         out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 
249 | 
250 |         t1 = time.time()
251 |         for _ in range(test_interval):
252 |             out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 
253 |         t2 = time.time()
254 |         tact_time = (t2 - t1) / test_interval
255 |         return tact_time
256 | 
257 |     def detect_heatmap(self, image, heatmap_save_path):
258 |         import cv2
259 |         import matplotlib.pyplot as plt
260 |         def sigmoid(x):
261 |             y = 1.0 / (1.0 + np.exp(-x))
262 |             return y
263 |         #---------------------------------------------------------#
264 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
265 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
266 |         #---------------------------------------------------------#
267 |         image       = cvtColor(image)
268 |         #---------------------------------------------------------#
269 |         #   给图像增加灰条，实现不失真的resize
270 |         #   也可以直接resize进行识别
271 |         #---------------------------------------------------------#
272 |         image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
273 |         #---------------------------------------------------------#
274 |         #   添加上batch_size维度，并进行归一化
275 |         #---------------------------------------------------------#
276 |         image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
277 |         
278 |         output  = self.model.predict(image_data)
279 |         
280 |         plt.imshow(image, alpha=1)
281 |         plt.axis('off')
282 |         mask    = np.zeros((image.size[1], image.size[0]))
283 |         for sub_output in output:
284 |             b, h, w, c = np.shape(sub_output)
285 |             sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0]
286 |             score      = np.max(sigmoid(sub_output[..., 4]), -1)
287 |             score      = cv2.resize(score, (image.size[0], image.size[1]))
288 |             normed_score    = (score * 255).astype('uint8')
289 |             mask            = np.maximum(mask, normed_score)
290 |             
291 |         plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")
292 | 
293 |         plt.axis('off')
294 |         plt.subplots_adjust(top=1, bottom=0, right=1,  left=0, hspace=0, wspace=0)
295 |         plt.margins(0, 0)
296 |         plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1)
297 |         print("Save to the " + heatmap_save_path)
298 |         plt.show()
299 | 
300 |     #---------------------------------------------------#
301 |     #   检测图片
302 |     #---------------------------------------------------#
303 |     def get_map_txt(self, image_id, image, class_names, map_out_path):
304 |         f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
305 |         #---------------------------------------------------------#
306 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
307 |         #---------------------------------------------------------#
308 |         image       = cvtColor(image)
309 |         #---------------------------------------------------------#
310 |         #   给图像增加灰条，实现不失真的resize
311 |         #   也可以直接resize进行识别
312 |         #---------------------------------------------------------#
313 |         image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
314 |         #---------------------------------------------------------#
315 |         #   添加上batch_size维度，并进行归一化
316 |         #---------------------------------------------------------#
317 |         image_data  = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)
318 | 
319 |         #---------------------------------------------------------#
320 |         #   将图像输入网络当中进行预测！
321 |         #---------------------------------------------------------#
322 |         input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
323 |         out_boxes, out_scores, out_classes = self.get_pred(image_data, input_image_shape) 
324 | 
325 |         for i, c in enumerate(out_classes):
326 |             predicted_class             = self.class_names[int(c)]
327 |             try:
328 |                 score                   = str(out_scores[i].numpy())
329 |             except:
330 |                 score                   = str(out_scores[i])
331 |             top, left, bottom, right    = out_boxes[i]
332 |             if predicted_class not in class_names:
333 |                 continue
334 | 
335 |             f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
336 | 
337 |         f.close()
338 |         return 
339 |         
340 | 


--------------------------------------------------------------------------------
/常见问题汇总.md:
--------------------------------------------------------------------------------
  1 | 问题汇总的博客地址为[https://blog.csdn.net/weixin_44791964/article/details/107517428](https://blog.csdn.net/weixin_44791964/article/details/107517428)。
  2 | 
  3 | # 问题汇总
  4 | ## 1、下载问题
  5 | ### a、代码下载
  6 | **问：up主，可以给我发一份代码吗，代码在哪里下载啊？ 
  7 | 答：Github上的地址就在视频简介里。复制一下就能进去下载了。**
  8 | 
  9 | **问：up主，为什么我下载的代码提示压缩包损坏？
 10 | 答：重新去Github下载。**
 11 | 
 12 | **问：up主，为什么我下载的代码和你在视频以及博客上的代码不一样？
 13 | 答：我常常会对代码进行更新，最终以实际的代码为准。**
 14 | 
 15 | ### b、 权值下载
 16 | **问：up主，为什么我下载的代码里面，model_data下面没有.pth或者.h5文件？ 
 17 | 答：我一般会把权值上传到Github和百度网盘，在GITHUB的README里面就能找到。**
 18 | 
 19 | ### c、 数据集下载
 20 | **问：up主，XXXX数据集在哪里下载啊？
 21 | 答：一般数据集的下载地址我会放在README里面，基本上都有，没有的话请及时联系我添加，直接发github的issue即可**。
 22 | 
 23 | ## 2、环境配置问题
 24 | ### a、20系列及以下显卡环境配置
 25 | **pytorch代码对应的pytorch版本为1.2，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/106037141](https://blog.csdn.net/weixin_44791964/article/details/106037141)。
 26 | 
 27 | **keras代码对应的tensorflow版本为1.13.2，keras版本是2.1.5，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/104702142](https://blog.csdn.net/weixin_44791964/article/details/104702142)。
 28 | 
 29 | **tf2代码对应的tensorflow版本为2.2.0，无需安装keras，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/109161493](https://blog.csdn.net/weixin_44791964/article/details/109161493)。
 30 | 
 31 | **问：你的代码某某某版本的tensorflow和pytorch能用嘛？
 32 | 答：最好按照我推荐的配置，配置教程也有！其它版本的我没有试过！可能出现问题但是一般问题不大。仅需要改少量代码即可。**
 33 | 
 34 | ### b、30系列显卡环境配置
 35 | 30系显卡由于框架更新不可使用上述环境配置教程。
 36 | 当前我已经测试的可以用的30显卡配置如下：
 37 | **pytorch代码对应的pytorch版本为1.7.0，cuda为11.0，cudnn为8.0.5，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/120668551](https://blog.csdn.net/weixin_44791964/article/details/120668551)。
 38 | 
 39 | **keras代码无法在win10下配置cuda11，在ubuntu下可以百度查询一下，配置tensorflow版本为1.15.4，keras版本是2.1.5或者2.3.1（少量函数接口不同，代码可能还需要少量调整。）**
 40 | 
 41 | **tf2代码对应的tensorflow版本为2.4.0，cuda为11.0，cudnn为8.0.5，博客地址对应为**[https://blog.csdn.net/weixin_44791964/article/details/120657664](https://blog.csdn.net/weixin_44791964/article/details/120657664)。
 42 | 
 43 | ### c、CPU环境配置
 44 | **pytorch代码对应的pytorch-cpu版本为1.2，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/120655098](https://blog.csdn.net/weixin_44791964/article/details/120655098)
 45 | 
 46 | **keras代码对应的tensorflow-cpu版本为1.13.2，keras版本是2.1.5，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/120653717](https://blog.csdn.net/weixin_44791964/article/details/120653717)。
 47 | 
 48 | **tf2代码对应的tensorflow-cpu版本为2.2.0，无需安装keras，博客地址对应**[https://blog.csdn.net/weixin_44791964/article/details/120656291](https://blog.csdn.net/weixin_44791964/article/details/120656291)。
 49 | 
 50 | 
 51 | ### d、GPU利用问题与环境使用问题
 52 | **问：为什么我安装了tensorflow-gpu但是却没用利用GPU进行训练呢？
 53 | 答：确认tensorflow-gpu已经装好，利用pip list查看tensorflow版本，然后查看任务管理器或者利用nvidia命令看看是否使用了gpu进行训练，任务管理器的话要看显存使用情况。**
 54 | 
 55 | **问：up主，我好像没有在用gpu进行训练啊，怎么看是不是用了GPU进行训练？
 56 | 答：查看是否使用GPU进行训练一般使用NVIDIA在命令行的查看命令。在windows电脑中打开cmd然后利用nvidia-smi指令查看GPU利用情况**
 57 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/f88ef794c9a341918f000eb2b1c67af6.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAQnViYmxpaWlpbmc=,size_20,color_FFFFFF,t_70,g_se,x_16)
 58 | **如果要一定看任务管理器的话，请看性能部分GPU的显存是否利用，或者查看任务管理器的Cuda，而非Copy。**
 59 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013234241524.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDc5MTk2NA==,size_16,color_FFFFFF,t_70#pic_center)
 60 | 
 61 | ### e、DLL load failed: 找不到指定的模块
 62 | **问：出现如下错误**
 63 | ```python
 64 | Traceback (most recent call last):
 65 |   File "C:\Users\focus\Anaconda3\ana\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 58, in <module>
 66 |  from tensorflow.python.pywrap_tensorflow_internal import *
 67 | File "C:\Users\focus\Anaconda3\ana\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 28, in <module>
 68 | pywrap_tensorflow_internal = swig_import_helper()
 69 |   File "C:\Users\focus\Anaconda3\ana\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 24, in swig_import_helper
 70 |     _mod = imp.load_module('_pywrap_tensorflow_internal', fp, pathname, description)
 71 | File "C:\Users\focus\Anaconda3\ana\envs\tensorflow-gpu\lib\imp.py", line 243, in load_modulereturn load_dynamic(name, filename, file)
 72 | File "C:\Users\focus\Anaconda3\ana\envs\tensorflow-gpu\lib\imp.py", line 343, in load_dynamic
 73 |     return _load(spec)
 74 | ImportError: DLL load failed: 找不到指定的模块。
 75 | ```
 76 | **答：如果没重启过就重启一下，否则重新按照步骤安装，还无法解决则把你的GPU、CUDA、CUDNN、TF版本以及PYTORCH版本私聊告诉我。**
 77 | 
 78 | ### f、no module问题（no module name utils.utils、no module named 'matplotlib' ）
 79 | **问：为什么提示说no module name utils.utils（no module name nets.yolo、no module name nets.ssd等一系列问题）啊？
 80 | 答：utils并不需要用pip装，它就在我上传的仓库的根目录，出现这个问题的原因是根目录不对，查查相对目录和根目录的概念。查了基本上就明白了。**
 81 | 
 82 | **问：为什么提示说no module name matplotlib（no module name PIL，no module name cv2等等）？
 83 | 答：这个库没安装打开命令行安装就好。pip install matplotlib**
 84 | 
 85 | **问：为什么我已经用pip装了opencv（pillow、matplotlib等），还是提示no module name cv2？
 86 | 答：没有激活环境装，要激活对应的conda环境进行安装才可以正常使用**
 87 | 
 88 | **问：为什么提示说No module named 'torch' ？
 89 | 答：其实我也真的很想知道为什么会有这个问题……这个pytorch没装是什么情况？一般就俩情况，一个是真的没装，还有一个是装到其它环境了，当前激活的环境不是自己装的环境。**
 90 | 
 91 | **问：为什么提示说No module named 'tensorflow' ？
 92 | 答：同上。**
 93 | 
 94 | ### g、cuda安装失败问题
 95 | 一般cuda安装前需要安装Visual Studio，装个2017版本即可。
 96 | 
 97 | ### h、Ubuntu系统问题
 98 | **所有代码在Ubuntu下可以使用，我两个系统都试过。**
 99 | 
100 | ### i、VSCODE提示错误的问题
101 | **问：为什么在VSCODE里面提示一大堆的错误啊？
102 | 答：我也提示一大堆的错误，但是不影响，是VSCODE的问题，如果不想看错误的话就装Pycharm。
103 | 最好将设置里面的Python:Language Server，调整为Pylance。**
104 | 
105 | ### j、使用cpu进行训练与预测的问题
106 | **对于keras和tf2的代码而言，如果想用cpu进行训练和预测，直接装cpu版本的tensorflow就可以了。**
107 | 
108 | **对于pytorch的代码而言，如果想用cpu进行训练和预测，需要将cuda=True修改成cuda=False。**
109 | 
110 | ### k、tqdm没有pos参数问题
111 | **问：运行代码提示'tqdm' object has no attribute 'pos'。
112 | 答：重装tqdm，换个版本就可以了。**
113 | 
114 | ### l、提示decode(“utf-8”)的问题
115 | **由于h5py库的更新，安装过程中会自动安装h5py=3.0.0以上的版本，会导致decode("utf-8")的错误！
116 | 各位一定要在安装完tensorflow后利用命令装h5py=2.10.0！**
117 | ```
118 | pip install h5py==2.10.0
119 | ```
120 | 
121 | ### m、提示TypeError: __array__() takes 1 positional argument but 2 were given错误
122 | 可以修改pillow版本解决。
123 | ```
124 | pip install pillow==8.2.0
125 | ```
126 | ### n、如何查看当前cuda和cudnn
127 | **window下cuda版本查看方式如下：
128 | 1、打开cmd窗口。
129 | 2、输入nvcc -V。
130 | 3、Cuda compilation tools, release XXXXXXXX中的XXXXXXXX即cuda版本。**
131 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/0389ea35107a408a80ab5cb6590d5a74.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAQnViYmxpaWlpbmc=,size_20,color_FFFFFF,t_70,g_se,x_16)
132 | window下cudnn版本查看方式如下：
133 | 1、进入cuda安装目录，进入incude文件夹。
134 | 2、找到cudnn.h文件。
135 | 3、右键文本打开，下拉，看到#define处可获得cudnn版本。
136 | ```python
137 | #define CUDNN_MAJOR 7
138 | #define CUDNN_MINOR 4
139 | #define CUDNN_PATCHLEVEL 1
140 | ```
141 | 代表cudnn为7.4.1。
142 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/7a86b68b17c84feaa6fa95780d4ae4b4.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAQnViYmxpaWlpbmc=,size_20,color_FFFFFF,t_70,g_se,x_16)
143 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/81bb7c3e13cc492292530e4b69df86a9.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBAQnViYmxpaWlpbmc=,size_20,color_FFFFFF,t_70,g_se,x_16)
144 | 
145 | ### o、为什么按照你的环境配置后还是不能使用
146 | **问：up主，为什么我按照你的环境配置后还是不能使用？
147 | 答：请把你的GPU、CUDA、CUDNN、TF版本以及PYTORCH版本B站私聊告诉我。**
148 | 
149 | ### p、其它问题
150 | **问：为什么提示TypeError: cat() got an unexpected keyword argument 'axis'，Traceback (most recent call last)，AttributeError: 'Tensor' object has no attribute 'bool'？
151 | 答：这是版本问题，建议使用torch1.2以上版本**
152 | 
153 | **其它有很多稀奇古怪的问题，很多是版本问题，建议按照我的视频教程安装Keras和tensorflow。比如装的是tensorflow2，就不用问我说为什么我没法运行Keras-yolo啥的。那是必然不行的。**
154 | 
155 | ## 3、目标检测库问题汇总（人脸检测和分类库也可参考）
156 | ### a、shape不匹配问题。
157 | #### 1）、训练时shape不匹配问题。
158 | **问：up主，为什么运行train.py会提示shape不匹配啊？
159 | 答：在keras环境中，因为你训练的种类和原始的种类不同，网络结构会变化，所以最尾部的shape会有少量不匹配。**
160 | 
161 | #### 2）、预测时shape不匹配问题。
162 | **问：为什么我运行predict.py会提示我说shape不匹配呀。**
163 | ##### i、copying a param with shape torch.Size([75, 704, 1, 1]) from checkpoint
164 | 在Pytorch里面是这样的：
165 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200722171631901.png)
166 | ##### ii、Shapes are [1,1,1024,75] and [255,1024,1,1]. for 'Assign_360' (op: 'Assign') with input shapes: [1,1,1024,75], [255,1024,1,1].
167 | 在Keras里面是这样的：
168 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200722171523380.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDc5MTk2NA==,size_16,color_FFFFFF,t_70)
169 | **答：原因主要有仨：
170 | 1、训练的classes_path没改，就开始训练了。
171 | 2、训练的model_path没改。
172 | 3、训练的classes_path没改。
173 | 请检查清楚了！确定自己所用的model_path和classes_path是对应的！训练的时候用到的num_classes或者classes_path也需要检查！**
174 | 
175 | ### b、显存不足问题（OOM、RuntimeError: CUDA out of memory）。
176 | **问：为什么我运行train.py下面的命令行闪的贼快，还提示OOM啥的？ 
177 | 答：这是在keras中出现的，爆显存了，可以改小batch_size，SSD的显存占用率是最小的，建议用SSD；
178 | 2G显存：SSD、YOLOV4-TINY
179 | 4G显存：YOLOV3
180 | 6G显存：YOLOV4、Retinanet、M2det、Efficientdet、Faster RCNN等
181 | 8G+显存：随便选吧。**
182 | **需要注意的是，受到BatchNorm2d影响，batch_size不可为1，至少为2。**
183 | 
184 | **问：为什么提示 RuntimeError: CUDA out of memory. Tried to allocate 52.00 MiB (GPU 0; 15.90 GiB total capacity; 14.85 GiB already allocated; 51.88 MiB free; 15.07 GiB reserved in total by PyTorch)？ 
185 | 答：这是pytorch中出现的，爆显存了，同上。**
186 | 
187 | **问：为什么我显存都没利用，就直接爆显存了？ 
188 | 答：都爆显存了，自然就不利用了，模型没有开始训练。**
189 | ### c、为什么要进行冻结训练与解冻训练，不进行行吗？
190 | **问：为什么要冻结训练和解冻训练呀？
191 | 答：可以不进行，本质上是为了保证性能不足的同学的训练，如果电脑性能完全不够，可以将Freeze_Epoch和UnFreeze_Epoch设置成一样，只进行冻结训练。**
192 | 
193 | **同时这也是迁移学习的思想，因为神经网络主干特征提取部分所提取到的特征是通用的，我们冻结起来训练可以加快训练效率，也可以防止权值被破坏。**
194 | 在冻结阶段，模型的主干被冻结了，特征提取网络不发生改变。占用的显存较小，仅对网络进行微调。
195 | 在解冻阶段，模型的主干不被冻结了，特征提取网络会发生改变。占用的显存较大，网络所有的参数都会发生改变。
196 | 
197 | ### d、我的LOSS好大啊，有问题吗？（我的LOSS好小啊，有问题吗？）
198 | **问：为什么我的网络不收敛啊，LOSS是XXXX。
199 | 答：不同网络的LOSS不同，LOSS只是一个参考指标，用于查看网络是否收敛，而非评价网络好坏，我的yolo代码都没有归一化，所以LOSS值看起来比较高，LOSS的值不重要，重要的是是否在变小，预测是否有效果。**
200 | 
201 | ### e、为什么我训练出来的模型没有预测结果？
202 | **问：为什么我的训练效果不好？预测了没有框（框不准）。
203 | 答：**
204 | 考虑几个问题：
205 | 1、目标信息问题，查看2007_train.txt文件是否有目标信息，没有的话请修改voc_annotation.py。
206 | 2、数据集问题，小于500的自行考虑增加数据集，同时测试不同的模型，确认数据集是好的。
207 | 3、是否解冻训练，如果数据集分布与常规画面差距过大需要进一步解冻训练，调整主干，加强特征提取能力。
208 | 4、网络问题，比如SSD不适合小目标，因为先验框固定了。
209 | 5、训练时长问题，有些同学只训练了几代表示没有效果，按默认参数训练完。
210 | 6、确认自己是否按照步骤去做了，如果比如voc_annotation.py里面的classes是否修改了等。
211 | 7、不同网络的LOSS不同，LOSS只是一个参考指标，用于查看网络是否收敛，而非评价网络好坏，LOSS的值不重要，重要的是是否收敛。
212 | 8、是否修改了网络的主干，如果修改了没有预训练权重，网络不容易收敛，自然效果不好。
213 | 
214 | ### f、为什么我计算出来的map是0？
215 | **问：为什么我的训练效果不好？没有map？
216 | 答：**
217 | 首先尝试利用predict.py预测一下，如果有效果的话应该是get_map.py里面的classes_path设置错误。如果没有预测结果的话，解决方法同e问题，对下面几点进行检查：
218 | 1、目标信息问题，查看2007_train.txt文件是否有目标信息，没有的话请修改voc_annotation.py。
219 | 2、数据集问题，小于500的自行考虑增加数据集，同时测试不同的模型，确认数据集是好的。
220 | 3、是否解冻训练，如果数据集分布与常规画面差距过大需要进一步解冻训练，调整主干，加强特征提取能力。
221 | 4、网络问题，比如SSD不适合小目标，因为先验框固定了。
222 | 5、训练时长问题，有些同学只训练了几代表示没有效果，按默认参数训练完。
223 | 6、确认自己是否按照步骤去做了，如果比如voc_annotation.py里面的classes是否修改了等。
224 | 7、不同网络的LOSS不同，LOSS只是一个参考指标，用于查看网络是否收敛，而非评价网络好坏，LOSS的值不重要，重要的是是否收敛。
225 | 8、是否修改了网络的主干，如果修改了没有预训练权重，网络不容易收敛，自然效果不好。
226 | 
227 | ### g、gbk编码错误（'gbk' codec can't decode byte）。
228 | **问：我怎么出现了gbk什么的编码错误啊：**
229 | ```python
230 | UnicodeDecodeError: 'gbk' codec can't decode byte 0xa6 in position 446: illegal multibyte sequence
231 | ```
232 | **答：标签和路径不要使用中文，如果一定要使用中文，请注意处理的时候编码的问题，改成打开文件的encoding方式改为utf-8。**
233 | 
234 | ### h、我的图片是xxx*xxx的分辨率的，可以用吗？
235 | **问：我的图片是xxx*xxx的分辨率的，可以用吗！**
236 | **答：可以用，代码里面会自动进行resize与数据增强。**
237 | 
238 | ### i、我想进行数据增强！怎么增强？
239 | **问：我想要进行数据增强！怎么做呢？**
240 | **答：可以用，代码里面会自动进行resize与数据增强。**
241 | 
242 | ### j、多GPU训练。
243 | **问：怎么进行多GPU训练？
244 | 答：pytorch的大多数代码可以直接使用gpu训练，keras的话直接百度就好了，实现并不复杂，我没有多卡没法详细测试，还需要各位同学自己努力了。**
245 | 
246 | ### k、能不能训练灰度图？
247 | **问：能不能训练灰度图（预测灰度图）啊？
248 | 答：我的大多数库会将灰度图转化成RGB进行训练和预测，如果遇到代码不能训练或者预测灰度图的情况，可以尝试一下在get_random_data里面将Image.open后的结果转换成RGB，预测的时候也这样试试。（仅供参考）**
249 | 
250 | ### l、断点续练问题。
251 | **问：我已经训练过几个世代了，能不能从这个基础上继续开始训练
252 | 答：可以，你在训练前，和载入预训练权重一样载入训练过的权重就行了。一般训练好的权重会保存在logs文件夹里面，将model_path修改成你要开始的权值的路径即可。**
253 | 
254 | ### m、我要训练其它的数据集，预训练权重能不能用？
255 | **问：如果我要训练其它的数据集，预训练权重要怎么办啊？**
256 | **答：数据的预训练权重对不同数据集是通用的，因为特征是通用的，预训练权重对于99%的情况都必须要用，不用的话权值太过随机，特征提取效果不明显，网络训练的结果也不会好。**
257 | 
258 | ### n、网络如何从0开始训练？
259 | **问：我要怎么不使用预训练权重啊？
260 | 答：看一看注释、大多数代码是model_path = ''，Freeze_Train = Fasle**，如果设置model_path无用，**那么把载入预训练权重的代码注释了就行。**
261 | 
262 | ### o、为什么从0开始训练效果这么差（修改了网络主干，效果不好怎么办）？
263 | **问：为什么我不使用预训练权重效果这么差啊？
264 | 答：因为随机初始化的权值不好，提取的特征不好，也就导致了模型训练的效果不好，voc07+12、coco+voc07+12效果都不一样，预训练权重还是非常重要的。**
265 | 
266 | **问：up，我修改了网络，预训练权重还能用吗？
267 | 答：修改了主干的话，如果不是用的现有的网络，基本上预训练权重是不能用的，要么就自己判断权值里卷积核的shape然后自己匹配，要么只能自己预训练去了；修改了后半部分的话，前半部分的主干部分的预训练权重还是可以用的，如果是pytorch代码的话，需要自己修改一下载入权值的方式，判断shape后载入，如果是keras代码，直接by_name=True,skip_mismatch=True即可。**
268 | 权值匹配的方式可以参考如下：
269 | ```python
270 | # 加快模型训练的效率
271 | print('Loading weights into state dict...')
272 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
273 | model_dict = model.state_dict()
274 | pretrained_dict = torch.load(model_path, map_location=device)
275 | a = {}
276 | for k, v in pretrained_dict.items():
277 |     try:    
278 |         if np.shape(model_dict[k]) ==  np.shape(v):
279 |             a[k]=v
280 |     except:
281 |         pass
282 | model_dict.update(a)
283 | model.load_state_dict(model_dict)
284 | print('Finished!')
285 | ```
286 | 
287 | **问：为什么从0开始训练效果这么差（我修改了网络主干，效果不好怎么办）？
288 | 答：一般来讲，网络从0开始的训练效果会很差，因为权值太过随机，特征提取效果不明显，因此非常、非常、非常不建议大家从0开始训练！如果一定要从0开始，可以了解imagenet数据集，首先训练分类模型，获得网络的主干部分权值，分类模型的 主干部分 和该模型通用，基于此进行训练。
289 | 网络修改了主干之后也是同样的问题，随机的权值效果很差。**
290 | 
291 | **问：怎么在模型上从0开始训练？
292 | 答：在算力不足与调参能力不足的情况下从0开始训练毫无意义。模型特征提取能力在随机初始化参数的情况下非常差。没有好的参数调节能力和算力，无法使得网络正常收敛。**
293 | 如果一定要从0开始，那么训练的时候请注意几点：
294 |  - 不载入预训练权重。 
295 |  - 不要进行冻结训练，注释冻结模型的代码。
296 | 
297 | **问：为什么我不使用预训练权重效果这么差啊？
298 | 答：因为随机初始化的权值不好，提取的特征不好，也就导致了模型训练的效果不好，voc07+12、coco+voc07+12效果都不一样，预训练权重还是非常重要的。**
299 | 
300 | ### p、你的权值都是哪里来的？
301 | **问：如果网络不能从0开始训练的话你的权值哪里来的？
302 | 答：有些权值是官方转换过来的，有些权值是自己训练出来的，我用到的主干的imagenet的权值都是官方的。**
303 | 
304 | ### q、视频检测与摄像头检测
305 | **问：怎么用摄像头检测呀？
306 | 答：predict.py修改参数可以进行摄像头检测，也有视频详细解释了摄像头检测的思路。**
307 | 
308 | **问：怎么用视频检测呀？
309 | 答：同上**
310 | 
311 | ### r、如何保存检测出的图片
312 | **问：检测完的图片怎么保存？
313 | 答：一般目标检测用的是Image，所以查询一下PIL库的Image如何进行保存。详细看看predict.py文件的注释。**
314 | 
315 | **问：怎么用视频保存呀？
316 | 答：详细看看predict.py文件的注释。**
317 | 
318 | ### s、遍历问题
319 | **问：如何对一个文件夹的图片进行遍历？
320 | 答：一般使用os.listdir先找出文件夹里面的所有图片，然后根据predict.py文件里面的执行思路检测图片就行了，详细看看predict.py文件的注释。**
321 | 
322 | **问：如何对一个文件夹的图片进行遍历？并且保存。
323 | 答：遍历的话一般使用os.listdir先找出文件夹里面的所有图片，然后根据predict.py文件里面的执行思路检测图片就行了。保存的话一般目标检测用的是Image，所以查询一下PIL库的Image如何进行保存。如果有些库用的是cv2，那就是查一下cv2怎么保存图片。详细看看predict.py文件的注释。**
324 | 
325 | ### t、路径问题（No such file or directory、StopIteration: [Errno 13] Permission denied: 'XXXXXX'）
326 | **问：我怎么出现了这样的错误呀：**
327 | ```python
328 | FileNotFoundError: 【Errno 2】 No such file or directory
329 | StopIteration: [Errno 13] Permission denied: 'D:\\Study\\Collection\\Dataset\\VOC07+12+test\\VOCdevkit/VOC2007'
330 | ……………………………………
331 | ……………………………………
332 | ```
333 | **答：去检查一下文件夹路径，查看是否有对应文件；并且检查一下2007_train.txt，其中文件路径是否有错。**
334 | 关于路径有几个重要的点：
335 | **文件夹名称中一定不要有空格。
336 | 注意相对路径和绝对路径。
337 | 多百度路径相关的知识。**
338 | 
339 | **所有的路径问题基本上都是根目录问题，好好查一下相对目录的概念！**
340 | ### u、和原版比较问题，你怎么和原版不一样啊？
341 | **问：原版的代码是XXX，为什么你的代码是XXX？
342 | 答：是啊……这要不怎么说我不是原版呢……**
343 | 
344 | **问：你这个代码和原版比怎么样，可以达到原版的效果么？
345 | 答：基本上可以达到，我都用voc数据测过，我没有好显卡，没有能力在coco上测试与训练。**
346 | 
347 | **问：你有没有实现yolov4所有的tricks，和原版差距多少？
348 | 答：并没有实现全部的改进部分，由于YOLOV4使用的改进实在太多了，很难完全实现与列出来，这里只列出来了一些我比较感兴趣，而且非常有效的改进。论文中提到的SAM（注意力机制模块），作者自己的源码也没有使用。还有其它很多的tricks，不是所有的tricks都有提升，我也没法实现全部的tricks。至于和原版的比较，我没有能力训练coco数据集，根据使用过的同学反应差距不大。**
349 | 
350 | ### v、我的检测速度是xxx正常吗？我的检测速度还能增快吗？
351 | **问：你这个FPS可以到达多少，可以到 XX FPS么？
352 | 答：FPS和机子的配置有关，配置高就快，配置低就慢。**
353 | 
354 | **问：我的检测速度是xxx正常吗？我的检测速度还能增快吗？
355 | 答：看配置，配置好速度就快，如果想要配置不变的情况下加快速度，就要修改网络了。**
356 | 
357 | **问：为什么我用服务器去测试yolov4（or others）的FPS只有十几？
358 | 答：检查是否正确安装了tensorflow-gpu或者pytorch的gpu版本，如果已经正确安装，可以去利用time.time()的方法查看detect_image里面，哪一段代码耗时更长（不仅只有网络耗时长，其它处理部分也会耗时，如绘图等）。**
359 | 
360 | **问：为什么论文中说速度可以达到XX，但是这里却没有？
361 | 答：检查是否正确安装了tensorflow-gpu或者pytorch的gpu版本，如果已经正确安装，可以去利用time.time()的方法查看detect_image里面，哪一段代码耗时更长（不仅只有网络耗时长，其它处理部分也会耗时，如绘图等）。有些论文还会使用多batch进行预测，我并没有去实现这个部分。**
362 | 
363 | ### w、预测图片不显示问题
364 | **问：为什么你的代码在预测完成后不显示图片？只是在命令行告诉我有什么目标。
365 | 答：给系统安装一个图片查看器就行了。**
366 | 
367 | ### x、算法评价问题（目标检测的map、PR曲线、Recall、Precision等）
368 | **问：怎么计算map？
369 | 答：看map视频，都一个流程。**
370 | 
371 | **问：计算map的时候，get_map.py里面有一个MINOVERLAP是什么用的，是iou吗？
372 | 答：是iou，它的作用是判断预测框和真实框的重合成度，如果重合程度大于MINOVERLAP，则预测正确。**
373 | 
374 | **问：为什么get_map.py里面的self.confidence（self.score）要设置的那么小？
375 | 答：看一下map的视频的原理部分，要知道所有的结果然后再进行pr曲线的绘制。**
376 | 
377 | **问：能不能说说怎么绘制PR曲线啥的呀。
378 | 答：可以看mAP视频，结果里面有PR曲线。**
379 | 
380 | **问：怎么计算Recall、Precision指标。
381 | 答：这俩指标应该是相对于特定的置信度的，计算map的时候也会获得。**
382 | 
383 | ### y、coco数据集训练问题
384 | **问：目标检测怎么训练COCO数据集啊？。
385 | 答：coco数据训练所需要的txt文件可以参考qqwweee的yolo3的库，格式都是一样的。**
386 | 
387 | ### z、UP，怎么优化模型啊？我想提升效果
388 | **问：up，怎么修改模型啊，我想发个小论文！
389 | 答：建议看看yolov3和yolov4的区别，然后看看yolov4的论文，作为一个大型调参现场非常有参考意义，使用了很多tricks。我能给的建议就是多看一些经典模型，然后拆解里面的亮点结构并使用。**
390 | 
391 | ### aa、UP，有Focal LOSS的代码吗？怎么改啊？
392 | **问：up，YOLO系列使用Focal LOSS的代码你有吗，有提升吗？
393 | 答：很多人试过，提升效果也不大（甚至变的更Low），它自己有自己的正负样本的平衡方式**。改代码的事情，还是自己好好看看代码吧。
394 | 
395 | ### ab、部署问题（ONNX、TensorRT等）
396 | 我没有具体部署到手机等设备上过，所以很多部署问题我并不了解……
397 | 
398 | ## 4、语义分割库问题汇总
399 | ### a、shape不匹配问题
400 | #### 1）、训练时shape不匹配问题
401 | **问：up主，为什么运行train.py会提示shape不匹配啊？
402 | 答：在keras环境中，因为你训练的种类和原始的种类不同，网络结构会变化，所以最尾部的shape会有少量不匹配。**
403 | 
404 | #### 2）、预测时shape不匹配问题
405 | **问：为什么我运行predict.py会提示我说shape不匹配呀。**
406 | ##### i、copying a param with shape torch.Size([75, 704, 1, 1]) from checkpoint
407 | 在Pytorch里面是这样的：
408 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200722171631901.png)
409 | ##### ii、Shapes are [1,1,1024,75] and [255,1024,1,1]. for 'Assign_360' (op: 'Assign') with input shapes: [1,1,1024,75], [255,1024,1,1].
410 | 在Keras里面是这样的：
411 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20200722171523380.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDc5MTk2NA==,size_16,color_FFFFFF,t_70)
412 | **答：原因主要有二：
413 | 1、train.py里面的num_classes没改。
414 | 2、预测时num_classes没改。
415 | 3、预测时model_path没改。
416 | 请检查清楚！训练和预测的时候用到的num_classes都需要检查！**
417 | 
418 | ### b、显存不足问题（OOM、RuntimeError: CUDA out of memory）。
419 | **问：为什么我运行train.py下面的命令行闪的贼快，还提示OOM啥的？ 
420 | 答：这是在keras中出现的，爆显存了，可以改小batch_size。**
421 | 
422 | **需要注意的是，受到BatchNorm2d影响，batch_size不可为1，至少为2。**
423 | 
424 | **问：为什么提示 RuntimeError: CUDA out of memory. Tried to allocate 52.00 MiB (GPU 0; 15.90 GiB total capacity; 14.85 GiB already allocated; 51.88 MiB free; 15.07 GiB reserved in total by PyTorch)？ 
425 | 答：这是pytorch中出现的，爆显存了，同上。**
426 | 
427 | **问：为什么我显存都没利用，就直接爆显存了？ 
428 | 答：都爆显存了，自然就不利用了，模型没有开始训练。**
429 | 
430 | ### c、为什么要进行冻结训练与解冻训练，不进行行吗？
431 | **问：为什么要冻结训练和解冻训练呀？
432 | 答：可以不进行，本质上是为了保证性能不足的同学的训练，如果电脑性能完全不够，可以将Freeze_Epoch和UnFreeze_Epoch设置成一样，只进行冻结训练。**
433 | 
434 | **同时这也是迁移学习的思想，因为神经网络主干特征提取部分所提取到的特征是通用的，我们冻结起来训练可以加快训练效率，也可以防止权值被破坏。**
435 | 在冻结阶段，模型的主干被冻结了，特征提取网络不发生改变。占用的显存较小，仅对网络进行微调。
436 | 在解冻阶段，模型的主干不被冻结了，特征提取网络会发生改变。占用的显存较大，网络所有的参数都会发生改变。
437 | 
438 | ### d、我的LOSS好大啊，有问题吗？（我的LOSS好小啊，有问题吗？）
439 | **问：为什么我的网络不收敛啊，LOSS是XXXX。
440 | 答：不同网络的LOSS不同，LOSS只是一个参考指标，用于查看网络是否收敛，而非评价网络好坏，我的yolo代码都没有归一化，所以LOSS值看起来比较高，LOSS的值不重要，重要的是是否在变小，预测是否有效果。**
441 | 
442 | ### e、为什么我训练出来的模型没有预测结果？
443 | **问：为什么我的训练效果不好？预测了没有框（框不准）。
444 | 答：**
445 | **考虑几个问题：
446 | 1、数据集问题，这是最重要的问题。小于500的自行考虑增加数据集；一定要检查数据集的标签，视频中详细解析了VOC数据集的格式，但并不是有输入图片有输出标签即可，还需要确认标签的每一个像素值是否为它对应的种类。很多同学的标签格式不对，最常见的错误格式就是标签的背景为黑，目标为白，此时目标的像素点值为255，无法正常训练，目标需要为1才行。
447 | 2、是否解冻训练，如果数据集分布与常规画面差距过大需要进一步解冻训练，调整主干，加强特征提取能力。
448 | 3、网络问题，可以尝试不同的网络。
449 | 4、训练时长问题，有些同学只训练了几代表示没有效果，按默认参数训练完。
450 | 5、确认自己是否按照步骤去做了。
451 | 6、不同网络的LOSS不同，LOSS只是一个参考指标，用于查看网络是否收敛，而非评价网络好坏，LOSS的值不重要，重要的是是否收敛。**
452 | 
453 | **问：为什么我的训练效果不好？对小目标预测不准确。
454 | 答：对于deeplab和pspnet而言，可以修改一下downsample_factor，当downsample_factor为16的时候下采样倍数过多，效果不太好，可以修改为8。**
455 | 
456 | ### f、为什么我计算出来的miou是0？
457 | **问：为什么我的训练效果不好？计算出来的miou是0？。**
458 | 答：
459 | 与e类似，**考虑几个问题：
460 | 1、数据集问题，这是最重要的问题。小于500的自行考虑增加数据集；一定要检查数据集的标签，视频中详细解析了VOC数据集的格式，但并不是有输入图片有输出标签即可，还需要确认标签的每一个像素值是否为它对应的种类。很多同学的标签格式不对，最常见的错误格式就是标签的背景为黑，目标为白，此时目标的像素点值为255，无法正常训练，目标需要为1才行。
461 | 2、是否解冻训练，如果数据集分布与常规画面差距过大需要进一步解冻训练，调整主干，加强特征提取能力。
462 | 3、网络问题，可以尝试不同的网络。
463 | 4、训练时长问题，有些同学只训练了几代表示没有效果，按默认参数训练完。
464 | 5、确认自己是否按照步骤去做了。
465 | 6、不同网络的LOSS不同，LOSS只是一个参考指标，用于查看网络是否收敛，而非评价网络好坏，LOSS的值不重要，重要的是是否收敛。**
466 | 
467 | ### g、gbk编码错误（'gbk' codec can't decode byte）。
468 | **问：我怎么出现了gbk什么的编码错误啊：**
469 | ```python
470 | UnicodeDecodeError: 'gbk' codec can't decode byte 0xa6 in position 446: illegal multibyte sequence
471 | ```
472 | **答：标签和路径不要使用中文，如果一定要使用中文，请注意处理的时候编码的问题，改成打开文件的encoding方式改为utf-8。**
473 | 
474 | ### h、我的图片是xxx*xxx的分辨率的，可以用吗？
475 | **问：我的图片是xxx*xxx的分辨率的，可以用吗！**
476 | **答：可以用，代码里面会自动进行resize与数据增强。**
477 | 
478 | ### i、我想进行数据增强！怎么增强？
479 | **问：我想要进行数据增强！怎么做呢？**
480 | **答：可以用，代码里面会自动进行resize与数据增强。**
481 | 
482 | ### j、多GPU训练。
483 | **问：怎么进行多GPU训练？
484 | 答：pytorch的大多数代码可以直接使用gpu训练，keras的话直接百度就好了，实现并不复杂，我没有多卡没法详细测试，还需要各位同学自己努力了。**
485 | 
486 | ### k、能不能训练灰度图？
487 | **问：能不能训练灰度图（预测灰度图）啊？
488 | 答：我的大多数库会将灰度图转化成RGB进行训练和预测，如果遇到代码不能训练或者预测灰度图的情况，可以尝试一下在get_random_data里面将Image.open后的结果转换成RGB，预测的时候也这样试试。（仅供参考）**
489 | 
490 | ### l、断点续练问题。
491 | **问：我已经训练过几个世代了，能不能从这个基础上继续开始训练
492 | 答：可以，你在训练前，和载入预训练权重一样载入训练过的权重就行了。一般训练好的权重会保存在logs文件夹里面，将model_path修改成你要开始的权值的路径即可。**
493 | 
494 | ### m、我要训练其它的数据集，预训练权重能不能用？
495 | **问：如果我要训练其它的数据集，预训练权重要怎么办啊？**
496 | **答：数据的预训练权重对不同数据集是通用的，因为特征是通用的，预训练权重对于99%的情况都必须要用，不用的话权值太过随机，特征提取效果不明显，网络训练的结果也不会好。**
497 | 
498 | ### n、网络如何从0开始训练？
499 | **问：我要怎么不使用预训练权重啊？
500 | 答：看一看注释、大多数代码是model_path = ''，Freeze_Train = Fasle**，如果设置model_path无用，**那么把载入预训练权重的代码注释了就行。**
501 | 
502 | ### o、为什么从0开始训练效果这么差（修改了网络主干，效果不好怎么办）？
503 | **问：为什么我不使用预训练权重效果这么差啊？
504 | 答：因为随机初始化的权值不好，提取的特征不好，也就导致了模型训练的效果不好，预训练权重还是非常重要的。**
505 | 
506 | **问：up，我修改了网络，预训练权重还能用吗？
507 | 答：修改了主干的话，如果不是用的现有的网络，基本上预训练权重是不能用的，要么就自己判断权值里卷积核的shape然后自己匹配，要么只能自己预训练去了；修改了后半部分的话，前半部分的主干部分的预训练权重还是可以用的，如果是pytorch代码的话，需要自己修改一下载入权值的方式，判断shape后载入，如果是keras代码，直接by_name=True,skip_mismatch=True即可。**
508 | 权值匹配的方式可以参考如下：
509 | ```python
510 | # 加快模型训练的效率
511 | print('Loading weights into state dict...')
512 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
513 | model_dict = model.state_dict()
514 | pretrained_dict = torch.load(model_path, map_location=device)
515 | a = {}
516 | for k, v in pretrained_dict.items():
517 |     try:    
518 |         if np.shape(model_dict[k]) ==  np.shape(v):
519 |             a[k]=v
520 |     except:
521 |         pass
522 | model_dict.update(a)
523 | model.load_state_dict(model_dict)
524 | print('Finished!')
525 | ```
526 | 
527 | **问：为什么从0开始训练效果这么差（我修改了网络主干，效果不好怎么办）？
528 | 答：一般来讲，网络从0开始的训练效果会很差，因为权值太过随机，特征提取效果不明显，因此非常、非常、非常不建议大家从0开始训练！如果一定要从0开始，可以了解imagenet数据集，首先训练分类模型，获得网络的主干部分权值，分类模型的 主干部分 和该模型通用，基于此进行训练。
529 | 网络修改了主干之后也是同样的问题，随机的权值效果很差。**
530 | 
531 | **问：怎么在模型上从0开始训练？
532 | 答：在算力不足与调参能力不足的情况下从0开始训练毫无意义。模型特征提取能力在随机初始化参数的情况下非常差。没有好的参数调节能力和算力，无法使得网络正常收敛。**
533 | 如果一定要从0开始，那么训练的时候请注意几点：
534 |  - 不载入预训练权重。 
535 |  - 不要进行冻结训练，注释冻结模型的代码。
536 | 
537 | **问：为什么我不使用预训练权重效果这么差啊？
538 | 答：因为随机初始化的权值不好，提取的特征不好，也就导致了模型训练的效果不好，voc07+12、coco+voc07+12效果都不一样，预训练权重还是非常重要的。**
539 | 
540 | ### p、你的权值都是哪里来的？
541 | **问：如果网络不能从0开始训练的话你的权值哪里来的？
542 | 答：有些权值是官方转换过来的，有些权值是自己训练出来的，我用到的主干的imagenet的权值都是官方的。**
543 | 
544 | 
545 | ### q、视频检测与摄像头检测
546 | **问：怎么用摄像头检测呀？
547 | 答：predict.py修改参数可以进行摄像头检测，也有视频详细解释了摄像头检测的思路。**
548 | 
549 | **问：怎么用视频检测呀？
550 | 答：同上**
551 | 
552 | ### r、如何保存检测出的图片
553 | **问：检测完的图片怎么保存？
554 | 答：一般目标检测用的是Image，所以查询一下PIL库的Image如何进行保存。详细看看predict.py文件的注释。**
555 | 
556 | **问：怎么用视频保存呀？
557 | 答：详细看看predict.py文件的注释。**
558 | 
559 | ### s、遍历问题
560 | **问：如何对一个文件夹的图片进行遍历？
561 | 答：一般使用os.listdir先找出文件夹里面的所有图片，然后根据predict.py文件里面的执行思路检测图片就行了，详细看看predict.py文件的注释。**
562 | 
563 | **问：如何对一个文件夹的图片进行遍历？并且保存。
564 | 答：遍历的话一般使用os.listdir先找出文件夹里面的所有图片，然后根据predict.py文件里面的执行思路检测图片就行了。保存的话一般目标检测用的是Image，所以查询一下PIL库的Image如何进行保存。如果有些库用的是cv2，那就是查一下cv2怎么保存图片。详细看看predict.py文件的注释。**
565 | 
566 | ### t、路径问题（No such file or directory、StopIteration: [Errno 13] Permission denied: 'XXXXXX'）
567 | **问：我怎么出现了这样的错误呀：**
568 | ```python
569 | FileNotFoundError: 【Errno 2】 No such file or directory
570 | StopIteration: [Errno 13] Permission denied: 'D:\\Study\\Collection\\Dataset\\VOC07+12+test\\VOCdevkit/VOC2007'
571 | ……………………………………
572 | ……………………………………
573 | ```
574 | **答：去检查一下文件夹路径，查看是否有对应文件；并且检查一下2007_train.txt，其中文件路径是否有错。**
575 | 关于路径有几个重要的点：
576 | **文件夹名称中一定不要有空格。
577 | 注意相对路径和绝对路径。
578 | 多百度路径相关的知识。**
579 | 
580 | **所有的路径问题基本上都是根目录问题，好好查一下相对目录的概念！**
581 | ### u、和原版比较问题，你怎么和原版不一样啊？
582 | **问：原版的代码是XXX，为什么你的代码是XXX？
583 | 答：是啊……这要不怎么说我不是原版呢……**
584 | 
585 | **问：你这个代码和原版比怎么样，可以达到原版的效果么？
586 | 答：基本上可以达到，我都用voc数据测过，我没有好显卡，没有能力在coco上测试与训练。**
587 | 
588 | ### v、我的检测速度是xxx正常吗？我的检测速度还能增快吗？
589 | **问：你这个FPS可以到达多少，可以到 XX FPS么？
590 | 答：FPS和机子的配置有关，配置高就快，配置低就慢。**
591 | 
592 | **问：我的检测速度是xxx正常吗？我的检测速度还能增快吗？
593 | 答：看配置，配置好速度就快，如果想要配置不变的情况下加快速度，就要修改网络了。**
594 | 
595 | **问：为什么论文中说速度可以达到XX，但是这里却没有？
596 | 答：检查是否正确安装了tensorflow-gpu或者pytorch的gpu版本，如果已经正确安装，可以去利用time.time()的方法查看detect_image里面，哪一段代码耗时更长（不仅只有网络耗时长，其它处理部分也会耗时，如绘图等）。有些论文还会使用多batch进行预测，我并没有去实现这个部分。**
597 | 
598 | ### w、预测图片不显示问题
599 | **问：为什么你的代码在预测完成后不显示图片？只是在命令行告诉我有什么目标。
600 | 答：给系统安装一个图片查看器就行了。**
601 | 
602 | ### x、算法评价问题（miou）
603 | **问：怎么计算miou？
604 | 答：参考视频里的miou测量部分。**
605 | 
606 | **问：怎么计算Recall、Precision指标。
607 | 答：现有的代码还无法获得，需要各位同学理解一下混淆矩阵的概念，然后自行计算一下。**
608 | 
609 | ### y、UP，怎么优化模型啊？我想提升效果
610 | **问：up，怎么修改模型啊，我想发个小论文！
611 | 答：建议目标检测中的yolov4论文，作为一个大型调参现场非常有参考意义，使用了很多tricks。我能给的建议就是多看一些经典模型，然后拆解里面的亮点结构并使用。**
612 | 
613 | ### z、部署问题（ONNX、TensorRT等）
614 | 我没有具体部署到手机等设备上过，所以很多部署问题我并不了解……
615 | 
616 | ## 5、交流群问题
617 | **问：up，有没有QQ群啥的呢？
618 | 答：没有没有，我没有时间管理QQ群……**
619 | 
620 | ## 6、怎么学习的问题
621 | **问：up，你的学习路线怎么样的？我是个小白我要怎么学？
622 | 答：这里有几点需要注意哈
623 | 1、我不是高手，很多东西我也不会，我的学习路线也不一定适用所有人。
624 | 2、我实验室不做深度学习，所以我很多东西都是自学，自己摸索，正确与否我也不知道。
625 | 3、我个人觉得学习更靠自学**
626 | 学习路线的话，我是先学习了莫烦的python教程，从tensorflow、keras、pytorch入门，入门完之后学的SSD，YOLO，然后了解了很多经典的卷积网，后面就开始学很多不同的代码了，我的学习方法就是一行一行的看，了解整个代码的执行流程，特征层的shape变化等，花了很多时间也没有什么捷径，就是要花时间吧。
627 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | from functools import partial
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow.keras.backend as K
  7 | from tensorflow.keras.callbacks import (EarlyStopping, LearningRateScheduler,
  8 |                                         TensorBoard)
  9 | from tensorflow.keras.optimizers import SGD, Adam
 10 | 
 11 | from nets.yolo import get_train_model, yolo_body
 12 | from nets.yolo_training import get_lr_scheduler
 13 | from utils.callbacks import EvalCallback, LossHistory, ModelCheckpoint
 14 | from utils.dataloader import YoloDatasets
 15 | from utils.utils import get_anchors, get_classes, show_config
 16 | from utils.utils_fit import fit_one_epoch
 17 | 
 18 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 19 | 
 20 | '''
 21 | 训练自己的目标检测模型一定需要注意以下几点：
 22 | 1、训练前仔细检查自己的格式是否满足要求，该库要求数据集格式为VOC格式，需要准备好的内容有输入图片和标签
 23 |    输入图片为.jpg图片，无需固定大小，传入训练前会自动进行resize。
 24 |    灰度图会自动转成RGB图片进行训练，无需自己修改。
 25 |    输入图片如果后缀非jpg，需要自己批量转成jpg后再开始训练。
 26 | 
 27 |    标签为.xml格式，文件中会有需要检测的目标信息，标签文件和输入图片文件相对应。
 28 | 
 29 | 2、损失值的大小用于判断是否收敛，比较重要的是有收敛的趋势，即验证集损失不断下降，如果验证集损失基本上不改变的话，模型基本上就收敛了。
 30 |    损失值的具体大小并没有什么意义，大和小只在于损失的计算方式，并不是接近于0才好。如果想要让损失好看点，可以直接到对应的损失函数里面除上10000。
 31 |    训练过程中的损失值会保存在logs文件夹下的loss_%Y_%m_%d_%H_%M_%S文件夹中
 32 |    
 33 | 3、训练好的权值文件保存在logs文件夹中，每个训练世代（Epoch）包含若干训练步长（Step），每个训练步长（Step）进行一次梯度下降。
 34 |    如果只是训练了几个Step是不会保存的，Epoch和Step的概念要捋清楚一下。
 35 | '''
 36 | if __name__ == "__main__":
 37 |     #----------------------------------------------------#
 38 |     #   是否使用eager模式训练
 39 |     #----------------------------------------------------#
 40 |     eager           = False
 41 |     #---------------------------------------------------------------------#
 42 |     #   train_gpu   训练用到的GPU
 43 |     #               默认为第一张卡、双卡为[0, 1]、三卡为[0, 1, 2]
 44 |     #               在使用多GPU时，每个卡上的batch为总batch除以卡的数量。
 45 |     #---------------------------------------------------------------------#
 46 |     train_gpu       = [0,]
 47 |     #---------------------------------------------------------------------#
 48 |     #   classes_path    指向model_data下的txt，与自己训练的数据集相关 
 49 |     #                   训练前一定要修改classes_path，使其对应自己的数据集
 50 |     #---------------------------------------------------------------------#
 51 |     classes_path    = 'model_data/voc_classes.txt'
 52 |     #---------------------------------------------------------------------#
 53 |     #   anchors_path    代表先验框对应的txt文件，一般不修改。
 54 |     #   anchors_mask    用于帮助代码找到对应的先验框，一般不修改。
 55 |     #---------------------------------------------------------------------#
 56 |     anchors_path    = 'model_data/yolo_anchors.txt'
 57 |     anchors_mask    = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
 58 |     #----------------------------------------------------------------------------------------------------------------------------#
 59 |     #   权值文件的下载请看README，可以通过网盘下载。模型的 预训练权重 对不同数据集是通用的，因为特征是通用的。
 60 |     #   模型的 预训练权重 比较重要的部分是 主干特征提取网络的权值部分，用于进行特征提取。
 61 |     #   预训练权重对于99%的情况都必须要用，不用的话主干部分的权值太过随机，特征提取效果不明显，网络训练的结果也不会好
 62 |     #
 63 |     #   如果训练过程中存在中断训练的操作，可以将model_path设置成logs文件夹下的权值文件，将已经训练了一部分的权值再次载入。
 64 |     #   同时修改下方的 冻结阶段 或者 解冻阶段 的参数，来保证模型epoch的连续性。
 65 |     #   
 66 |     #   当model_path = ''的时候不加载整个模型的权值。
 67 |     #
 68 |     #   此处使用的是整个模型的权重，因此是在train.py进行加载的。
 69 |     #   如果想要让模型从主干的预训练权值开始训练，则设置model_path为主干网络的权值，此时仅加载主干。
 70 |     #   如果想要让模型从0开始训练，则设置model_path = ''，Freeze_Train = Fasle，此时从0开始训练，且没有冻结主干的过程。
 71 |     #
 72 |     #   一般来讲，网络从0开始的训练效果会很差，因为权值太过随机，特征提取效果不明显，因此非常、非常、非常不建议大家从0开始训练！
 73 |     #   如果一定要从0开始，可以了解imagenet数据集，首先训练分类模型，获得网络的主干部分权值，分类模型的 主干部分 和该模型通用，基于此进行训练。
 74 |     #----------------------------------------------------------------------------------------------------------------------------#
 75 |     model_path      = 'model_data/yolov3_efficientnet_b2_voc.h5'
 76 |     #------------------------------------------------------#
 77 |     #   input_shape     输入的shape大小，一定要是32的倍数
 78 |     #------------------------------------------------------#
 79 |     input_shape     = [416, 416]
 80 |     #------------------------------------------------------#
 81 |     #   efficientnet的版本
 82 |     #   phi = 0代表efficientnet-B0-yolov3
 83 |     #   phi = 1代表efficientnet-B1-yolov3
 84 |     #   phi = 2代表efficientnet-B2-yolov3   
 85 |     #   …… 以此类推
 86 |     #------------------------------------------------------#
 87 |     phi             = 2
 88 |     
 89 |     #----------------------------------------------------------------------------------------------------------------------------#
 90 |     #   训练分为两个阶段，分别是冻结阶段和解冻阶段。设置冻结阶段是为了满足机器性能不足的同学的训练需求。
 91 |     #   冻结训练需要的显存较小，显卡非常差的情况下，可设置Freeze_Epoch等于UnFreeze_Epoch，此时仅仅进行冻结训练。
 92 |     #      
 93 |     #   在此提供若干参数设置建议，各位训练者根据自己的需求进行灵活调整：
 94 |     #   （一）从整个模型的预训练权重开始训练： 
 95 |     #       Adam：
 96 |     #           Init_Epoch = 0，Freeze_Epoch = 50，UnFreeze_Epoch = 100，Freeze_Train = True，optimizer_type = 'adam'，Init_lr = 1e-3，weight_decay = 0。（冻结）
 97 |     #           Init_Epoch = 0，UnFreeze_Epoch = 100，Freeze_Train = False，optimizer_type = 'adam'，Init_lr = 1e-3，weight_decay = 0。（不冻结）
 98 |     #       SGD：
 99 |     #           Init_Epoch = 0，Freeze_Epoch = 50，UnFreeze_Epoch = 300，Freeze_Train = True，optimizer_type = 'sgd'，Init_lr = 1e-2，weight_decay = 5e-4。（冻结）
100 |     #           Init_Epoch = 0，UnFreeze_Epoch = 300，Freeze_Train = False，optimizer_type = 'sgd'，Init_lr = 1e-2，weight_decay = 5e-4。（不冻结）
101 |     #       其中：UnFreeze_Epoch可以在100-300之间调整。
102 |     #   （二）从主干网络的预训练权重开始训练：
103 |     #       Adam：
104 |     #           Init_Epoch = 0，Freeze_Epoch = 50，UnFreeze_Epoch = 100，Freeze_Train = True，optimizer_type = 'adam'，Init_lr = 1e-3，weight_decay = 0。（冻结）
105 |     #           Init_Epoch = 0，UnFreeze_Epoch = 100，Freeze_Train = False，optimizer_type = 'adam'，Init_lr = 1e-3，weight_decay = 0。（不冻结）
106 |     #       SGD：
107 |     #           Init_Epoch = 0，Freeze_Epoch = 50，UnFreeze_Epoch = 300，Freeze_Train = True，optimizer_type = 'sgd'，Init_lr = 1e-2，weight_decay = 5e-4。（冻结）
108 |     #           Init_Epoch = 0，UnFreeze_Epoch = 300，Freeze_Train = False，optimizer_type = 'sgd'，Init_lr = 1e-2，weight_decay = 5e-4。（不冻结）
109 |     #       其中：由于从主干网络的预训练权重开始训练，主干的权值不一定适合目标检测，需要更多的训练跳出局部最优解。
110 |     #             UnFreeze_Epoch可以在150-300之间调整，YOLOV5和YOLOX均推荐使用300。
111 |     #             Adam相较于SGD收敛的快一些。因此UnFreeze_Epoch理论上可以小一点，但依然推荐更多的Epoch。
112 |     #   （三）batch_size的设置：
113 |     #       在显卡能够接受的范围内，以大为好。显存不足与数据集大小无关，提示显存不足（OOM或者CUDA out of memory）请调小batch_size。
114 |     #       受到BatchNorm层影响，batch_size最小为2，不能为1。
115 |     #       正常情况下Freeze_batch_size建议为Unfreeze_batch_size的1-2倍。不建议设置的差距过大，因为关系到学习率的自动调整。
116 |     #----------------------------------------------------------------------------------------------------------------------------#
117 |     #------------------------------------------------------------------#
118 |     #   冻结阶段训练参数
119 |     #   此时模型的主干被冻结了，特征提取网络不发生改变
120 |     #   占用的显存较小，仅对网络进行微调
121 |     #   Init_Epoch          模型当前开始的训练世代，其值可以大于Freeze_Epoch，如设置：
122 |     #                       Init_Epoch = 60、Freeze_Epoch = 50、UnFreeze_Epoch = 100
123 |     #                       会跳过冻结阶段，直接从60代开始，并调整对应的学习率。
124 |     #                       （断点续练时使用）
125 |     #   Freeze_Epoch        模型冻结训练的Freeze_Epoch
126 |     #                       (当Freeze_Train=False时失效)
127 |     #   Freeze_batch_size   模型冻结训练的batch_size
128 |     #                       (当Freeze_Train=False时失效)
129 |     #------------------------------------------------------------------#
130 |     Init_Epoch          = 0
131 |     Freeze_Epoch        = 50
132 |     Freeze_batch_size   = 16
133 |     #------------------------------------------------------------------#
134 |     #   解冻阶段训练参数
135 |     #   此时模型的主干不被冻结了，特征提取网络会发生改变
136 |     #   占用的显存较大，网络所有的参数都会发生改变
137 |     #   UnFreeze_Epoch          模型总共训练的epoch
138 |     #                           SGD需要更长的时间收敛，因此设置较大的UnFreeze_Epoch
139 |     #                           Adam可以使用相对较小的UnFreeze_Epoch
140 |     #   Unfreeze_batch_size     模型在解冻后的batch_size
141 |     #------------------------------------------------------------------#
142 |     UnFreeze_Epoch      = 300
143 |     Unfreeze_batch_size = 8
144 |     #------------------------------------------------------------------#
145 |     #   Freeze_Train    是否进行冻结训练
146 |     #                   默认先冻结主干训练后解冻训练。
147 |     #------------------------------------------------------------------#
148 |     Freeze_Train        = True
149 |     
150 |     #------------------------------------------------------------------#
151 |     #   其它训练参数：学习率、优化器、学习率下降有关
152 |     #------------------------------------------------------------------#
153 |     #------------------------------------------------------------------#
154 |     #   Init_lr         模型的最大学习率
155 |     #                   当使用Adam优化器时建议设置  Init_lr=1e-3
156 |     #                   当使用SGD优化器时建议设置   Init_lr=1e-2
157 |     #   Min_lr          模型的最小学习率，默认为最大学习率的0.01
158 |     #------------------------------------------------------------------#
159 |     Init_lr             = 1e-2
160 |     Min_lr              = Init_lr * 0.01
161 |     #------------------------------------------------------------------#
162 |     #   optimizer_type  使用到的优化器种类，可选的有adam、sgd
163 |     #                   当使用Adam优化器时建议设置  Init_lr=1e-3
164 |     #                   当使用SGD优化器时建议设置   Init_lr=1e-2
165 |     #   momentum        优化器内部使用到的momentum参数
166 |     #   weight_decay    权值衰减，可防止过拟合
167 |     #                   adam会导致weight_decay错误，使用adam时建议设置为0。
168 |     #------------------------------------------------------------------#
169 |     optimizer_type      = "sgd"
170 |     momentum            = 0.937
171 |     weight_decay        = 5e-4
172 |     #------------------------------------------------------------------#
173 |     #   lr_decay_type   使用到的学习率下降方式，可选的有'step'、'cos'
174 |     #------------------------------------------------------------------#
175 |     lr_decay_type       = 'cos'
176 |     #------------------------------------------------------------------#
177 |     #   save_period     多少个epoch保存一次权值
178 |     #------------------------------------------------------------------#
179 |     save_period         = 10
180 |     #------------------------------------------------------------------#
181 |     #   save_dir        权值与日志文件保存的文件夹
182 |     #------------------------------------------------------------------#
183 |     save_dir            = 'logs'
184 |     #------------------------------------------------------------------#
185 |     #   eval_flag       是否在训练时进行评估，评估对象为验证集
186 |     #                   安装pycocotools库后，评估体验更佳。
187 |     #   eval_period     代表多少个epoch评估一次，不建议频繁的评估
188 |     #                   评估需要消耗较多的时间，频繁评估会导致训练非常慢
189 |     #   此处获得的mAP会与get_map.py获得的会有所不同，原因有二：
190 |     #   （一）此处获得的mAP为验证集的mAP。
191 |     #   （二）此处设置评估参数较为保守，目的是加快评估速度。
192 |     #------------------------------------------------------------------#
193 |     eval_flag           = True
194 |     eval_period         = 10
195 |     #------------------------------------------------------------------#
196 |     #   num_workers     用于设置是否使用多线程读取数据，1代表关闭多线程
197 |     #                   开启后会加快数据读取速度，但是会占用更多内存
198 |     #                   keras里开启多线程有些时候速度反而慢了许多
199 |     #                   在IO为瓶颈的时候再开启多线程，即GPU运算速度远大于读取图片的速度。
200 |     #------------------------------------------------------------------#
201 |     num_workers         = 1
202 | 
203 |     #------------------------------------------------------#
204 |     #   train_annotation_path   训练图片路径和标签
205 |     #   val_annotation_path     验证图片路径和标签
206 |     #------------------------------------------------------#
207 |     train_annotation_path   = '2007_train.txt'
208 |     val_annotation_path     = '2007_val.txt'
209 | 
210 |     #------------------------------------------------------#
211 |     #   设置用到的显卡
212 |     #------------------------------------------------------#
213 |     os.environ["CUDA_VISIBLE_DEVICES"]  = ','.join(str(x) for x in train_gpu)
214 |     ngpus_per_node                      = len(train_gpu)
215 |     
216 |     gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
217 |     for gpu in gpus:
218 |         tf.config.experimental.set_memory_growth(gpu, True)
219 |         
220 |     if ngpus_per_node > 1:
221 |         strategy = tf.distribute.MirroredStrategy()
222 |     else:
223 |         strategy = None
224 |     print('Number of devices: {}'.format(ngpus_per_node))
225 | 
226 |     #----------------------------------------------------#
227 |     #   获取classes和anchor
228 |     #----------------------------------------------------#
229 |     class_names, num_classes = get_classes(classes_path)
230 |     anchors, num_anchors     = get_anchors(anchors_path)
231 | 
232 |     #----------------------------------------------------#
233 |     #   判断是否多GPU载入模型和预训练权重
234 |     #----------------------------------------------------#
235 |     if ngpus_per_node > 1:
236 |         with strategy.scope():
237 |             #------------------------------------------------------#
238 |             #   创建yolo模型
239 |             #------------------------------------------------------#
240 |             model_body  = yolo_body((None, None, 3), anchors_mask, num_classes, phi, weight_decay)
241 |             if model_path != '':
242 |                 #------------------------------------------------------#
243 |                 #   载入预训练权重
244 |                 #------------------------------------------------------#
245 |                 print('Load weights {}.'.format(model_path))
246 |                 model_body.load_weights(model_path, by_name=True, skip_mismatch=True)
247 |             if not eager:
248 |                 model = get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask)
249 |     else:
250 |         #------------------------------------------------------#
251 |         #   创建yolo模型
252 |         #------------------------------------------------------#
253 |         model_body  = yolo_body((None, None, 3), anchors_mask, num_classes, phi, weight_decay)
254 |         if model_path != '':
255 |             #------------------------------------------------------#
256 |             #   载入预训练权重
257 |             #------------------------------------------------------#
258 |             print('Load weights {}.'.format(model_path))
259 |             model_body.load_weights(model_path, by_name=True, skip_mismatch=True)
260 |         if not eager:
261 |             model = get_train_model(model_body, input_shape, num_classes, anchors, anchors_mask)
262 | 
263 |     #---------------------------#
264 |     #   读取数据集对应的txt
265 |     #---------------------------#
266 |     with open(train_annotation_path, encoding='utf-8') as f:
267 |         train_lines = f.readlines()
268 |     with open(val_annotation_path, encoding='utf-8') as f:
269 |         val_lines   = f.readlines()
270 |     num_train   = len(train_lines)
271 |     num_val     = len(val_lines)
272 | 
273 |     show_config(
274 |         classes_path = classes_path, anchors_path = anchors_path, anchors_mask = anchors_mask, model_path = model_path, input_shape = input_shape, \
275 |         Init_Epoch = Init_Epoch, Freeze_Epoch = Freeze_Epoch, UnFreeze_Epoch = UnFreeze_Epoch, Freeze_batch_size = Freeze_batch_size, Unfreeze_batch_size = Unfreeze_batch_size, Freeze_Train = Freeze_Train, \
276 |         Init_lr = Init_lr, Min_lr = Min_lr, optimizer_type = optimizer_type, momentum = momentum, lr_decay_type = lr_decay_type, \
277 |         save_period = save_period, save_dir = save_dir, num_workers = num_workers, num_train = num_train, num_val = num_val
278 |     )
279 |     #---------------------------------------------------------#
280 |     #   总训练世代指的是遍历全部数据的总次数
281 |     #   总训练步长指的是梯度下降的总次数 
282 |     #   每个训练世代包含若干训练步长，每个训练步长进行一次梯度下降。
283 |     #   此处仅建议最低训练世代，上不封顶，计算时只考虑了解冻部分
284 |     #----------------------------------------------------------#
285 |     wanted_step = 5e4 if optimizer_type == "sgd" else 1.5e4
286 |     total_step  = num_train // Unfreeze_batch_size * UnFreeze_Epoch
287 |     if total_step <= wanted_step:
288 |         if num_train // Unfreeze_batch_size == 0:
289 |             raise ValueError('数据集过小，无法进行训练，请扩充数据集。')
290 |         wanted_epoch = wanted_step // (num_train // Unfreeze_batch_size) + 1
291 |         print("\n\033[1;33;44m[Warning] 使用%s优化器时，建议将训练总步长设置到%d以上。\033[0m"%(optimizer_type, wanted_step))
292 |         print("\033[1;33;44m[Warning] 本次运行的总训练数据量为%d，Unfreeze_batch_size为%d，共训练%d个Epoch，计算出总训练步长为%d。\033[0m"%(num_train, Unfreeze_batch_size, UnFreeze_Epoch, total_step))
293 |         print("\033[1;33;44m[Warning] 由于总训练步长为%d，小于建议总步长%d，建议设置总世代为%d。\033[0m"%(total_step, wanted_step, wanted_epoch))
294 | 
295 |     #------------------------------------------------------#
296 |     #   主干特征提取网络特征通用，冻结训练可以加快训练速度
297 |     #   也可以在训练初期防止权值被破坏。
298 |     #   Init_Epoch为起始世代
299 |     #   Freeze_Epoch为冻结训练的世代
300 |     #   UnFreeze_Epoch总训练世代
301 |     #   提示OOM或者显存不足请调小Batch_size
302 |     #------------------------------------------------------#
303 |     if True:
304 |         if Freeze_Train:
305 |             freeze_layers = [231, 333, 333, 378, 468, 570, 660, 807][phi]
306 |             if Freeze_Train:
307 |                 for i in range(freeze_layers): model_body.layers[i].trainable = False
308 |                 print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
309 | 
310 |         #-------------------------------------------------------------------#
311 |         #   如果不冻结训练的话，直接设置batch_size为Unfreeze_batch_size
312 |         #-------------------------------------------------------------------#
313 |         batch_size  = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size
314 | 
315 |         #-------------------------------------------------------------------#
316 |         #   判断当前batch_size，自适应调整学习率
317 |         #-------------------------------------------------------------------#
318 |         nbs             = 64
319 |         lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
320 |         lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
321 |         Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
322 |         Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
323 | 
324 |         #---------------------------------------#
325 |         #   获得学习率下降的公式
326 |         #---------------------------------------#
327 |         lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
328 | 
329 |         epoch_step      = num_train // batch_size
330 |         epoch_step_val  = num_val // batch_size
331 | 
332 |         if epoch_step == 0 or epoch_step_val == 0:
333 |             raise ValueError('数据集过小，无法进行训练，请扩充数据集。')
334 | 
335 |         train_dataloader    = YoloDatasets(train_lines, input_shape, anchors, batch_size, num_classes, anchors_mask, train = True)
336 |         val_dataloader      = YoloDatasets(val_lines, input_shape, anchors, batch_size, num_classes, anchors_mask, train = False)
337 | 
338 |         optimizer = {
339 |             'adam'  : Adam(lr = Init_lr, beta_1 = momentum),
340 |             'sgd'   : SGD(lr = Init_lr, momentum = momentum, nesterov=True)
341 |         }[optimizer_type]
342 |         
343 |         if eager:
344 |             start_epoch     = Init_Epoch
345 |             end_epoch       = UnFreeze_Epoch
346 |             UnFreeze_flag   = False
347 | 
348 |             gen     = tf.data.Dataset.from_generator(partial(train_dataloader.generate), (tf.float32, tf.float32, tf.float32, tf.float32))
349 |             gen_val = tf.data.Dataset.from_generator(partial(val_dataloader.generate), (tf.float32, tf.float32, tf.float32, tf.float32))
350 | 
351 |             gen     = gen.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
352 |             gen_val = gen_val.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
353 |             
354 |             if ngpus_per_node > 1:
355 |                 gen     = strategy.experimental_distribute_dataset(gen)
356 |                 gen_val = strategy.experimental_distribute_dataset(gen_val)
357 | 
358 |             time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
359 |             log_dir         = os.path.join(save_dir, "loss_" + str(time_str))
360 |             loss_history    = LossHistory(log_dir)
361 |             eval_callback   = EvalCallback(model_body, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir, \
362 |                                             eval_flag=eval_flag, period=eval_period)
363 |             #---------------------------------------#
364 |             #   开始模型训练
365 |             #---------------------------------------#
366 |             for epoch in range(start_epoch, end_epoch):
367 |                 #---------------------------------------#
368 |                 #   如果模型有冻结学习部分
369 |                 #   则解冻，并设置参数
370 |                 #---------------------------------------#
371 |                 if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train:
372 |                     batch_size      = Unfreeze_batch_size
373 | 
374 |                     #-------------------------------------------------------------------#
375 |                     #   判断当前batch_size，自适应调整学习率
376 |                     #-------------------------------------------------------------------#
377 |                     nbs             = 64
378 |                     lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
379 |                     lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
380 |                     Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
381 |                     Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
382 |                     #---------------------------------------#
383 |                     #   获得学习率下降的公式
384 |                     #---------------------------------------#
385 |                     lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
386 | 
387 |                     for i in range(len(model_body.layers)): 
388 |                         model_body.layers[i].trainable = True
389 | 
390 |                     epoch_step      = num_train // batch_size
391 |                     epoch_step_val  = num_val // batch_size
392 | 
393 |                     if epoch_step == 0 or epoch_step_val == 0:
394 |                         raise ValueError("数据集过小，无法继续进行训练，请扩充数据集。")
395 | 
396 |                     train_dataloader.batch_size    = batch_size
397 |                     val_dataloader.batch_size      = batch_size
398 | 
399 |                     gen     = tf.data.Dataset.from_generator(partial(train_dataloader.generate), (tf.float32, tf.float32, tf.float32, tf.float32))
400 |                     gen_val = tf.data.Dataset.from_generator(partial(val_dataloader.generate), (tf.float32, tf.float32, tf.float32, tf.float32))
401 | 
402 |                     gen     = gen.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
403 |                     gen_val = gen_val.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
404 |             
405 |                     if ngpus_per_node > 1:
406 |                         gen     = strategy.experimental_distribute_dataset(gen)
407 |                         gen_val = strategy.experimental_distribute_dataset(gen_val)
408 |                     
409 |                     UnFreeze_flag = True
410 | 
411 |                 lr = lr_scheduler_func(epoch)
412 |                 K.set_value(optimizer.lr, lr)
413 | 
414 |                 fit_one_epoch(model_body, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, 
415 |                             end_epoch, input_shape, anchors, anchors_mask, num_classes, save_period, save_dir, strategy)
416 | 
417 |                 train_dataloader.on_epoch_end()
418 |                 val_dataloader.on_epoch_end()
419 |         else:
420 |             start_epoch = Init_Epoch
421 |             end_epoch   = Freeze_Epoch if Freeze_Train else UnFreeze_Epoch
422 |             
423 |             if ngpus_per_node > 1:
424 |                 with strategy.scope():
425 |                     model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
426 |             else:
427 |                 model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
428 |             #-------------------------------------------------------------------------------#
429 |             #   训练参数的设置
430 |             #   logging         用于设置tensorboard的保存地址
431 |             #   checkpoint      用于设置权值保存的细节，period用于修改多少epoch保存一次
432 |             #   lr_scheduler       用于设置学习率下降的方式
433 |             #   early_stopping  用于设定早停，val_loss多次不下降自动结束训练，表示模型基本收敛
434 |             #-------------------------------------------------------------------------------#
435 |             time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
436 |             log_dir         = os.path.join(save_dir, "loss_" + str(time_str))
437 |             logging         = TensorBoard(log_dir)
438 |             loss_history    = LossHistory(log_dir)
439 |             checkpoint      = ModelCheckpoint(os.path.join(save_dir, "ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5"), 
440 |                                     monitor = 'val_loss', save_weights_only = True, save_best_only = False, period = save_period)
441 |             checkpoint_last = ModelCheckpoint(os.path.join(save_dir, "last_epoch_weights.h5"), 
442 |                                     monitor = 'val_loss', save_weights_only = True, save_best_only = False, period = 1)
443 |             checkpoint_best = ModelCheckpoint(os.path.join(save_dir, "best_epoch_weights.h5"), 
444 |                                     monitor = 'val_loss', save_weights_only = True, save_best_only = True, period = 1)
445 |             early_stopping  = EarlyStopping(monitor='val_loss', min_delta = 0, patience = 10, verbose = 1)
446 |             lr_scheduler    = LearningRateScheduler(lr_scheduler_func, verbose = 1)
447 |             eval_callback   = EvalCallback(model_body, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir, \
448 |                                             eval_flag=eval_flag, period=eval_period)
449 |             callbacks       = [logging, loss_history, checkpoint, checkpoint_last, checkpoint_best, lr_scheduler, eval_callback]
450 | 
451 |             if start_epoch < end_epoch:
452 |                 print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
453 |                 model.fit(
454 |                     x                   = train_dataloader,
455 |                     steps_per_epoch     = epoch_step,
456 |                     validation_data     = val_dataloader,
457 |                     validation_steps    = epoch_step_val,
458 |                     epochs              = end_epoch,
459 |                     initial_epoch       = start_epoch,
460 |                     use_multiprocessing = True if num_workers > 1 else False,
461 |                     workers             = num_workers,
462 |                     callbacks           = callbacks
463 |                 )
464 |             #---------------------------------------#
465 |             #   如果模型有冻结学习部分
466 |             #   则解冻，并设置参数
467 |             #---------------------------------------#
468 |             if Freeze_Train:
469 |                 batch_size  = Unfreeze_batch_size
470 |                 start_epoch = Freeze_Epoch if start_epoch < Freeze_Epoch else start_epoch
471 |                 end_epoch   = UnFreeze_Epoch
472 |                     
473 |                 #-------------------------------------------------------------------#
474 |                 #   判断当前batch_size，自适应调整学习率
475 |                 #-------------------------------------------------------------------#
476 |                 nbs             = 64
477 |                 lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
478 |                 lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
479 |                 Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
480 |                 Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
481 |                 #---------------------------------------#
482 |                 #   获得学习率下降的公式
483 |                 #---------------------------------------#
484 |                 lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
485 |                 lr_scheduler    = LearningRateScheduler(lr_scheduler_func, verbose = 1)
486 |                 callbacks       = [logging, loss_history, checkpoint, checkpoint_last, checkpoint_best, lr_scheduler, eval_callback]
487 |                     
488 |                 for i in range(len(model_body.layers)): 
489 |                     model_body.layers[i].trainable = True
490 |                 if ngpus_per_node > 1:
491 |                     with strategy.scope():
492 |                         model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
493 |                 else:
494 |                     model.compile(optimizer = optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred})
495 | 
496 |                 epoch_step      = num_train // batch_size
497 |                 epoch_step_val  = num_val // batch_size
498 | 
499 |                 if epoch_step == 0 or epoch_step_val == 0:
500 |                     raise ValueError("数据集过小，无法继续进行训练，请扩充数据集。")
501 | 
502 |                 train_dataloader.batch_size    = Unfreeze_batch_size
503 |                 val_dataloader.batch_size      = Unfreeze_batch_size
504 | 
505 |                 print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
506 |                 model.fit(
507 |                     x                   = train_dataloader,
508 |                     steps_per_epoch     = epoch_step,
509 |                     validation_data     = val_dataloader,
510 |                     validation_steps    = epoch_step_val,
511 |                     epochs              = end_epoch,
512 |                     initial_epoch       = start_epoch,
513 |                     use_multiprocessing = True if num_workers > 1 else False,
514 |                     workers             = num_workers,
515 |                     callbacks           = callbacks
516 |                 )
517 | 


--------------------------------------------------------------------------------