├── Infer.py
├── README.md
├── main.py
├── objectDetection.py
└── output
    ├── classes
        ├── 0.0.png
        ├── 1.0.png
        └── 2.0.png
    ├── detection-results-info.png
    ├── ground-truth-info.png
    ├── lamr.png
    ├── mAP.png
    └── output.txt


/Infer.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: UTF-8 -*-
   2 | """
   3 | 训练常基于dark-net的YOLOv3网络，目标检测
   4 | """
   5 | from __future__ import absolute_import
   6 | from __future__ import division
   7 | from __future__ import print_function
   8 | import os
   9 | 
  10 | os.environ["FLAGS_fraction_of_gpu_memory_to_use"] = '0.82'
  11 | 
  12 | import uuid
  13 | import numpy as np
  14 | import time
  15 | import six
  16 | import math
  17 | import random
  18 | import paddle
  19 | import paddle.fluid as fluid
  20 | import logging
  21 | import xml.etree.ElementTree
  22 | import codecs
  23 | import json
  24 | 
  25 | from paddle.fluid.initializer import MSRA
  26 | from paddle.fluid.param_attr import ParamAttr
  27 | from paddle.fluid.regularizer import L2Decay
  28 | from PIL import Image, ImageEnhance, ImageDraw, ImageFile
  29 | ImageFile.LOAD_TRUNCATED_IMAGES = True
  30 | Image.MAX_IMAGE_PIXELS = None
  31 | 
  32 | logger = None  # 日志对象
  33 | 
  34 | train_params = {
  35 |     "data_dir": "data/data6045",  # 数据目录
  36 |     "train_list": "train.txt",  # 训练集文件
  37 |     "eval_list": "eval.txt",
  38 |     "class_dim": -1,
  39 |     "label_dict": {},  # 标签字典
  40 |     "num_dict": {},
  41 |     "image_count": -1,
  42 |     "continue_train": True,  # 是否加载前一次的训练参数，接着训练
  43 |     "pretrained": False,  # 是否预训练
  44 |     "pretrained_model_dir": "./pretrained-model",
  45 |     "save_model_dir": "./yolo-model",  # 模型保存目录
  46 |     "model_prefix": "yolo-v3",  # 模型前缀
  47 |     "freeze_dir": "freeze_model",
  48 |     "use_tiny": False,  # 是否使用 裁剪 tiny 模型
  49 |     "max_box_num": 20,  # 一幅图上最多有多少个目标
  50 |     "num_epochs": 2,  # 训练轮次
  51 |     "train_batch_size": 10,  # 对于完整yolov3，每一批的训练样本不能太多，内存会炸掉；如果使用tiny，可以适当大一些
  52 |     "use_gpu": True,  # 是否使用GPU
  53 |     "yolo_cfg": {  # YOLO模型参数
  54 |         "input_size": [3, 448, 448],  # 原版的边长大小为608，为了提高训练速度和预测速度，此处压缩为448
  55 |         "anchors": [7, 10, 12, 22, 24, 17, 22, 45, 46, 33, 43, 88, 85, 66, 115, 146, 275, 240],  # 锚点??
  56 |         "anchor_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  57 |     },
  58 |     "yolo_tiny_cfg": {  # YOLO tiny 模型参数
  59 |         "input_size": [3, 256, 256],
  60 |         "anchors": [6, 8, 13, 15, 22, 34, 48, 50, 81, 100, 205, 191],
  61 |         "anchor_mask": [[3, 4, 5], [0, 1, 2]]
  62 |     },
  63 |     "ignore_thresh": 0.7,
  64 |     "mean_rgb": [127.5, 127.5, 127.5],
  65 |     "mode": "train",
  66 |     "multi_data_reader_count": 4,
  67 |     "apply_distort": True,  # 是否做图像扭曲增强
  68 |     "nms_top_k": 300,
  69 |     "nms_pos_k": 300,
  70 |     "valid_thresh": 0.01,
  71 |     "nms_thresh": 0.40,  # 非最大值抑制阈值
  72 |     "image_distort_strategy": {  # 图像扭曲策略
  73 |         "expand_prob": 0.5,  # 扩展比率
  74 |         "expand_max_ratio": 4,
  75 |         "hue_prob": 0.5,  # 色调
  76 |         "hue_delta": 18,
  77 |         "contrast_prob": 0.5,  # 对比度
  78 |         "contrast_delta": 0.5,
  79 |         "saturation_prob": 0.5,  # 饱和度
  80 |         "saturation_delta": 0.5,
  81 |         "brightness_prob": 0.5,  # 亮度
  82 |         "brightness_delta": 0.125
  83 |     },
  84 |     "sgd_strategy": {  # 梯度下降配置
  85 |         "learning_rate": 0.002,
  86 |         "lr_epochs": [30, 50, 65],  # 学习率衰减分段（3个数字分为4段）
  87 |         "lr_decay": [1, 0.5, 0.25, 0.1]  # 每段采用的学习率，对应lr_epochs参数4段
  88 |     },
  89 |     "early_stop": {
  90 |         "sample_frequency": 50,
  91 |         "successive_limit": 3,
  92 |         "min_loss": 2.5,
  93 |         "min_curr_map": 0.84
  94 |     }
  95 | }
  96 | 
  97 | 
  98 | def init_train_parameters():
  99 |     """
 100 |     初始化训练参数，主要是初始化图片数量，类别数
 101 |     :return:
 102 |     """
 103 |     file_list = os.path.join(train_params['data_dir'], train_params['train_list'])  # 训练集
 104 |     label_list = os.path.join(train_params['data_dir'], "label_list")  # 标签文件
 105 |     index = 0
 106 | 
 107 |     # codecs是专门用作编码转换通用模块
 108 |     with codecs.open(label_list, encoding='utf-8') as flist:
 109 |         lines = [line.strip() for line in flist]
 110 |         for line in lines:
 111 |             train_params['num_dict'][index] = line.strip()
 112 |             train_params['label_dict'][line.strip()] = index
 113 |             index += 1
 114 |         train_params['class_dim'] = index
 115 | 
 116 |     with codecs.open(file_list, encoding='utf-8') as flist:
 117 |         lines = [line.strip() for line in flist]
 118 |         train_params['image_count'] = len(lines)  # 图片数量
 119 | 
 120 | 
 121 | # 日志相关配置
 122 | def init_log_config():  # 初始化日志相关配置
 123 |     global logger
 124 | 
 125 |     logger = logging.getLogger()  # 创建日志对象
 126 |     logger.setLevel(logging.INFO)  # 设置日志级别
 127 |     log_path = os.path.join(os.getcwd(), 'logs')
 128 | 
 129 |     if not os.path.exists(log_path):  # 创建日志路径
 130 |         os.makedirs(log_path)
 131 | 
 132 |     log_name = os.path.join(log_path, 'train.log')  # 训练日志文件
 133 |     fh = logging.FileHandler(log_name, mode='w')  # 打开文件句柄
 134 |     fh.setLevel(logging.DEBUG)  # 设置级别
 135 | 
 136 |     formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
 137 |     fh.setFormatter(formatter)
 138 |     logger.addHandler(fh)
 139 | 
 140 | 
 141 | init_log_config()
 142 | 
 143 | 
 144 | # 定义YOLO3网络结构：darknet-53
 145 | class YOLOv3(object):
 146 |     def __init__(self, class_num, anchors, anchor_mask):
 147 |         self.outputs = []  # 网络最终模型
 148 |         self.downsample_ratio = 1  # 下采样率
 149 |         self.anchor_mask = anchor_mask  # 计算卷积核？？？
 150 |         self.anchors = anchors  # 锚点
 151 |         self.class_num = class_num  # 类别数量
 152 | 
 153 |         self.yolo_anchors = []
 154 |         self.yolo_classes = []
 155 | 
 156 |         for mask_pair in self.anchor_mask:
 157 |             mask_anchors = []
 158 |             for mask in mask_pair:
 159 |                 mask_anchors.append(self.anchors[2 * mask])
 160 |                 mask_anchors.append(self.anchors[2 * mask + 1])
 161 |             self.yolo_anchors.append(mask_anchors)
 162 |             self.yolo_classes.append(class_num)
 163 | 
 164 |     def name(self):
 165 |         return 'YOLOv3'
 166 | 
 167 |     # 获取anchors
 168 |     def get_anchors(self):
 169 |         return self.anchors
 170 | 
 171 |     # 获取anchor_mask
 172 |     def get_anchor_mask(self):
 173 |         return self.anchor_mask
 174 | 
 175 |     def get_class_num(self):
 176 |         return self.class_num
 177 | 
 178 |     def get_downsample_ratio(self):
 179 |         return self.downsample_ratio
 180 | 
 181 |     def get_yolo_anchors(self):
 182 |         return self.yolo_anchors
 183 | 
 184 |     def get_yolo_classes(self):
 185 |         return self.yolo_classes
 186 | 
 187 |     # 卷积正则化函数: 卷积、批量正则化处理、leakrelu
 188 |     def conv_bn(self,
 189 |                 input,  # 输入
 190 |                 num_filters,  # 卷积核数量
 191 |                 filter_size,  # 卷积核大小
 192 |                 stride,  # 步幅
 193 |                 padding,  # 填充
 194 |                 use_cudnn=True):
 195 |         # 2d卷积操作
 196 |         conv = fluid.layers.conv2d(input=input,
 197 |                                    num_filters=num_filters,
 198 |                                    filter_size=filter_size,
 199 |                                    stride=stride,
 200 |                                    padding=padding,
 201 |                                    act=None,
 202 |                                    use_cudnn=use_cudnn,  # 是否使用cudnn，cudnn利用cuda进行了加速处理
 203 |                                    param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02)),
 204 |                                    bias_attr=False)
 205 | 
 206 |         # batch_norm中的参数不需要参与正则化，所以主动使用正则系数为0的正则项屏蔽掉
 207 |         # 在batch_norm中使用leaky的话，只能使用默认的alpha=0.02；如果需要设值，必须提出去单独来
 208 |         # 正则化的目的，是为了防止过拟合，较小的L2值能防止过拟合
 209 |         param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
 210 |                                regularizer=L2Decay(0.))
 211 |         bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0),
 212 |                               regularizer=L2Decay(0.))
 213 |         out = fluid.layers.batch_norm(input=conv, act=None,
 214 |                                       param_attr=param_attr,
 215 |                                       bias_attr=bias_attr)
 216 |         # leaky_relu: Leaky ReLU是给所有负值赋予一个非零斜率
 217 |         out = fluid.layers.leaky_relu(out, 0.1)
 218 |         return out
 219 | 
 220 |     # 通过卷积实现降采样
 221 |     # 如：原始图片大小448*448，降采样后大小为 ((448+2)-3)/2 + 1 = 224
 222 |     def down_sample(self, input, num_filters, filter_size=3, stride=2, padding=1):
 223 |         self.downsample_ratio *= 2  # 降采样率
 224 |         return self.conv_bn(input,
 225 |                             num_filters=num_filters,
 226 |                             filter_size=filter_size,
 227 |                             stride=stride,
 228 |                             padding=padding)
 229 | 
 230 |     # 基本块：包含两个卷积/正则化层，一个残差块
 231 |     def basic_block(self, input, num_filters):
 232 |         conv1 = self.conv_bn(input, num_filters, filter_size=1, stride=1, padding=0)
 233 |         conv2 = self.conv_bn(conv1, num_filters * 2, filter_size=3, stride=1, padding=1)
 234 |         out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)  # 计算H(x)=F(x)+x
 235 |         return out
 236 | 
 237 |     # 创建多个basic_block
 238 |     def layer_warp(self, input, num_filters, count):
 239 |         res_out = self.basic_block(input, num_filters)
 240 |         for j in range(1, count):
 241 |             res_out = self.basic_block(res_out, num_filters)
 242 |         return res_out
 243 | 
 244 |     # 上采样
 245 |     def up_sample(self, input, scale=2):
 246 |         # get dynamic upsample output shape
 247 |         shape_nchw = fluid.layers.shape(input)  # 获取input的形状
 248 |         shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
 249 |         shape_hw.stop_gradient = True
 250 |         in_shape = fluid.layers.cast(shape_hw, dtype='int32')
 251 |         out_shape = in_shape * scale  # 计算输出数据形状
 252 |         out_shape.stop_gradient = True
 253 | 
 254 |         # reisze by actual_shape
 255 |         # 矩阵放大(最邻插值法)
 256 |         out = fluid.layers.resize_nearest(input=input,
 257 |                                           scale=scale,
 258 |                                           actual_shape=out_shape)
 259 |         return out
 260 | 
 261 |     def yolo_detection_block(self, input, num_filters):
 262 |         assert num_filters % 2 == 0, "num_filters {} cannot be divided by 2".format(num_filters)
 263 | 
 264 |         conv = input
 265 |         for j in range(2):
 266 |             conv = self.conv_bn(conv, num_filters, filter_size=1, stride=1, padding=0)
 267 |             conv = self.conv_bn(conv, num_filters * 2, filter_size=3, stride=1, padding=1)
 268 |         route = self.conv_bn(conv, num_filters, filter_size=1, stride=1, padding=0)
 269 |         tip = self.conv_bn(route, num_filters * 2, filter_size=3, stride=1, padding=1)
 270 |         return route, tip
 271 | 
 272 |     # 搭建网络模型 darknet-53
 273 |     def net(self, img):
 274 |         stages = [1, 2, 8, 8, 4]
 275 |         assert len(self.anchor_mask) <= len(stages), "anchor masks can't bigger than down_sample times"
 276 |         # 第一个卷积层: 256*256
 277 |         conv1 = self.conv_bn(img, num_filters=32, filter_size=3, stride=1, padding=1)
 278 |         # 第二个卷积层：128*128
 279 |         downsample_ = self.down_sample(conv1, conv1.shape[1] * 2)  # 第二个参数为卷积核数量
 280 |         blocks = []
 281 | 
 282 |         # 循环创建basic_block组
 283 |         for i, stage_count in enumerate(stages):
 284 |             block = self.layer_warp(downsample_,  # 输入数据
 285 |                                     32 * (2 ** i),  # 卷积核数量
 286 |                                     stage_count)  # 基本块数量
 287 |             blocks.append(block)
 288 |             if i < len(stages) - 1:  # 如果不是最后一组，做降采样
 289 |                 downsample_ = self.down_sample(block, block.shape[1] * 2)
 290 |         blocks = blocks[-1:-4:-1]  # 取倒数三层，并且逆序，后面跨层级联需要
 291 | 
 292 |         # yolo detector
 293 |         for i, block in enumerate(blocks):
 294 |             # yolo中跨视域链接
 295 |             if i > 0:
 296 |                 block = fluid.layers.concat(input=[route, block], axis=1)  # 连接route和block，按行
 297 | 
 298 |             route, tip = self.yolo_detection_block(block,  # 输入
 299 |                                                    num_filters=512 // (2 ** i))  # 卷积核数量
 300 | 
 301 |             param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02))
 302 |             bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.))
 303 |             block_out = fluid.layers.conv2d(input=tip,
 304 |                                             # 5 elements represent x|y|h|w|score
 305 |                                             num_filters=len(self.anchor_mask[i]) * (self.class_num + 5),
 306 |                                             filter_size=1,
 307 |                                             stride=1,
 308 |                                             padding=0,
 309 |                                             act=None,
 310 |                                             param_attr=param_attr,
 311 |                                             bias_attr=bias_attr)
 312 |             self.outputs.append(block_out)
 313 | 
 314 |             # 为了跨视域链接，差值方式提升特征图尺寸
 315 |             if i < len(blocks) - 1:
 316 |                 route = self.conv_bn(route, 256 // (2 ** i), filter_size=1, stride=1, padding=0)
 317 |                 route = self.up_sample(route)  # 上采样
 318 | 
 319 |         return self.outputs
 320 | 
 321 | # Tiny(精简版)YOLO模型
 322 | class YOLOv3Tiny(object):
 323 |     def __init__(self, class_num, anchors, anchor_mask):
 324 |         self.outputs = []
 325 |         self.downsample_ratio = 1
 326 |         self.anchor_mask = anchor_mask
 327 |         self.anchors = anchors
 328 |         self.class_num = class_num
 329 | 
 330 |         self.yolo_anchors = []
 331 |         self.yolo_classes = []
 332 |         for mask_pair in self.anchor_mask:
 333 |             mask_anchors = []
 334 |             for mask in mask_pair:
 335 |                 mask_anchors.append(self.anchors[2 * mask])
 336 |                 mask_anchors.append(self.anchors[2 * mask + 1])
 337 |             self.yolo_anchors.append(mask_anchors)
 338 |             self.yolo_classes.append(class_num)
 339 | 
 340 |     def name(self):
 341 |         return 'YOLOv3-tiny'
 342 | 
 343 |     def get_anchors(self):
 344 |         return self.anchors
 345 | 
 346 |     def get_anchor_mask(self):
 347 |         return self.anchor_mask
 348 | 
 349 |     def get_class_num(self):
 350 |         return self.class_num
 351 | 
 352 |     def get_downsample_ratio(self):
 353 |         return self.downsample_ratio
 354 | 
 355 |     def get_yolo_anchors(self):
 356 |         return self.yolo_anchors
 357 | 
 358 |     def get_yolo_classes(self):
 359 |         return self.yolo_classes
 360 | 
 361 |     def conv_bn(self,
 362 |                 input,
 363 |                 num_filters,
 364 |                 filter_size,
 365 |                 stride,
 366 |                 padding,
 367 |                 num_groups=1,
 368 |                 use_cudnn=True):
 369 |         conv = fluid.layers.conv2d(
 370 |             input=input,
 371 |             num_filters=num_filters,
 372 |             filter_size=filter_size,
 373 |             stride=stride,
 374 |             padding=padding,
 375 |             act=None,
 376 |             groups=num_groups,
 377 |             use_cudnn=use_cudnn,
 378 |             param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02)),
 379 |             bias_attr=False)
 380 | 
 381 |         # batch_norm中的参数不需要参与正则化，所以主动使用正则系数为0的正则项屏蔽掉
 382 |         out = fluid.layers.batch_norm(
 383 |             input=conv, act='relu',
 384 |             param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), regularizer=L2Decay(0.)),
 385 |             bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.)))
 386 | 
 387 |         return out
 388 | 
 389 |     def depthwise_conv_bn(self, input, filter_size=3, stride=1, padding=1):
 390 |         num_filters = input.shape[1]
 391 |         return self.conv_bn(input,
 392 |                             num_filters=num_filters,
 393 |                             filter_size=filter_size,
 394 |                             stride=stride,
 395 |                             padding=padding,
 396 |                             num_groups=num_filters)
 397 | 
 398 |     def down_sample(self, input, pool_size=2, pool_stride=2):
 399 |         self.downsample_ratio *= 2
 400 |         return fluid.layers.pool2d(input=input, pool_type='max', pool_size=pool_size,
 401 |                                    pool_stride=pool_stride)
 402 | 
 403 |     def basic_block(self, input, num_filters):
 404 |         conv1 = self.conv_bn(input, num_filters, filter_size=3, stride=1, padding=1)
 405 |         out = self.down_sample(conv1)
 406 |         return out
 407 | 
 408 |     def up_sample(self, input, scale=2):
 409 |         # get dynamic upsample output shape
 410 |         shape_nchw = fluid.layers.shape(input)
 411 |         shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
 412 |         shape_hw.stop_gradient = True
 413 |         in_shape = fluid.layers.cast(shape_hw, dtype='int32')
 414 |         out_shape = in_shape * scale
 415 |         out_shape.stop_gradient = True
 416 | 
 417 |         # reisze by actual_shape
 418 |         out = fluid.layers.resize_nearest(
 419 |             input=input,
 420 |             scale=scale,
 421 |             actual_shape=out_shape)
 422 |         return out
 423 | 
 424 |     def yolo_detection_block(self, input, num_filters):
 425 |         route = self.conv_bn(input, num_filters, filter_size=1, stride=1, padding=0)
 426 |         tip = self.conv_bn(route, num_filters * 2, filter_size=3, stride=1, padding=1)
 427 |         return route, tip
 428 | 
 429 |     def net(self, img):
 430 |         # darknet-tiny
 431 |         stages = [16, 32, 64, 128, 256, 512]
 432 |         assert len(self.anchor_mask) <= len(stages), "anchor masks can't bigger than down_sample times"
 433 |         # 256x256
 434 |         tmp = img
 435 |         blocks = []
 436 |         for i, stage_count in enumerate(stages):
 437 |             if i == len(stages) - 1:
 438 |                 block = self.conv_bn(tmp, stage_count, filter_size=3, stride=1, padding=1)
 439 |                 blocks.append(block)
 440 |                 block = self.depthwise_conv_bn(blocks[-1])
 441 |                 block = self.depthwise_conv_bn(blocks[-1])
 442 |                 block = self.conv_bn(blocks[-1], stage_count * 2, filter_size=1, stride=1, padding=0)
 443 |                 blocks.append(block)
 444 |             else:
 445 |                 tmp = self.basic_block(tmp, stage_count)
 446 |                 blocks.append(tmp)
 447 | 
 448 |         blocks = [blocks[-1], blocks[3]]
 449 | 
 450 |         # yolo detector
 451 |         for i, block in enumerate(blocks):
 452 |             # yolo 中跨视域链接
 453 |             if i > 0:
 454 |                 block = fluid.layers.concat(input=[route, block], axis=1)
 455 |             if i < 1:
 456 |                 route, tip = self.yolo_detection_block(block, num_filters=256 // (2 ** i))
 457 |             else:
 458 |                 tip = self.conv_bn(block, num_filters=256, filter_size=3, stride=1, padding=1)
 459 | 
 460 |             param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02))
 461 |             bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.))
 462 |             block_out = fluid.layers.conv2d(input=tip,
 463 |                                             # 5 elements represent x|y|h|w|score
 464 |                                             num_filters=len(self.anchor_mask[i]) * (self.class_num + 5),
 465 |                                             filter_size=1,
 466 |                                             stride=1,
 467 |                                             padding=0,
 468 |                                             act=None,
 469 |                                             param_attr=param_attr,
 470 |                                             bias_attr=bias_attr)
 471 |             self.outputs.append(block_out)
 472 |             # 为了跨视域链接，差值方式提升特征图尺寸
 473 |             if i < len(blocks) - 1:
 474 |                 route = self.conv_bn(route, 128 // (2 ** i), filter_size=1, stride=1, padding=0)
 475 |                 route = self.up_sample(route)
 476 | 
 477 |         return self.outputs
 478 | 
 479 | 
 480 | def get_yolo(is_tiny, class_num, anchors, anchor_mask):
 481 |     if is_tiny:
 482 |         return YOLOv3Tiny(class_num, anchors, anchor_mask)
 483 |     else:
 484 |         return YOLOv3(class_num, anchors, anchor_mask)
 485 | 
 486 | 
 487 | class Sampler(object):
 488 |     """
 489 |     采样器，用于扣取采样
 490 |     """
 491 | 
 492 |     def __init__(self, max_sample, max_trial, min_scale, max_scale,
 493 |                  min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap,
 494 |                  max_jaccard_overlap):
 495 |         self.max_sample = max_sample
 496 |         self.max_trial = max_trial
 497 |         self.min_scale = min_scale
 498 |         self.max_scale = max_scale
 499 |         self.min_aspect_ratio = min_aspect_ratio
 500 |         self.max_aspect_ratio = max_aspect_ratio
 501 |         self.min_jaccard_overlap = min_jaccard_overlap
 502 |         self.max_jaccard_overlap = max_jaccard_overlap
 503 | 
 504 | 
 505 | class bbox(object):
 506 |     """
 507 |     外界矩形框
 508 |     """
 509 | 
 510 |     def __init__(self, xmin, ymin, xmax, ymax):
 511 |         self.xmin = xmin
 512 |         self.ymin = ymin
 513 |         self.xmax = xmax
 514 |         self.ymax = ymax
 515 | 
 516 | 
 517 | # 坐标转换，由[x1, y1, w, h]转换为[center_x, center_y, w, h]
 518 | # 并转换为范围在[0, 1]之间的相对坐标
 519 | def box_to_center_relative(box, img_height, img_width):
 520 |     """
 521 |     Convert COCO annotations box with format [x1, y1, w, h] to
 522 |     center mode [center_x, center_y, w, h] and divide image width
 523 |     and height to get relative value in range[0, 1]
 524 |     """
 525 |     assert len(box) == 4, "box should be a len(4) list or tuple"
 526 |     x, y, w, h = box
 527 | 
 528 |     x1 = max(x, 0)
 529 |     x2 = min(x + w - 1, img_width - 1)
 530 |     y1 = max(y, 0)
 531 |     y2 = min(y + h - 1, img_height - 1)
 532 | 
 533 |     x = (x1 + x2) / 2 / img_width  # x中心坐标
 534 |     y = (y1 + y2) / 2 / img_height  # y中心坐标
 535 |     w = (x2 - x1) / img_width  # 框宽度/图片总宽度
 536 |     h = (y2 - y1) / img_height  # 框高度/图片总高度
 537 | 
 538 |     return np.array([x, y, w, h])
 539 | 
 540 | 
 541 | # 调整图像大小
 542 | def resize_img(img, sampled_labels, input_size):
 543 |     target_size = input_size
 544 |     img = img.resize((target_size[1], target_size[2]), Image.BILINEAR)
 545 |     return img
 546 | 
 547 | 
 548 | # 计算交并比
 549 | def box_iou_xywh(box1, box2):
 550 |     assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."
 551 |     assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."
 552 | 
 553 |     # 取两个框的坐标
 554 |     b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
 555 |     b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
 556 |     b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
 557 |     b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
 558 | 
 559 |     inter_x1 = np.maximum(b1_x1, b2_x1)
 560 |     inter_x2 = np.minimum(b1_x2, b2_x2)
 561 |     inter_y1 = np.maximum(b1_y1, b2_y1)
 562 |     inter_y2 = np.minimum(b1_y2, b2_y2)
 563 |     inter_w = inter_x2 - inter_x1 + 1  # 相交部分宽度
 564 |     inter_h = inter_y2 - inter_y1 + 1  # 相交部分高度
 565 |     inter_w[inter_w < 0] = 0
 566 |     inter_h[inter_h < 0] = 0
 567 | 
 568 |     inter_area = inter_w * inter_h  # 相交面积
 569 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)  # 框1的面积
 570 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)  # 框2的面积
 571 | 
 572 |     return inter_area / (b1_area + b2_area - inter_area)  # 相集面积/并集面积
 573 | 
 574 | 
 575 | # box裁剪
 576 | def box_crop(boxes, labels, crop, img_shape):
 577 |     x, y, w, h = map(float, crop)
 578 |     im_w, im_h = map(float, img_shape)
 579 | 
 580 |     boxes = boxes.copy()
 581 |     boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (boxes[:, 0] + boxes[:, 2] / 2) * im_w
 582 |     boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (boxes[:, 1] + boxes[:, 3] / 2) * im_h
 583 | 
 584 |     crop_box = np.array([x, y, x + w, y + h])
 585 |     centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
 586 |     mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(axis=1)
 587 | 
 588 |     boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2])
 589 |     boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:])
 590 |     boxes[:, :2] -= crop_box[:2]
 591 |     boxes[:, 2:] -= crop_box[:2]
 592 | 
 593 |     mask = np.logical_and(mask, (boxes[:, :2] < boxes[:, 2:]).all(axis=1))
 594 |     boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1)
 595 |     labels = labels * mask.astype('float32')
 596 |     boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (boxes[:, 2] - boxes[:, 0]) / w
 597 |     boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (boxes[:, 3] - boxes[:, 1]) / h
 598 | 
 599 |     return boxes, labels, mask.sum()
 600 | 
 601 | 
 602 | # 图像增加：对比度，饱和度，明暗，颜色，扩张
 603 | def random_brightness(img):  # 亮度
 604 |     prob = np.random.uniform(0, 1)
 605 | 
 606 |     if prob < train_params['image_distort_strategy']['brightness_prob']:
 607 |         brightness_delta = train_params['image_distort_strategy']['brightness_delta']  # 默认值0.125
 608 |         delta = np.random.uniform(-brightness_delta, brightness_delta) + 1  # 产生均匀分布随机值
 609 |         img = ImageEnhance.Brightness(img).enhance(delta)  # 调整图像亮度
 610 | 
 611 |     return img
 612 | 
 613 | 
 614 | def random_contrast(img):  # 对比度
 615 |     prob = np.random.uniform(0, 1)
 616 | 
 617 |     if prob < train_params['image_distort_strategy']['contrast_prob']:
 618 |         contrast_delta = train_params['image_distort_strategy']['contrast_delta']
 619 |         delta = np.random.uniform(-contrast_delta, contrast_delta) + 1
 620 |         img = ImageEnhance.Contrast(img).enhance(delta)
 621 | 
 622 |     return img
 623 | 
 624 | 
 625 | def random_saturation(img):  # 饱和度
 626 |     prob = np.random.uniform(0, 1)
 627 | 
 628 |     if prob < train_params['image_distort_strategy']['saturation_prob']:
 629 |         saturation_delta = train_params['image_distort_strategy']['saturation_delta']
 630 |         delta = np.random.uniform(-saturation_delta, saturation_delta) + 1
 631 |         img = ImageEnhance.Color(img).enhance(delta)
 632 | 
 633 |     return img
 634 | 
 635 | 
 636 | def random_hue(img):  # 色调
 637 |     prob = np.random.uniform(0, 1)
 638 | 
 639 |     if prob < train_params['image_distort_strategy']['hue_prob']:
 640 |         hue_delta = train_params['image_distort_strategy']['hue_delta']
 641 |         delta = np.random.uniform(-hue_delta, hue_delta)
 642 |         img_hsv = np.array(img.convert('HSV'))
 643 |         img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
 644 |         img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
 645 | 
 646 |     return img
 647 | 
 648 | 
 649 | def distort_image(img):  # 图像扭曲
 650 |     prob = np.random.uniform(0, 1)
 651 |     # Apply different distort order
 652 |     if prob > 0.5:
 653 |         img = random_brightness(img)
 654 |         img = random_contrast(img)
 655 |         img = random_saturation(img)
 656 |         img = random_hue(img)
 657 |     else:
 658 |         img = random_brightness(img)
 659 |         img = random_saturation(img)
 660 |         img = random_hue(img)
 661 |         img = random_contrast(img)
 662 |     return img
 663 | 
 664 | 
 665 | # 随机裁剪
 666 | def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraints=None, max_trial=50):
 667 |     if random.random() > 0.6:
 668 |         return img, boxes, labels
 669 |     if len(boxes) == 0:
 670 |         return img, boxes, labels
 671 | 
 672 |     if not constraints:
 673 |         constraints = [(0.1, 1.0),
 674 |                        (0.3, 1.0),
 675 |                        (0.5, 1.0),
 676 |                        (0.7, 1.0),
 677 |                        (0.9, 1.0),
 678 |                        (0.0, 1.0)]  # 最小/最大交并比值
 679 | 
 680 |     w, h = img.size
 681 |     crops = [(0, 0, w, h)]
 682 | 
 683 |     for min_iou, max_iou in constraints:
 684 |         for _ in range(max_trial):
 685 |             scale = random.uniform(scales[0], scales[1])
 686 |             aspect_ratio = random.uniform(max(1 / max_ratio, scale * scale), \
 687 |                                           min(max_ratio, 1 / scale / scale))
 688 |             crop_h = int(h * scale / np.sqrt(aspect_ratio))
 689 |             crop_w = int(w * scale * np.sqrt(aspect_ratio))
 690 |             crop_x = random.randrange(w - crop_w)
 691 |             crop_y = random.randrange(h - crop_h)
 692 |             crop_box = np.array([[
 693 |                 (crop_x + crop_w / 2.0) / w,
 694 |                 (crop_y + crop_h / 2.0) / h,
 695 |                 crop_w / float(w),
 696 |                 crop_h / float(h)
 697 |             ]])
 698 | 
 699 |             iou = box_iou_xywh(crop_box, boxes)
 700 |             if min_iou <= iou.min() and max_iou >= iou.max():
 701 |                 crops.append((crop_x, crop_y, crop_w, crop_h))
 702 |                 break
 703 | 
 704 |     while crops:
 705 |         crop = crops.pop(np.random.randint(0, len(crops)))
 706 |         crop_boxes, crop_labels, box_num = box_crop(boxes, labels, crop, (w, h))
 707 |         if box_num < 1:
 708 |             continue
 709 |         img = img.crop((crop[0], crop[1], crop[0] + crop[2],
 710 |                         crop[1] + crop[3])).resize(img.size, Image.LANCZOS)
 711 |         return img, crop_boxes, crop_labels
 712 |     return img, boxes, labels
 713 | 
 714 | 
 715 | # 扩张
 716 | def random_expand(img, gtboxes, keep_ratio=True):
 717 |     if np.random.uniform(0, 1) < train_params['image_distort_strategy']['expand_prob']:
 718 |         return img, gtboxes
 719 | 
 720 |     max_ratio = train_params['image_distort_strategy']['expand_max_ratio']
 721 |     w, h = img.size
 722 |     c = 3
 723 |     ratio_x = random.uniform(1, max_ratio)
 724 |     if keep_ratio:
 725 |         ratio_y = ratio_x
 726 |     else:
 727 |         ratio_y = random.uniform(1, max_ratio)
 728 |     oh = int(h * ratio_y)
 729 |     ow = int(w * ratio_x)
 730 |     off_x = random.randint(0, ow - w)
 731 |     off_y = random.randint(0, oh - h)
 732 | 
 733 |     out_img = np.zeros((oh, ow, c), np.uint8)
 734 |     for i in range(c):
 735 |         out_img[:, :, i] = train_params['mean_rgb'][i]
 736 | 
 737 |     out_img[off_y: off_y + h, off_x: off_x + w, :] = img
 738 |     gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow)
 739 |     gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh)
 740 |     gtboxes[:, 2] = gtboxes[:, 2] / ratio_x
 741 |     gtboxes[:, 3] = gtboxes[:, 3] / ratio_y
 742 | 
 743 |     return Image.fromarray(out_img), gtboxes
 744 | 
 745 | 
 746 | # 预处理：图像样本增强，维度转换
 747 | def preprocess(img, bbox_labels, input_size, mode):
 748 |     img_width, img_height = img.size
 749 |     sample_labels = np.array(bbox_labels)
 750 | 
 751 |     if mode == 'train':
 752 |         if train_params['apply_distort']:  # 是否扭曲增强
 753 |             img = distort_image(img)
 754 | 
 755 |         img, gtboxes = random_expand(img, sample_labels[:, 1:5])  # 扩展增强
 756 |         img, gtboxes, gtlabels = random_crop(img, gtboxes, sample_labels[:, 0])  # 随机裁剪
 757 |         sample_labels[:, 0] = gtlabels
 758 |         sample_labels[:, 1:5] = gtboxes
 759 | 
 760 |     img = resize_img(img, sample_labels, input_size)
 761 |     img = np.array(img).astype('float32')
 762 |     img -= train_params['mean_rgb']
 763 |     img = img.transpose((2, 0, 1))  # HWC to CHW
 764 |     img *= 0.007843
 765 |     return img, sample_labels
 766 | 
 767 | 
 768 | # 数据读取器
 769 | # 根据样本文件，读取图片、并做数据增强，返回图片数据、边框、标签
 770 | def custom_reader(file_list, data_dir, input_size, mode):
 771 |     def reader():
 772 |         np.random.shuffle(file_list)  # 打乱文件列表
 773 | 
 774 |         for line in file_list:  # 读取行，每行一个图片及标注
 775 |             if mode == 'train' or mode == 'eval':
 776 |                 ######################  以下可能是需要自定义修改的部分   ############################
 777 |                 parts = line.split('\t')  # 按照tab键拆分
 778 |                 image_path = parts[0]
 779 | 
 780 |                 img = Image.open(os.path.join(data_dir, image_path)) # 读取图像数据
 781 |                 if img.mode != 'RGB':
 782 |                     img = img.convert('RGB')
 783 |                 im_width, im_height = img.size
 784 | 
 785 |                 # bbox 的列表，每一个元素为这样
 786 |                 # layout: label | x-center | y-cneter | width | height | difficult
 787 |                 bbox_labels = []
 788 |                 for object_str in parts[1:]:  # 循环处理每一个目标标注信息
 789 |                     if len(object_str) <= 1:
 790 |                         continue
 791 | 
 792 |                     bbox_sample = []
 793 |                     object = json.loads(object_str)
 794 |                     bbox_sample.append(float(train_params['label_dict'][object['value']]))
 795 |                     bbox = object['coordinate']  # 获取框坐标
 796 |                     # 计算x,y,w,h
 797 |                     box = [bbox[0][0], bbox[0][1], bbox[1][0] - bbox[0][0], bbox[1][1] - bbox[0][1]]
 798 |                     bbox = box_to_center_relative(box, im_height, im_width)  # 坐标转换
 799 |                     bbox_sample.append(float(bbox[0]))
 800 |                     bbox_sample.append(float(bbox[1]))
 801 |                     bbox_sample.append(float(bbox[2]))
 802 |                     bbox_sample.append(float(bbox[3]))
 803 |                     difficult = float(0)
 804 |                     bbox_sample.append(difficult)
 805 |                     bbox_labels.append(bbox_sample)
 806 |                 ######################  可能需要自定义修改部分结束   ############################
 807 | 
 808 |                 if len(bbox_labels) == 0:
 809 |                     continue
 810 | 
 811 |                 img, sample_labels = preprocess(img, bbox_labels, input_size, mode)  # 预处理
 812 |                 # sample_labels = np.array(sample_labels)
 813 |                 if len(sample_labels) == 0:
 814 |                     continue
 815 | 
 816 |                 boxes = sample_labels[:, 1:5]  # 坐标
 817 |                 lbls = sample_labels[:, 0].astype('int32')  # 标签
 818 |                 difficults = sample_labels[:, -1].astype('int32')
 819 |                 max_box_num = train_params['max_box_num']  # 一副图像最多多少个目标物体
 820 |                 cope_size = max_box_num if len(boxes) >= max_box_num else len(boxes)  # 控制最大目标数量
 821 |                 ret_boxes = np.zeros((max_box_num, 4), dtype=np.float32)
 822 |                 ret_lbls = np.zeros((max_box_num), dtype=np.int32)
 823 |                 ret_difficults = np.zeros((max_box_num), dtype=np.int32)
 824 |                 ret_boxes[0: cope_size] = boxes[0: cope_size]
 825 |                 ret_lbls[0: cope_size] = lbls[0: cope_size]
 826 |                 ret_difficults[0: cope_size] = difficults[0: cope_size]
 827 | 
 828 |                 yield img, ret_boxes, ret_lbls
 829 | 
 830 |             elif mode == 'test':
 831 |                 img_path = os.path.join(line)
 832 | 
 833 |                 yield Image.open(img_path)
 834 | 
 835 |     return reader
 836 | 
 837 | 
 838 | # 批量、随机数据读取器
 839 | def single_custom_reader(file_path, data_dir, input_size, mode):
 840 |     file_path = os.path.join(data_dir, file_path)
 841 | 
 842 |     images = [line.strip() for line in open(file_path)]
 843 |     reader = custom_reader(images, data_dir, input_size, mode)
 844 |     reader = paddle.reader.shuffle(reader, train_params['train_batch_size'])
 845 |     reader = paddle.batch(reader, train_params['train_batch_size'])
 846 | 
 847 |     return reader
 848 | 
 849 | 
 850 | # 定义优化器
 851 | def optimizer_sgd_setting():
 852 |     batch_size = train_params["train_batch_size"]  # batch大小
 853 |     iters = train_params["image_count"] // batch_size  # 计算轮次
 854 |     iters = 1 if iters < 1 else iters
 855 |     learning_strategy = train_params['sgd_strategy']
 856 |     lr = learning_strategy['learning_rate']  # 学习率
 857 | 
 858 |     boundaries = [i * iters for i in learning_strategy["lr_epochs"]]
 859 |     values = [i * lr for i in learning_strategy["lr_decay"]]
 860 |     logger.info("origin learning rate: {0} boundaries: {1}  values: {2}".format(lr, boundaries, values))
 861 | 
 862 |     optimizer = fluid.optimizer.SGDOptimizer(
 863 |         learning_rate=fluid.layers.piecewise_decay(boundaries, values),  # 分段衰减学习率
 864 |         # learning_rate=lr,
 865 |         regularization=fluid.regularizer.L2Decay(0.00005))
 866 | 
 867 |     return optimizer
 868 | 
 869 | 
 870 | # 创建program, feeder及yolo模型
 871 | def build_program_with_feeder(main_prog, startup_prog, place):
 872 |     max_box_num = train_params['max_box_num']
 873 |     ues_tiny = train_params['use_tiny']  # 获取是否使用tiny yolo参数
 874 |     yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
 875 | 
 876 |     with fluid.program_guard(main_prog, startup_prog):  # 更改全局主程序和启动程序
 877 |         img = fluid.layers.data(name='img', shape=yolo_config['input_size'], dtype='float32')  # 图像
 878 |         gt_box = fluid.layers.data(name='gt_box', shape=[max_box_num, 4], dtype='float32')  # 边框
 879 |         gt_label = fluid.layers.data(name='gt_label', shape=[max_box_num], dtype='int32')  # 标签
 880 | 
 881 |         feeder = fluid.DataFeeder(feed_list=[img, gt_box, gt_label],
 882 |                                   place=place,
 883 |                                   program=main_prog)  # 定义feeder
 884 |         reader = single_custom_reader(train_params['train_list'],
 885 |                                       train_params['data_dir'],
 886 |                                       yolo_config['input_size'], 'train')  # 读取器
 887 |         # 获取yolo参数
 888 |         ues_tiny = train_params['use_tiny']
 889 |         yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
 890 | 
 891 |         with fluid.unique_name.guard():
 892 |             # 创建yolo模型
 893 |             model = get_yolo(ues_tiny, train_params['class_dim'], yolo_config['anchors'],
 894 |                              yolo_config['anchor_mask'])
 895 |             outputs = model.net(img)
 896 |         return feeder, reader, get_loss(model, outputs, gt_box, gt_label)
 897 | 
 898 | 
 899 | # 损失函数
 900 | def get_loss(model, outputs, gt_box, gt_label):
 901 |     losses = []
 902 |     downsample_ratio = model.get_downsample_ratio()
 903 | 
 904 |     with fluid.unique_name.guard('train'):
 905 |         for i, out in enumerate(outputs):
 906 |             loss = fluid.layers.yolov3_loss(x=out,
 907 |                                             gt_box=gt_box,  # 真实边框
 908 |                                             gt_label=gt_label,  # 标签
 909 |                                             anchors=model.get_anchors(),  # 锚点
 910 |                                             anchor_mask=model.get_anchor_mask()[i],
 911 |                                             class_num=model.get_class_num(),
 912 |                                             ignore_thresh=train_params['ignore_thresh'],
 913 |                                             # 对于类别不多的情况，设置为 False 会更合适一些，不然 score 会很小
 914 |                                             use_label_smooth=False,
 915 |                                             downsample_ratio=downsample_ratio)
 916 |             losses.append(fluid.layers.reduce_mean(loss))
 917 |             downsample_ratio //= 2
 918 |         loss = sum(losses)
 919 |         optimizer = optimizer_sgd_setting()
 920 |         optimizer.minimize(loss)
 921 |         return loss
 922 | 
 923 | 
 924 | # 持久化参数加载
 925 | def load_pretrained_params(exe, program):
 926 |     if train_params['continue_train'] and os.path.exists(train_params['save_model_dir']):
 927 |         logger.info('load param from retrain model')
 928 |         fluid.io.load_persistables(executor=exe,
 929 |                                    dirname=train_params['save_model_dir'],
 930 |                                    main_program=program)
 931 |     elif train_params['pretrained'] and os.path.exists(train_params['pretrained_model_dir']):
 932 |         logger.info('load param from pretrained model')
 933 | 
 934 |         def if_exist(var):
 935 |             return os.path.exists(os.path.join(train_params['pretrained_model_dir'], var.name))
 936 | 
 937 |         fluid.io.load_vars(exe, train_params['pretrained_model_dir'], main_program=program,
 938 |                            predicate=if_exist)
 939 | 
 940 | 
 941 | # 执行训练
 942 | def train():
 943 |     init_log_config()
 944 |     init_train_parameters()
 945 | 
 946 |     logger.info("start train YOLOv3, train params:%s", str(train_params))
 947 |     logger.info("create place, use gpu:" + str(train_params['use_gpu']))
 948 | 
 949 |     place = fluid.CUDAPlace(0) if train_params['use_gpu'] else fluid.CPUPlace()
 950 | 
 951 |     logger.info("build network and program")
 952 |     train_program = fluid.Program()
 953 |     start_program = fluid.Program()
 954 |     feeder, reader, loss = build_program_with_feeder(train_program, start_program, place)
 955 | 
 956 |     logger.info("build executor and init params")
 957 | 
 958 |     exe = fluid.Executor(place)
 959 |     exe.run(start_program)
 960 |     train_fetch_list = [loss.name]
 961 |     load_pretrained_params(exe, train_program)  # 加载模型及参数
 962 | 
 963 |     stop_strategy = train_params['early_stop']
 964 |     successive_limit = stop_strategy['successive_limit']
 965 |     sample_freq = stop_strategy['sample_frequency']
 966 |     min_curr_map = stop_strategy['min_curr_map']
 967 |     min_loss = stop_strategy['min_loss']
 968 |     stop_train = False
 969 |     successive_count = 0
 970 |     total_batch_count = 0
 971 |     valid_thresh = train_params['valid_thresh']
 972 |     nms_thresh = train_params['nms_thresh']
 973 |     current_best_loss = 10000000000.0
 974 | 
 975 |     # 开始迭代训练
 976 |     for pass_id in range(train_params["num_epochs"]):
 977 |         logger.info("current pass: {}, start read image".format(pass_id))
 978 |         batch_id = 0
 979 |         total_loss = 0.0
 980 | 
 981 |         for batch_id, data in enumerate(reader()):
 982 |             t1 = time.time()
 983 | 
 984 |             loss = exe.run(train_program,
 985 |                            feed=feeder.feed(data),
 986 |                            fetch_list=train_fetch_list)  # 执行训练
 987 | 
 988 |             period = time.time() - t1
 989 |             loss = np.mean(np.array(loss))
 990 |             total_loss += loss
 991 |             batch_id += 1
 992 |             total_batch_count += 1
 993 | 
 994 |             if batch_id % 10 == 0:  # 调整日志输出的频率
 995 |                 logger.info(
 996 |                     "pass {}, trainbatch {}, loss {} time {}".format(pass_id, batch_id, loss, "%2.2f sec" % period))
 997 | 
 998 |         pass_mean_loss = total_loss / batch_id
 999 |         logger.info("pass {0} train result, current pass mean loss: {1}".format(pass_id, pass_mean_loss))
1000 | 
1001 |         # 采用每训练完一轮停止办法，可以调整为更精细的保存策略
1002 |         if pass_mean_loss < current_best_loss:
1003 |             logger.info("temp save {} epcho train result, current best pass loss {}".format(pass_id, pass_mean_loss))
1004 |             fluid.io.save_persistables(dirname=train_params['save_model_dir'], main_program=train_program,
1005 |                                        executor=exe)
1006 |             current_best_loss = pass_mean_loss
1007 | 
1008 |     logger.info("training till last epcho, end training")
1009 |     fluid.io.save_persistables(dirname=train_params['save_model_dir'], main_program=train_program, executor=exe)
1010 | 
1011 | 
1012 | 
1013 | 
1014 | 
1015 | 
1016 |     # 固化保存模型
1017 | import paddle
1018 | import paddle.fluid as fluid
1019 | import codecs
1020 | 
1021 | init_train_parameters()
1022 | 
1023 | 
1024 | def freeze_model():
1025 |     exe = fluid.Executor(fluid.CPUPlace())
1026 | 
1027 |     ues_tiny = train_params['use_tiny']
1028 |     yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
1029 |     path = train_params['save_model_dir']
1030 | 
1031 |     model = get_yolo(ues_tiny, train_params['class_dim'],
1032 |                      yolo_config['anchors'], yolo_config['anchor_mask'])
1033 |     image = fluid.layers.data(name='image', shape=yolo_config['input_size'], dtype='float32')
1034 |     image_shape = fluid.layers.data(name="image_shape", shape=[2], dtype='int32')
1035 | 
1036 |     boxes = []
1037 |     scores = []
1038 |     outputs = model.net(image)
1039 |     downsample_ratio = model.get_downsample_ratio()
1040 | 
1041 |     for i, out in enumerate(outputs):
1042 |         box, score = fluid.layers.yolo_box(x=out,
1043 |                                            img_size=image_shape,
1044 |                                            anchors=model.get_yolo_anchors()[i],
1045 |                                            class_num=model.get_class_num(),
1046 |                                            conf_thresh=train_params['valid_thresh'],
1047 |                                            downsample_ratio=downsample_ratio,
1048 |                                            name="yolo_box_" + str(i))
1049 |         boxes.append(box)
1050 |         scores.append(fluid.layers.transpose(score, perm=[0, 2, 1]))
1051 |         downsample_ratio //= 2
1052 | 
1053 |     pred = fluid.layers.multiclass_nms(bboxes=fluid.layers.concat(boxes, axis=1),
1054 |                                        scores=fluid.layers.concat(scores, axis=2),
1055 |                                        score_threshold=train_params['valid_thresh'],
1056 |                                        nms_top_k=train_params['nms_top_k'],
1057 |                                        keep_top_k=train_params['nms_pos_k'],
1058 |                                        nms_threshold=train_params['nms_thresh'],
1059 |                                        background_label=-1,
1060 |                                        name="multiclass_nms")
1061 | 
1062 |     freeze_program = fluid.default_main_program()
1063 | 
1064 |     fluid.io.load_persistables(exe, path, freeze_program)
1065 |     freeze_program = freeze_program.clone(for_test=True)
1066 |     print("freeze out: {0}, pred layout: {1}".format(train_params['freeze_dir'], pred))
1067 |     # 保存模型
1068 |     fluid.io.save_inference_model(train_params['freeze_dir'],
1069 |                                   ['image', 'image_shape'],
1070 |                                   pred, exe, freeze_program)
1071 |     print("freeze end")
1072 | 
1073 | 
1074 | 
1075 | 
1076 | 
1077 | # 预测
1078 | import codecs
1079 | import sys
1080 | import numpy as np
1081 | import time
1082 | import paddle
1083 | import paddle.fluid as fluid
1084 | import math
1085 | import functools
1086 | 
1087 | from IPython.display import display
1088 | from PIL import Image
1089 | from PIL import ImageFont
1090 | from PIL import ImageDraw
1091 | from collections import namedtuple
1092 | 
1093 | init_train_parameters()
1094 | ues_tiny = train_params['use_tiny']
1095 | yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
1096 | 
1097 | target_size = yolo_config['input_size']
1098 | anchors = yolo_config['anchors']
1099 | anchor_mask = yolo_config['anchor_mask']
1100 | label_dict = train_params['num_dict']
1101 | class_dim = train_params['class_dim']
1102 | print("label_dict:{} class dim:{}".format(label_dict, class_dim))
1103 | 
1104 | place = fluid.CUDAPlace(0) if train_params['use_gpu'] else fluid.CPUPlace()
1105 | exe = fluid.Executor(place)
1106 | 
1107 | path = train_params['freeze_dir']
1108 | [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=path, executor=exe)
1109 | 
1110 | 
1111 | # 给图片画上外接矩形框
1112 | def draw_bbox_image(img, boxes, labels, save_name):
1113 |     img_width, img_height = img.size
1114 | 
1115 |     draw = ImageDraw.Draw(img) # 图像绘制对象
1116 |     for box, label in zip(boxes, labels):
1117 |         xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
1118 |         draw.rectangle((xmin, ymin, xmax, ymax), None, 'red') # 绘制矩形
1119 |         draw.text((xmin, ymin), label_dict[int(label)], (255, 255, 0)) # 绘制标签
1120 |     img.save(save_name)
1121 |     display(img)
1122 | 
1123 | 
1124 | def resize_img(img, target_size):
1125 |     """
1126 |     保持比例的缩放图片
1127 |     :param img:
1128 |     :param target_size:
1129 |     :return:
1130 |     """
1131 |     img = img.resize(target_size[1:], Image.BILINEAR)
1132 |     return img
1133 | 
1134 | 
1135 | def read_image(img_path):
1136 |     """
1137 |     读取图片
1138 |     :param img_path:
1139 |     :return:
1140 |     """
1141 |     origin = Image.open(img_path)
1142 |     img = resize_img(origin, target_size)
1143 |     resized_img = img.copy()
1144 |     if img.mode != 'RGB':
1145 |         img = img.convert('RGB')
1146 |     img = np.array(img).astype('float32').transpose((2, 0, 1))  # HWC to CHW
1147 |     img -= 127.5
1148 |     img *= 0.007843
1149 |     img = img[np.newaxis, :]
1150 |     return origin, img, resized_img
1151 | 
1152 | 
1153 | def infer(image_path):
1154 |     origin, tensor_img, resized_img = read_image(image_path)
1155 |     input_w, input_h = origin.size[0], origin.size[1]
1156 |     image_shape = np.array([input_h, input_w], dtype='int32')
1157 |     # print("image shape high:{0}, width:{1}".format(input_h, input_w))
1158 |     t1 = time.time()
1159 |     batch_outputs = exe.run(inference_program,
1160 |                             feed={feed_target_names[0]: tensor_img,
1161 |                                   feed_target_names[1]: image_shape[np.newaxis, :]},
1162 |                             fetch_list=fetch_targets,
1163 |                             return_numpy=False)
1164 |     period = time.time() - t1
1165 |     print("predict cost time:{0}".format("%2.2f sec" % period))
1166 |     bboxes = np.array(batch_outputs[0])
1167 | # print(bboxes)
1168 | 
1169 | # 用于展示一张图片用于预测的效果
1170 |     if bboxes.shape[1] != 6:
1171 |         print("No object found in {}".format(image_path))
1172 |         return
1173 |     labels = bboxes[:, 0].astype('int32').tolist()
1174 |     scores = bboxes[:, 1].astype('float32').tolist()
1175 |     boxes = bboxes[:, 2:].astype('float32').tolist()
1176 |     
1177 | 
1178 |     last_dot_index = image_path.rfind('.')
1179 |     out_path = image_path[:last_dot_index]
1180 |     out_path += '-result.jpg'
1181 |     draw_bbox_image(origin, boxes, labels, out_path)
1182 |     last_slash_index=image_path.rfind('/')
1183 | 
1184 |     predict = []
1185 |     for i in range(len(labels)):
1186 |         predictTmp = []
1187 |         predictTmp.append(labels[i])
1188 |         predictTmp.append(scores[i])
1189 |         for j in boxes[i]:
1190 |             predictTmp.append(j)
1191 |         predict.append(predictTmp)
1192 |     f = open("./input/detection-results/" + image_path[last_slash_index+1:last_dot_index]+'.txt', 'w')
1193 |     for i in predict:
1194 |         for j in i:
1195 |             f.write(str(float(j)) + ' ')
1196 |         f.write('\n')
1197 |     f.close()
1198 |     return predict
1199 | 
1200 | if __name__ == '__main__':
1201 |     if os.path.exists('./input') == False:
1202 |         os.mkdir('./input')
1203 |         os.mkdir('./input/detection-results')
1204 |         os.mkdir('./input/ground-truth')
1205 |     file_path = os.path.join(train_params['data_dir'], 'eval.txt')
1206 |     images = [line.strip() for line in open(file_path)]
1207 |     for line in images:
1208 |         image_path = line
1209 |         parts = line.split('\t')
1210 |         filename = parts[0]
1211 |         filename_path = os.path.join(train_params['data_dir']+'/lslm_test/', parts[0])
1212 |         infer(filename_path)
1213 | 
1214 |         bbox_labels = []
1215 |         for object_str in parts[1:]:
1216 |             if len(object_str) <= 1:
1217 |                 continue
1218 |             bbox_sample = []
1219 |             object = json.loads(object_str)
1220 |             bbox_sample.append(float(train_params['label_dict'][object['value']]))
1221 |             bbox = object['coordinate']
1222 |             bbox_sample.append(float(bbox[0][0]))
1223 |             bbox_sample.append(float(bbox[0][1]))
1224 |             bbox_sample.append(float(bbox[1][0]))
1225 |             bbox_sample.append(float(bbox[1][1]))
1226 |             bbox_labels.append(bbox_sample)
1227 | 
1228 |         f = open("./input/ground-truth/" + filename_path[24:-4]+'.txt', 'w')
1229 |         for i in bbox_labels:
1230 |             for j in i:
1231 |                 f.write(str(float(j)) + ' ')
1232 |             f.write('\n')
1233 |         f.close()
1234 | 
1235 | 
1236 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 基于YOLOv3的目标检测实验报告 
  2 | 
  3 | ## 目录
  4 | 
  5 | - 小组成员及分工
  6 | - YOLOv3目标检测网络
  7 |   - YOLO算法简介  
  8 |   - 网络结构
  9 |   - PaddlePaddle代码实现
 10 |     - 主要参数
 11 |     - 模型建立
 12 |     - 训练与迭代
 13 | - 数据集基本信息
 14 | - 训练过程中的参数调整与模型优化
 15 |     - YOLO和YOLO-tiny对比
 16 |     - 参数调整
 17 |     - 模型优化
 18 | - 网络性能分析
 19 |     - 挑战集测试分析
 20 |     - 实际结果
 21 | 
 22 | ---
 23 | ## 小组成员及分工
 24 | 姓名|学号|贡献
 25 | ---|:--:|:-:
 26 | 马家昱|1950509|数据集搜索与整合、图片处理
 27 | 陈冠忠|1950638|模型修改、调试、训练
 28 | 陶思月|1951858|数据集拍摄、标记
 29 | 黄继宣|1951857|数据集拍摄、标记
 30 | 周婉莹|1950579|数据集拍摄、标记
 31 | 罗格峰|1952222|数据集拍摄、标记
 32 | 
 33 | ---
 34 | ## YOLOv3目标检测网络
 35 | ### YOLO算法简介
 36 | - 相关算法
 37 |     1. 滑动窗口
 38 | 
 39 |         &emsp;&emsp;采用滑动窗口的目标检测算法将检测问题转化为了图像分类问题。其基本原理就是采用不同大小和比例（宽高比）的窗口在整张图片上以一定的步长进行滑动，然后对这些窗口对应的区域做图像分类，这样就可以实现对整张图片的检测了。
 40 |         <div align=center><img src="https://images2017.cnblogs.com/blog/606386/201708/606386-20170826152837558-1289161833.png"/></div>
 41 |     2. 非极大值抑制
 42 | 
 43 |         &emsp;&emsp;首先从所有的检测框中找到置信度最大的那个框，然后挨个计算其与剩余框的交并比(IOU)，如果其值大于一定阈值（重合度过高），那么就将该框剔除；然后对剩余的检测框重复上述过程，直到处理完所有的检测框。
 44 |         <div align=center><img src="https://img-blog.csdn.net/20180307143236194?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvVHdUNTIwTHk=/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70"/></div>
 45 | - YOLO算法
 46 | 
 47 |     &emsp;&emsp;YOLO将对象检测重新定义为一个回归问题。它将单个卷积神经网络(CNN)应用于整个图像，将图像分成网格，并预测每个网格的类概率和边界框。对于每个网格，网络都会预测一个边界框和与每个类别（汽车，行人，交通信号灯等）相对应的概率。每个边界框可以使用四个描述符进行描述：
 48 | 
 49 |     1. 边界框的中心
 50 |     2. 高度
 51 |     3. 宽度
 52 |     4. 值映射到对象所属的类
 53 | 
 54 |     &emsp;&emsp;此外，该算法还可以预测边界框中存在对象的概率。如果一个对象的中心落在一个网格单元中，则该网格单元负责检测该对象。每个网格中将有多个边界框。在训练时，我们希望每个对象只有一个边界框。因此，我们根据哪个Box与ground truth box的重叠度最高，从而分配一个Box来负责预测对象。
 55 | 
 56 |     &emsp;&emsp;最后，对每个类的对象应用非最大值抑制的方法来过滤出“置信度”小于阈值的边界框。这为我们提供了图像预测。
 57 |     
 58 |     <div align=center><img src ="https://pics2.baidu.com/feed/a8014c086e061d95a3897a4e0b1385d760d9cae3.jpeg?token=d08c83b9aff4cbc62139ce92a1332c8f"/></div>
 59 | 
 60 | 
 61 | 
 62 | ### 网络结构
 63 | - YOLOv3采用了称之为Darknet-53的网络结构（含有53个卷积层），它借鉴了残差网络的做法，在一些层之间设置了快捷链路。下图展示了其基本结构。
 64 |   <div align=center><img src ="https://img-blog.csdnimg.cn/20190329210004674.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2xpdHQxZQ==,size_16,color_FFFFFF,t_70"/></div>
 65 |     其中Darknet-53的具体结构如下，其采用448*448*3作为输入，左侧数字表示多重复的残差组件个数，每个残差组件有两个卷积层和一个快捷链路。
 66 |     <div align=center><img src ="https://upload-images.jianshu.io/upload_images/2709767-e65c08c61bfaa7c7.png?imageMogr2/auto-orient/strip|imageView2/2/w/520/format/webp"/></div>
 67 | 
 68 | ### PaddlePaddle代码实现
 69 | #### 主要参数
 70 | ```
 71 | train_params = {
 72 |     "data_dir": "data/data6045",  # 数据目录
 73 |     "train_list": "train.txt",  # 训练集文件
 74 |     "eval_list": "eval.txt",
 75 |     "class_dim": -1,
 76 |     "label_dict": {},  # 标签字典
 77 |     "num_dict": {},
 78 |     "image_count": -1,
 79 |     "continue_train": True,  # 是否加载前一次的训练参数，接着训练
 80 |     "pretrained": False,  # 是否预训练
 81 |     "pretrained_model_dir": "./pretrained-model",
 82 |     "save_model_dir": "./yolo-model",  # 模型保存目录
 83 |     "model_prefix": "yolo-v3",  # 模型前缀
 84 |     "freeze_dir": "freeze_model",
 85 |     "use_tiny": False,  # 是否使用 裁剪 tiny 模型
 86 |     "max_box_num": 8,  # 一幅图上最多有多少个目标
 87 |     "num_epochs": 15,  # 训练轮次
 88 |     "train_batch_size": 12,  # 对于完整yolov3，每一批的训练样本不能太多，内存会炸掉；如果使用tiny，可以适当大一些
 89 |     "use_gpu": True,  # 是否使用GPU
 90 |     "yolo_cfg": {  # YOLO模型参数
 91 |         "input_size": [3, 448, 448],  # 原版的边长大小为608，为了提高训练速度和预测速度，此处压缩为448
 92 |         "anchors": [7, 10, 12, 22, 24, 17, 22, 45, 46, 33, 43, 88, 85, 66, 115, 146, 275, 240],  # 锚点??
 93 |         "anchor_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
 94 |     },
 95 |     "yolo_tiny_cfg": {  # YOLO tiny 模型参数
 96 |         "input_size": [3, 256, 256],
 97 |         "anchors": [6, 8, 13, 15, 22, 34, 48, 50, 81, 100, 205, 191],
 98 |         "anchor_mask": [[3, 4, 5], [0, 1, 2]]
 99 |     },
100 |     "ignore_thresh": 0.7,
101 |     "mean_rgb": [127.5, 127.5, 127.5],
102 |     "mode": "train",
103 |     "multi_data_reader_count": 4,
104 |     "apply_distort": True,  # 是否做图像扭曲增强
105 |     "nms_top_k": 300,
106 |     "nms_pos_k": 300,
107 |     "valid_thresh": 0.01,
108 |     "nms_thresh": 0.40,  # 非最大值抑制阈值
109 |     "image_distort_strategy": {  # 图像扭曲策略
110 |         "expand_prob": 0.5,  # 扩展比率
111 |         "expand_max_ratio": 4,
112 |         "hue_prob": 0.5,  # 色调
113 |         "hue_delta": 18,
114 |         "contrast_prob": 0.5,  # 对比度
115 |         "contrast_delta": 0.5,
116 |         "saturation_prob": 0.5,  # 饱和度
117 |         "saturation_delta": 0.5,
118 |         "brightness_prob": 0.5,  # 亮度
119 |         "brightness_delta": 0.125
120 |     },
121 |     "sgd_strategy": {  # 梯度下降配置
122 |         "learning_rate": 0.002,
123 |         "lr_epochs": [30, 50, 65],  # 学习率衰减分段（3个数字分为4段）
124 |         "lr_decay": [1, 0.5, 0.25, 0.1]  # 每段采用的学习率，对应lr_epochs参数4段
125 |     },
126 |     "early_stop": {
127 |         "sample_frequency": 50,
128 |         "successive_limit": 3,
129 |         "min_loss": 2.5,
130 |         "min_curr_map": 0.84
131 |     }
132 | }
133 | ```
134 | #### 模型建立
135 | ```
136 | class YOLOv3(object):
137 |     def __init__(self, class_num, anchors, anchor_mask):
138 |         self.outputs = []  # 网络最终模型
139 |         self.downsample_ratio = 1  # 下采样率
140 |         self.anchor_mask = anchor_mask  # 计算卷积核？？？
141 |         self.anchors = anchors  # 锚点
142 |         self.class_num = class_num  # 类别数量
143 | 
144 |         self.yolo_anchors = []
145 |         self.yolo_classes = []
146 | 
147 |         for mask_pair in self.anchor_mask:
148 |             mask_anchors = []
149 |             for mask in mask_pair:
150 |                 mask_anchors.append(self.anchors[2 * mask])
151 |                 mask_anchors.append(self.anchors[2 * mask + 1])
152 |             self.yolo_anchors.append(mask_anchors)
153 |             self.yolo_classes.append(class_num)
154 | 
155 |     def name(self):
156 |         return 'YOLOv3'
157 | 
158 |     # 获取anchors
159 |     def get_anchors(self):
160 |         return self.anchors
161 | 
162 |     # 获取anchor_mask
163 |     def get_anchor_mask(self):
164 |         return self.anchor_mask
165 | 
166 |     def get_class_num(self):
167 |         return self.class_num
168 | 
169 |     def get_downsample_ratio(self):
170 |         return self.downsample_ratio
171 | 
172 |     def get_yolo_anchors(self):
173 |         return self.yolo_anchors
174 | 
175 |     def get_yolo_classes(self):
176 |         return self.yolo_classes
177 | 
178 |     # 卷积正则化函数: 卷积、批量正则化处理、leakrelu
179 |     def conv_bn(self,
180 |                 input,  # 输入
181 |                 num_filters,  # 卷积核数量
182 |                 filter_size,  # 卷积核大小
183 |                 stride,  # 步幅
184 |                 padding,  # 填充
185 |                 use_cudnn=True):
186 |         # 2d卷积操作
187 |         conv = fluid.layers.conv2d(input=input,
188 |                                    num_filters=num_filters,
189 |                                    filter_size=filter_size,
190 |                                    stride=stride,
191 |                                    padding=padding,
192 |                                    act=None,
193 |                                    use_cudnn=use_cudnn,  # 是否使用cudnn，cudnn利用cuda进行了加速处理
194 |                                    param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02)),
195 |                                    bias_attr=False)
196 | 
197 |         # batch_norm中的参数不需要参与正则化，所以主动使用正则系数为0的正则项屏蔽掉
198 |         # 在batch_norm中使用leaky的话，只能使用默认的alpha=0.02；如果需要设值，必须提出去单独来
199 |         # 正则化的目的，是为了防止过拟合，较小的L2值能防止过拟合
200 |         param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
201 |                                regularizer=L2Decay(0.))
202 |         bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0),
203 |                               regularizer=L2Decay(0.))
204 |         out = fluid.layers.batch_norm(input=conv, act=None,
205 |                                       param_attr=param_attr,
206 |                                       bias_attr=bias_attr)
207 |         # leaky_relu: Leaky ReLU是给所有负值赋予一个非零斜率
208 |         out = fluid.layers.leaky_relu(out, 0.1)
209 |         return out
210 | ```
211 | #### 训练与迭代
212 | ```
213 | # 执行训练
214 | def train():
215 |     init_log_config()
216 |     init_train_parameters()
217 | 
218 |     logger.info("start train YOLOv3, train params:%s", str(train_params))
219 |     logger.info("create place, use gpu:" + str(train_params['use_gpu']))
220 | 
221 |     place = fluid.CUDAPlace(0) if train_params['use_gpu'] else fluid.CPUPlace()
222 | 
223 |     logger.info("build network and program")
224 |     train_program = fluid.Program()
225 |     start_program = fluid.Program()
226 |     feeder, reader, loss = build_program_with_feeder(train_program, start_program, place)
227 | 
228 |     logger.info("build executor and init params")
229 | 
230 |     exe = fluid.Executor(place)
231 |     exe.run(start_program)
232 |     train_fetch_list = [loss.name]
233 |     load_pretrained_params(exe, train_program)  # 加载模型及参数
234 | 
235 |     stop_strategy = train_params['early_stop']
236 |     successive_limit = stop_strategy['successive_limit']
237 |     sample_freq = stop_strategy['sample_frequency']
238 |     min_curr_map = stop_strategy['min_curr_map']
239 |     min_loss = stop_strategy['min_loss']
240 |     stop_train = False
241 |     successive_count = 0
242 |     total_batch_count = 0
243 |     valid_thresh = train_params['valid_thresh']
244 |     nms_thresh = train_params['nms_thresh']
245 |     current_best_loss = 10000000000.0
246 | 
247 |     # 开始迭代训练
248 |     for pass_id in range(train_params["num_epochs"]):
249 |         logger.info("current pass: {}, start read image".format(pass_id))
250 |         batch_id = 0
251 |         total_loss = 0.0
252 | 
253 |         for batch_id, data in enumerate(reader()):
254 |             t1 = time.time()
255 | 
256 |             loss = exe.run(train_program,
257 |                            feed=feeder.feed(data),
258 |                            fetch_list=train_fetch_list)  # 执行训练
259 | 
260 |             period = time.time() - t1
261 |             loss = np.mean(np.array(loss))
262 |             total_loss += loss
263 |             batch_id += 1
264 |             total_batch_count += 1
265 | 
266 |             if batch_id % 10 == 0:  # 调整日志输出的频率
267 |                 logger.info(
268 |                     "pass {}, trainbatch {}, loss {} time {}".format(pass_id, batch_id, loss, "%2.2f sec" % period))
269 | 
270 |         pass_mean_loss = total_loss / batch_id
271 |         logger.info("pass {0} train result, current pass mean loss: {1}".format(pass_id, pass_mean_loss))
272 | 
273 |         # 采用每训练完一轮停止办法，可以调整为更精细的保存策略
274 |         if pass_mean_loss < current_best_loss:
275 |             logger.info("temp save {} epcho train result, current best pass loss {}".format(pass_id, pass_mean_loss))
276 |             fluid.io.save_persistables(dirname=train_params['save_model_dir'], main_program=train_program,
277 |                                        executor=exe)
278 |             current_best_loss = pass_mean_loss
279 | 
280 |     logger.info("training till last epcho, end training")
281 |     fluid.io.save_persistables(dirname=train_params['save_model_dir'], main_program=train_program, executor=exe)
282 | ```
283 | ---
284 | 
285 | ## 数据集基本信息
286 | * 本组使用的数据集共有900张图片，其中500张来自校园拍摄实景，其余为下载的特定分类图片。
287 | * 所有图片宽高比均为4：3,分辨率为800*600。数据集图片主要分四类，包括单独的行人、自行车与汽车与前三类混杂在一起的图片。
288 | ![avatar](https://wx1.sinaimg.cn/mw690/005CNyQ8ly1gm2hh1x9qaj31770nu7wh.jpg)![avatar](https://wx1.sinaimg.cn/mw690/005CNyQ8ly1gm2hh1xo3pj31720nz4qp.jpg)
289 | 
290 | ## 训练过程中的参数调整与模型优化
291 | ### YOLO和YOLO-tiny对比
292 | <div align=center><img src="https://wx2.sinaimg.cn/mw690/005CNyQ8ly1gm2n8d31nmj30g709imx8.jpg"/></div>
293 | 
294 | 模型|训练30轮所用时长|
295 | ---|:--:|
296 | YOLO|2h9m|
297 | YOLO-tiny|1h41m|
298 | ### 参数调整
299 | - max_box_num": 8
300 | - nms_thresh": 0.40
301 | - valid_thresh": 0.015
302 | - 优化显存
303 |   - os.environ["FLAGS_fraction_of_gpu_memory_to_use"] = '0.92'
304 |   - os.environ["FLAGS_eager_delete_tensor_gb"] = '0'
305 |   - os.environ["FLAGS_memory_fraction_of_eager_deletion"] = '1'
306 |   - os.environ["FLAGS_fast_eager_deletion_mode"]='True'
307 | ### 模型优化
308 |   - 优化器更改：原优化器为SGD
309 | ```
310 | optimizer=fluid.optimizer.SGDOptimizer(
311 |         learning_rate=fluid.layers.piecewise_decay(boundaries, values), regularization=fluid.regularizer.L2Decay(0.00005))
312 | ```
313 | - 变更为Adam算法
314 | ```
315 | optimizer=fluid.optimizer.AdamOptimizer(learning_rate=0.01,beta1=0.9,beta2=0.999,regularization=fluid.regularizer.L2Decay(0.00005))
316 | ```
317 | - Adam优化对比分析：
318 | - ![avatar](https://wx2.sinaimg.cn/mw690/005CNyQ8gy1gm42jjv0noj30q80joglh.jpg)![avatar](https://wx3.sinaimg.cn/mw690/005CNyQ8gy1gm42gr58jsj30uk0ikaal.jpg)
319 | 
320 | ## 网络性能分析
321 | - 挑战集测试分析
322 | - ![avatar](https://wx1.sinaimg.cn/mw690/005CNyQ8ly1gm2ovf5tr1j30hs0dcdg9.jpg)![avatar](https://wx1.sinaimg.cn/mw690/005CNyQ8ly1gm2ovf5w0aj30hs0dcaaa.jpg)![avatar](https://wx4.sinaimg.cn/mw690/005CNyQ8ly1gm2ovf627sj30hs0dcdg1.jpg)![avatar](https://wx2.sinaimg.cn/mw690/005CNyQ8ly1gm2ovf8ihlj30hs0dc0sv.jpg)
323 | - 实际结果
324 | - ![avatar](https://wx4.sinaimg.cn/mw690/005CNyQ8ly1gm2oofq8nzj31400u0hdt.jpg)![avatar](https://wx3.sinaimg.cn/mw690/005CNyQ8ly1gm2oof38goj31400u0kd6.jpg)
325 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import os
  4 | import shutil
  5 | import operator
  6 | import sys
  7 | import argparse
  8 | import math
  9 | 
 10 | import numpy as np
 11 | 
 12 | MINOVERLAP = 0.5 # default value (defined in the PASCAL VOC2012 challenge)
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('-na', '--no-animation', help="no animation is shown.", action="store_true")
 16 | parser.add_argument('-np', '--no-plot', help="no plot is shown.", action="store_true")
 17 | parser.add_argument('-q', '--quiet', help="minimalistic console output.", action="store_true")
 18 | # argparse receiving list of classes to be ignored (e.g., python main.py --ignore person book)
 19 | parser.add_argument('-i', '--ignore', nargs='+', type=str, help="ignore a list of classes.")
 20 | # argparse receiving list of classes with specific IoU (e.g., python main.py --set-class-iou person 0.7)
 21 | parser.add_argument('--set-class-iou', nargs='+', type=str, help="set IoU for a specific class.")
 22 | args = parser.parse_args()
 23 | 
 24 | '''
 25 |     0,0 ------> x (width)
 26 |      |
 27 |      |  (Left,Top)
 28 |      |      *_________
 29 |      |      |         |
 30 |             |         |
 31 |      y      |_________|
 32 |   (height)            *
 33 |                 (Right,Bottom)
 34 | '''
 35 | 
 36 | # if there are no classes to ignore then replace None by empty list
 37 | if args.ignore is None:
 38 |     args.ignore = []
 39 | 
 40 | specific_iou_flagged = False
 41 | if args.set_class_iou is not None:
 42 |     specific_iou_flagged = True
 43 | 
 44 | # make sure that the cwd() is the location of the python script (so that every path makes sense)
 45 | os.chdir(os.path.dirname(os.path.abspath(__file__)))
 46 | 
 47 | GT_PATH = os.path.join(os.getcwd(), 'input', 'ground-truth')
 48 | DR_PATH = os.path.join(os.getcwd(), 'input', 'detection-results')
 49 | # if there are no images then no animation can be shown
 50 | IMG_PATH = os.path.join(os.getcwd(), 'input', 'images-optional')
 51 | if os.path.exists(IMG_PATH): 
 52 |     for dirpath, dirnames, files in os.walk(IMG_PATH):
 53 |         if not files:
 54 |             # no image files found
 55 |             args.no_animation = True
 56 | else:
 57 |     args.no_animation = True
 58 | 
 59 | # try to import OpenCV if the user didn't choose the option --no-animation
 60 | show_animation = False
 61 | if not args.no_animation:
 62 |     try:
 63 |         import cv2
 64 |         show_animation = True
 65 |     except ImportError:
 66 |         print("\"opencv-python\" not found, please install to visualize the results.")
 67 |         args.no_animation = True
 68 | 
 69 | # try to import Matplotlib if the user didn't choose the option --no-plot
 70 | draw_plot = False
 71 | if not args.no_plot:
 72 |     try:
 73 |         import matplotlib.pyplot as plt
 74 |         draw_plot = True
 75 |     except ImportError:
 76 |         print("\"matplotlib\" not found, please install it to get the resulting plots.")
 77 |         args.no_plot = True
 78 | 
 79 | 
 80 | def log_average_miss_rate(prec, rec, num_images):
 81 |     """
 82 |         log-average miss rate:
 83 |             Calculated by averaging miss rates at 9 evenly spaced FPPI points
 84 |             between 10e-2 and 10e0, in log-space.
 85 | 
 86 |         output:
 87 |                 lamr | log-average miss rate
 88 |                 mr | miss rate
 89 |                 fppi | false positives per image
 90 | 
 91 |         references:
 92 |             [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
 93 |                State of the Art." Pattern Analysis and Machine Intelligence, IEEE
 94 |                Transactions on 34.4 (2012): 743 - 761.
 95 |     """
 96 | 
 97 |     # if there were no detections of that class
 98 |     if prec.size == 0:
 99 |         lamr = 0
100 |         mr = 1
101 |         fppi = 0
102 |         return lamr, mr, fppi
103 | 
104 |     fppi = (1 - prec)
105 |     mr = (1 - rec)
106 | 
107 |     fppi_tmp = np.insert(fppi, 0, -1.0)
108 |     mr_tmp = np.insert(mr, 0, 1.0)
109 | 
110 |     # Use 9 evenly spaced reference points in log-space
111 |     ref = np.logspace(-2.0, 0.0, num = 9)
112 |     for i, ref_i in enumerate(ref):
113 |         # np.where() will always find at least 1 index, since min(ref) = 0.01 and min(fppi_tmp) = -1.0
114 |         j = np.where(fppi_tmp <= ref_i)[-1][-1]
115 |         ref[i] = mr_tmp[j]
116 | 
117 |     # log(0) is undefined, so we use the np.maximum(1e-10, ref)
118 |     lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
119 | 
120 |     return lamr, mr, fppi
121 | 
122 | """
123 |  throw error and exit
124 | """
125 | def error(msg):
126 |     print(msg)
127 |     sys.exit(0)
128 | 
129 | """
130 |  check if the number is a float between 0.0 and 1.0
131 | """
132 | def is_float_between_0_and_1(value):
133 |     try:
134 |         val = float(value)
135 |         if val > 0.0 and val < 1.0:
136 |             return True
137 |         else:
138 |             return False
139 |     except ValueError:
140 |         return False
141 | 
142 | """
143 |  Calculate the AP given the recall and precision array
144 |     1st) We compute a version of the measured precision/recall curve with
145 |          precision monotonically decreasing
146 |     2nd) We compute the AP as the area under this curve by numerical integration.
147 | """
148 | def voc_ap(rec, prec):
149 |     """
150 |     --- Official matlab code VOC2012---
151 |     mrec=[0 ; rec ; 1];
152 |     mpre=[0 ; prec ; 0];
153 |     for i=numel(mpre)-1:-1:1
154 |             mpre(i)=max(mpre(i),mpre(i+1));
155 |     end
156 |     i=find(mrec(2:end)~=mrec(1:end-1))+1;
157 |     ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
158 |     """
159 |     rec.insert(0, 0.0) # insert 0.0 at begining of list
160 |     rec.append(1.0) # insert 1.0 at end of list
161 |     mrec = rec[:]
162 |     prec.insert(0, 0.0) # insert 0.0 at begining of list
163 |     prec.append(0.0) # insert 0.0 at end of list
164 |     mpre = prec[:]
165 |     """
166 |      This part makes the precision monotonically decreasing
167 |         (goes from the end to the beginning)
168 |         matlab: for i=numel(mpre)-1:-1:1
169 |                     mpre(i)=max(mpre(i),mpre(i+1));
170 |     """
171 |     # matlab indexes start in 1 but python in 0, so I have to do:
172 |     #     range(start=(len(mpre) - 2), end=0, step=-1)
173 |     # also the python function range excludes the end, resulting in:
174 |     #     range(start=(len(mpre) - 2), end=-1, step=-1)
175 |     for i in range(len(mpre)-2, -1, -1):
176 |         mpre[i] = max(mpre[i], mpre[i+1])
177 |     """
178 |      This part creates a list of indexes where the recall changes
179 |         matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
180 |     """
181 |     i_list = []
182 |     for i in range(1, len(mrec)):
183 |         if mrec[i] != mrec[i-1]:
184 |             i_list.append(i) # if it was matlab would be i + 1
185 |     """
186 |      The Average Precision (AP) is the area under the curve
187 |         (numerical integration)
188 |         matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
189 |     """
190 |     ap = 0.0
191 |     for i in i_list:
192 |         ap += ((mrec[i]-mrec[i-1])*mpre[i])
193 |     return ap, mrec, mpre
194 | 
195 | 
196 | """
197 |  Convert the lines of a file to a list
198 | """
199 | def file_lines_to_list(path):
200 |     # open txt file lines to a list
201 |     with open(path) as f:
202 |         content = f.readlines()
203 |     # remove whitespace characters like `\n` at the end of each line
204 |     content = [x.strip() for x in content]
205 |     return content
206 | 
207 | """
208 |  Draws text in image
209 | """
210 | def draw_text_in_image(img, text, pos, color, line_width):
211 |     font = cv2.FONT_HERSHEY_PLAIN
212 |     fontScale = 1
213 |     lineType = 1
214 |     bottomLeftCornerOfText = pos
215 |     cv2.putText(img, text,
216 |             bottomLeftCornerOfText,
217 |             font,
218 |             fontScale,
219 |             color,
220 |             lineType)
221 |     text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
222 |     return img, (line_width + text_width)
223 | 
224 | """
225 |  Plot - adjust axes
226 | """
227 | def adjust_axes(r, t, fig, axes):
228 |     # get text width for re-scaling
229 |     bb = t.get_window_extent(renderer=r)
230 |     text_width_inches = bb.width / fig.dpi
231 |     # get axis width in inches
232 |     current_fig_width = fig.get_figwidth()
233 |     new_fig_width = current_fig_width + text_width_inches
234 |     propotion = new_fig_width / current_fig_width
235 |     # get axis limit
236 |     x_lim = axes.get_xlim()
237 |     axes.set_xlim([x_lim[0], x_lim[1]*propotion])
238 | 
239 | """
240 |  Draw plot using Matplotlib
241 | """
242 | def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
243 |     # sort the dictionary by decreasing value, into a list of tuples
244 |     sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
245 |     # unpacking the list of tuples into two lists
246 |     sorted_keys, sorted_values = zip(*sorted_dic_by_value)
247 |     # 
248 |     if true_p_bar != "":
249 |         """
250 |          Special case to draw in:
251 |             - green -> TP: True Positives (object detected and matches ground-truth)
252 |             - red -> FP: False Positives (object detected but does not match ground-truth)
253 |             - pink -> FN: False Negatives (object not detected but present in the ground-truth)
254 |         """
255 |         fp_sorted = []
256 |         tp_sorted = []
257 |         for key in sorted_keys:
258 |             fp_sorted.append(dictionary[key] - true_p_bar[key])
259 |             tp_sorted.append(true_p_bar[key])
260 |         plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
261 |         plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
262 |         # add legend
263 |         plt.legend(loc='lower right')
264 |         """
265 |          Write number on side of bar
266 |         """
267 |         fig = plt.gcf() # gcf - get current figure
268 |         axes = plt.gca()
269 |         r = fig.canvas.get_renderer()
270 |         for i, val in enumerate(sorted_values):
271 |             fp_val = fp_sorted[i]
272 |             tp_val = tp_sorted[i]
273 |             fp_str_val = " " + str(fp_val)
274 |             tp_str_val = fp_str_val + " " + str(tp_val)
275 |             # trick to paint multicolor with offset:
276 |             # first paint everything and then repaint the first number
277 |             t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
278 |             plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
279 |             if i == (len(sorted_values)-1): # largest bar
280 |                 adjust_axes(r, t, fig, axes)
281 |     else:
282 |         plt.barh(range(n_classes), sorted_values, color=plot_color)
283 |         """
284 |          Write number on side of bar
285 |         """
286 |         fig = plt.gcf() # gcf - get current figure
287 |         axes = plt.gca()
288 |         r = fig.canvas.get_renderer()
289 |         for i, val in enumerate(sorted_values):
290 |             str_val = " " + str(val) # add a space before
291 |             if val < 1.0:
292 |                 str_val = " {0:.2f}".format(val)
293 |             t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
294 |             # re-set axes to show number inside the figure
295 |             if i == (len(sorted_values)-1): # largest bar
296 |                 adjust_axes(r, t, fig, axes)
297 |     # set window title
298 |     fig.canvas.set_window_title(window_title)
299 |     # write classes in y axis
300 |     tick_font_size = 12
301 |     plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
302 |     """
303 |      Re-scale height accordingly
304 |     """
305 |     init_height = fig.get_figheight()
306 |     # comput the matrix height in points and inches
307 |     dpi = fig.dpi
308 |     height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
309 |     height_in = height_pt / dpi
310 |     # compute the required figure height 
311 |     top_margin = 0.15 # in percentage of the figure height
312 |     bottom_margin = 0.05 # in percentage of the figure height
313 |     figure_height = height_in / (1 - top_margin - bottom_margin)
314 |     # set new height
315 |     if figure_height > init_height:
316 |         fig.set_figheight(figure_height)
317 | 
318 |     # set plot title
319 |     plt.title(plot_title, fontsize=14)
320 |     # set axis titles
321 |     # plt.xlabel('classes')
322 |     plt.xlabel(x_label, fontsize='large')
323 |     # adjust size of window
324 |     fig.tight_layout()
325 |     # save the plot
326 |     fig.savefig(output_path)
327 |     # show image
328 |     if to_show:
329 |         plt.show()
330 |     # close the plot
331 |     plt.close()
332 | 
333 | """
334 |  Create a ".temp_files/" and "output/" directory
335 | """
336 | TEMP_FILES_PATH = ".temp_files"
337 | if not os.path.exists(TEMP_FILES_PATH): # if it doesn't exist already
338 |     os.makedirs(TEMP_FILES_PATH)
339 | output_files_path = "output"
340 | if os.path.exists(output_files_path): # if it exist already
341 |     # reset the output directory
342 |     shutil.rmtree(output_files_path)
343 | 
344 | os.makedirs(output_files_path)
345 | if draw_plot:
346 |     os.makedirs(os.path.join(output_files_path, "classes"))
347 | if show_animation:
348 |     os.makedirs(os.path.join(output_files_path, "images", "detections_one_by_one"))
349 | 
350 | """
351 |  ground-truth
352 |      Load each of the ground-truth files into a temporary ".json" file.
353 |      Create a list of all the class names present in the ground-truth (gt_classes).
354 | """
355 | # get a list with the ground-truth files
356 | ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
357 | if len(ground_truth_files_list) == 0:
358 |     error("Error: No ground-truth files found!")
359 | ground_truth_files_list.sort()
360 | # dictionary with counter per class
361 | gt_counter_per_class = {}
362 | counter_images_per_class = {}
363 | 
364 | gt_files = []
365 | for txt_file in ground_truth_files_list:
366 |     #print(txt_file)
367 |     file_id = txt_file.split(".txt", 1)[0]
368 |     file_id = os.path.basename(os.path.normpath(file_id))
369 |     # check if there is a correspondent detection-results file
370 |     temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
371 |     if not os.path.exists(temp_path):
372 |         error_msg = "Error. File not found: {}\n".format(temp_path)
373 |         error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
374 |         error(error_msg)
375 |     lines_list = file_lines_to_list(txt_file)
376 |     # create ground-truth dictionary
377 |     bounding_boxes = []
378 |     is_difficult = False
379 |     already_seen_classes = []
380 |     for line in lines_list:
381 |         try:
382 |             if "difficult" in line:
383 |                     class_name, left, top, right, bottom, _difficult = line.split()
384 |                     is_difficult = True
385 |             else:
386 |                     class_name, left, top, right, bottom = line.split()
387 |         except ValueError:
388 |             error_msg = "Error: File " + txt_file + " in the wrong format.\n"
389 |             error_msg += " Expected: <class_name> <left> <top> <right> <bottom> ['difficult']\n"
390 |             error_msg += " Received: " + line
391 |             error_msg += "\n\nIf you have a <class_name> with spaces between words you should remove them\n"
392 |             error_msg += "by running the script \"remove_space.py\" or \"rename_class.py\" in the \"extra/\" folder."
393 |             error(error_msg)
394 |         # check if class is in the ignore list, if yes skip
395 |         if class_name in args.ignore:
396 |             continue
397 |         bbox = left + " " + top + " " + right + " " +bottom
398 |         if is_difficult:
399 |             bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
400 |             is_difficult = False
401 |         else:
402 |             bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
403 |             # count that object
404 |             if class_name in gt_counter_per_class:
405 |                 gt_counter_per_class[class_name] += 1
406 |             else:
407 |                 # if class didn't exist yet
408 |                 gt_counter_per_class[class_name] = 1
409 | 
410 |             if class_name not in already_seen_classes:
411 |                 if class_name in counter_images_per_class:
412 |                     counter_images_per_class[class_name] += 1
413 |                 else:
414 |                     # if class didn't exist yet
415 |                     counter_images_per_class[class_name] = 1
416 |                 already_seen_classes.append(class_name)
417 | 
418 | 
419 |     # dump bounding_boxes into a ".json" file
420 |     new_temp_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
421 |     gt_files.append(new_temp_file)
422 |     with open(new_temp_file, 'w') as outfile:
423 |         json.dump(bounding_boxes, outfile)
424 | 
425 | gt_classes = list(gt_counter_per_class.keys())
426 | # let's sort the classes alphabetically
427 | gt_classes = sorted(gt_classes)
428 | n_classes = len(gt_classes)
429 | #print(gt_classes)
430 | #print(gt_counter_per_class)
431 | 
432 | """
433 |  Check format of the flag --set-class-iou (if used)
434 |     e.g. check if class exists
435 | """
436 | if specific_iou_flagged:
437 |     n_args = len(args.set_class_iou)
438 |     error_msg = \
439 |         '\n --set-class-iou [class_1] [IoU_1] [class_2] [IoU_2] [...]'
440 |     if n_args % 2 != 0:
441 |         error('Error, missing arguments. Flag usage:' + error_msg)
442 |     # [class_1] [IoU_1] [class_2] [IoU_2]
443 |     # specific_iou_classes = ['class_1', 'class_2']
444 |     specific_iou_classes = args.set_class_iou[::2] # even
445 |     # iou_list = ['IoU_1', 'IoU_2']
446 |     iou_list = args.set_class_iou[1::2] # odd
447 |     if len(specific_iou_classes) != len(iou_list):
448 |         error('Error, missing arguments. Flag usage:' + error_msg)
449 |     for tmp_class in specific_iou_classes:
450 |         if tmp_class not in gt_classes:
451 |                     error('Error, unknown class \"' + tmp_class + '\". Flag usage:' + error_msg)
452 |     for num in iou_list:
453 |         if not is_float_between_0_and_1(num):
454 |             error('Error, IoU must be between 0.0 and 1.0. Flag usage:' + error_msg)
455 | 
456 | """
457 |  detection-results
458 |      Load each of the detection-results files into a temporary ".json" file.
459 | """
460 | # get a list with the detection-results files
461 | dr_files_list = glob.glob(DR_PATH + '/*.txt')
462 | dr_files_list.sort()
463 | 
464 | for class_index, class_name in enumerate(gt_classes):
465 |     bounding_boxes = []
466 |     for txt_file in dr_files_list:
467 |         #print(txt_file)
468 |         # the first time it checks if all the corresponding ground-truth files exist
469 |         file_id = txt_file.split(".txt",1)[0]
470 |         file_id = os.path.basename(os.path.normpath(file_id))
471 |         temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
472 |         if class_index == 0:
473 |             if not os.path.exists(temp_path):
474 |                 error_msg = "Error. File not found: {}\n".format(temp_path)
475 |                 error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
476 |                 error(error_msg)
477 |         lines = file_lines_to_list(txt_file)
478 |         for line in lines:
479 |             try:
480 |                 tmp_class_name, confidence, left, top, right, bottom = line.split()
481 |             except ValueError:
482 |                 error_msg = "Error: File " + txt_file + " in the wrong format.\n"
483 |                 error_msg += " Expected: <class_name> <confidence> <left> <top> <right> <bottom>\n"
484 |                 error_msg += " Received: " + line
485 |                 error(error_msg)
486 |             if tmp_class_name == class_name:
487 |                 #print("match")
488 |                 bbox = left + " " + top + " " + right + " " +bottom
489 |                 bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
490 |                 #print(bounding_boxes)
491 |     # sort detection-results by decreasing confidence
492 |     bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
493 |     with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
494 |         json.dump(bounding_boxes, outfile)
495 | 
496 | """
497 |  Calculate the AP for each class
498 | """
499 | sum_AP = 0.0
500 | ap_dictionary = {}
501 | lamr_dictionary = {}
502 | # open file to store the output
503 | with open(output_files_path + "/output.txt", 'w') as output_file:
504 |     output_file.write("# AP and precision/recall per class\n")
505 |     count_true_positives = {}
506 |     for class_index, class_name in enumerate(gt_classes):
507 |         count_true_positives[class_name] = 0
508 |         """
509 |          Load detection-results of that class
510 |         """
511 |         dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
512 |         dr_data = json.load(open(dr_file))
513 | 
514 |         """
515 |          Assign detection-results to ground-truth objects
516 |         """
517 |         nd = len(dr_data)
518 |         tp = [0] * nd # creates an array of zeros of size nd
519 |         fp = [0] * nd
520 |         for idx, detection in enumerate(dr_data):
521 |             file_id = detection["file_id"]
522 |             if show_animation:
523 |                 # find ground truth image
524 |                 ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
525 |                 #tifCounter = len(glob.glob1(myPath,"*.tif"))
526 |                 if len(ground_truth_img) == 0:
527 |                     error("Error. Image not found with id: " + file_id)
528 |                 elif len(ground_truth_img) > 1:
529 |                     error("Error. Multiple image with id: " + file_id)
530 |                 else: # found image
531 |                     #print(IMG_PATH + "/" + ground_truth_img[0])
532 |                     # Load image
533 |                     img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
534 |                     # load image with draws of multiple detections
535 |                     img_cumulative_path = output_files_path + "/images/" + ground_truth_img[0]
536 |                     if os.path.isfile(img_cumulative_path):
537 |                         img_cumulative = cv2.imread(img_cumulative_path)
538 |                     else:
539 |                         img_cumulative = img.copy()
540 |                     # Add bottom border to image
541 |                     bottom_border = 60
542 |                     BLACK = [0, 0, 0]
543 |                     img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
544 |             # assign detection-results to ground truth object if any
545 |             # open ground-truth with that file_id
546 |             gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
547 |             ground_truth_data = json.load(open(gt_file))
548 |             ovmax = -1
549 |             gt_match = -1
550 |             # load detected object bounding-box
551 |             bb = [ float(x) for x in detection["bbox"].split() ]
552 |             for obj in ground_truth_data:
553 |                 # look for a class_name match
554 |                 if obj["class_name"] == class_name:
555 |                     bbgt = [ float(x) for x in obj["bbox"].split() ]
556 |                     bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
557 |                     iw = bi[2] - bi[0] + 1
558 |                     ih = bi[3] - bi[1] + 1
559 |                     if iw > 0 and ih > 0:
560 |                         # compute overlap (IoU) = area of intersection / area of union
561 |                         ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
562 |                                         + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
563 |                         ov = iw * ih / ua
564 |                         if ov > ovmax:
565 |                             ovmax = ov
566 |                             gt_match = obj
567 | 
568 |             # assign detection as true positive/don't care/false positive
569 |             if show_animation:
570 |                 status = "NO MATCH FOUND!" # status is only used in the animation
571 |             # set minimum overlap
572 |             min_overlap = MINOVERLAP
573 |             if specific_iou_flagged:
574 |                 if class_name in specific_iou_classes:
575 |                     index = specific_iou_classes.index(class_name)
576 |                     min_overlap = float(iou_list[index])
577 |             if ovmax >= min_overlap:
578 |                 if "difficult" not in gt_match:
579 |                         if not bool(gt_match["used"]):
580 |                             # true positive
581 |                             tp[idx] = 1
582 |                             gt_match["used"] = True
583 |                             count_true_positives[class_name] += 1
584 |                             # update the ".json" file
585 |                             with open(gt_file, 'w') as f:
586 |                                     f.write(json.dumps(ground_truth_data))
587 |                             if show_animation:
588 |                                 status = "MATCH!"
589 |                         else:
590 |                             # false positive (multiple detection)
591 |                             fp[idx] = 1
592 |                             if show_animation:
593 |                                 status = "REPEATED MATCH!"
594 |             else:
595 |                 # false positive
596 |                 fp[idx] = 1
597 |                 if ovmax > 0:
598 |                     status = "INSUFFICIENT OVERLAP"
599 | 
600 |             """
601 |              Draw image to show animation
602 |             """
603 |             if show_animation:
604 |                 height, widht = img.shape[:2]
605 |                 # colors (OpenCV works with BGR)
606 |                 white = (255,255,255)
607 |                 light_blue = (255,200,100)
608 |                 green = (0,255,0)
609 |                 light_red = (30,30,255)
610 |                 # 1st line
611 |                 margin = 10
612 |                 v_pos = int(height - margin - (bottom_border / 2.0))
613 |                 text = "Image: " + ground_truth_img[0] + " "
614 |                 img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
615 |                 text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
616 |                 img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
617 |                 if ovmax != -1:
618 |                     color = light_red
619 |                     if status == "INSUFFICIENT OVERLAP":
620 |                         text = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
621 |                     else:
622 |                         text = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
623 |                         color = green
624 |                     img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
625 |                 # 2nd line
626 |                 v_pos += int(bottom_border / 2.0)
627 |                 rank_pos = str(idx+1) # rank position (idx starts at 0)
628 |                 text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
629 |                 img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
630 |                 color = light_red
631 |                 if status == "MATCH!":
632 |                     color = green
633 |                 text = "Result: " + status + " "
634 |                 img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
635 | 
636 |                 font = cv2.FONT_HERSHEY_SIMPLEX
637 |                 if ovmax > 0: # if there is intersections between the bounding-boxes
638 |                     bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
639 |                     cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
640 |                     cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
641 |                     cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
642 |                 bb = [int(i) for i in bb]
643 |                 cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
644 |                 cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
645 |                 cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
646 |                 # show image
647 |                 cv2.imshow("Animation", img)
648 |                 cv2.waitKey(20) # show for 20 ms
649 |                 # save image to output
650 |                 output_img_path = output_files_path + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
651 |                 cv2.imwrite(output_img_path, img)
652 |                 # save the image with all the objects drawn to it
653 |                 cv2.imwrite(img_cumulative_path, img_cumulative)
654 | 
655 |         #print(tp)
656 |         # compute precision/recall
657 |         cumsum = 0
658 |         for idx, val in enumerate(fp):
659 |             fp[idx] += cumsum
660 |             cumsum += val
661 |         cumsum = 0
662 |         for idx, val in enumerate(tp):
663 |             tp[idx] += cumsum
664 |             cumsum += val
665 |         #print(tp)
666 |         rec = tp[:]
667 |         for idx, val in enumerate(tp):
668 |             rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
669 |         #print(rec)
670 |         prec = tp[:]
671 |         for idx, val in enumerate(tp):
672 |             prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
673 |         #print(prec)
674 | 
675 |         ap, mrec, mprec = voc_ap(rec[:], prec[:])
676 |         sum_AP += ap
677 |         text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
678 |         """
679 |          Write to output.txt
680 |         """
681 |         rounded_prec = [ '%.2f' % elem for elem in prec ]
682 |         rounded_rec = [ '%.2f' % elem for elem in rec ]
683 |         output_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
684 |         if not args.quiet:
685 |             print(text)
686 |         ap_dictionary[class_name] = ap
687 | 
688 |         n_images = counter_images_per_class[class_name]
689 |         lamr, mr, fppi = log_average_miss_rate(np.array(prec), np.array(rec), n_images)
690 |         lamr_dictionary[class_name] = lamr
691 | 
692 |         """
693 |          Draw plot
694 |         """
695 |         if draw_plot:
696 |             plt.plot(rec, prec, '-o')
697 |             # add a new penultimate point to the list (mrec[-2], 0.0)
698 |             # since the last line segment (and respective area) do not affect the AP value
699 |             area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
700 |             area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
701 |             plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
702 |             # set window title
703 |             fig = plt.gcf() # gcf - get current figure
704 |             fig.canvas.set_window_title('AP ' + class_name)
705 |             # set plot title
706 |             plt.title('class: ' + text)
707 |             #plt.suptitle('This is a somewhat long figure title', fontsize=16)
708 |             # set axis titles
709 |             plt.xlabel('Recall')
710 |             plt.ylabel('Precision')
711 |             # optional - set axes
712 |             axes = plt.gca() # gca - get current axes
713 |             axes.set_xlim([0.0,1.0])
714 |             axes.set_ylim([0.0,1.05]) # .05 to give some extra space
715 |             # Alternative option -> wait for button to be pressed
716 |             #while not plt.waitforbuttonpress(): pass # wait for key display
717 |             # Alternative option -> normal display
718 |             #plt.show()
719 |             # save the plot
720 |             fig.savefig(output_files_path + "/classes/" + class_name + ".png")
721 |             plt.cla() # clear axes for next plot
722 | 
723 |     if show_animation:
724 |         cv2.destroyAllWindows()
725 | 
726 |     output_file.write("\n# mAP of all classes\n")
727 |     mAP = sum_AP / n_classes
728 |     text = "mAP = {0:.2f}%".format(mAP*100)
729 |     output_file.write(text + "\n")
730 |     print(text)
731 | 
732 | """
733 |  Draw false negatives
734 | """
735 | if show_animation:
736 |     pink = (203,192,255)
737 |     for tmp_file in gt_files:
738 |         ground_truth_data = json.load(open(tmp_file))
739 |         #print(ground_truth_data)
740 |         # get name of corresponding image
741 |         start = TEMP_FILES_PATH + '/'
742 |         img_id = tmp_file[tmp_file.find(start)+len(start):tmp_file.rfind('_ground_truth.json')]
743 |         img_cumulative_path = output_files_path + "/images/" + img_id + ".jpg"
744 |         img = cv2.imread(img_cumulative_path)
745 |         if img is None:
746 |             img_path = IMG_PATH + '/' + img_id + ".jpg"
747 |             img = cv2.imread(img_path)
748 |         # draw false negatives
749 |         for obj in ground_truth_data:
750 |             if not obj['used']:
751 |                 bbgt = [ int(round(float(x))) for x in obj["bbox"].split() ]
752 |                 cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),pink,2)
753 |         cv2.imwrite(img_cumulative_path, img)
754 | 
755 | # remove the temp_files directory
756 | shutil.rmtree(TEMP_FILES_PATH)
757 | 
758 | """
759 |  Count total of detection-results
760 | """
761 | # iterate through all the files
762 | det_counter_per_class = {}
763 | for txt_file in dr_files_list:
764 |     # get lines to list
765 |     lines_list = file_lines_to_list(txt_file)
766 |     for line in lines_list:
767 |         class_name = line.split()[0]
768 |         # check if class is in the ignore list, if yes skip
769 |         if class_name in args.ignore:
770 |             continue
771 |         # count that object
772 |         if class_name in det_counter_per_class:
773 |             det_counter_per_class[class_name] += 1
774 |         else:
775 |             # if class didn't exist yet
776 |             det_counter_per_class[class_name] = 1
777 | #print(det_counter_per_class)
778 | dr_classes = list(det_counter_per_class.keys())
779 | 
780 | 
781 | """
782 |  Plot the total number of occurences of each class in the ground-truth
783 | """
784 | if draw_plot:
785 |     window_title = "ground-truth-info"
786 |     plot_title = "ground-truth\n"
787 |     plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
788 |     x_label = "Number of objects per class"
789 |     output_path = output_files_path + "/ground-truth-info.png"
790 |     to_show = False
791 |     plot_color = 'forestgreen'
792 |     draw_plot_func(
793 |         gt_counter_per_class,
794 |         n_classes,
795 |         window_title,
796 |         plot_title,
797 |         x_label,
798 |         output_path,
799 |         to_show,
800 |         plot_color,
801 |         '',
802 |         )
803 | 
804 | """
805 |  Write number of ground-truth objects per class to results.txt
806 | """
807 | with open(output_files_path + "/output.txt", 'a') as output_file:
808 |     output_file.write("\n# Number of ground-truth objects per class\n")
809 |     for class_name in sorted(gt_counter_per_class):
810 |         output_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
811 | 
812 | """
813 |  Finish counting true positives
814 | """
815 | for class_name in dr_classes:
816 |     # if class exists in detection-result but not in ground-truth then there are no true positives in that class
817 |     if class_name not in gt_classes:
818 |         count_true_positives[class_name] = 0
819 | #print(count_true_positives)
820 | 
821 | """
822 |  Plot the total number of occurences of each class in the "detection-results" folder
823 | """
824 | if draw_plot:
825 |     window_title = "detection-results-info"
826 |     # Plot title
827 |     plot_title = "detection-results\n"
828 |     plot_title += "(" + str(len(dr_files_list)) + " files and "
829 |     count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
830 |     plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
831 |     # end Plot title
832 |     x_label = "Number of objects per class"
833 |     output_path = output_files_path + "/detection-results-info.png"
834 |     to_show = False
835 |     plot_color = 'forestgreen'
836 |     true_p_bar = count_true_positives
837 |     draw_plot_func(
838 |         det_counter_per_class,
839 |         len(det_counter_per_class),
840 |         window_title,
841 |         plot_title,
842 |         x_label,
843 |         output_path,
844 |         to_show,
845 |         plot_color,
846 |         true_p_bar
847 |         )
848 | 
849 | """
850 |  Write number of detected objects per class to output.txt
851 | """
852 | with open(output_files_path + "/output.txt", 'a') as output_file:
853 |     output_file.write("\n# Number of detected objects per class\n")
854 |     for class_name in sorted(dr_classes):
855 |         n_det = det_counter_per_class[class_name]
856 |         text = class_name + ": " + str(n_det)
857 |         text += " (tp:" + str(count_true_positives[class_name]) + ""
858 |         text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
859 |         output_file.write(text)
860 | 
861 | """
862 |  Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
863 | """
864 | if draw_plot:
865 |     window_title = "lamr"
866 |     plot_title = "log-average miss rate"
867 |     x_label = "log-average miss rate"
868 |     output_path = output_files_path + "/lamr.png"
869 |     to_show = False
870 |     plot_color = 'royalblue'
871 |     draw_plot_func(
872 |         lamr_dictionary,
873 |         n_classes,
874 |         window_title,
875 |         plot_title,
876 |         x_label,
877 |         output_path,
878 |         to_show,
879 |         plot_color,
880 |         ""
881 |         )
882 | 
883 | """
884 |  Draw mAP plot (Show AP's of all classes in decreasing order)
885 | """
886 | if draw_plot:
887 |     window_title = "mAP"
888 |     plot_title = "mAP = {0:.2f}%".format(mAP*100)
889 |     x_label = "Average Precision"
890 |     output_path = output_files_path + "/mAP.png"
891 |     to_show = True
892 |     plot_color = 'royalblue'
893 |     draw_plot_func(
894 |         ap_dictionary,
895 |         n_classes,
896 |         window_title,
897 |         plot_title,
898 |         x_label,
899 |         output_path,
900 |         to_show,
901 |         plot_color,
902 |         ""
903 |         )
904 | 


--------------------------------------------------------------------------------
/objectDetection.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: UTF-8 -*-
   2 | """
   3 | 训练常基于dark-net的YOLOv3网络，目标检测
   4 | """
   5 | from __future__ import absolute_import
   6 | from __future__ import division
   7 | from __future__ import print_function
   8 | import os
   9 | 
  10 | os.environ["FLAGS_fraction_of_gpu_memory_to_use"] = '0.92'
  11 | os.environ["FLAGS_eager_delete_tensor_gb"] = '0'
  12 | os.environ["FLAGS_memory_fraction_of_eager_deletion"] = '1'
  13 | os.environ["FLAGS_fast_eager_deletion_mode"]='True'
  14 | 
  15 | import uuid
  16 | import numpy as np
  17 | import time
  18 | import six
  19 | import math
  20 | import random
  21 | import paddle
  22 | import paddle.fluid as fluid
  23 | import logging
  24 | import xml.etree.ElementTree
  25 | import codecs
  26 | import json
  27 | 
  28 | from paddle.fluid.initializer import MSRA
  29 | from paddle.fluid.param_attr import ParamAttr
  30 | from paddle.fluid.regularizer import L2Decay
  31 | from PIL import Image, ImageEnhance, ImageDraw, ImageFile
  32 | ImageFile.LOAD_TRUNCATED_IMAGES = True
  33 | Image.MAX_IMAGE_PIXELS = None
  34 | 
  35 | logger = None  # 日志对象
  36 | 
  37 | train_params = {
  38 |     "data_dir": "data/data6045",  # 数据目录
  39 |     "train_list": "train.txt",  # 训练集文件
  40 |     "eval_list": "eval.txt",
  41 |     "class_dim": -1,
  42 |     "label_dict": {},  # 标签字典
  43 |     "num_dict": {},
  44 |     "image_count": -1,
  45 |     "continue_train": True,  # 是否加载前一次的训练参数，接着训练
  46 |     "pretrained": False,  # 是否预训练
  47 |     "pretrained_model_dir": "./pretrained-model",
  48 |     "save_model_dir": "./yolo-model",  # 模型保存目录
  49 |     "model_prefix": "yolo-v3",  # 模型前缀
  50 |     "freeze_dir": "freeze_model",
  51 |     "use_tiny": False,  # 是否使用 裁剪 tiny 模型
  52 |     "max_box_num": 8,  # 一幅图上最多有多少个目标
  53 |     "num_epochs": 100,  # 训练轮次
  54 |     "train_batch_size": 7,  # 对于完整yolov3，每一批的训练样本不能太多，内存会炸掉；如果使用tiny，可以适当大一些
  55 |     "use_gpu": True,  # 是否使用GPU
  56 |     "yolo_cfg": {  # YOLO模型参数
  57 |         "input_size": [3, 448, 448],  # 原版的边长大小为608，为了提高训练速度和预测速度，此处压缩为448
  58 |         "anchors": [7, 10, 12, 22, 24, 17, 22, 45, 46, 33, 43, 88, 85, 66, 115, 146, 275, 240],  # 锚点??
  59 |         "anchor_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  60 |     },
  61 |     "yolo_tiny_cfg": {  # YOLO tiny 模型参数
  62 |         "input_size": [3, 256, 256],
  63 |         "anchors": [6, 8, 13, 15, 22, 34, 48, 50, 81, 100, 205, 191],
  64 |         "anchor_mask": [[3, 4, 5], [0, 1, 2]]
  65 |     },
  66 |     "ignore_thresh": 0.7,
  67 |     "mean_rgb": [127.5, 127.5, 127.5],
  68 |     "mode": "train",
  69 |     "multi_data_reader_count": 4,
  70 |     "apply_distort": True,  # 是否做图像扭曲增强
  71 |     "nms_top_k": 300,
  72 |     "nms_pos_k": 300,
  73 |     "valid_thresh": 0.01,
  74 |     "nms_thresh": 0.40,  # 非最大值抑制阈值
  75 |     "image_distort_strategy": {  # 图像扭曲策略
  76 |         "expand_prob": 0.5,  # 扩展比率
  77 |         "expand_max_ratio": 4,
  78 |         "hue_prob": 0.5,  # 色调
  79 |         "hue_delta": 18,
  80 |         "contrast_prob": 0.5,  # 对比度
  81 |         "contrast_delta": 0.5,
  82 |         "saturation_prob": 0.5,  # 饱和度
  83 |         "saturation_delta": 0.5,
  84 |         "brightness_prob": 0.5,  # 亮度
  85 |         "brightness_delta": 0.125
  86 |     },
  87 |     "sgd_strategy": {  # 梯度下降配置
  88 |         "learning_rate": 0.002,
  89 |         "lr_epochs": [30, 50, 65],  # 学习率衰减分段（3个数字分为4段）
  90 |         "lr_decay": [1, 0.5, 0.25, 0.1]  # 每段采用的学习率，对应lr_epochs参数4段
  91 |     },
  92 |     "early_stop": {
  93 |         "sample_frequency": 50,
  94 |         "successive_limit": 3,
  95 |         "min_loss": 2.5,
  96 |         "min_curr_map": 0.84
  97 |     }
  98 | }
  99 | 
 100 | 
 101 | def init_train_parameters():
 102 |     """
 103 |     初始化训练参数，主要是初始化图片数量，类别数
 104 |     :return:
 105 |     """
 106 |     file_list = os.path.join(train_params['data_dir'], train_params['train_list'])  # 训练集
 107 |     label_list = os.path.join(train_params['data_dir'], "label_list")  # 标签文件
 108 |     index = 0
 109 | 
 110 |     # codecs是专门用作编码转换通用模块
 111 |     with codecs.open(label_list, encoding='utf-8') as flist:
 112 |         lines = [line.strip() for line in flist]
 113 |         for line in lines:
 114 |             train_params['num_dict'][index] = line.strip()
 115 |             train_params['label_dict'][line.strip()] = index
 116 |             index += 1
 117 |         train_params['class_dim'] = index
 118 | 
 119 |     with codecs.open(file_list, encoding='utf-8') as flist:
 120 |         lines = [line.strip() for line in flist]
 121 |         train_params['image_count'] = len(lines)  # 图片数量
 122 | 
 123 | 
 124 | # 日志相关配置
 125 | def init_log_config():  # 初始化日志相关配置
 126 |     global logger
 127 | 
 128 |     logger = logging.getLogger()  # 创建日志对象
 129 |     logger.setLevel(logging.INFO)  # 设置日志级别
 130 |     log_path = os.path.join(os.getcwd(), 'logs')
 131 | 
 132 |     if not os.path.exists(log_path):  # 创建日志路径
 133 |         os.makedirs(log_path)
 134 | 
 135 |     log_name = os.path.join(log_path, 'train.log')  # 训练日志文件
 136 |     fh = logging.FileHandler(log_name, mode='w')  # 打开文件句柄
 137 |     fh.setLevel(logging.DEBUG)  # 设置级别
 138 | 
 139 |     formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
 140 |     fh.setFormatter(formatter)
 141 |     logger.addHandler(fh)
 142 | 
 143 | 
 144 | init_log_config()
 145 | 
 146 | 
 147 | # 定义YOLO3网络结构：darknet-53
 148 | class YOLOv3(object):
 149 |     def __init__(self, class_num, anchors, anchor_mask):
 150 |         self.outputs = []  # 网络最终模型
 151 |         self.downsample_ratio = 1  # 下采样率
 152 |         self.anchor_mask = anchor_mask  # 计算卷积核？？？
 153 |         self.anchors = anchors  # 锚点
 154 |         self.class_num = class_num  # 类别数量
 155 | 
 156 |         self.yolo_anchors = []
 157 |         self.yolo_classes = []
 158 | 
 159 |         for mask_pair in self.anchor_mask:
 160 |             mask_anchors = []
 161 |             for mask in mask_pair:
 162 |                 mask_anchors.append(self.anchors[2 * mask])
 163 |                 mask_anchors.append(self.anchors[2 * mask + 1])
 164 |             self.yolo_anchors.append(mask_anchors)
 165 |             self.yolo_classes.append(class_num)
 166 | 
 167 |     def name(self):
 168 |         return 'YOLOv3'
 169 | 
 170 |     # 获取anchors
 171 |     def get_anchors(self):
 172 |         return self.anchors
 173 | 
 174 |     # 获取anchor_mask
 175 |     def get_anchor_mask(self):
 176 |         return self.anchor_mask
 177 | 
 178 |     def get_class_num(self):
 179 |         return self.class_num
 180 | 
 181 |     def get_downsample_ratio(self):
 182 |         return self.downsample_ratio
 183 | 
 184 |     def get_yolo_anchors(self):
 185 |         return self.yolo_anchors
 186 | 
 187 |     def get_yolo_classes(self):
 188 |         return self.yolo_classes
 189 | 
 190 |     # 卷积正则化函数: 卷积、批量正则化处理、leakrelu
 191 |     def conv_bn(self,
 192 |                 input,  # 输入
 193 |                 num_filters,  # 卷积核数量
 194 |                 filter_size,  # 卷积核大小
 195 |                 stride,  # 步幅
 196 |                 padding,  # 填充
 197 |                 use_cudnn=True):
 198 |         # 2d卷积操作
 199 |         conv = fluid.layers.conv2d(input=input,
 200 |                                    num_filters=num_filters,
 201 |                                    filter_size=filter_size,
 202 |                                    stride=stride,
 203 |                                    padding=padding,
 204 |                                    act=None,
 205 |                                    use_cudnn=use_cudnn,  # 是否使用cudnn，cudnn利用cuda进行了加速处理
 206 |                                    param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02)),
 207 |                                    bias_attr=False)
 208 | 
 209 |         # batch_norm中的参数不需要参与正则化，所以主动使用正则系数为0的正则项屏蔽掉
 210 |         # 在batch_norm中使用leaky的话，只能使用默认的alpha=0.02；如果需要设值，必须提出去单独来
 211 |         # 正则化的目的，是为了防止过拟合，较小的L2值能防止过拟合
 212 |         param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
 213 |                                regularizer=L2Decay(0.))
 214 |         bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0),
 215 |                               regularizer=L2Decay(0.))
 216 |         out = fluid.layers.batch_norm(input=conv, act=None,
 217 |                                       param_attr=param_attr,
 218 |                                       bias_attr=bias_attr)
 219 |         # leaky_relu: Leaky ReLU是给所有负值赋予一个非零斜率
 220 |         out = fluid.layers.leaky_relu(out, 0.1)
 221 |         return out
 222 | 
 223 |     # 通过卷积实现降采样
 224 |     # 如：原始图片大小448*448，降采样后大小为 ((448+2)-3)/2 + 1 = 224
 225 |     def down_sample(self, input, num_filters, filter_size=3, stride=2, padding=1):
 226 |         self.downsample_ratio *= 2  # 降采样率
 227 |         return self.conv_bn(input,
 228 |                             num_filters=num_filters,
 229 |                             filter_size=filter_size,
 230 |                             stride=stride,
 231 |                             padding=padding)
 232 | 
 233 |     # 基本块：包含两个卷积/正则化层，一个残差块
 234 |     def basic_block(self, input, num_filters):
 235 |         conv1 = self.conv_bn(input, num_filters, filter_size=1, stride=1, padding=0)
 236 |         conv2 = self.conv_bn(conv1, num_filters * 2, filter_size=3, stride=1, padding=1)
 237 |         out = fluid.layers.elementwise_add(x=input, y=conv2, act=None)  # 计算H(x)=F(x)+x
 238 |         return out
 239 | 
 240 |     # 创建多个basic_block
 241 |     def layer_warp(self, input, num_filters, count):
 242 |         res_out = self.basic_block(input, num_filters)
 243 |         for j in range(1, count):
 244 |             res_out = self.basic_block(res_out, num_filters)
 245 |         return res_out
 246 | 
 247 |     # 上采样
 248 |     def up_sample(self, input, scale=2):
 249 |         # get dynamic upsample output shape
 250 |         shape_nchw = fluid.layers.shape(input)  # 获取input的形状
 251 |         shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
 252 |         shape_hw.stop_gradient = True
 253 |         in_shape = fluid.layers.cast(shape_hw, dtype='int32')
 254 |         out_shape = in_shape * scale  # 计算输出数据形状
 255 |         out_shape.stop_gradient = True
 256 | 
 257 |         # reisze by actual_shape
 258 |         # 矩阵放大(最邻插值法)
 259 |         out = fluid.layers.resize_nearest(input=input,
 260 |                                           scale=scale,
 261 |                                           actual_shape=out_shape)
 262 |         return out
 263 | 
 264 |     def yolo_detection_block(self, input, num_filters):
 265 |         assert num_filters % 2 == 0, "num_filters {} cannot be divided by 2".format(num_filters)
 266 | 
 267 |         conv = input
 268 |         for j in range(2):
 269 |             conv = self.conv_bn(conv, num_filters, filter_size=1, stride=1, padding=0)
 270 |             conv = self.conv_bn(conv, num_filters * 2, filter_size=3, stride=1, padding=1)
 271 |         route = self.conv_bn(conv, num_filters, filter_size=1, stride=1, padding=0)
 272 |         tip = self.conv_bn(route, num_filters * 2, filter_size=3, stride=1, padding=1)
 273 |         return route, tip
 274 | 
 275 |     # 搭建网络模型 darknet-53
 276 |     def net(self, img):
 277 |         stages = [1, 2, 8, 8, 4]
 278 |         assert len(self.anchor_mask) <= len(stages), "anchor masks can't bigger than down_sample times"
 279 |         # 第一个卷积层: 256*256
 280 |         conv1 = self.conv_bn(img, num_filters=32, filter_size=3, stride=1, padding=1)
 281 |         # 第二个卷积层：128*128
 282 |         downsample_ = self.down_sample(conv1, conv1.shape[1] * 2)  # 第二个参数为卷积核数量
 283 |         blocks = []
 284 | 
 285 |         # 循环创建basic_block组
 286 |         for i, stage_count in enumerate(stages):
 287 |             block = self.layer_warp(downsample_,  # 输入数据
 288 |                                     32 * (2 ** i),  # 卷积核数量
 289 |                                     stage_count)  # 基本块数量
 290 |             blocks.append(block)
 291 |             if i < len(stages) - 1:  # 如果不是最后一组，做降采样
 292 |                 downsample_ = self.down_sample(block, block.shape[1] * 2)
 293 |         blocks = blocks[-1:-4:-1]  # 取倒数三层，并且逆序，后面跨层级联需要
 294 | 
 295 |         # yolo detector
 296 |         for i, block in enumerate(blocks):
 297 |             # yolo中跨视域链接
 298 |             if i > 0:
 299 |                 block = fluid.layers.concat(input=[route, block], axis=1)  # 连接route和block，按行
 300 | 
 301 |             route, tip = self.yolo_detection_block(block,  # 输入
 302 |                                                    num_filters=512 // (2 ** i))  # 卷积核数量
 303 | 
 304 |             param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02))
 305 |             bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.))
 306 |             block_out = fluid.layers.conv2d(input=tip,
 307 |                                             # 5 elements represent x|y|h|w|score
 308 |                                             num_filters=len(self.anchor_mask[i]) * (self.class_num + 5),
 309 |                                             filter_size=1,
 310 |                                             stride=1,
 311 |                                             padding=0,
 312 |                                             act=None,
 313 |                                             param_attr=param_attr,
 314 |                                             bias_attr=bias_attr)
 315 |             self.outputs.append(block_out)
 316 | 
 317 |             # 为了跨视域链接，差值方式提升特征图尺寸
 318 |             if i < len(blocks) - 1:
 319 |                 route = self.conv_bn(route, 256 // (2 ** i), filter_size=1, stride=1, padding=0)
 320 |                 route = self.up_sample(route)  # 上采样
 321 | 
 322 |         return self.outputs
 323 | 
 324 | # Tiny(精简版)YOLO模型
 325 | class YOLOv3Tiny(object):
 326 |     def __init__(self, class_num, anchors, anchor_mask):
 327 |         self.outputs = []
 328 |         self.downsample_ratio = 1
 329 |         self.anchor_mask = anchor_mask
 330 |         self.anchors = anchors
 331 |         self.class_num = class_num
 332 | 
 333 |         self.yolo_anchors = []
 334 |         self.yolo_classes = []
 335 |         for mask_pair in self.anchor_mask:
 336 |             mask_anchors = []
 337 |             for mask in mask_pair:
 338 |                 mask_anchors.append(self.anchors[2 * mask])
 339 |                 mask_anchors.append(self.anchors[2 * mask + 1])
 340 |             self.yolo_anchors.append(mask_anchors)
 341 |             self.yolo_classes.append(class_num)
 342 | 
 343 |     def name(self):
 344 |         return 'YOLOv3-tiny'
 345 | 
 346 |     def get_anchors(self):
 347 |         return self.anchors
 348 | 
 349 |     def get_anchor_mask(self):
 350 |         return self.anchor_mask
 351 | 
 352 |     def get_class_num(self):
 353 |         return self.class_num
 354 | 
 355 |     def get_downsample_ratio(self):
 356 |         return self.downsample_ratio
 357 | 
 358 |     def get_yolo_anchors(self):
 359 |         return self.yolo_anchors
 360 | 
 361 |     def get_yolo_classes(self):
 362 |         return self.yolo_classes
 363 | 
 364 |     def conv_bn(self,
 365 |                 input,
 366 |                 num_filters,
 367 |                 filter_size,
 368 |                 stride,
 369 |                 padding,
 370 |                 num_groups=1,
 371 |                 use_cudnn=True):
 372 |         conv = fluid.layers.conv2d(
 373 |             input=input,
 374 |             num_filters=num_filters,
 375 |             filter_size=filter_size,
 376 |             stride=stride,
 377 |             padding=padding,
 378 |             act=None,
 379 |             groups=num_groups,
 380 |             use_cudnn=use_cudnn,
 381 |             param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02)),
 382 |             bias_attr=False)
 383 | 
 384 |         # batch_norm中的参数不需要参与正则化，所以主动使用正则系数为0的正则项屏蔽掉
 385 |         out = fluid.layers.batch_norm(
 386 |             input=conv, act='relu',
 387 |             param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02), regularizer=L2Decay(0.)),
 388 |             bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.)))
 389 | 
 390 |         return out
 391 | 
 392 |     def depthwise_conv_bn(self, input, filter_size=3, stride=1, padding=1):
 393 |         num_filters = input.shape[1]
 394 |         return self.conv_bn(input,
 395 |                             num_filters=num_filters,
 396 |                             filter_size=filter_size,
 397 |                             stride=stride,
 398 |                             padding=padding,
 399 |                             num_groups=num_filters)
 400 | 
 401 |     def down_sample(self, input, pool_size=2, pool_stride=2):
 402 |         self.downsample_ratio *= 2
 403 |         return fluid.layers.pool2d(input=input, pool_type='max', pool_size=pool_size,
 404 |                                    pool_stride=pool_stride)
 405 | 
 406 |     def basic_block(self, input, num_filters):
 407 |         conv1 = self.conv_bn(input, num_filters, filter_size=3, stride=1, padding=1)
 408 |         out = self.down_sample(conv1)
 409 |         return out
 410 | 
 411 |     def up_sample(self, input, scale=2):
 412 |         # get dynamic upsample output shape
 413 |         shape_nchw = fluid.layers.shape(input)
 414 |         shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
 415 |         shape_hw.stop_gradient = True
 416 |         in_shape = fluid.layers.cast(shape_hw, dtype='int32')
 417 |         out_shape = in_shape * scale
 418 |         out_shape.stop_gradient = True
 419 | 
 420 |         # reisze by actual_shape
 421 |         out = fluid.layers.resize_nearest(
 422 |             input=input,
 423 |             scale=scale,
 424 |             actual_shape=out_shape)
 425 |         return out
 426 | 
 427 |     def yolo_detection_block(self, input, num_filters):
 428 |         route = self.conv_bn(input, num_filters, filter_size=1, stride=1, padding=0)
 429 |         tip = self.conv_bn(route, num_filters * 2, filter_size=3, stride=1, padding=1)
 430 |         return route, tip
 431 | 
 432 |     def net(self, img):
 433 |         # darknet-tiny
 434 |         stages = [16, 32, 64, 128, 256, 512]
 435 |         assert len(self.anchor_mask) <= len(stages), "anchor masks can't bigger than down_sample times"
 436 |         # 256x256
 437 |         tmp = img
 438 |         blocks = []
 439 |         for i, stage_count in enumerate(stages):
 440 |             if i == len(stages) - 1:
 441 |                 block = self.conv_bn(tmp, stage_count, filter_size=3, stride=1, padding=1)
 442 |                 blocks.append(block)
 443 |                 block = self.depthwise_conv_bn(blocks[-1])
 444 |                 block = self.depthwise_conv_bn(blocks[-1])
 445 |                 block = self.conv_bn(blocks[-1], stage_count * 2, filter_size=1, stride=1, padding=0)
 446 |                 blocks.append(block)
 447 |             else:
 448 |                 tmp = self.basic_block(tmp, stage_count)
 449 |                 blocks.append(tmp)
 450 | 
 451 |         blocks = [blocks[-1], blocks[3]]
 452 | 
 453 |         # yolo detector
 454 |         for i, block in enumerate(blocks):
 455 |             # yolo 中跨视域链接
 456 |             if i > 0:
 457 |                 block = fluid.layers.concat(input=[route, block], axis=1)
 458 |             if i < 1:
 459 |                 route, tip = self.yolo_detection_block(block, num_filters=256 // (2 ** i))
 460 |             else:
 461 |                 tip = self.conv_bn(block, num_filters=256, filter_size=3, stride=1, padding=1)
 462 | 
 463 |             param_attr = ParamAttr(initializer=fluid.initializer.Normal(0., 0.02))
 464 |             bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.))
 465 |             block_out = fluid.layers.conv2d(input=tip,
 466 |                                             # 5 elements represent x|y|h|w|score
 467 |                                             num_filters=len(self.anchor_mask[i]) * (self.class_num + 5),
 468 |                                             filter_size=1,
 469 |                                             stride=1,
 470 |                                             padding=0,
 471 |                                             act=None,
 472 |                                             param_attr=param_attr,
 473 |                                             bias_attr=bias_attr)
 474 |             self.outputs.append(block_out)
 475 |             # 为了跨视域链接，差值方式提升特征图尺寸
 476 |             if i < len(blocks) - 1:
 477 |                 route = self.conv_bn(route, 128 // (2 ** i), filter_size=1, stride=1, padding=0)
 478 |                 route = self.up_sample(route)
 479 | 
 480 |         return self.outputs
 481 | 
 482 | 
 483 | def get_yolo(is_tiny, class_num, anchors, anchor_mask):
 484 |     if is_tiny:
 485 |         return YOLOv3Tiny(class_num, anchors, anchor_mask)
 486 |     else:
 487 |         return YOLOv3(class_num, anchors, anchor_mask)
 488 | 
 489 | 
 490 | class Sampler(object):
 491 |     """
 492 |     采样器，用于扣取采样
 493 |     """
 494 | 
 495 |     def __init__(self, max_sample, max_trial, min_scale, max_scale,
 496 |                  min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap,
 497 |                  max_jaccard_overlap):
 498 |         self.max_sample = max_sample
 499 |         self.max_trial = max_trial
 500 |         self.min_scale = min_scale
 501 |         self.max_scale = max_scale
 502 |         self.min_aspect_ratio = min_aspect_ratio
 503 |         self.max_aspect_ratio = max_aspect_ratio
 504 |         self.min_jaccard_overlap = min_jaccard_overlap
 505 |         self.max_jaccard_overlap = max_jaccard_overlap
 506 | 
 507 | 
 508 | class bbox(object):
 509 |     """
 510 |     外界矩形框
 511 |     """
 512 | 
 513 |     def __init__(self, xmin, ymin, xmax, ymax):
 514 |         self.xmin = xmin
 515 |         self.ymin = ymin
 516 |         self.xmax = xmax
 517 |         self.ymax = ymax
 518 | 
 519 | 
 520 | # 坐标转换，由[x1, y1, w, h]转换为[center_x, center_y, w, h]
 521 | # 并转换为范围在[0, 1]之间的相对坐标
 522 | def box_to_center_relative(box, img_height, img_width):
 523 |     """
 524 |     Convert COCO annotations box with format [x1, y1, w, h] to
 525 |     center mode [center_x, center_y, w, h] and divide image width
 526 |     and height to get relative value in range[0, 1]
 527 |     """
 528 |     assert len(box) == 4, "box should be a len(4) list or tuple"
 529 |     x, y, w, h = box
 530 | 
 531 |     x1 = max(x, 0)
 532 |     x2 = min(x + w - 1, img_width - 1)
 533 |     y1 = max(y, 0)
 534 |     y2 = min(y + h - 1, img_height - 1)
 535 | 
 536 |     x = (x1 + x2) / 2 / img_width  # x中心坐标
 537 |     y = (y1 + y2) / 2 / img_height  # y中心坐标
 538 |     w = (x2 - x1) / img_width  # 框宽度/图片总宽度
 539 |     h = (y2 - y1) / img_height  # 框高度/图片总高度
 540 | 
 541 |     return np.array([x, y, w, h])
 542 | 
 543 | 
 544 | # 调整图像大小
 545 | def resize_img(img, sampled_labels, input_size):
 546 |     target_size = input_size
 547 |     img = img.resize((target_size[1], target_size[2]), Image.BILINEAR)
 548 |     return img
 549 | 
 550 | 
 551 | # 计算交并比
 552 | def box_iou_xywh(box1, box2):
 553 |     assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."
 554 |     assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."
 555 | 
 556 |     # 取两个框的坐标
 557 |     b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
 558 |     b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
 559 |     b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
 560 |     b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
 561 | 
 562 |     inter_x1 = np.maximum(b1_x1, b2_x1)
 563 |     inter_x2 = np.minimum(b1_x2, b2_x2)
 564 |     inter_y1 = np.maximum(b1_y1, b2_y1)
 565 |     inter_y2 = np.minimum(b1_y2, b2_y2)
 566 |     inter_w = inter_x2 - inter_x1 + 1  # 相交部分宽度
 567 |     inter_h = inter_y2 - inter_y1 + 1  # 相交部分高度
 568 |     inter_w[inter_w < 0] = 0
 569 |     inter_h[inter_h < 0] = 0
 570 | 
 571 |     inter_area = inter_w * inter_h  # 相交面积
 572 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)  # 框1的面积
 573 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)  # 框2的面积
 574 | 
 575 |     return inter_area / (b1_area + b2_area - inter_area)  # 相集面积/并集面积
 576 | 
 577 | 
 578 | # box裁剪
 579 | def box_crop(boxes, labels, crop, img_shape):
 580 |     x, y, w, h = map(float, crop)
 581 |     im_w, im_h = map(float, img_shape)
 582 | 
 583 |     boxes = boxes.copy()
 584 |     boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (boxes[:, 0] + boxes[:, 2] / 2) * im_w
 585 |     boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (boxes[:, 1] + boxes[:, 3] / 2) * im_h
 586 | 
 587 |     crop_box = np.array([x, y, x + w, y + h])
 588 |     centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
 589 |     mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(axis=1)
 590 | 
 591 |     boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2])
 592 |     boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:])
 593 |     boxes[:, :2] -= crop_box[:2]
 594 |     boxes[:, 2:] -= crop_box[:2]
 595 | 
 596 |     mask = np.logical_and(mask, (boxes[:, :2] < boxes[:, 2:]).all(axis=1))
 597 |     boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1)
 598 |     labels = labels * mask.astype('float32')
 599 |     boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (boxes[:, 2] - boxes[:, 0]) / w
 600 |     boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (boxes[:, 3] - boxes[:, 1]) / h
 601 | 
 602 |     return boxes, labels, mask.sum()
 603 | 
 604 | 
 605 | # 图像增加：对比度，饱和度，明暗，颜色，扩张
 606 | def random_brightness(img):  # 亮度
 607 |     prob = np.random.uniform(0, 1)
 608 | 
 609 |     if prob < train_params['image_distort_strategy']['brightness_prob']:
 610 |         brightness_delta = train_params['image_distort_strategy']['brightness_delta']  # 默认值0.125
 611 |         delta = np.random.uniform(-brightness_delta, brightness_delta) + 1  # 产生均匀分布随机值
 612 |         img = ImageEnhance.Brightness(img).enhance(delta)  # 调整图像亮度
 613 | 
 614 |     return img
 615 | 
 616 | 
 617 | def random_contrast(img):  # 对比度
 618 |     prob = np.random.uniform(0, 1)
 619 | 
 620 |     if prob < train_params['image_distort_strategy']['contrast_prob']:
 621 |         contrast_delta = train_params['image_distort_strategy']['contrast_delta']
 622 |         delta = np.random.uniform(-contrast_delta, contrast_delta) + 1
 623 |         img = ImageEnhance.Contrast(img).enhance(delta)
 624 | 
 625 |     return img
 626 | 
 627 | 
 628 | def random_saturation(img):  # 饱和度
 629 |     prob = np.random.uniform(0, 1)
 630 | 
 631 |     if prob < train_params['image_distort_strategy']['saturation_prob']:
 632 |         saturation_delta = train_params['image_distort_strategy']['saturation_delta']
 633 |         delta = np.random.uniform(-saturation_delta, saturation_delta) + 1
 634 |         img = ImageEnhance.Color(img).enhance(delta)
 635 | 
 636 |     return img
 637 | 
 638 | 
 639 | def random_hue(img):  # 色调
 640 |     prob = np.random.uniform(0, 1)
 641 | 
 642 |     if prob < train_params['image_distort_strategy']['hue_prob']:
 643 |         hue_delta = train_params['image_distort_strategy']['hue_delta']
 644 |         delta = np.random.uniform(-hue_delta, hue_delta)
 645 |         img_hsv = np.array(img.convert('HSV'))
 646 |         img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
 647 |         img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
 648 | 
 649 |     return img
 650 | 
 651 | 
 652 | def distort_image(img):  # 图像扭曲
 653 |     prob = np.random.uniform(0, 1)
 654 |     # Apply different distort order
 655 |     if prob > 0.5:
 656 |         img = random_brightness(img)
 657 |         img = random_contrast(img)
 658 |         img = random_saturation(img)
 659 |         img = random_hue(img)
 660 |     else:
 661 |         img = random_brightness(img)
 662 |         img = random_saturation(img)
 663 |         img = random_hue(img)
 664 |         img = random_contrast(img)
 665 |     return img
 666 | 
 667 | 
 668 | # 随机裁剪
 669 | def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraints=None, max_trial=50):
 670 |     if random.random() > 0.6:
 671 |         return img, boxes, labels
 672 |     if len(boxes) == 0:
 673 |         return img, boxes, labels
 674 | 
 675 |     if not constraints:
 676 |         constraints = [(0.1, 1.0),
 677 |                        (0.3, 1.0),
 678 |                        (0.5, 1.0),
 679 |                        (0.7, 1.0),
 680 |                        (0.9, 1.0),
 681 |                        (0.0, 1.0)]  # 最小/最大交并比值
 682 | 
 683 |     w, h = img.size
 684 |     crops = [(0, 0, w, h)]
 685 | 
 686 |     for min_iou, max_iou in constraints:
 687 |         for _ in range(max_trial):
 688 |             scale = random.uniform(scales[0], scales[1])
 689 |             aspect_ratio = random.uniform(max(1 / max_ratio, scale * scale), \
 690 |                                           min(max_ratio, 1 / scale / scale))
 691 |             crop_h = int(h * scale / np.sqrt(aspect_ratio))
 692 |             crop_w = int(w * scale * np.sqrt(aspect_ratio))
 693 |             crop_x = random.randrange(w - crop_w)
 694 |             crop_y = random.randrange(h - crop_h)
 695 |             crop_box = np.array([[
 696 |                 (crop_x + crop_w / 2.0) / w,
 697 |                 (crop_y + crop_h / 2.0) / h,
 698 |                 crop_w / float(w),
 699 |                 crop_h / float(h)
 700 |             ]])
 701 | 
 702 |             iou = box_iou_xywh(crop_box, boxes)
 703 |             if min_iou <= iou.min() and max_iou >= iou.max():
 704 |                 crops.append((crop_x, crop_y, crop_w, crop_h))
 705 |                 break
 706 | 
 707 |     while crops:
 708 |         crop = crops.pop(np.random.randint(0, len(crops)))
 709 |         crop_boxes, crop_labels, box_num = box_crop(boxes, labels, crop, (w, h))
 710 |         if box_num < 1:
 711 |             continue
 712 |         img = img.crop((crop[0], crop[1], crop[0] + crop[2],
 713 |                         crop[1] + crop[3])).resize(img.size, Image.LANCZOS)
 714 |         return img, crop_boxes, crop_labels
 715 |     return img, boxes, labels
 716 | 
 717 | 
 718 | # 扩张
 719 | def random_expand(img, gtboxes, keep_ratio=True):
 720 |     if np.random.uniform(0, 1) < train_params['image_distort_strategy']['expand_prob']:
 721 |         return img, gtboxes
 722 | 
 723 |     max_ratio = train_params['image_distort_strategy']['expand_max_ratio']
 724 |     w, h = img.size
 725 |     c = 3
 726 |     ratio_x = random.uniform(1, max_ratio)
 727 |     if keep_ratio:
 728 |         ratio_y = ratio_x
 729 |     else:
 730 |         ratio_y = random.uniform(1, max_ratio)
 731 |     oh = int(h * ratio_y)
 732 |     ow = int(w * ratio_x)
 733 |     off_x = random.randint(0, ow - w)
 734 |     off_y = random.randint(0, oh - h)
 735 | 
 736 |     out_img = np.zeros((oh, ow, c), np.uint8)
 737 |     for i in range(c):
 738 |         out_img[:, :, i] = train_params['mean_rgb'][i]
 739 | 
 740 |     out_img[off_y: off_y + h, off_x: off_x + w, :] = img
 741 |     gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow)
 742 |     gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh)
 743 |     gtboxes[:, 2] = gtboxes[:, 2] / ratio_x
 744 |     gtboxes[:, 3] = gtboxes[:, 3] / ratio_y
 745 | 
 746 |     return Image.fromarray(out_img), gtboxes
 747 | 
 748 | 
 749 | # 预处理：图像样本增强，维度转换
 750 | def preprocess(img, bbox_labels, input_size, mode):
 751 |     img_width, img_height = img.size
 752 |     sample_labels = np.array(bbox_labels)
 753 | 
 754 |     if mode == 'train':
 755 |         if train_params['apply_distort']:  # 是否扭曲增强
 756 |             img = distort_image(img)
 757 | 
 758 |         img, gtboxes = random_expand(img, sample_labels[:, 1:5])  # 扩展增强
 759 |         img, gtboxes, gtlabels = random_crop(img, gtboxes, sample_labels[:, 0])  # 随机裁剪
 760 |         sample_labels[:, 0] = gtlabels
 761 |         sample_labels[:, 1:5] = gtboxes
 762 | 
 763 |     img = resize_img(img, sample_labels, input_size)
 764 |     img = np.array(img).astype('float32')
 765 |     img -= train_params['mean_rgb']
 766 |     img = img.transpose((2, 0, 1))  # HWC to CHW
 767 |     img *= 0.007843
 768 |     return img, sample_labels
 769 | 
 770 | 
 771 | # 数据读取器
 772 | # 根据样本文件，读取图片、并做数据增强，返回图片数据、边框、标签
 773 | def custom_reader(file_list, data_dir, input_size, mode):
 774 |     def reader():
 775 |         np.random.shuffle(file_list)  # 打乱文件列表
 776 | 
 777 |         for line in file_list:  # 读取行，每行一个图片及标注
 778 |             if mode == 'train' or mode == 'eval':
 779 |                 ######################  以下可能是需要自定义修改的部分   ############################
 780 |                 parts = line.split('\t')  # 按照tab键拆分
 781 |                 image_path = parts[0]
 782 | 
 783 |                 img = Image.open(os.path.join(data_dir, image_path)) # 读取图像数据
 784 |                 if img.mode != 'RGB':
 785 |                     img = img.convert('RGB')
 786 |                 im_width, im_height = img.size
 787 | 
 788 |                 # bbox 的列表，每一个元素为这样
 789 |                 # layout: label | x-center | y-cneter | width | height | difficult
 790 |                 bbox_labels = []
 791 |                 for object_str in parts[1:]:  # 循环处理每一个目标标注信息
 792 |                     if len(object_str) <= 1:
 793 |                         continue
 794 | 
 795 |                     bbox_sample = []
 796 |                     object = json.loads(object_str)
 797 |                     bbox_sample.append(float(train_params['label_dict'][object['value']]))
 798 |                     bbox = object['coordinate']  # 获取框坐标
 799 |                     # 计算x,y,w,h
 800 |                     box = [bbox[0][0], bbox[0][1], bbox[1][0] - bbox[0][0], bbox[1][1] - bbox[0][1]]
 801 |                     bbox = box_to_center_relative(box, im_height, im_width)  # 坐标转换
 802 |                     bbox_sample.append(float(bbox[0]))
 803 |                     bbox_sample.append(float(bbox[1]))
 804 |                     bbox_sample.append(float(bbox[2]))
 805 |                     bbox_sample.append(float(bbox[3]))
 806 |                     difficult = float(0)
 807 |                     bbox_sample.append(difficult)
 808 |                     bbox_labels.append(bbox_sample)
 809 |                 ######################  可能需要自定义修改部分结束   ############################
 810 | 
 811 |                 if len(bbox_labels) == 0:
 812 |                     continue
 813 | 
 814 |                 img, sample_labels = preprocess(img, bbox_labels, input_size, mode)  # 预处理
 815 |                 # sample_labels = np.array(sample_labels)
 816 |                 if len(sample_labels) == 0:
 817 |                     continue
 818 | 
 819 |                 boxes = sample_labels[:, 1:5]  # 坐标
 820 |                 lbls = sample_labels[:, 0].astype('int32')  # 标签
 821 |                 difficults = sample_labels[:, -1].astype('int32')
 822 |                 max_box_num = train_params['max_box_num']  # 一副图像最多多少个目标物体
 823 |                 cope_size = max_box_num if len(boxes) >= max_box_num else len(boxes)  # 控制最大目标数量
 824 |                 ret_boxes = np.zeros((max_box_num, 4), dtype=np.float32)
 825 |                 ret_lbls = np.zeros((max_box_num), dtype=np.int32)
 826 |                 ret_difficults = np.zeros((max_box_num), dtype=np.int32)
 827 |                 ret_boxes[0: cope_size] = boxes[0: cope_size]
 828 |                 ret_lbls[0: cope_size] = lbls[0: cope_size]
 829 |                 ret_difficults[0: cope_size] = difficults[0: cope_size]
 830 | 
 831 |                 yield img, ret_boxes, ret_lbls
 832 | 
 833 |             elif mode == 'test':
 834 |                 img_path = os.path.join(line)
 835 | 
 836 |                 yield Image.open(img_path)
 837 | 
 838 |     return reader
 839 | 
 840 | 
 841 | # 批量、随机数据读取器
 842 | def single_custom_reader(file_path, data_dir, input_size, mode):
 843 |     file_path = os.path.join(data_dir, file_path)
 844 | 
 845 |     images = [line.strip() for line in open(file_path)]
 846 |     reader = custom_reader(images, data_dir, input_size, mode)
 847 |     reader = paddle.reader.shuffle(reader, train_params['train_batch_size'])
 848 |     reader = paddle.batch(reader, train_params['train_batch_size'])
 849 | 
 850 |     return reader
 851 | 
 852 | 
 853 | # 定义优化器
 854 | def optimizer_sgd_setting():
 855 |     batch_size = train_params["train_batch_size"]  # batch大小
 856 |     iters = train_params["image_count"] // batch_size  # 计算轮次
 857 |     iters = 1 if iters < 1 else iters
 858 |     '''
 859 |     learning_strategy = train_params['sgd_strategy']
 860 |     lr = learning_strategy['learning_rate']  # 学习率
 861 | 
 862 |     boundaries = [i * iters for i in learning_strategy["lr_epochs"]]
 863 |     values = [i * lr for i in learning_strategy["lr_decay"]]
 864 |     logger.info("origin learning rate: {0} boundaries: {1}  values: {2}".format(lr, boundaries, values))
 865 | 
 866 | 
 867 |     optimizer = fluid.optimizer.SGDOptimizer(
 868 |         learning_rate=fluid.layers.piecewise_decay(boundaries, values),  # 分段衰减学习率
 869 |         # learning_rate=lr,
 870 |         regularization=fluid.regularizer.L2Decay(0.00005))
 871 |     '''
 872 |     optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01,beta1=0.9,beta2=0.999,regularization=fluid.regularizer.L2Decay(0.00005))
 873 |     return optimizer
 874 | 
 875 | 
 876 | # 创建program, feeder及yolo模型
 877 | def build_program_with_feeder(main_prog, startup_prog, place):
 878 |     max_box_num = train_params['max_box_num']
 879 |     ues_tiny = train_params['use_tiny']  # 获取是否使用tiny yolo参数
 880 |     yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
 881 | 
 882 |     with fluid.program_guard(main_prog, startup_prog):  # 更改全局主程序和启动程序
 883 |         img = fluid.layers.data(name='img', shape=yolo_config['input_size'], dtype='float32')  # 图像
 884 |         gt_box = fluid.layers.data(name='gt_box', shape=[max_box_num, 4], dtype='float32')  # 边框
 885 |         gt_label = fluid.layers.data(name='gt_label', shape=[max_box_num], dtype='int32')  # 标签
 886 | 
 887 |         feeder = fluid.DataFeeder(feed_list=[img, gt_box, gt_label],
 888 |                                   place=place,
 889 |                                   program=main_prog)  # 定义feeder
 890 |         reader = single_custom_reader(train_params['train_list'],
 891 |                                       train_params['data_dir'],
 892 |                                       yolo_config['input_size'], 'train')  # 读取器
 893 |         # 获取yolo参数
 894 |         ues_tiny = train_params['use_tiny']
 895 |         yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
 896 | 
 897 |         with fluid.unique_name.guard():
 898 |             # 创建yolo模型
 899 |             model = get_yolo(ues_tiny, train_params['class_dim'], yolo_config['anchors'],
 900 |                              yolo_config['anchor_mask'])
 901 |             outputs = model.net(img)
 902 |         return feeder, reader, get_loss(model, outputs, gt_box, gt_label)
 903 | 
 904 | 
 905 | # 损失函数
 906 | def get_loss(model, outputs, gt_box, gt_label):
 907 |     losses = []
 908 |     downsample_ratio = model.get_downsample_ratio()
 909 | 
 910 |     with fluid.unique_name.guard('train'):
 911 |         for i, out in enumerate(outputs):
 912 |             loss = fluid.layers.yolov3_loss(x=out,
 913 |                                             gt_box=gt_box,  # 真实边框
 914 |                                             gt_label=gt_label,  # 标签
 915 |                                             anchors=model.get_anchors(),  # 锚点
 916 |                                             anchor_mask=model.get_anchor_mask()[i],
 917 |                                             class_num=model.get_class_num(),
 918 |                                             ignore_thresh=train_params['ignore_thresh'],
 919 |                                             # 对于类别不多的情况，设置为 False 会更合适一些，不然 score 会很小
 920 |                                             use_label_smooth=False,
 921 |                                             downsample_ratio=downsample_ratio)
 922 |             losses.append(fluid.layers.reduce_mean(loss))
 923 |             downsample_ratio //= 2
 924 |         loss = sum(losses)
 925 |         optimizer = optimizer_sgd_setting()
 926 |         optimizer.minimize(loss)
 927 |         return loss
 928 | 
 929 | 
 930 | # 持久化参数加载
 931 | def load_pretrained_params(exe, program):
 932 |     if train_params['continue_train'] and os.path.exists(train_params['save_model_dir']):
 933 |         logger.info('load param from retrain model')
 934 |         fluid.io.load_persistables(executor=exe,
 935 |                                    dirname=train_params['save_model_dir'],
 936 |                                    main_program=program)
 937 |     elif train_params['pretrained'] and os.path.exists(train_params['pretrained_model_dir']):
 938 |         logger.info('load param from pretrained model')
 939 | 
 940 |         def if_exist(var):
 941 |             return os.path.exists(os.path.join(train_params['pretrained_model_dir'], var.name))
 942 | 
 943 |         fluid.io.load_vars(exe, train_params['pretrained_model_dir'], main_program=program,
 944 |                            predicate=if_exist)
 945 | 
 946 | 
 947 | # 执行训练
 948 | def train():
 949 |     init_log_config()
 950 |     init_train_parameters()
 951 | 
 952 |     logger.info("start train YOLOv3, train params:%s", str(train_params))
 953 |     logger.info("create place, use gpu:" + str(train_params['use_gpu']))
 954 | 
 955 |     place = fluid.CUDAPlace(0) if train_params['use_gpu'] else fluid.CPUPlace()
 956 | 
 957 |     logger.info("build network and program")
 958 |     train_program = fluid.Program()
 959 |     start_program = fluid.Program()
 960 |     feeder, reader, loss = build_program_with_feeder(train_program, start_program, place)
 961 | 
 962 |     logger.info("build executor and init params")
 963 | 
 964 |     exe = fluid.Executor(place)
 965 |     exe.run(start_program)
 966 |     train_fetch_list = [loss.name]
 967 |     load_pretrained_params(exe, train_program)  # 加载模型及参数
 968 | 
 969 |     stop_strategy = train_params['early_stop']
 970 |     successive_limit = stop_strategy['successive_limit']
 971 |     sample_freq = stop_strategy['sample_frequency']
 972 |     min_curr_map = stop_strategy['min_curr_map']
 973 |     min_loss = stop_strategy['min_loss']
 974 |     stop_train = False
 975 |     successive_count = 0
 976 |     total_batch_count = 0
 977 |     valid_thresh = train_params['valid_thresh']
 978 |     nms_thresh = train_params['nms_thresh']
 979 |     current_best_loss = 10000000000.0
 980 | 
 981 |     # 开始迭代训练
 982 |     for pass_id in range(train_params["num_epochs"]):
 983 |         logger.info("current pass: {}, start read image".format(pass_id))
 984 |         batch_id = 0
 985 |         total_loss = 0.0
 986 | 
 987 |         for batch_id, data in enumerate(reader()):
 988 |             t1 = time.time()
 989 | 
 990 |             loss = exe.run(train_program,
 991 |                            feed=feeder.feed(data),
 992 |                            fetch_list=train_fetch_list)  # 执行训练
 993 | 
 994 |             period = time.time() - t1
 995 |             loss = np.mean(np.array(loss))
 996 |             total_loss += loss
 997 |             batch_id += 1
 998 |             total_batch_count += 1
 999 | 
1000 |             if batch_id % 10 == 0:  # 调整日志输出的频率
1001 |                 logger.info(
1002 |                     "pass {}, trainbatch {}, loss {} time {}".format(pass_id, batch_id, loss, "%2.2f sec" % period))
1003 | 
1004 |         pass_mean_loss = total_loss / batch_id
1005 |         logger.info("pass {0} train result, current pass mean loss: {1}".format(pass_id, pass_mean_loss))
1006 | 
1007 |         # 采用每训练完一轮停止办法，可以调整为更精细的保存策略
1008 |         if pass_mean_loss < current_best_loss:
1009 |             logger.info("temp save {} epcho train result, current best pass loss {}".format(pass_id, pass_mean_loss))
1010 |             fluid.io.save_persistables(dirname=train_params['save_model_dir'], main_program=train_program,
1011 |                                        executor=exe)
1012 |             current_best_loss = pass_mean_loss
1013 | 
1014 |     logger.info("training till last epcho, end training")
1015 |     fluid.io.save_persistables(dirname=train_params['save_model_dir'], main_program=train_program, executor=exe)
1016 | 
1017 | 
1018 | if __name__ == '__main__':
1019 |     train()
1020 | 
1021 | 
1022 | 
1023 |     # 固化保存模型
1024 | import paddle
1025 | import paddle.fluid as fluid
1026 | import codecs
1027 | 
1028 | init_train_parameters()
1029 | 
1030 | 
1031 | def freeze_model():
1032 |     exe = fluid.Executor(fluid.CPUPlace())
1033 | 
1034 |     ues_tiny = train_params['use_tiny']
1035 |     yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
1036 |     path = train_params['save_model_dir']
1037 | 
1038 |     model = get_yolo(ues_tiny, train_params['class_dim'],
1039 |                      yolo_config['anchors'], yolo_config['anchor_mask'])
1040 |     image = fluid.layers.data(name='image', shape=yolo_config['input_size'], dtype='float32')
1041 |     image_shape = fluid.layers.data(name="image_shape", shape=[2], dtype='int32')
1042 | 
1043 |     boxes = []
1044 |     scores = []
1045 |     outputs = model.net(image)
1046 |     downsample_ratio = model.get_downsample_ratio()
1047 | 
1048 |     for i, out in enumerate(outputs):
1049 |         box, score = fluid.layers.yolo_box(x=out,
1050 |                                            img_size=image_shape,
1051 |                                            anchors=model.get_yolo_anchors()[i],
1052 |                                            class_num=model.get_class_num(),
1053 |                                            conf_thresh=train_params['valid_thresh'],
1054 |                                            downsample_ratio=downsample_ratio,
1055 |                                            name="yolo_box_" + str(i))
1056 |         boxes.append(box)
1057 |         scores.append(fluid.layers.transpose(score, perm=[0, 2, 1]))
1058 |         downsample_ratio //= 2
1059 | 
1060 |     pred = fluid.layers.multiclass_nms(bboxes=fluid.layers.concat(boxes, axis=1),
1061 |                                        scores=fluid.layers.concat(scores, axis=2),
1062 |                                        score_threshold=train_params['valid_thresh'],
1063 |                                        nms_top_k=train_params['nms_top_k'],
1064 |                                        keep_top_k=train_params['nms_pos_k'],
1065 |                                        nms_threshold=train_params['nms_thresh'],
1066 |                                        background_label=-1,
1067 |                                        name="multiclass_nms")
1068 | 
1069 |     freeze_program = fluid.default_main_program()
1070 | 
1071 |     fluid.io.load_persistables(exe, path, freeze_program)
1072 |     freeze_program = freeze_program.clone(for_test=True)
1073 |     print("freeze out: {0}, pred layout: {1}".format(train_params['freeze_dir'], pred))
1074 |     # 保存模型
1075 |     fluid.io.save_inference_model(train_params['freeze_dir'],
1076 |                                   ['image', 'image_shape'],
1077 |                                   pred, exe, freeze_program)
1078 |     print("freeze end")
1079 | 
1080 | 
1081 | if __name__ == '__main__':
1082 |     freeze_model()
1083 | 
1084 | 
1085 | # 预测
1086 | import codecs
1087 | import sys
1088 | import numpy as np
1089 | import time
1090 | import paddle
1091 | import paddle.fluid as fluid
1092 | import math
1093 | import functools
1094 | 
1095 | from IPython.display import display
1096 | from PIL import Image
1097 | from PIL import ImageFont
1098 | from PIL import ImageDraw
1099 | from collections import namedtuple
1100 | 
1101 | init_train_parameters()
1102 | ues_tiny = train_params['use_tiny']
1103 | yolo_config = train_params['yolo_tiny_cfg'] if ues_tiny else train_params['yolo_cfg']
1104 | 
1105 | target_size = yolo_config['input_size']
1106 | anchors = yolo_config['anchors']
1107 | anchor_mask = yolo_config['anchor_mask']
1108 | label_dict = train_params['num_dict']
1109 | class_dim = train_params['class_dim']
1110 | print("label_dict:{} class dim:{}".format(label_dict, class_dim))
1111 | 
1112 | place = fluid.CUDAPlace(0) if train_params['use_gpu'] else fluid.CPUPlace()
1113 | exe = fluid.Executor(place)
1114 | 
1115 | path = train_params['freeze_dir']
1116 | [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=path, executor=exe)
1117 | 
1118 | 
1119 | # 给图片画上外接矩形框
1120 | def draw_bbox_image(img, boxes, labels, save_name):
1121 |     img_width, img_height = img.size
1122 | 
1123 |     draw = ImageDraw.Draw(img) # 图像绘制对象
1124 |     for box, label in zip(boxes, labels):
1125 |         xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
1126 |         draw.rectangle((xmin, ymin, xmax, ymax), None, 'red') # 绘制矩形
1127 |         draw.text((xmin, ymin), label_dict[int(label)], (255, 255, 0)) # 绘制标签
1128 |     img.save(save_name)
1129 |     display(img)
1130 | 
1131 | 
1132 | def resize_img(img, target_size):
1133 |     """
1134 |     保持比例的缩放图片
1135 |     :param img:
1136 |     :param target_size:
1137 |     :return:
1138 |     """
1139 |     img = img.resize(target_size[1:], Image.BILINEAR)
1140 |     return img
1141 | 
1142 | 
1143 | def read_image(img_path):
1144 |     """
1145 |     读取图片
1146 |     :param img_path:
1147 |     :return:
1148 |     """
1149 |     origin = Image.open(img_path)
1150 |     img = resize_img(origin, target_size)
1151 |     resized_img = img.copy()
1152 |     if img.mode != 'RGB':
1153 |         img = img.convert('RGB')
1154 |     img = np.array(img).astype('float32').transpose((2, 0, 1))  # HWC to CHW
1155 |     img -= 127.5
1156 |     img *= 0.007843
1157 |     img = img[np.newaxis, :]
1158 |     return origin, img, resized_img
1159 | 
1160 | 
1161 | def infer(image_path):
1162 |     """
1163 |     预测，将结果保存到一副新的图片中
1164 |     :param image_path:
1165 |     :return:
1166 |     """
1167 |     origin, tensor_img, resized_img = read_image(image_path)
1168 |     input_w, input_h = origin.size[0], origin.size[1]
1169 |     image_shape = np.array([input_h, input_w], dtype='int32')
1170 |     # print("image shape high:{0}, width:{1}".format(input_h, input_w))
1171 | 
1172 |     t1 = time.time()
1173 |     # 执行预测
1174 |     batch_outputs = exe.run(inference_program,
1175 |                             feed={feed_target_names[0]: tensor_img,
1176 |                                   feed_target_names[1]: image_shape[np.newaxis, :]},
1177 |                             fetch_list=fetch_targets,
1178 |                             return_numpy=False)
1179 |     period = time.time() - t1
1180 |     print("predict cost time:{0}".format("%2.2f sec" % period))
1181 |     bboxes = np.array(batch_outputs[0])  # 预测结果
1182 |     # print(bboxes)
1183 | 
1184 |     if bboxes.shape[1] != 6:
1185 |         print("No object found in {}".format(image_path))
1186 |         return
1187 |     labels = bboxes[:, 0].astype('int32') # 类别
1188 |     scores = bboxes[:, 1].astype('float32') # 概率
1189 |     boxes = bboxes[:, 2:].astype('float32') # 边框
1190 | 
1191 |     last_dot_index = image_path.rfind('.')
1192 |     out_path = image_path[:last_dot_index]
1193 |     out_path += '-result.jpg'
1194 |     draw_bbox_image(origin, boxes, labels, out_path)
1195 | 
1196 | 
1197 | if __name__ == '__main__':
1198 |     #image_name = sys.argv[1]
1199 |     #image_path = image_name
1200 |     image_path = "data/data6045/lslm_test/23.jpg"
1201 |     infer(image_path)


--------------------------------------------------------------------------------
/output/classes/0.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/classes/0.0.png


--------------------------------------------------------------------------------
/output/classes/1.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/classes/1.0.png


--------------------------------------------------------------------------------
/output/classes/2.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/classes/2.0.png


--------------------------------------------------------------------------------
/output/detection-results-info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/detection-results-info.png


--------------------------------------------------------------------------------
/output/ground-truth-info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/ground-truth-info.png


--------------------------------------------------------------------------------
/output/lamr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/lamr.png


--------------------------------------------------------------------------------
/output/mAP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imajiayu/object_detection_based-on-yolov3/de6ceb99541ed9544d593675872fac3c35f18deb/output/mAP.png


--------------------------------------------------------------------------------
/output/output.txt:
--------------------------------------------------------------------------------
 1 | # AP and precision/recall per class
 2 | 47.18% = 0.0 AP 
 3 |  Precision: ['1.00', '1.00', '1.00', '0.75', '0.60', '0.67', '0.71', '0.75', '0.78', '0.80', '0.82', '0.83', '0.85', '0.86', '0.87', '0.81', '0.76', '0.78', '0.79', '0.80', '0.81', '0.77', '0.78', '0.79', '0.76', '0.73', '0.70', '0.68', '0.69', '0.70', '0.68', '0.69', '0.67', '0.65', '0.66', '0.64', '0.62', '0.61', '0.62', '0.60', '0.61', '0.62', '0.60', '0.59', '0.58', '0.57', '0.55', '0.54', '0.53', '0.52', '0.51', '0.50', '0.49', '0.48', '0.47', '0.46', '0.46', '0.45', '0.44', '0.43', '0.43', '0.42', '0.41', '0.41', '0.40', '0.39', '0.39', '0.38', '0.38', '0.37', '0.37', '0.36', '0.36', '0.35', '0.35', '0.34', '0.34', '0.33', '0.33', '0.33', '0.32', '0.32', '0.31', '0.31', '0.31', '0.31', '0.31', '0.31', '0.30', '0.30', '0.30', '0.29', '0.29', '0.30', '0.29', '0.29', '0.29', '0.29', '0.28', '0.28', '0.28', '0.27', '0.27', '0.27', '0.27', '0.26']
 4 |  Recall :['0.02', '0.04', '0.07', '0.07', '0.07', '0.09', '0.11', '0.13', '0.15', '0.17', '0.20', '0.22', '0.24', '0.26', '0.28', '0.28', '0.28', '0.30', '0.33', '0.35', '0.37', '0.37', '0.39', '0.41', '0.41', '0.41', '0.41', '0.41', '0.43', '0.46', '0.46', '0.48', '0.48', '0.48', '0.50', '0.50', '0.50', '0.50', '0.52', '0.52', '0.54', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.57', '0.59', '0.59', '0.59', '0.59', '0.59', '0.59', '0.59', '0.59', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61', '0.61']
 5 | 
 6 | 80.87% = 1.0 AP 
 7 |  Precision: ['1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '0.93', '0.87', '0.88', '0.82', '0.78', '0.74', '0.70', '0.67', '0.68', '0.65', '0.62', '0.60', '0.58', '0.56', '0.54', '0.52', '0.50', '0.48', '0.47', '0.45', '0.44', '0.43', '0.42', '0.41', '0.39', '0.38', '0.38']
 8 |  Recall :['0.06', '0.11', '0.17', '0.22', '0.28', '0.33', '0.39', '0.44', '0.50', '0.56', '0.61', '0.67', '0.72', '0.72', '0.72', '0.78', '0.78', '0.78', '0.78', '0.78', '0.78', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83']
 9 | 
10 | 64.01% = 2.0 AP 
11 |  Precision: ['1.00', '1.00', '1.00', '1.00', '0.80', '0.67', '0.71', '0.75', '0.67', '0.70', '0.73', '0.75', '0.69', '0.71', '0.73', '0.69', '0.71', '0.67', '0.68', '0.70', '0.67', '0.64', '0.65', '0.67', '0.68', '0.65', '0.63', '0.61', '0.59', '0.57', '0.55', '0.53', '0.52', '0.50', '0.51', '0.50', '0.51', '0.53', '0.51', '0.50', '0.49', '0.48', '0.47', '0.45', '0.44', '0.43', '0.43', '0.42', '0.41', '0.42', '0.41', '0.40', '0.40', '0.39', '0.38', '0.38', '0.37', '0.36', '0.36', '0.35', '0.34', '0.34', '0.33', '0.33', '0.32', '0.32', '0.31', '0.31', '0.30', '0.30', '0.30', '0.29', '0.29', '0.28', '0.28', '0.28', '0.27', '0.27', '0.27', '0.26', '0.26', '0.26', '0.25', '0.25', '0.25', '0.24', '0.24', '0.24', '0.24', '0.23', '0.23', '0.23', '0.23']
12 |  Recall :['0.04', '0.08', '0.12', '0.17', '0.17', '0.17', '0.21', '0.25', '0.25', '0.29', '0.33', '0.38', '0.38', '0.42', '0.46', '0.46', '0.50', '0.50', '0.54', '0.58', '0.58', '0.58', '0.62', '0.67', '0.71', '0.71', '0.71', '0.71', '0.71', '0.71', '0.71', '0.71', '0.71', '0.71', '0.75', '0.75', '0.79', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.83', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88', '0.88']
13 | 
14 | 
15 | # mAP of all classes
16 | mAP = 64.02%
17 | 
18 | # Number of ground-truth objects per class
19 | 0.0: 46
20 | 1.0: 18
21 | 2.0: 24
22 | 
23 | # Number of detected objects per class
24 | 0.0: 106 (tp:28, fp:78)
25 | 1.0: 40 (tp:15, fp:25)
26 | 2.0: 93 (tp:21, fp:72)
27 | 


--------------------------------------------------------------------------------