├── .idea
├── .gitignore
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── retinanet-pytorch.iml
└── vcs.xml
├── Configs.py
├── Data
├── Dataloader.py
├── Dataset_VOC.py
├── Transfroms.py
├── Transfroms_utils.py
├── __init__.py
└── __pycache__
│ ├── Dataloader.cpython-37.pyc
│ ├── Dataset_VOC.cpython-37.pyc
│ ├── Transfroms.cpython-37.pyc
│ ├── Transfroms_utils.cpython-37.pyc
│ └── __init__.cpython-37.pyc
├── Demo_detect_one_image.py
├── Demo_detect_video.py
├── Demo_eval.py
├── Demo_train.py
├── Model
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ └── retainnet.cpython-37.pyc
├── base_models
│ ├── Resnet.py
│ ├── __init__.py
│ └── __pycache__
│ │ ├── Resnet.cpython-37.pyc
│ │ └── __init__.cpython-37.pyc
├── evaler.py
├── retainnet.py
├── struct
│ ├── Anchors.py
│ ├── Focal_Loss.py
│ ├── Fpn.py
│ ├── MultiBoxLoss.py
│ ├── PostProcess.py
│ ├── Predictor.py
│ ├── __init__.py
│ └── __pycache__
│ │ ├── Anchors.cpython-37.pyc
│ │ ├── Focal_Loss.cpython-37.pyc
│ │ ├── Fpn.cpython-37.pyc
│ │ ├── PostProcess.cpython-37.pyc
│ │ ├── Predictor.cpython-37.pyc
│ │ └── __init__.cpython-37.pyc
└── trainer.py
├── README.md
├── Utils
├── Boxs_op.py
├── Cal_mean_std.py
├── Hash.py
├── __init__.py
├── __pycache__
│ ├── Boxs_op.cpython-37.pyc
│ ├── Hash.cpython-37.pyc
│ ├── __init__.cpython-37.pyc
│ └── voc_cal_ap.cpython-37.pyc
├── utils.py
├── visdom_op.py
└── voc_cal_ap.py
└── __pycache__
└── Configs.cpython-37.pyc
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/retinanet-pytorch.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Configs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from yacs.config import CfgNode as CN
4 | import os
5 |
6 | project_root = os.getcwd()
7 |
8 | _C = CN()
9 |
10 |
11 | _C.FILE = CN()
12 |
13 | _C.FILE.PRETRAIN_WEIGHT_ROOT = project_root+'/Weights/pretrained' # 会使用到的预训练模型
14 | _C.FILE.MODEL_SAVE_ROOT = project_root+'/Weights/trained' # 训练模型的保存
15 | # _C.FILE.VGG16_WEIGHT = 'vgg16_reducedfc.pth' # vgg预训练模型
16 |
17 | _C.DEVICE = CN()
18 |
19 | _C.DEVICE.MAINDEVICE = 'cuda:0' # 主gpu 主GPU会占用内存稍大一丁点
20 | _C.DEVICE.TRAIN_DEVICES = [0, 1] # 训练gpu 0代表第一块gpu, 1 代表第二块gpu, 你可以随意更改. 你可以通过 nvidim-smi 来查看gpu编号及占用情况, 同样的,你可以[0,1,2,3,4,5,6,7]来指定八块gpu 或[0,2,4] 来指定其中的任意三块gpu
21 | _C.DEVICE.TEST_DEVICES = [0, 1] # 检测gpu
22 |
23 | _C.MODEL = CN()
24 | _C.MODEL.BASEMODEL = 'resnet50' # 现支持 resnet18, resnet34, resnet50, resnet101, resnet152
25 |
26 | _C.MODEL.INPUT = CN()
27 | _C.MODEL.INPUT.IMAGE_SIZE = 600 # 模型输入尺寸
28 |
29 | _C.MODEL.ANCHORS = CN()
30 | _C.MODEL.ANCHORS.FEATURE_MAPS = [(75, 75), (38, 38), (19, 19), (10, 10), (5, 5)] # fpn输出的特征图大小 # [(IMAGE_SIZE/2/2/2, ), (IMAGE_SIZE/2/2/2/2, ), (IMAGE_SIZE/2/2/2/2/2)] 这里都向上取整
31 | _C.MODEL.ANCHORS.SIZES = [32, 64, 128, 256, 512] # 每层特征图上anchor的真实尺寸
32 | _C.MODEL.ANCHORS.NUMS = 9 # 每个特征点上anchor的数量, 与_C.MODEL.ANCHORS.RATIOS 相关联
33 | _C.MODEL.ANCHORS.RATIOS = [0.5, 1, 2] # 不同特征图上检测框绘制比例
34 | _C.MODEL.ANCHORS.SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] # 不同特征图上检测框绘制比例
35 | _C.MODEL.ANCHORS.CLIP = True # 越界检测框截断,0~1
36 | _C.MODEL.ANCHORS.THRESHOLD = 0.5 # 交并比阈值
37 | _C.MODEL.ANCHORS.CENTER_VARIANCE = 0.1 # 解码
38 | _C.MODEL.ANCHORS.SIZE_VARIANCE = 0.2 # 解码
39 |
40 | _C.TRAIN = CN()
41 |
42 | _C.TRAIN.NEG_POS_RATIO = 3 # 负正样本比例,每张图中负样本比例(背景类)会占大多数,通过这个来对负样本进行抑制,只取3倍正样本数量的负样本进行训练,而不至于导致正负样本严重失衡
43 | _C.TRAIN.MAX_ITER = 120000 # 训练轮数
44 | _C.TRAIN.BATCH_SIZE = 20 # 训练批次, 如果内存小,可以调小。如果使用多块gpu,请使用整数倍gpu数量的批次数
45 |
46 | _C.MULTIBOXLOSS = CN()
47 | _C.MULTIBOXLOSS.ALPHA = 0.25 # focal loss 阿尔法参数,用于调节背景与目标比例,这里与 _C.TRAIN.NEG_POS_RATIO 目的相同,但原理不同,_C.TRAIN.NEG_POS_RATIO直接减少负样本数量,_C.MULTIBOXLOSS.ALPHA 减小负样本对损失的影响比重
48 | _C.MULTIBOXLOSS.GAMMA = 2 # focal loss 伽马参数 ,用于调节难易样本影响,一般为2即可
49 |
50 | _C.OPTIM = CN()
51 |
52 | _C.OPTIM.LR = 1e-3 # 初始学习率.默认优化器为SGD # 如需修改优化器,可以代码中进行修改 Model/trainer.py -> set_optimizer
53 | _C.OPTIM.MOMENTUM = 0.9 # 优化器动量.默认优化器为SGD
54 | _C.OPTIM.WEIGHT_DECAY = 5e-4 # 权重衰减,L2正则化.默认优化器为SGD
55 |
56 | _C.OPTIM.SCHEDULER = CN() # 默认使用MultiStepLR
57 | _C.OPTIM.SCHEDULER.GAMMA = 0.1 # 学习率衰减率
58 | _C.OPTIM.SCHEDULER.LR_STEPS = [80000, 100000]
59 |
60 |
61 | _C.MODEL.TEST = CN()
62 |
63 | _C.MODEL.TEST.NMS_THRESHOLD = 0.45 # 非极大抑制阈值
64 | _C.MODEL.TEST.CONFIDENCE_THRESHOLD = 0.1 # 分数阈值,
65 | _C.MODEL.TEST.MAX_PER_IMAGE = 100 # 预测结果最大保留数量
66 | _C.MODEL.TEST.MAX_PER_CLASS = -1 # 测试时,top-N
67 |
68 |
69 | _C.DATA = CN()
70 |
71 | # 由于在使用时,是自己的数据集.所以这里,并没有写0712合并的数据集格式,这里以VOC2007为例
72 | _C.DATA.DATASET = CN()
73 | _C.DATA.DATASET.NUM_CLASSES =21
74 | _C.DATA.DATASET.CLASS_NAME = ('__background__', 'aeroplane', 'bicycle', 'bird', 'boat',
75 | 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
76 | 'dog', 'horse', 'motorbike', 'person', 'pottedplant',
77 | 'sheep', 'sofa', 'train', 'tvmonitor')
78 |
79 |
80 | _C.DATA.DATASET.DATA_DIR = '/home/XXX/VOC_det/VOCdevkit/VOC2007' # 数据集voc格式,根目录 请更改为自己的目录
81 | _C.DATA.DATASET.TRAIN_SPLIT = 'train' # 训练集,对应于 /VOCdevkit/VOC2007/ImageSets/Main/train.txt'
82 | _C.DATA.DATASET.TEST_SPLIT = 'val' # 测试集,对应于 /VOCdevkit/VOC2007/ImageSets/Main/val.txt'
83 | _C.DATA.PIXEL_MEAN = [0, 0, 0] #数据集均值 用于数据增强部分,依数据集修改即可
84 | _C.DATA.PIXEL_STD = [1, 1, 1] # 数据集方差
85 |
86 | _C.DATA.DATALOADER = CN()
87 |
88 |
89 | _C.STEP = CN()
90 | _C.STEP.VIS_STEP = 10 # visdom可视化训练过程,打印步长
91 | _C.STEP.MODEL_SAVE_STEP = 1000 # 训练过程中,模型保存步长
92 | _C.STEP.EVAL_STEP = 1000 # 在训练过程中,并没有进行检测流程,建议保存模型后另外检测
93 |
--------------------------------------------------------------------------------
/Data/Dataloader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from torch._six import int_classes as _int_classes
4 | from torch.utils.data import DataLoader
5 | from torch.utils.data.sampler import RandomSampler, SequentialSampler
6 | from torch.utils.data import Sampler
7 | from torch.utils.data.dataloader import default_collate # 这个不用管,只是显示问题,实际可以使用
8 |
9 | __all__ = ['our_dataloader', 'our_dataloader_test']
10 |
11 | class BatchSampler_Our(Sampler):
12 | """
13 | 重新定义了 批采样类 ,实现按指定迭代数进行批次提取,
14 | 在取完一批次后没达到指定迭代数会进行循环,直到输出指定的批次数量。
15 | """
16 |
17 | def __init__(self, sampler, batch_size, max_iteration=100000000, drop_last=True):
18 | """
19 | 数据加载,默认循环加载1亿次,几近无限迭代.
20 | 每次迭代输出一个批次的数据.
21 | :param sampler: 采样器,传入 不同采样器 实现 不同的采样策略, RandomSampler随机采样,SequentialSampler顺序采样
22 | :param batch_size: 批次大小
23 | :param max_iteration: 迭代次数
24 | :param drop_last: 是否弃掉最后的不够一批次的数据。True则弃掉;False保留,并返回,但是这一批次会小于指定批次大小。
25 | """
26 | if not isinstance(sampler, Sampler):
27 | raise ValueError("sampler should be an instance of "
28 | "torch.utils.data.Sampler, but got sampler={}"
29 | .format(sampler))
30 | if not isinstance(batch_size, _int_classes) or isinstance(batch_size, bool) or \
31 | batch_size <= 0:
32 | raise ValueError("batch_size should be a positive integer value, "
33 | "but got batch_size={}".format(batch_size))
34 | if not isinstance(max_iteration, _int_classes) or isinstance(max_iteration, bool) or \
35 | max_iteration <= 0:
36 | raise ValueError("max_iter should be a positive integer value, "
37 | "but got max_iter={}".format(max_iteration))
38 |
39 | if not isinstance(drop_last, bool):
40 | raise ValueError("drop_last should be a boolean value, but got "
41 | "drop_last={}".format(drop_last))
42 | self.sampler = sampler
43 | self.batch_size = batch_size
44 | self.max_iteration = max_iteration
45 | self.drop_last = drop_last
46 |
47 | def __iter__(self):
48 | iteration = 0
49 |
50 | while iteration <= self.max_iteration:
51 | batch = []
52 | for idx in self.sampler:
53 | batch.append(idx)
54 |
55 | if len(batch) == self.batch_size:
56 | iteration += 1
57 | yield batch
58 | batch = []
59 |
60 | if iteration > self.max_iteration:
61 | break
62 |
63 | if len(batch) > 0 and not self.drop_last:
64 | iteration += 1
65 | yield batch
66 |
67 | if iteration > self.max_iteration:
68 | break
69 |
70 | def __len__(self):
71 | if self.drop_last:
72 | return self.max_iteration
73 | else:
74 | return self.max_iteration
75 |
76 |
77 | class BatchCollator:
78 | def __init__(self, is_train=True):
79 | self.is_train = is_train
80 |
81 | def __call__(self, batch):
82 | transposed_batch = list(zip(*batch))
83 | images = default_collate(transposed_batch[0])
84 | img_ids = default_collate(transposed_batch[3])
85 |
86 | if self.is_train:
87 | boxes = default_collate(transposed_batch[1])
88 | labels = default_collate(transposed_batch[2])
89 | else:
90 | boxes = None
91 | labels = None
92 | return images, boxes, labels, img_ids
93 |
94 |
95 | def our_dataloader(dataset,batch_size,shuffle=True,num_workers=2,drop_last=True,max_iteration=100000000):
96 | """
97 | 几近无限迭代器,迭代次数为1亿次,每次迭代输出一个批次的数据.
98 | :param dataset: 数据集
99 | :param batch_size: 批次数
100 | :param max_iteration: 迭代的总次数,默认1亿次,具体迭代次数,在取数据时进行判断会更为灵活
101 | :param shuffle:
102 | :param num_workers:
103 | :param drop_last:
104 | :return:
105 | """
106 | if shuffle:
107 | sampler = RandomSampler(dataset) # 随机采样器
108 | else:
109 | sampler = SequentialSampler(dataset) # 顺序采样器
110 | batch_sampler = BatchSampler_Our(sampler=sampler,
111 | batch_size=batch_size,
112 | max_iteration=max_iteration,
113 | drop_last=drop_last)
114 | loader = DataLoader(dataset=dataset,batch_sampler=batch_sampler,num_workers=num_workers,collate_fn=BatchCollator(is_train=dataset.is_train))
115 | return loader
116 |
117 | def our_dataloader_test(dataset,batch_size,shuffle=False,get_box_label=True,num_workers=2,drop_last=False):
118 |
119 | loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=shuffle,num_workers=num_workers,
120 | collate_fn=BatchCollator(is_train=get_box_label),drop_last=drop_last)
121 | return loader
--------------------------------------------------------------------------------
/Data/Dataset_VOC.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import os
4 | import torch.utils.data
5 | import numpy as np
6 | import xml.etree.ElementTree as ET
7 | from PIL import Image
8 |
9 | __all__ = ['vocdataset']
10 |
11 | class vocdataset(torch.utils.data.Dataset):
12 |
13 | def __init__(self, cfg, is_train=True, data_dir=None, transform=None, target_transform=None, keep_difficult=False):
14 | """VOC格式数据集
15 | Args:
16 | data_dir: VOC格式数据集根目录,该目录下包含:
17 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
18 | split: train、test 或者 eval, 对应于 ImageSets/Main/train.txt,eval.txt
19 | """
20 | # 类别
21 | self.class_names = cfg.DATA.DATASET.CLASS_NAME
22 | self.data_dir = cfg.DATA.DATASET.DATA_DIR
23 | self.is_train = is_train
24 | if data_dir:
25 | self.data_dir = data_dir
26 | self.split = cfg.DATA.DATASET.TRAIN_SPLIT # train 对应于ImageSets/Main/train.txt
27 | if not self.is_train:
28 | self.split = cfg.DATA.DATASET.TEST_SPLIT # test 对应于ImageSets/Main/test.txt
29 | self.transform = transform
30 | self.target_transform = target_transform
31 | image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "{}.txt".format(self.split))
32 | # 从train.txt 文件中读取图片 id 返回ids列表
33 | self.ids = self._read_image_ids(image_sets_file)
34 | self.keep_difficult = keep_difficult
35 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
36 |
37 | def __getitem__(self, index):
38 | image_name = self.ids[index]
39 | # 解析Annotations/id.xml 读取id图片对应的 boxes, labels, is_difficult 均为列表
40 | boxes, labels, is_difficult = self._get_annotation(image_name)
41 | if not self.keep_difficult:
42 | boxes = boxes[is_difficult == 0]
43 | labels = labels[is_difficult == 0]
44 | # 读取 JPEGImages/id.jpg 返回Image.Image
45 | image = self._read_image(image_name)
46 | if self.transform:
47 | image, boxes, labels = self.transform(image, boxes, labels)
48 | if self.target_transform:
49 | boxes, labels = self.target_transform(boxes, labels)
50 |
51 | return image, boxes, labels, image_name
52 |
53 | # 返回 id, boxes, labels, is_difficult
54 | def get_annotation(self, index):
55 | image_id = self.ids[index]
56 | return image_id, self._get_annotation(image_id)
57 |
58 | def __len__(self):
59 | return len(self.ids)
60 |
61 | @staticmethod
62 | def _read_image_ids(image_sets_file):
63 | ids = []
64 | with open(image_sets_file) as f:
65 | for line in f:
66 | ids.append(line.rstrip())
67 | return ids
68 |
69 | # 解析xml,返回 boxes, labels, is_difficult numpy.array格式
70 | def _get_annotation(self, image_name):
71 | annotation_file = os.path.join(self.data_dir, "Annotations", "{}.xml".format(image_name))
72 | objects = ET.parse(annotation_file).findall("object")
73 | boxes = []
74 | labels = []
75 | is_difficult = []
76 | for obj in objects: # .encode('utf-8').decode('UTF-8-sig') 解决Windows下中文编码问题
77 | class_name = obj.find('name').text.encode('utf-8').decode('UTF-8-sig').lower().strip()
78 | bbox = obj.find('bndbox')
79 | # VOC dataset format follows Matlab, in which indexes start from 0
80 | x1 = float(bbox.find('xmin').text.encode('utf-8').decode('UTF-8-sig')) - 1
81 | y1 = float(bbox.find('ymin').text.encode('utf-8').decode('UTF-8-sig')) - 1
82 | x2 = float(bbox.find('xmax').text.encode('utf-8').decode('UTF-8-sig')) - 1
83 | y2 = float(bbox.find('ymax').text.encode('utf-8').decode('UTF-8-sig')) - 1
84 | boxes.append([x1, y1, x2, y2])
85 | labels.append(self.class_dict[class_name])
86 | is_difficult_str = obj.find('difficult').text
87 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
88 |
89 | return (np.array(boxes, dtype=np.float32),
90 | np.array(labels, dtype=np.int64),
91 | np.array(is_difficult, dtype=np.uint8))
92 |
93 | # 获取图片尺寸信息,返回字典 {'height': , 'width': }
94 | def get_img_size(self, img_name):
95 | annotation_file = os.path.join(self.data_dir, "Annotations", "{}.xml".format(img_name))
96 | anno = ET.parse(annotation_file).getroot()
97 | size = anno.find("size")
98 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
99 | return {"height": im_info[0], "width": im_info[1]}
100 |
101 | # 读取图片数据,返回Image.Image
102 | def _read_image(self, image_id):
103 | image_file = os.path.join(self.data_dir, "JPEGImages", "{}.jpg".format(image_id))
104 | image = Image.open(image_file).convert("RGB")
105 | image = np.array(image)
106 | return image
107 |
108 | def get_one_image(self,image_name = None):
109 | import random
110 |
111 | if not image_name:
112 | image_name = random.choice(self.ids)
113 | # 解析Annotations/id.xml 读取id图片对应的 boxes, labels, is_difficult 均为列表
114 | boxes, labels, is_difficult = self._get_annotation(image_name)
115 | if not self.keep_difficult:
116 | boxes = boxes[is_difficult == 0]
117 | labels = labels[is_difficult == 0]
118 | # 读取 JPEGImages/id.jpg 返回Image.Image
119 | image = self._read_image(image_name)
120 | image_after_transfrom = None
121 | if self.transform:
122 | image_after_transfrom, boxes, labels = self.transform(image, boxes, labels)
123 | if self.target_transform:
124 | boxes, labels = self.target_transform(boxes, labels)
125 |
126 | return image, image_after_transfrom, boxes, labels, image_name
--------------------------------------------------------------------------------
/Data/Transfroms.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from Utils.Boxs_op import center_form_to_corner_form, assign_priors,\
4 | corner_form_to_center_form, convert_boxes_to_locations
5 | from Data.Transfroms_utils import *
6 |
7 | __all__ = ['transfrom', 'targettransform']
8 |
9 | class transfrom:
10 | """
11 | transfroms
12 | eg:
13 | transform = Tramsfrom(cfg,is_train=True)
14 | """
15 | def __init__(self,cfg, is_train):
16 | if is_train:
17 | self.transforms = [
18 | ConvertFromInts(), # 图像数据转float32
19 | PhotometricDistort(), # 光度畸变,对比度,亮度,光噪声,色调,饱和等(详情看函数,有详细备注.)
20 | SubtractMeans(cfg.DATA.PIXEL_MEAN), # 减均值
21 | DivideStds(cfg.DATA.PIXEL_STD), # 除方差
22 | Expand(), # 随机扩充
23 | RandomSampleCrop(), # 随机交兵比裁剪
24 | RandomMirror(), # 随机镜像
25 | ToPercentCoords(), # boxes 坐标转百分比制
26 | Resize(cfg.MODEL.INPUT.IMAGE_SIZE),
27 |
28 | ToTensor(),
29 | ]
30 | else:
31 | self.transforms = [
32 | Resize(cfg.MODEL.INPUT.IMAGE_SIZE),
33 | SubtractMeans(cfg.DATA.PIXEL_MEAN), # 减均值
34 | DivideStds(cfg.DATA.PIXEL_STD), # 除方差
35 | ToTensor()
36 | ]
37 |
38 | def __call__(self, img, boxes=None, labels=None):
39 | for t in self.transforms:
40 | img, boxes, labels = t(img, boxes, labels)
41 | return img, boxes, labels
42 |
43 |
44 | class targettransform:
45 | """
46 | targets_transfroms
47 | eg:
48 | transform = TargetTransform(cfg)
49 | """
50 |
51 | def __init__(self, cfg):
52 | from Model.struct import priorbox # 避免循环导入.(模型中detect方法会使用transfrom,而targettransfrom会使用到priorbox, 这样写可以避免循环导入)
53 |
54 | self.center_form_priors = priorbox(cfg)()
55 | self.corner_form_priors = center_form_to_corner_form(self.center_form_priors)
56 | self.center_variance = cfg.MODEL.ANCHORS.CENTER_VARIANCE
57 | self.size_variance = cfg.MODEL.ANCHORS.SIZE_VARIANCE
58 | self.iou_threshold = cfg.MODEL.ANCHORS.THRESHOLD
59 |
60 | def __call__(self, gt_boxes, gt_labels):
61 | if type(gt_boxes) is np.ndarray:
62 | gt_boxes = torch.from_numpy(gt_boxes)
63 | if type(gt_labels) is np.ndarray:
64 | gt_labels = torch.from_numpy(gt_labels)
65 | boxes, labels = assign_priors(gt_boxes, gt_labels,
66 | self.corner_form_priors,
67 | self.iou_threshold)
68 | boxes = corner_form_to_center_form(boxes)
69 | locations = convert_boxes_to_locations(boxes,
70 | self.center_form_priors,
71 | self.center_variance,
72 | self.size_variance)
73 | return locations, labels
74 |
--------------------------------------------------------------------------------
/Data/Transfroms_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import torch
4 | from torchvision import transforms
5 | import cv2
6 | import numpy as np
7 | import types
8 | from numpy import random
9 |
10 |
11 | def intersect(box_a, box_b):
12 | max_xy = np.minimum(box_a[:, 2:], box_b[2:])
13 | min_xy = np.maximum(box_a[:, :2], box_b[:2])
14 | inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
15 | return inter[:, 0] * inter[:, 1]
16 |
17 |
18 | def jaccard_numpy(box_a, box_b):
19 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
20 | is simply the intersection over union of two boxes.
21 | E.g.:
22 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
23 | Args:
24 | box_a: Multiple bounding boxes, Shape: [num_boxes,4]
25 | box_b: Single bounding box, Shape: [4]
26 | Return:
27 | jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
28 | """
29 | inter = intersect(box_a, box_b)
30 | area_a = ((box_a[:, 2] - box_a[:, 0]) *
31 | (box_a[:, 3] - box_a[:, 1])) # [A,B]
32 | area_b = ((box_b[2] - box_b[0]) *
33 | (box_b[3] - box_b[1])) # [A,B]
34 | union = area_a + area_b - inter
35 | return inter / union # [A,B]
36 |
37 |
38 | class Compose(object):
39 | """Composes several augmentations together.
40 | Args:
41 | transforms (List[Transform]): list of transforms to compose.
42 | Example:
43 | >>> Compose([
44 | >>> transforms.CenterCrop(10),
45 | >>> transforms.ToTensor(),
46 | >>> ])
47 | """
48 |
49 | def __init__(self, transforms):
50 | self.transforms = transforms
51 |
52 | def __call__(self, img, boxes=None, labels=None):
53 | for t in self.transforms:
54 | img, boxes, labels = t(img, boxes, labels)
55 | return img, boxes, labels
56 |
57 |
58 | class Lambda(object):
59 | """Applies a lambda as a transform."""
60 |
61 | def __init__(self, lambd):
62 | assert isinstance(lambd, types.LambdaType)
63 | self.lambd = lambd
64 |
65 | def __call__(self, img, boxes=None, labels=None):
66 | return self.lambd(img, boxes, labels)
67 |
68 |
69 | class ConvertFromInts(object):
70 | def __call__(self, image, boxes=None, labels=None):
71 | return image.astype(np.float32), boxes, labels
72 |
73 |
74 | class SubtractMeans(object):
75 | def __init__(self, mean):
76 | self.mean = np.array(mean, dtype=np.float32)
77 |
78 | def __call__(self, image, boxes=None, labels=None):
79 | image = image.astype(np.float32)
80 | image -= self.mean
81 | return image.astype(np.float32), boxes, labels
82 |
83 | class DivideStds(object):
84 | def __init__(self, std):
85 | self.std = np.array(std, dtype=np.float32)
86 |
87 | def __call__(self, image, boxes=None, labels=None):
88 | image = image.astype(np.float32)
89 | image /= self.std
90 | return image.astype(np.float32), boxes, labels
91 |
92 | class ToAbsoluteCoords(object):
93 | def __call__(self, image, boxes=None, labels=None):
94 | height, width, channels = image.shape
95 | boxes[:, 0] *= width
96 | boxes[:, 2] *= width
97 | boxes[:, 1] *= height
98 | boxes[:, 3] *= height
99 |
100 | return image, boxes, labels
101 |
102 |
103 | class ToPercentCoords(object):
104 | def __call__(self, image, boxes=None, labels=None):
105 | height, width, channels = image.shape
106 | boxes[:, 0] /= width
107 | boxes[:, 2] /= width
108 | boxes[:, 1] /= height
109 | boxes[:, 3] /= height
110 |
111 | return image, boxes, labels
112 |
113 |
114 | class Resize(object):
115 | def __init__(self, size=600):
116 | self.size = size
117 |
118 | def __call__(self, image, boxes=None, labels=None):
119 | image = cv2.resize(image, (self.size,
120 | self.size))
121 | return image, boxes, labels
122 |
123 |
124 | class RandomSaturation(object):
125 | def __init__(self, lower=0.5, upper=1.5):
126 | self.lower = lower
127 | self.upper = upper
128 | assert self.upper >= self.lower, "contrast upper must be >= lower."
129 | assert self.lower >= 0, "contrast lower must be non-negative."
130 |
131 | def __call__(self, image, boxes=None, labels=None):
132 | if random.randint(2):
133 | image[:, :, 1] *= random.uniform(self.lower, self.upper)
134 |
135 | return image, boxes, labels
136 |
137 |
138 | class RandomHue(object):
139 | def __init__(self, delta=18.0):
140 | assert delta >= 0.0 and delta <= 360.0
141 | self.delta = delta
142 |
143 | def __call__(self, image, boxes=None, labels=None):
144 | if random.randint(2):
145 | image[:, :, 0] += random.uniform(-self.delta, self.delta)
146 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
147 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
148 | return image, boxes, labels
149 |
150 |
151 | class RandomLightingNoise(object):
152 | def __init__(self):
153 | self.perms = ((0, 1, 2), (0, 2, 1),
154 | (1, 0, 2), (1, 2, 0),
155 | (2, 0, 1), (2, 1, 0))
156 |
157 | def __call__(self, image, boxes=None, labels=None):
158 | if random.randint(2):
159 | swap = self.perms[random.randint(len(self.perms))]
160 | shuffle = SwapChannels(swap) # shuffle channels
161 | image = shuffle(image)
162 | return image, boxes, labels
163 |
164 |
165 | class ConvertColor(object):
166 | '''
167 | H色调用角度度量,取值范围为0°~360°.从红色开始按逆时针方向计算,红色为0°,绿色为120°,蓝色为240°.它们的补色是:黄色为60°,青色为180°,品红为300°;
168 | S饱和度表示颜色接近光谱色的程度.一种颜色,可以看成是某种光谱色与白色混合的结果.其中光谱色所占的比例愈大,颜色接近光谱色的程度就愈高,颜色的饱和度也就愈高;
169 | 明度表示颜色明亮的程度,对于光源色,明度值与发光体的光亮度有关;对于物体色,此值和物体的透射比或反射比有关。通常取值范围为0%(黑)到100%(白)。
170 | '''
171 | def __init__(self, current, transform):
172 | self.transform = transform
173 | self.current = current
174 |
175 | def __call__(self, image, boxes=None, labels=None):
176 | if self.current == 'BGR' and self.transform == 'HSV':
177 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
178 | elif self.current == 'RGB' and self.transform == 'HSV':
179 | image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
180 | elif self.current == 'BGR' and self.transform == 'RGB':
181 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
182 | elif self.current == 'HSV' and self.transform == 'BGR':
183 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
184 | elif self.current == 'HSV' and self.transform == "RGB":
185 | image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
186 | else:
187 | raise NotImplementedError
188 | return image, boxes, labels
189 |
190 |
191 | class RandomContrast(object):
192 | def __init__(self, lower=0.5, upper=1.5):
193 | self.lower = lower
194 | self.upper = upper
195 | assert self.upper >= self.lower, "contrast upper must be >= lower."
196 | assert self.lower >= 0, "contrast lower must be non-negative."
197 |
198 | # expects float image
199 | def __call__(self, image, boxes=None, labels=None):
200 | if random.randint(2):
201 | alpha = random.uniform(self.lower, self.upper)
202 | image *= alpha
203 | return image, boxes, labels
204 |
205 |
206 | class RandomBrightness(object):
207 | def __init__(self, delta=32):
208 | assert delta >= 0.0
209 | assert delta <= 255.0
210 | self.delta = delta
211 |
212 | def __call__(self, image, boxes=None, labels=None):
213 | if random.randint(2):
214 | delta = random.uniform(-self.delta, self.delta)
215 | image += delta
216 | return image, boxes, labels
217 |
218 |
219 | class ToCV2Image(object):
220 | def __call__(self, tensor, boxes=None, labels=None):
221 | return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
222 |
223 |
224 | class ToTensor(object):
225 | def __call__(self, cvimage, boxes=None, labels=None):
226 | return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
227 |
228 |
229 | class RandomSampleCrop(object):
230 | """Crop
231 | Arguments:
232 | img (Image): the image being input during training
233 | boxes (Tensor): the original bounding boxes in pt form
234 | labels (Tensor): the class labels for each bbox
235 | mode (float tuple): the min and max jaccard overlaps
236 | Return:
237 | (img, boxes, classes)
238 | img (Image): the cropped image
239 | boxes (Tensor): the adjusted bounding boxes in pt form
240 | labels (Tensor): the class labels for each bbox
241 | """
242 |
243 | def __init__(self):
244 | self.sample_options = (
245 | None, # 直接返回,不裁剪
246 | # IOU裁剪, (最小iou, +无穷) , 这里的IOU为 所有 标注框 与 裁剪框的IOU,
247 | # 因而,只决定 裁剪框所包含多目标部分的比例, 并不是直接与单个标注框进行IOU裁剪
248 | (0.1, None),
249 | (0.3, None),
250 | (0.7, None),
251 | (0.9, None),
252 | # 不限定iou裁剪,(-无穷, +无穷)
253 | (None, None),
254 | )
255 |
256 | def __call__(self, image, boxes=None, labels=None):
257 | # guard against no boxes
258 | if boxes is not None and boxes.shape[0] == 0:
259 | return image, boxes, labels
260 | height, width, _ = image.shape
261 | while True:
262 | # randomly choose a mode
263 | mode = random.choice(self.sample_options)
264 | if mode is None:
265 | return image, boxes, labels
266 |
267 | min_iou, max_iou = mode
268 | if min_iou is None:
269 | min_iou = float('-inf') # 负无穷
270 | if max_iou is None:
271 | max_iou = float('inf') # 正无穷
272 |
273 | # 尝试50次,每次随机裁剪不一定符合情况,会进行尝试.
274 | for _ in range(50):
275 | current_image = image
276 |
277 | w = random.uniform(0.3 * width, width) # 框尺寸,0.3~1 倍 图像尺寸
278 | h = random.uniform(0.3 * height, height)
279 |
280 | # 限制 框宽高,不要特别狭长的框体,不然会出现resize后的严重变形.
281 | if h / w < 0.5 or h / w > 2:
282 | continue
283 |
284 | left = random.uniform(width - w)
285 | top = random.uniform(height - h)
286 |
287 | # 截取框 (l, t, r, d)
288 | rect = np.array([int(left), int(top), int(left + w), int(top + h)])
289 |
290 | # 计算 截取框与 标注框s的 交并比, boxes为单图对应的多个标注框,rect是当前的截取框
291 | overlap = jaccard_numpy(boxes, rect)
292 |
293 | # IOU限制
294 | if overlap.max() < min_iou or overlap.min() > max_iou:
295 | continue
296 |
297 | # 标注框s的 中心 (x, y)
298 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
299 |
300 | # 截取框 左上角 处于标注框中心 左上 ( 与下面两行 共同 确定 是否截取框 包含了所有的标注框中心)
301 | m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
302 |
303 | # 截取框 右下角 处于标注框中心 右下
304 | m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
305 |
306 | mask = m1 * m2
307 |
308 | # 如果截取框没有包含至少一个标注框的中心, 则 重新尝试
309 | if not mask.any():
310 | continue
311 |
312 | # 截取图片
313 | current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :]
314 |
315 | # 这里只选择 中心处于截取框内的 目标
316 | current_boxes = boxes[mask, :].copy()
317 |
318 | # 标签同上
319 | current_labels = labels[mask]
320 |
321 | # should we use the box left and top corner or the crop's
322 | current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
323 | rect[:2])
324 | # adjust to crop (by substracting crop's left,top)
325 | current_boxes[:, :2] -= rect[:2]
326 |
327 | current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
328 | rect[2:])
329 | # adjust to crop (by substracting crop's left,top)
330 | current_boxes[:, 2:] -= rect[:2]
331 |
332 | return current_image, current_boxes, current_labels
333 |
334 |
335 | class Expand(object):
336 | def __call__(self, image, boxes, labels):
337 | if random.randint(2):
338 | return image, boxes, labels
339 |
340 | height, width, depth = image.shape
341 | ratio = random.uniform(1, 4) # (1,4)
342 | left = random.uniform(0, width * ratio - width)
343 | top = random.uniform(0, height * ratio - height)
344 |
345 | expand_image = np.zeros(
346 | (int(height * ratio), int(width * ratio), depth),
347 | dtype=image.dtype)
348 | expand_image[int(top):int(top + height),
349 | int(left):int(left + width)] = image
350 | image = expand_image
351 |
352 | boxes = boxes.copy()
353 | boxes[:, :2] += (int(left), int(top))
354 | boxes[:, 2:] += (int(left), int(top))
355 |
356 | return image, boxes, labels
357 |
358 |
359 | class RandomMirror(object):
360 | def __call__(self, image, boxes, classes):
361 | _, width, _ = image.shape
362 | if random.randint(2):
363 | image = image[:, ::-1]
364 | boxes = boxes.copy()
365 | boxes[:, 0::2] = width - boxes[:, 2::-2]
366 | return image, boxes, classes
367 |
368 |
369 | class SwapChannels(object):
370 | """Transforms a tensorized image by swapping the channels in the order
371 | specified in the swap tuple.
372 | Args:
373 | swaps (int triple): final order of channels
374 | eg: (2, 1, 0)
375 | """
376 |
377 | def __init__(self, swaps):
378 | self.swaps = swaps
379 |
380 | def __call__(self, image):
381 | """
382 | Args:
383 | image (Tensor): image tensor to be transformed
384 | Return:
385 | a tensor with channels swapped according to swap
386 | """
387 | # if torch.is_tensor(image):
388 | # image = image.data.cpu().numpy()
389 | # else:
390 | # image = np.array(image)
391 | image = image[:, :, self.swaps]
392 | return image
393 |
394 |
395 | class PhotometricDistort(object):
396 | def __init__(self):
397 | self.pd = [
398 | RandomContrast(), # RGB 随机对比度
399 | ConvertColor(current="RGB", transform='HSV'), # HSV HSV同RGB也是一种颜色表示. H色调,S饱和度,V明度,详情看函数备注
400 | RandomSaturation(), # HSV 随机饱和
401 | RandomHue(), # HSV 随机色调
402 | ConvertColor(current='HSV', transform='RGB'), # RGB
403 | RandomContrast() # 随机对比度
404 | ]
405 | self.rand_brightness = RandomBrightness() # 随机亮度
406 | self.rand_light_noise = RandomLightingNoise() # 随机光噪声
407 |
408 | def __call__(self, image, boxes, labels):
409 | im = image.copy()
410 | im, boxes, labels = self.rand_brightness(im, boxes, labels)
411 | if random.randint(2):
412 | distort = Compose(self.pd[:-1]) # 先对比度调整
413 | else:
414 | distort = Compose(self.pd[1:]) # 后对比度调整
415 | im, boxes, labels = distort(im, boxes, labels)
416 | return self.rand_light_noise(im, boxes, labels)
417 |
418 |
419 |
420 |
--------------------------------------------------------------------------------
/Data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from .Dataset_VOC import vocdataset
4 | from .Dataloader import our_dataloader, our_dataloader_test
5 | from .Transfroms import transfrom, targettransform
6 |
--------------------------------------------------------------------------------
/Data/__pycache__/Dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Dataloader.cpython-37.pyc
--------------------------------------------------------------------------------
/Data/__pycache__/Dataset_VOC.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Dataset_VOC.cpython-37.pyc
--------------------------------------------------------------------------------
/Data/__pycache__/Transfroms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Transfroms.cpython-37.pyc
--------------------------------------------------------------------------------
/Data/__pycache__/Transfroms_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Transfroms_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/Data/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/Demo_detect_one_image.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from Model import RetainNet
4 | from Configs import _C as cfg
5 | from PIL import Image
6 | import matplotlib.pyplot as plt
7 | # 实例化模型
8 | net = RetainNet(cfg)
9 | # 使用cpu或gpu
10 | net.to('cuda')
11 | # 模型从权重文件中加载权重
12 | net.load_pretrained_weight('/home/super/PycharmProjects/Retinanet-Pytorch/Weights/trained/model_35.pkl')
13 | # 打开图片
14 | image = Image.open("/home/super/VOC_det/VOCdevkit/VOC2007/JPEGImages/000009.jpg")
15 | # 进行检测, 分别返回 绘制了检测框的图片数据/回归框/标签/分数.
16 | drawn_image, boxes, labels, scores = net.Detect_single_img(image=image,score_threshold=0.5)
17 |
18 | plt.imsave('XXX_det.jpg',drawn_image)
19 | plt.imshow(drawn_image)
20 | plt.show()
--------------------------------------------------------------------------------
/Demo_detect_video.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from Model import RetainNet
4 | from Configs import _C as cfg
5 |
6 | # 实例化模型
7 | net = RetainNet(cfg)
8 | # 使用cpu或gpu
9 | net.to('cuda')
10 | # 模型从权重文件中加载权重
11 | net.load_pretrained_weight('XXX.pkl')
12 |
13 | video_path = 'XXX.mp4'
14 |
15 | # 进行检测,
16 | # if save_video_path不为None,则不保存视频,如需保存视频save_video_path=XXX.mp4 ,
17 | # show=True,实时显示检测结果
18 | net.Detect_video(video_path=video_path, score_threshold=0.02, save_video_path=None, show=True)
19 |
--------------------------------------------------------------------------------
/Demo_eval.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 |
4 | from Model import RetainNet, Evaler
5 | from Data import vocdataset
6 | from Configs import _C as cfg
7 | from Data import transfrom,targettransform
8 |
9 |
10 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt'
11 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True),
12 | target_transform=targettransform(cfg))
13 |
14 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt'
15 | test_dataset = vocdataset(cfg=cfg, is_train=False,
16 | transform=transfrom(cfg=cfg, is_train=False),
17 | target_transform=targettransform(cfg))
18 |
19 | if __name__ == '__main__':
20 | # 模型测试只支持GPU单卡或多卡,不支持cpu
21 | net = RetainNet(cfg)
22 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU
23 | net.to(cfg.DEVICE.MAINDEVICE)
24 | # 模型从权重文件中加载权重
25 | net.load_pretrained_weight('XXX.pkl')
26 | # 初始化验证器,验证器参数通过cfg进行配置;也可传入参数进行配置,但不建议
27 | evaler = Evaler(cfg, eval_devices=None)
28 | # 验证器开始在数据集上验证模型
29 | ap, map = evaler(model=net,
30 | test_dataset=test_dataset)
31 | print(ap)
32 | print(map)
--------------------------------------------------------------------------------
/Demo_train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 |
4 | from Model import RetainNet, Trainer
5 | from Data import vocdataset
6 | from Configs import _C as cfg
7 | from Data import transfrom,targettransform
8 |
9 |
10 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt'
11 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True),
12 | target_transform=targettransform(cfg))
13 |
14 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt'
15 | test_dataset = vocdataset(cfg=cfg, is_train=False,
16 | transform=transfrom(cfg=cfg, is_train=False),
17 | target_transform=targettransform(cfg))
18 |
19 | if __name__ == '__main__':
20 | """
21 | 使用时,请先打开visdom
22 |
23 | 命令行 输入 pip install visdom 进行安装
24 | 输入 python -m visdom.server' 启动
25 | """
26 |
27 | # 首次调用会下载resnet预训练模型
28 |
29 | # 实例化模型. 模型的具体各种参数在Config文件中进行配置
30 | net = RetainNet(cfg)
31 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU
32 | net.to(cfg.DEVICE.MAINDEVICE)
33 | # 初始化训练器,训练器参数通过cfg进行配置;也可传入参数进行配置,但不建议
34 | trainer = Trainer(cfg)
35 | # 训练器开始在 数据集上训练模型
36 | trainer(net, train_dataset)
37 |
--------------------------------------------------------------------------------
/Model/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from .retainnet import RetainNet
4 | from .trainer import Trainer
5 | from .evaler import Evaler
--------------------------------------------------------------------------------
/Model/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/__pycache__/retainnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/__pycache__/retainnet.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/base_models/Resnet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import torch.nn as nn
4 | import torch
5 | from torch.nn import functional as F
6 | import wget
7 | import os
8 | from Configs import _C as cfg
9 |
10 | __all__ = ['build_resnet']
11 |
12 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
13 | """3x3 convolution with padding"""
14 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
15 | padding=dilation, groups=groups, bias=False, dilation=dilation)
16 |
17 |
18 | def conv1x1(in_planes, out_planes, stride=1):
19 | """1x1 convolution"""
20 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
21 |
22 |
23 | class BasicBlock(nn.Module):
24 | expansion = 1
25 |
26 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
27 | base_width=64, dilation=1, norm_layer=None):
28 | super(BasicBlock, self).__init__()
29 | if norm_layer is None:
30 | norm_layer = nn.BatchNorm2d
31 | if groups != 1 or base_width != 64:
32 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
33 | if dilation > 1:
34 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
35 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1
36 | self.conv1 = conv3x3(inplanes, planes, stride)
37 | self.bn1 = norm_layer(planes)
38 | self.relu = nn.ReLU(inplace=True)
39 | self.conv2 = conv3x3(planes, planes)
40 | self.bn2 = norm_layer(planes)
41 | self.downsample = downsample
42 | self.stride = stride
43 |
44 | def forward(self, x):
45 | identity = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 |
51 | out = self.conv2(out)
52 | out = self.bn2(out)
53 |
54 | if self.downsample is not None:
55 | identity = self.downsample(x)
56 |
57 | out += identity
58 | out = self.relu(out)
59 |
60 | return out
61 |
62 |
63 | class Bottleneck(nn.Module):
64 | expansion = 4
65 |
66 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
67 | base_width=64, dilation=1, norm_layer=None):
68 | super(Bottleneck, self).__init__()
69 | if norm_layer is None:
70 | norm_layer = nn.BatchNorm2d
71 | width = int(planes * (base_width / 64.)) * groups
72 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1
73 | self.conv1 = conv1x1(inplanes, width)
74 | self.bn1 = norm_layer(width)
75 | self.conv2 = conv3x3(width, width, stride, groups, dilation)
76 | self.bn2 = norm_layer(width)
77 | self.conv3 = conv1x1(width, planes * self.expansion)
78 | self.bn3 = norm_layer(planes * self.expansion)
79 | self.relu = nn.ReLU(inplace=True)
80 | self.downsample = downsample
81 | self.stride = stride
82 |
83 | def forward(self, x):
84 | identity = x
85 |
86 | out = self.conv1(x)
87 | out = self.bn1(out)
88 | out = self.relu(out)
89 |
90 | out = self.conv2(out)
91 | out = self.bn2(out)
92 | out = self.relu(out)
93 |
94 | out = self.conv3(out)
95 | out = self.bn3(out)
96 |
97 | if self.downsample is not None:
98 | identity = self.downsample(x)
99 |
100 | out += identity
101 | out = self.relu(out)
102 |
103 | return out
104 |
105 |
106 | class ResNet(nn.Module):
107 |
108 | def __init__(self, arch, zero_init_residual=False,
109 | groups=1, width_per_group=64, replace_stride_with_dilation=None,
110 | norm_layer=None):
111 | super(ResNet, self).__init__()
112 | resnets = {
113 | 'resnet18': [BasicBlock, [2, 2, 2, 2]],
114 | 'resnet34': [BasicBlock, [3, 4, 6, 3]],
115 | 'resnet50': [Bottleneck, [3, 4, 6, 3]],
116 | 'resnet101': [Bottleneck, [3, 4, 23, 3]],
117 | 'resnet152': [Bottleneck, [3, 8, 36, 3]],
118 | }
119 | block = resnets[arch][0]
120 | layers = resnets[arch][1]
121 |
122 | self.arch = arch
123 | if norm_layer is None:
124 | norm_layer = nn.BatchNorm2d
125 | self._norm_layer = norm_layer
126 |
127 | self.inplanes = 64
128 | self.dilation = 1
129 | if replace_stride_with_dilation is None:
130 | # each element in the tuple indicates if we should replace
131 | # the 2x2 stride with a dilated convolution instead
132 | replace_stride_with_dilation = [False, False, False]
133 | if len(replace_stride_with_dilation) != 3:
134 | raise ValueError("replace_stride_with_dilation should be None "
135 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
136 | self.groups = groups
137 | self.base_width = width_per_group
138 |
139 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
140 | bias=False)
141 | self.bn1 = norm_layer(self.inplanes)
142 | self.relu = nn.ReLU(inplace=True)
143 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
144 | self.layer1 = self._make_layer(block, 64, layers[0])
145 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
146 | dilate=replace_stride_with_dilation[0])
147 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
148 | dilate=replace_stride_with_dilation[1])
149 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
150 | dilate=replace_stride_with_dilation[2])
151 | # extra 额外层,用于在c5基础上输出p6,p7
152 | self.conv6 = nn.Conv2d(512*block.expansion, 256, kernel_size=3, stride=2, padding=1)
153 | self.conv7 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)
154 |
155 | for m in self.modules():
156 | if isinstance(m, nn.Conv2d):
157 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
158 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
159 | nn.init.constant_(m.weight, 1)
160 | nn.init.constant_(m.bias, 0)
161 |
162 | # Zero-initialize the last BN in each residual branch,
163 | # so that the residual branch starts with zeros, and each residual block behaves like an identity.
164 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
165 | if zero_init_residual:
166 | for m in self.modules():
167 | if isinstance(m, Bottleneck):
168 | nn.init.constant_(m.bn3.weight, 0)
169 | elif isinstance(m, BasicBlock):
170 | nn.init.constant_(m.bn2.weight, 0)
171 |
172 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
173 | norm_layer = self._norm_layer
174 | downsample = None
175 | previous_dilation = self.dilation
176 | if dilate:
177 | self.dilation *= stride
178 | stride = 1
179 | if stride != 1 or self.inplanes != planes * block.expansion:
180 | downsample = nn.Sequential(
181 | conv1x1(self.inplanes, planes * block.expansion, stride),
182 | norm_layer(planes * block.expansion),
183 | )
184 |
185 | layers = []
186 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
187 | self.base_width, previous_dilation, norm_layer))
188 | self.inplanes = planes * block.expansion
189 | for _ in range(1, blocks):
190 | layers.append(block(self.inplanes, planes, groups=self.groups,
191 | base_width=self.base_width, dilation=self.dilation,
192 | norm_layer=norm_layer))
193 |
194 | return nn.Sequential(*layers)
195 |
196 | def forward(self, x):
197 | c1 = self.conv1(x)
198 | c1 = self.bn1(c1)
199 | c1 = self.relu(c1)
200 | c1 = self.maxpool(c1)
201 |
202 | c2 = self.layer1(c1)
203 | c3 = self.layer2(c2)
204 | c4 = self.layer3(c3)
205 | c5 = self.layer4(c4)
206 |
207 | p6 = self.conv6(c5)
208 | p7 = self.conv7(F.relu(p6))
209 | return c3, c4, c5, p6, p7
210 |
211 | def load_weights(self):
212 | model_urls = {
213 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
214 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
215 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
216 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
217 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
218 | }
219 |
220 | url = model_urls[self.arch]
221 | weight_name = url.split('/')[-1]
222 | weight_path = cfg.FILE.PRETRAIN_WEIGHT_ROOT
223 | weight_file = os.path.join(weight_path, weight_name)
224 |
225 | if not os.path.exists(weight_file):
226 | if not os.path.exists(weight_path):
227 | os.makedirs(weight_path)
228 |
229 | print(' {} no exist ,downloading .....'.format(weight_name))
230 | wget.download(url=url, out=weight_file)
231 |
232 | print(' --- donwload to {} finish --- '.format(weight_file))
233 | self.load_state_dict(torch.load(weight_file), strict=False)
234 | print(' --- load weight finish --- ')
235 |
236 |
237 | def build_resnet(arch, pretrained=True, **kwargs):
238 | assert arch in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']
239 | model = ResNet(arch, **kwargs)
240 | if pretrained:
241 | model.load_weights()
242 | return model
243 |
244 |
245 | if __name__ == '__main__':
246 | import torch
247 | net = build_resnet('resnet18',pretrained=False)
248 | c3,c4,c5,p6,p7=net(torch.ones((1,3,600,600)))
249 | print(c3.size())
250 | print(c4.size())
251 | print(c5.size())
252 | print(p6.size())
253 | print(p7.size())
254 |
--------------------------------------------------------------------------------
/Model/base_models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from .Resnet import build_resnet
--------------------------------------------------------------------------------
/Model/base_models/__pycache__/Resnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/base_models/__pycache__/Resnet.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/base_models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/base_models/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/evaler.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import torch
4 | from Configs import _C as cfg
5 | from Utils import eval_detection_voc
6 | from tqdm import tqdm
7 | from torch.nn import DataParallel
8 | from torch import nn
9 | from Data import our_dataloader_test
10 | from .struct import postprocessor
11 |
12 |
13 | class Evaler(object):
14 | """
15 | 模型测试器,不指定参数时,均默认使用Configs中配置的参数
16 | *** 推荐使用Configs文件管理参数, 不推荐在函数中进行参数指定, 只是为了扩展 ***
17 |
18 | 模型在测试时,会使用DataParallel进行包装,以便于在多GPU上进行测试
19 | 本测试器只支持GPU训练,单机单卡与单机单卡均可,但不支持cpu,不支持多机多卡(别问为啥不支持多机多卡.穷!!!)
20 |
21 | eg:
22 | evaler = Evaler(cfg,eval_devices=[0,1]) # 使用俩块GPU进行测试,使用时请指定需使用的gpu编号,终端运行nvidia-smi进行查看
23 | ap, map = evaler(net,test_dataset=test_dataset)
24 | """
25 | def __init__(self, cfg, eval_devices=None):
26 | self.cfg = cfg
27 | self.postprocessor = postprocessor(cfg)
28 |
29 | self.eval_devices = self.cfg.DEVICE.TEST_DEVICES
30 | if eval_devices:
31 | self.eval_devices = eval_devices
32 |
33 | def __call__(self, model, test_dataset):
34 | model.eval()
35 | if not isinstance(model, nn.DataParallel):
36 | model = DataParallel(model, device_ids=self.eval_devices)
37 | else:
38 | model = DataParallel(model.module, device_ids=self.eval_devices)
39 | test_loader = our_dataloader_test(dataset=test_dataset, batch_size=20)
40 | results_dict = self.eval_model_inference(model, data_loader=test_loader)
41 | result = cal_ap_map(results_dict, test_dataset=test_loader.dataset)
42 | ap, map = result['ap'], result['map']
43 | return ap, map
44 |
45 | def eval_model_inference(self, model, data_loader):
46 | with torch.no_grad():
47 | results_dict = {}
48 | print(' Evaluating...... use GPU : {}'.format(self.eval_devices))
49 | for images, boxes, labels, image_names in tqdm(data_loader):
50 | cls_logits, bbox_pred = model(images)
51 | results = self.postprocessor(cls_logits, bbox_pred)
52 | for image_name, result in zip(image_names, results):
53 | pred_boxes, pred_labels, pred_scores = result
54 | pred_boxes, pred_labels, pred_scores = pred_boxes.to('cpu').numpy(), \
55 | pred_labels.to('cpu').numpy(), \
56 | pred_scores.to('cpu').numpy()
57 | results_dict.update({image_name: {'pred_boxes': pred_boxes,
58 | 'pred_labels': pred_labels,
59 | 'pred_scores': pred_scores}})
60 | return results_dict
61 |
62 |
63 | def cal_ap_map(results_dict,test_dataset):
64 | pred_boxes_list = []
65 | pred_labels_list = []
66 | pred_scores_list = []
67 | gt_boxs_list = []
68 | gt_labels_list = []
69 | gt_difficult_list = []
70 | for img_name in results_dict:
71 | gt_boxs, gt_labels, gt_difficult = test_dataset._get_annotation(img_name)
72 | size = test_dataset.get_img_size(img_name)
73 | w, h = size['width'],size['height']
74 | pred_boxes, pred_labels, pred_scores= results_dict[img_name]['pred_boxes'],results_dict[img_name]['pred_labels'],results_dict[img_name]['pred_scores']
75 | pred_boxes[:, 0::2] *= (w / cfg.MODEL.INPUT.IMAGE_SIZE)
76 | pred_boxes[:, 1::2] *= (h / cfg.MODEL.INPUT.IMAGE_SIZE)
77 | pred_boxes_list.append(pred_boxes)
78 | pred_labels_list.append(pred_labels)
79 | pred_scores_list.append(pred_scores)
80 | gt_boxs_list.append(gt_boxs)
81 | gt_labels_list.append(gt_labels)
82 | gt_difficult_list.append(gt_difficult)
83 | result = eval_detection_voc(pred_bboxes=pred_boxes_list,
84 | pred_labels=pred_labels_list,
85 | pred_scores=pred_scores_list,
86 | gt_bboxes=gt_boxs_list,
87 | gt_labels=gt_labels_list,
88 | gt_difficults=gt_difficult_list)
89 | return result
90 |
--------------------------------------------------------------------------------
/Model/retainnet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 |
4 | from .base_models import build_resnet
5 | from .struct import fpn, predictor, postprocessor
6 | from torch import nn
7 | from Data.Transfroms import transfrom
8 | from vizer.draw import draw_boxes
9 | import torch
10 | from PIL import Image
11 | import numpy as np
12 | import time
13 |
14 | class RetainNet(nn.Module):
15 | """
16 | :return cls_logits, torch.Size([C, 67995, num_classes])
17 | bbox_pred, torch.Size([C, 67995, 4])
18 | """
19 | def __init__(self,cfg=None, resnet=None):
20 | super(RetainNet,self).__init__()
21 | self.resnet = 'resnet50'
22 | self.num_classes = 21
23 | self.num_anchors = 9
24 | self.cfg = cfg
25 | if cfg:
26 | self.resnet = cfg.MODEL.BASEMODEL
27 | self.num_classes = cfg.DATA.DATASET.NUM_CLASSES
28 | self.num_anchors = cfg.MODEL.ANCHORS.NUMS
29 | if resnet:
30 | self.resnet = resnet
31 | expansion_list={
32 | 'resnet18': 1,
33 | 'resnet34': 1,
34 | 'resnet50': 4,
35 | 'resnet101': 4,
36 | 'resnet152': 4,
37 | }
38 | assert self.resnet in expansion_list
39 |
40 | self.backbone = build_resnet(self.resnet, pretrained=True)
41 | expansion = expansion_list[self.resnet]
42 | self.fpn = fpn(channels_of_fetures=[128*expansion, 256*expansion, 512*expansion])
43 | self.predictor = predictor(num_anchors=self.num_anchors, num_classes=self.num_classes) # num_anchors 默认为9,与anchor生成相对应
44 | self.postprocessor = postprocessor(cfg)
45 |
46 | def load_pretrained_weight(self, weight_pkl):
47 | self.load_state_dict(torch.load(weight_pkl))
48 |
49 | def forward(self, x):
50 | c3, c4, c5, p6, p7 = self.backbone(x) # resnet输出五层特征图
51 | p3, p4, p5 = self.fpn([c3, c4, c5]) # 前三层特征图进FPN
52 | features = [p3, p4, p5, p6, p7]
53 | cls_logits, bbox_pred = self.predictor(features)
54 | return cls_logits, bbox_pred
55 |
56 | def forward_with_postprocess(self, images):
57 | """
58 | 前向传播并后处理
59 | :param images:
60 | :return:
61 | """
62 | cls_logits, bbox_pred = self.forward(images)
63 | detections = self.postprocessor(cls_logits, bbox_pred)
64 | return detections
65 |
66 | @torch.no_grad()
67 | def Detect_single_img(self, image, score_threshold=0.7, device='cuda'):
68 | """
69 | 检测单张照片
70 | eg:
71 | image, boxes, labels, scores= net.Detect_single_img(img)
72 | plt.imshow(image)
73 | plt.show()
74 |
75 | :param image: 图片,PIL.Image.Image
76 | :param score_threshold: 阈值
77 | :param device: 检测时所用设备,默认'cuda'
78 | :return: 添加回归框的图片(np.array),回归框,标签,分数
79 | """
80 | self.eval()
81 | assert isinstance(image, Image.Image)
82 | w, h = image.width, image.height
83 | images_tensor = transfrom(self.cfg, is_train=False)(np.array(image))[0].unsqueeze(0)
84 |
85 | self.to(device)
86 | images_tensor = images_tensor.to(device)
87 | time1 = time.time()
88 | detections = self.forward_with_postprocess(images_tensor)[0]
89 | boxes, labels, scores = detections
90 | boxes, labels, scores = boxes.to('cpu').numpy(), labels.to('cpu').numpy(), scores.to('cpu').numpy()
91 | boxes[:, 0::2] *= (w / self.cfg.MODEL.INPUT.IMAGE_SIZE)
92 | boxes[:, 1::2] *= (h / self.cfg.MODEL.INPUT.IMAGE_SIZE)
93 |
94 | indices = scores > score_threshold
95 | boxes = boxes[indices]
96 | labels = labels[indices]
97 | scores = scores[indices]
98 | print("Detect {} object, inference cost {:.2f} ms".format(len(scores), (time.time() - time1) * 1000))
99 | # 图像数据加框
100 | drawn_image = draw_boxes(image=image, boxes=boxes, labels=labels,
101 | scores=scores, class_name_map=self.cfg.DATA.DATASET.CLASS_NAME).astype(np.uint8)
102 | return drawn_image, boxes, labels, scores
103 |
104 | @torch.no_grad()
105 | def Detect_video(self, video_path, score_threshold=0.5, save_video_path=None, show=True):
106 | """
107 | 检测视频
108 | :param video_path: 视频路径 eg: /XXX/aaa.mp4
109 | :param score_threshold:
110 | :param save_video_path: 保存路径,不指定则不保存
111 | :param show: 在检测过程中实时显示,(会存在卡顿现象,受检测效率影响)
112 | :return:
113 | """
114 | import cv2
115 | cap = cv2.VideoCapture(video_path)
116 | fourcc = cv2.VideoWriter_fourcc(*'MJPG')
117 | weight = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
118 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
119 | if save_video_path:
120 | out = cv2.VideoWriter(save_video_path, fourcc, cap.get(5), (weight, height))
121 | while (cap.isOpened()):
122 | ret, frame = cap.read()
123 | if ret == True:
124 | image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
125 | drawn_image, boxes, labels, scores = self.Detect_single_img(image=image,
126 | device='cuda:0',
127 | score_threshold=score_threshold)
128 | frame = cv2.cvtColor(np.asarray(drawn_image), cv2.COLOR_RGB2BGR)
129 | if show:
130 | cv2.imshow('frame', frame)
131 | if save_video_path:
132 | out.write(frame)
133 | if cv2.waitKey(1) & 0xFF == ord('q'):
134 | break
135 | else:
136 | break
137 | cap.release()
138 | if save_video_path:
139 | out.release()
140 | cv2.destroyAllWindows()
141 | return True
142 |
--------------------------------------------------------------------------------
/Model/struct/Anchors.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from torch import nn
4 | import numpy as np
5 | import torch
6 | from Utils import corner_form_to_center_form, center_form_to_corner_form
7 |
8 | class priorbox:
9 | """
10 | Retainnet anchors, 生成策略与SSD不同
11 | """
12 | def __init__(self,cfg=None):
13 | self.features_maps = [(75, 75), (38, 38), (19, 19), (10, 10), (5, 5)]
14 | self.anchor_sizes = [32, 64, 128, 256, 512]
15 | self.ratios = np.array([0.5, 1, 2])
16 | self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
17 | self.image_size = 600
18 | self.clip = True
19 | if cfg:
20 | self.features_maps = cfg.MODEL.ANCHORS.FEATURE_MAPS
21 | self.anchor_sizes = cfg.MODEL.ANCHORS.SIZES
22 | self.ratios = np.array(cfg.MODEL.ANCHORS.RATIOS)
23 | self.scales = np.array(cfg.MODEL.ANCHORS.SCALES)
24 | self.image_size = cfg.MODEL.INPUT.IMAGE_SIZE
25 | self.clip = cfg.MODEL.ANCHORS.CLIP
26 |
27 | def __call__(self):
28 | priors = []
29 | for k , (feature_map_w, feature_map_h) in enumerate(self.features_maps):
30 | for i in range(feature_map_w):
31 | for j in range(feature_map_h):
32 | cx = (j + 0.5) / feature_map_w
33 | cy = (i + 0.5) / feature_map_h
34 |
35 | size = self.anchor_sizes[k]/self.image_size # 将框体长宽转为 比例形式
36 |
37 | sides_square = self.scales * size # 计算方形检测框边长
38 | for side_square in sides_square:
39 | priors.append([cx, cy, side_square, side_square]) # 添加方形检测框
40 |
41 | sides_long = sides_square*2**(1/2) # 计算长形检测框长边
42 | for side_long in sides_long:
43 | priors.append([cx, cy, side_long, side_long/2]) # 添加长形检测框,短边为长边的一半
44 | priors.append([cx, cy, side_long/2, side_long])
45 |
46 | priors = torch.tensor(priors)
47 | if self.clip: # 对超出图像范围的框体进行截断
48 | priors = center_form_to_corner_form(priors) # 截断时,先转为 [xmin, ymin, xmin, xmax]形式
49 | priors.clamp_(max=1, min=0)
50 | priors = corner_form_to_center_form(priors) # 转回 [x, y, w, h]形式
51 | return priors
52 |
53 | if __name__ == '__main__':
54 | anchors = priorbox()()
55 | print(anchors[-10:])
56 | print(len(anchors))
57 |
--------------------------------------------------------------------------------
/Model/struct/Focal_Loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from torch import nn
4 | import torch
5 | from torch.nn import functional as F
6 |
7 | class focal_loss(nn.Module):
8 | def __init__(self, alpha=0.25, gamma=2, num_classes = 3, reduction='mean'):
9 | """
10 | focal_loss损失函数, -α(1-yi)**γ *ce_loss(xi,yi) 可以单独拎出来用,替代cross_entropy
11 | 步骤详细的实现了 focal_loss损失函数.
12 | :param alpha: 阿尔法α,类别权重. 当α是列表时,为各类别权重,当α为常数时,类别权重为[α, 1-α, 1-α, ....],常用于 目标检测算法中抑制背景类 , retainnet中设置为0.25
13 | :param gamma: 伽马γ,难易样本调节参数. retainnet中设置为2
14 | :param num_classes: 类别数量
15 | :param size_average: 损失计算方式,默认取均值
16 | """
17 | super(focal_loss,self).__init__()
18 | self.reduction = reduction
19 | if isinstance(alpha,list):
20 | assert len(alpha)==num_classes # α可以以list方式输入,size:[num_classes] 用于对不同类别精细地赋予权重
21 | print("Focal_loss alpha = {}, 将对每一类权重进行精细化赋值".format(alpha))
22 | self.alpha = torch.Tensor(alpha)
23 | else:
24 | assert alpha<1 #如果α为一个常数,则降低第一类的影响,在目标检测中为第一类
25 | print(" --- Focal_loss alpha = {} ,将对背景类进行衰减,请在目标检测任务中使用 --- ".format(alpha))
26 | self.alpha = torch.zeros(num_classes)
27 | self.alpha[0] += alpha
28 | self.alpha[1:] += (1-alpha) # α 最终为 [ α, 1-α, 1-α, 1-α, 1-α, ...] size:[num_classes]
29 |
30 | self.gamma = gamma
31 |
32 | def forward(self, preds, labels):
33 | """
34 | focal_loss损失计算
35 | :param preds: 预测类别. size:[B,N,C]
36 | :param labels: 实际类别. size:[B,N]
37 | :return:
38 | """
39 | # assert preds.dim()==2 and labels.dim()==1
40 | preds = preds.view(-1,preds.size(-1))
41 | self.alpha = self.alpha.to(preds.device)
42 | preds_softmax = F.softmax(preds, dim=1) # 这里并没有直接使用log_softmax, 因为后面会用到softmax的结果(当然你也可以使用log_softmax,然后进行exp操作)
43 | preds_logsoft = torch.log(preds_softmax)
44 |
45 | preds_softmax = preds_softmax.gather(1,labels.view(-1,1)) # 这部分实现nll_loss ( crossempty = log_softmax + nll )
46 | preds_logsoft = preds_logsoft.gather(1,labels.view(-1,1))
47 | self.alpha = self.alpha.gather(0,labels.view(-1))
48 | loss = -torch.mul(torch.pow((1-preds_softmax), self.gamma), preds_logsoft) # torch.pow((1-preds_softmax), self.gamma) 为focal loss中 (1-pt)**γ
49 |
50 | loss = torch.mul(self.alpha, loss.t())
51 | if self.reduction== 'mean':
52 | loss = loss.mean()
53 | elif self.reduction== 'sum':
54 | loss = loss.sum()
55 | return loss
56 |
57 |
--------------------------------------------------------------------------------
/Model/struct/Fpn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 |
4 | from torch import nn
5 | from torch.nn import functional as F
6 |
7 | class fpn(nn.Module):
8 | def __init__(self,channels_of_fetures, channel_out=256):
9 | """
10 | fpn,特征金字塔
11 | :param channels_of_fetures: list,输入层的通道数,必须与输入特征图相对应
12 | :param channel_out:
13 | """
14 | super(fpn,self).__init__()
15 | self.channels_of_fetures = channels_of_fetures
16 |
17 | self.lateral_conv1 = nn.Conv2d(channels_of_fetures[2], channel_out, kernel_size=1, stride=1, padding=0)
18 | self.lateral_conv2 = nn.Conv2d(channels_of_fetures[1], channel_out, kernel_size=1, stride=1, padding=0)
19 | self.lateral_conv3 = nn.Conv2d(channels_of_fetures[0], channel_out, kernel_size=1, stride=1, padding=0)
20 |
21 | self.top_down_conv1 = nn.Conv2d(channel_out, channel_out, kernel_size=3, stride=1, padding=1)
22 | self.top_down_conv2 = nn.Conv2d(channel_out, channel_out, kernel_size=3, stride=1, padding=1)
23 | self.top_down_conv3 = nn.Conv2d(channel_out, channel_out, kernel_size=3, stride=1, padding=1)
24 |
25 | def forward(self, features):
26 | """
27 |
28 | :param features:
29 | :return:
30 | """
31 | c3, c4, c5 = features
32 |
33 | p5 = self.lateral_conv1(c5) # 19
34 | p4 = self.lateral_conv2(c4) # 38
35 | p3 = self.lateral_conv3(c3) # 75
36 |
37 | p4 = F.interpolate(input=p5, size=(p4.size(2),p4.size(3)), mode="nearest") + p4
38 | p3 = F.interpolate(input=p4, size=(p3.size(2),p3.size(3)), mode="nearest") + p3
39 |
40 | p5 = self.top_down_conv1(p5)
41 | p4 = self.top_down_conv2(p4)
42 | p3 = self.top_down_conv3(p3)
43 |
44 | return p3, p4, p5
45 |
--------------------------------------------------------------------------------
/Model/struct/MultiBoxLoss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import torch.nn as nn
4 | from .Focal_Loss import focal_loss
5 | import torch
6 | import torch.nn.functional as F
7 | import math
8 |
9 | __all__ = ['multiboxloss']
10 |
11 | class multiboxloss(nn.Module):
12 | def __init__(self, cfg=None, alpha=None, gamma=None, num_classes=None, neg_pos_ratio=None):
13 | """
14 | retainnet损失函数,分为类别损失(focal loss)
15 | 框体回归损失(smooth_l1_loss)
16 | 采用分别返回的方式返回.便于训练过程中分析处理
17 | """
18 | super(multiboxloss, self).__init__()
19 | if cfg:
20 | self.alpha = cfg.MULTIBOXLOSS.ALPHA
21 | self.gamma = cfg.MULTIBOXLOSS.GAMMA
22 | self.num_classes = cfg.DATA.DATASET.NUM_CLASSES
23 | self.neg_pos_ratio = cfg.TRAIN.NEG_POS_RATIO
24 | if alpha:
25 | self.alpha = alpha
26 | if gamma:
27 | self.gamma = gamma
28 | if num_classes:
29 | self.num_classes = num_classes
30 | if neg_pos_ratio:
31 | self.neg_pos_ratio = neg_pos_ratio
32 |
33 | self.loc_loss_fn = nn.SmoothL1Loss(reduction='sum')
34 | self.cls_loss_fn = focal_loss(alpha=self.alpha, gamma=self.gamma, num_classes=self.num_classes, reduction='sum') # 类别损失为focal loss
35 | print(" --- Multiboxloss : α={} γ={} num_classes={}".format(self.alpha, self.gamma, self.num_classes))
36 |
37 | def forward(self, confidence, predicted_locations, labels, gt_locations):
38 | """
39 | 计算类别损失和框体回归损失
40 | Args:
41 | confidence (batch_size, num_priors, num_classes): 预测类别
42 | predicted_locations (batch_size, num_priors, 4): 预测位置
43 | labels (batch_size, num_priors): 所有框的真实类别
44 | gt_locations (batch_size, num_priors, 4): 所有框真实的位置
45 | """
46 | num_classes = confidence.size(2)
47 |
48 | with torch.no_grad():
49 | loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
50 | mask = hard_negative_mining(loss, labels, self.neg_pos_ratio)
51 |
52 |
53 | classification_loss = self.cls_loss_fn(confidence[mask, :], labels[mask])
54 |
55 | # 回归损失,smooth_l1
56 | pos_mask = labels > 0
57 | predicted_locations = predicted_locations[pos_mask, :].view(-1, 4)
58 | gt_locations = gt_locations[pos_mask, :].view(-1, 4)
59 | smooth_l1_loss = self.loc_loss_fn(predicted_locations, gt_locations)
60 | num_pos = gt_locations.size(0)
61 | return smooth_l1_loss / num_pos, classification_loss / (num_pos * self.neg_pos_ratio)
62 |
63 | def hard_negative_mining(loss, labels, neg_pos_ratio=3):
64 | """
65 | 用于训练过程中正负例比例的限制.默认在训练时,负例数量是正例数量的三倍
66 | Args:
67 | loss (N, num_priors): the loss for each example.
68 | labels (N, num_priors): the labels.
69 | neg_pos_ratio: 正负例比例: 负例数量/正例数量
70 | """
71 | pos_mask = labels > 0
72 | num_pos = pos_mask.long().sum(dim=1, keepdim=True)
73 | num_neg = num_pos * neg_pos_ratio
74 |
75 | loss[pos_mask] = -math.inf # 无穷
76 | # 两次sort 找出元素在排序中的位置
77 | _, indexes = loss.sort(dim=1, descending=True) # descending 降序 ,返回 value,index
78 | _, orders = indexes.sort(dim=1)
79 | neg_mask = orders < num_neg # loss 降序排, 背景为-无穷, 选择排前num_neg的 负无穷,也就是 背景
80 | return pos_mask | neg_mask # 目标 或 背景
81 |
--------------------------------------------------------------------------------
/Model/struct/PostProcess.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import torch
4 | import torchvision
5 | from Utils.Boxs_op import center_form_to_corner_form, convert_locations_to_boxes
6 | from .Anchors import priorbox
7 | import torch.nn.functional as F
8 |
9 | __all__ = ['postprocessor']
10 |
11 | class postprocessor:
12 | def __init__(self, cfg):
13 | super().__init__()
14 | self.cfg = cfg
15 | self.width = cfg.MODEL.INPUT.IMAGE_SIZE
16 | self.height = cfg.MODEL.INPUT.IMAGE_SIZE
17 |
18 | def __call__(self, cls_logits, bbox_pred):
19 | priors = priorbox(self.cfg)().to(cls_logits.device)
20 | batches_scores = F.softmax(cls_logits, dim=2)
21 | boxes = convert_locations_to_boxes(
22 | bbox_pred, priors, self.cfg.MODEL.ANCHORS.CENTER_VARIANCE, self.cfg.MODEL.ANCHORS.CENTER_VARIANCE
23 | )
24 | batches_boxes = center_form_to_corner_form(boxes)
25 |
26 | device = batches_scores.device
27 | batch_size = batches_scores.size(0)
28 | results = []
29 | for batch_id in range(batch_size):
30 | processed_boxes = []
31 | processed_scores = []
32 | processed_labels = []
33 |
34 | per_img_scores, per_img_boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4)
35 | for class_id in range(1, per_img_scores.size(1)): # skip background
36 | scores = per_img_scores[:, class_id]
37 | mask = scores > self.cfg.MODEL.TEST.CONFIDENCE_THRESHOLD
38 | scores = scores[mask]
39 | if scores.size(0) == 0:
40 | continue
41 | boxes = per_img_boxes[mask, :]
42 | boxes[:, 0::2] *= self.width
43 | boxes[:, 1::2] *= self.height
44 |
45 | keep = boxes_nms(boxes, scores, self.cfg.MODEL.TEST.NMS_THRESHOLD, self.cfg.MODEL.TEST.MAX_PER_CLASS)
46 |
47 | nmsed_boxes = boxes[keep, :]
48 | nmsed_labels = torch.tensor([class_id] * keep.size(0), device=device)
49 | nmsed_scores = scores[keep]
50 |
51 | processed_boxes.append(nmsed_boxes)
52 | processed_scores.append(nmsed_scores)
53 | processed_labels.append(nmsed_labels)
54 |
55 | if len(processed_boxes) == 0:
56 | processed_boxes = torch.empty(0, 4)
57 | processed_labels = torch.empty(0)
58 | processed_scores = torch.empty(0)
59 | else:
60 | processed_boxes = torch.cat(processed_boxes, 0)
61 | processed_labels = torch.cat(processed_labels, 0)
62 | processed_scores = torch.cat(processed_scores, 0)
63 |
64 | if processed_boxes.size(0) > self.cfg.MODEL.TEST.MAX_PER_IMAGE > 0:
65 | processed_scores, keep = torch.topk(processed_scores, k=self.cfg.MODEL.TEST.MAX_PER_IMAGE)
66 | processed_boxes = processed_boxes[keep, :]
67 | processed_labels = processed_labels[keep]
68 | results.append([processed_boxes, processed_labels, processed_scores])
69 | return results
70 |
71 | def boxes_nms(boxes, scores, nms_thresh, max_count=-1):
72 | """ Performs non-maximum suppression, run on GPU or CPU according to
73 | boxes's device.
74 | Args:
75 | boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(or relative coordinates), shape is (n, 4)
76 | scores(Tensor): scores, shape is (n, )
77 | nms_thresh(float): thresh
78 | max_count (int): if > 0, then only the top max_proposals are kept after non-maximum suppression
79 | Returns:
80 | indices kept.
81 | """
82 | keep = torchvision.ops.nms(boxes, scores, nms_thresh)
83 | if max_count > 0:
84 | keep = keep[:max_count]
85 | return keep
86 |
87 |
--------------------------------------------------------------------------------
/Model/struct/Predictor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from torch import nn
4 | import torch
5 |
6 | class predictor(nn.Module):
7 | """
8 | Retainnet 分类(cls)及回归(reg)网络
9 | """
10 | def __init__(self, num_anchors, num_classes):
11 | super().__init__()
12 | self.num_classes = num_classes
13 | self.num_anchors = num_anchors
14 | self.make_headers()
15 | self.reset_parameters()
16 |
17 | def forward(self, features):
18 | """
19 | 对输入的特征图中每个特征点进行分类及回归
20 | :param features: # 经过fpn后 输出的特征图
21 | :return: # 每个特征点的类别预测与回归预测
22 | """
23 | cls_logits = []
24 | bbox_pred = []
25 | batch_size = features[0].size(0)
26 | for feature in features:
27 | cls_logit = self.cls_headers(feature)
28 | cls_logits.append(self.cls_headers(feature).permute(0, 2, 3, 1).contiguous().view(batch_size,-1,self.num_classes))
29 | bbox_pred.append(self.reg_headers(feature).permute(0, 2, 3, 1).contiguous().view(batch_size,-1,4))
30 |
31 | cls_logits = torch.cat(cls_logits, dim=1)
32 | bbox_pred = torch.cat(bbox_pred, dim=1)
33 |
34 | return cls_logits, bbox_pred
35 |
36 | def make_headers(self):
37 | cls_headers = []
38 | reg_headers = []
39 |
40 | for _ in range(4):
41 | cls_headers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
42 | cls_headers.append(nn.ReLU(inplace=True))
43 |
44 | reg_headers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
45 | reg_headers.append(nn.ReLU(inplace=True))
46 |
47 | cls_headers.append(nn.Conv2d(256, self.num_anchors * self.num_classes, kernel_size=3, stride=1, padding=1))
48 | reg_headers.append(nn.Conv2d(256, self.num_anchors * 4, kernel_size=3, stride=1, padding=1))
49 |
50 | self.cls_headers = nn.Sequential(*cls_headers)
51 | self.reg_headers = nn.Sequential(*reg_headers)
52 |
53 | def reset_parameters(self):
54 | for m in self.modules():
55 | if isinstance(m, nn.Conv2d):
56 | nn.init.xavier_uniform_(m.weight)
57 | nn.init.zeros_(m.bias)
58 |
--------------------------------------------------------------------------------
/Model/struct/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from .Anchors import priorbox
4 | from .Focal_Loss import focal_loss
5 | from .Fpn import fpn
6 | from .MultiBoxLoss import multiboxloss
7 | from .PostProcess import postprocessor
8 | from .Predictor import predictor
--------------------------------------------------------------------------------
/Model/struct/__pycache__/Anchors.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Anchors.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/struct/__pycache__/Focal_Loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Focal_Loss.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/struct/__pycache__/Fpn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Fpn.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/struct/__pycache__/PostProcess.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/PostProcess.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/struct/__pycache__/Predictor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Predictor.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/struct/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/Model/trainer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import torch
4 | from torch.optim.lr_scheduler import MultiStepLR
5 | from Data import our_dataloader
6 | from .struct import multiboxloss
7 | from Utils.visdom_op import visdom_line, setup_visdom, visdom_bar
8 | from torch import nn
9 | from torch.nn import DataParallel
10 | import os
11 |
12 | __all__ = ['Trainer']
13 |
14 | class Trainer(object):
15 | """
16 | 模型训练器,不指定参数时,均默认使用Configs中配置的参数
17 | *** 推荐使用Configs文件管理参数, 不推荐在函数中进行参数指定, 只是为了扩展 ***
18 | *** 默认使用 SGD 优化器, 如需使用其他优化器, 继承该类,对build_optimizer方法进行重写即可***
19 |
20 | 模型在训练时,会使用DataParallel进行包装,以便于在多GPU上进行训练
21 | 本训练器只支持GPU训练,单机单卡与单机单卡均可,但不支持cpu,不支持多机多卡(别问为啥不用多机多卡.穷!!!)
22 |
23 | eg:
24 | trainer = Trainer(cfg) # 实例化训练器
25 | trainer(net,train_dataset) # 在train_dataset数据集上训练模型net
26 | """
27 | def __init__(self, cfg, max_iter=None, batch_size=None, train_devices=None,
28 | model_save_step=None, model_save_root=None, vis = None, vis_step=None):
29 | """
30 | 训练器初始化
31 | 值为None的参数项不指定时为默认,已在配置文件中设置.
32 | 如需更改参数建议在Configs配置文件中进行更改
33 | 不建议直接指定参数,只留做扩展用.
34 |
35 | :param cfg: 配置
36 | :param max_iter: 最大训练轮数
37 | :param batch_size: 批次数,
38 | :param train_devices: 训练设备,列表,eg:[0,1],使用0,1俩个GPU,这里0,1为gpu编号,可用nvidia-smi查看.,不指定时为默认,已在配置文件中设置
39 | :param vis: visdom.Visdom(),用于训练过程可视化.绘制损失曲线已经学习率
40 | :param model_save_step: 模型保存步长
41 | :param vis_step: visdom可视化步长
42 | """
43 | self.cfg = cfg
44 |
45 | self.iterations = self.cfg.TRAIN.MAX_ITER
46 | if max_iter:
47 | self.iterations = max_iter
48 |
49 | self.batch_size = cfg.TRAIN.BATCH_SIZE
50 | if batch_size:
51 | self.batch_size = batch_size
52 |
53 | self.train_devices = cfg.DEVICE.TRAIN_DEVICES
54 | if train_devices:
55 | self.train_devices = train_devices
56 |
57 | self.model_save_root = cfg.FILE.MODEL_SAVE_ROOT
58 | if model_save_root:
59 | self.model_save_root = model_save_root
60 |
61 | if not os.path.exists(self.model_save_root):
62 | os.mkdir(self.model_save_root)
63 | self.model_save_step = self.cfg.STEP.MODEL_SAVE_STEP
64 | if model_save_step:
65 | self.model_save_step = model_save_step
66 |
67 | self.vis = setup_visdom()
68 | if vis:
69 | self.vis = vis
70 | self.vis_step = self.cfg.STEP.VIS_STEP
71 | if vis_step:
72 | self.vis_step = vis_step
73 |
74 | self.model = None
75 | self.loss_func = None
76 | self.optimizer = None
77 | self.scheduler = None
78 |
79 | def __call__(self, model, dataset):
80 | """
81 | 训练器使用, 传入 模型 与数据集.
82 | :param model:
83 | :param dataset:
84 | :return:
85 | """
86 | if not isinstance(model, nn.DataParallel):
87 | # raise TypeError('请用 DataParallel 包装模型. eg: model = DataParallel(model, device_ids=[0,1,2]),使用device_ids指定需要使用的gpu')
88 | model = DataParallel(model, device_ids=self.train_devices)
89 | self.model = model
90 | data_loader = our_dataloader(dataset, batch_size=self.batch_size, shuffle=True)
91 | print(' Max_iter = {}, Batch_size = {}'.format(self.iterations, self.batch_size))
92 | print(' Model will train on cuda:{}'.format(self.train_devices))
93 |
94 | num_gpu_use = len(self.train_devices)
95 | if (self.batch_size % num_gpu_use) != 0:
96 | raise ValueError(
97 | 'You use {} gpu to train , but set batch_size={}'.format(num_gpu_use, data_loader.batch_size))
98 |
99 | self.set_lossfunc()
100 | self.set_optimizer()
101 | self.set_scheduler()
102 |
103 | print("Set optimizer : {}".format(self.optimizer))
104 | print("Set scheduler : {}".format(self.scheduler))
105 | print("Set lossfunc : {}".format(self.loss_func))
106 |
107 |
108 | print(' Start Train......')
109 | print(' -------' * 20)
110 |
111 | for iteration, (images, boxes, labels, image_names) in enumerate(data_loader):
112 | iteration+=1
113 | boxes, labels = boxes.to('cuda'), labels.to('cuda')
114 | cls_logits, bbox_preds = self.model(images)
115 | reg_loss, cls_loss = self.loss_func(cls_logits, bbox_preds, labels, boxes)
116 |
117 | reg_loss = reg_loss.mean()
118 | cls_loss = cls_loss.mean()
119 | loss = reg_loss + cls_loss
120 |
121 | self.optimizer.zero_grad()
122 | loss.backward()
123 | self.optimizer.step()
124 | self.scheduler.step()
125 |
126 | lr = self.optimizer.param_groups[0]['lr']
127 |
128 | if iteration % 10 == 0:
129 | print('Iter : {}/{} | Lr : {} | Loss : {:.4f} | cls_loss : {:.4f} | reg_loss : {:.4f}'.format(iteration, self.iterations, lr, loss.item(), cls_loss.item(), reg_loss.item()))
130 |
131 | if self.vis and iteration % self.vis_step == 0:
132 | visdom_line(self.vis, y=[loss], x=iteration, win_name='loss')
133 | visdom_line(self.vis, y=[reg_loss], x=iteration, win_name='reg_loss')
134 | visdom_line(self.vis, y=[cls_loss], x=iteration, win_name='cls_loss')
135 | visdom_line(self.vis, y=[lr], x=iteration, win_name='lr')
136 |
137 | if iteration % self.model_save_step == 0:
138 | torch.save(model.module.state_dict(), '{}/model_{}.pkl'.format(self.model_save_root, iteration))
139 |
140 | if iteration > self.iterations:
141 | break
142 | return True
143 |
144 | def set_optimizer(self, lr=None, momentum=None, weight_decay=None):
145 | """
146 | 配置优化器
147 | :param lr: 初始学习率, 默认0.001
148 | :param momentum: 动量, 默认 0.9
149 | :param weight_decay: 权重衰减,L2, 默认 5e-4
150 | :return:
151 | """
152 | if not lr:
153 | lr= self.cfg.OPTIM.LR
154 | if not momentum:
155 | momentum = self.cfg.OPTIM.MOMENTUM
156 | if not weight_decay:
157 | weight_decay = self.cfg.OPTIM.WEIGHT_DECAY
158 |
159 | self.optimizer = torch.optim.SGD(self.model.parameters(),
160 | lr=lr,
161 | momentum=momentum,
162 | weight_decay=weight_decay)
163 |
164 | def set_lossfunc(self, neg_pos_ratio=None):
165 | """
166 | 配置损失函数
167 | :param neg_pos_ratio: 负正例 比例,默认3, 负例数量是正例的三倍
168 | :return:
169 | """
170 | if not neg_pos_ratio:
171 | neg_pos_ratio = self.cfg.TRAIN.NEG_POS_RATIO
172 | self.loss_func = multiboxloss(self.cfg)
173 | # print(' Trainer set loss_func : {}, neg_pos_ratio = {}'.format('multiboxloss', neg_pos_ratio))
174 |
175 | def set_scheduler(self, lr_steps=None, gamma=None):
176 | """
177 | 配置学习率衰减策略
178 | :param lr_steps: 默认 [80000, 10000],当训练到这些轮次时,学习率*gamma
179 | :param gamma: 默认 0.1,学习率下降10倍
180 | :return:
181 | """
182 | if not lr_steps:
183 | lr_steps = self.cfg.OPTIM.SCHEDULER.LR_STEPS
184 | if not gamma:
185 | gamma = self.cfg.OPTIM.SCHEDULER.GAMMA
186 | self.scheduler = MultiStepLR(optimizer=self.optimizer,
187 | milestones=lr_steps,
188 | gamma=gamma)
189 | # print(' Trainer set scheduler : {}, lr_steps={}, gamma={}'.format('MultiStepLR', lr_steps, gamma))
190 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | GIthub使用指北:
2 |
3 | **1.想将项目拷贝到自己帐号下就fork一下.**
4 |
5 | **2.持续关注项目更新就star一下**
6 |
7 | **3.watch是设置接收邮件提醒的.**
8 |
9 | ---
10 |
11 | # Retinanet-Pytorch
12 |
13 | [Retinanet](https://arxiv.org/abs/1708.02002)目标检测算法pytorch实现,
14 |
15 | **本项目不是完全的复现论文**(很多参数以及实现方式上与原论文存在部分差异,有疑问欢迎issues)
16 |
17 | 由于一些原因,训练已经过测试,但是并没有训练完毕,所以不会上传预训练模型.
18 |
19 | 但项目代码验证无误.(但在使用时需要自己进行调整。不建议新手进行尝试。)
20 | ***
21 | 项目在架构上与 [SSD-Pytorch](https://github.com/yatengLG/SSD-Pytorch) 采用了相似的结构.
22 |
23 | 重用了大量[SSD-Pytorch](https://github.com/yatengLG/SSD-Pytorch)中代码,如训练器,测试器等.
24 |
25 | ***
26 |
27 | **本项目单机多卡,通过torch.nn.DataParallel实现,将单机环境统一包装.支持单机单卡,单机多卡,指定gpu训练及测试,但不支持多机多卡和cpu训练和测试.
28 | 不限定检测时的设备(cpu,gpu均可).**
29 |
30 | ***
31 |
32 | # Requirements
33 |
34 |
35 | 1. pytorch
36 | 2. opencv-python
37 | 3. torchvision >= 0.3.0
38 | 4. Vizer
39 | 5. visdom
40 |
41 | (均可pip安装)
42 |
43 | ## 项目结构
44 |
45 | | 文件夹| 文件 |说明 |
46 | |:-------:|:-------:|:------:|
47 | | **Data** | 数据相关 |
48 | | | Dataloader| 数据加载器类'Our_Dataloader', 'Our_Dataloader_test'|
49 | | | Dataset_VOC|VOC格式数据集类|
50 | | | Transfroms|数据Transfroms|
51 | | | Transfroms_tuils|Transfroms子方法|
52 | | **Model**| 模型相关|
53 | | | base_models/Resnet|支持resnet18,34,50,101,152|
54 | | | structs/Anchors|retinanet默认检测框生成器|
55 | | | structs/MutiBoxLoss|损失函数|
56 | | | structs/Focal_Loss|focal_loss损失函数|
57 | | | structs/Fpn|特征金字塔结构|
58 | | | structs/PostProcess|后处理|
59 | | | structs/Predictor|分类及回归网络|
60 | | | evaler |验证器,用于在数据集上对模型进行验证(测试),计算ap,map |
61 | | | retainnet|Retinanet模型类 |
62 | | | trainer|训练器,用于在数据集上训练模型 |
63 | | **Utils**|各种工具|
64 | | |boxs_op |各种框体操作,编码解码,IOU计算,框体格式转换等|
65 | | **Weights**| 模型权重存放处|
66 | | | pretrained|预训练模型权重存放处,本项目模型并没有训练完毕,因而没有上传训练好的模型,但是训练过程已经过验证|
67 | | | trained |训练过程中默认模型存放处|
68 | | ---- | Configs.py|配置文件,包含了模型定义,数据以及训练过程,测试过程等的全部参数,建议备份一份再进行修改|
69 | | ---- | Demo_train.py| 模型训练的例子,训练过程中的模型会保存在Weights/Our/ |
70 | | ---- | Demo_eval.py| 模型测试的例子,计算模型ap,map |
71 | | ---- | Demo_detect_one_image.py|检测单张图片例子|
72 | | ---- | Demo_detect_video.py|视频检测例子,传入一个视频,进行检测|
73 |
74 |
75 | # Demo
76 |
77 | 本项目配有训练,验证,检测部分的代码,所有Demo均经过测试,可直接运行.
78 |
79 | ## 训练train
80 |
81 | 同[针对单机多卡环境的SSD目标检测算法实现(Single Shot MultiBox Detector)(简单,明了,易用,中文注释)](https://ptorch.com/news/252.html)一样,项目**使用visdom进行训练过程可视化**.在运行前请安装并运行visdom.
82 |
83 | 同样的,训练过程也只支持单机单卡或单机多卡环境,不支持cpu训练.
84 |
85 | ```python
86 |
87 | # -*- coding: utf-8 -*-
88 | # @Author : LG
89 |
90 | from Model import RetainNet, Trainer
91 | from Data import vocdataset
92 | from Configs import _C as cfg
93 | from Data import transfrom,targettransform
94 |
95 |
96 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt'
97 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True),
98 | target_transform=targettransform(cfg))
99 |
100 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt'
101 | test_dataset = vocdataset(cfg=cfg, is_train=False,
102 | transform=transfrom(cfg=cfg, is_train=False),
103 | target_transform=targettransform(cfg))
104 |
105 | if __name__ == '__main__':
106 | """
107 | 使用时,请先打开visdom
108 |
109 | 命令行 输入 pip install visdom 进行安装
110 | 输入 python -m visdom.server' 启动
111 | """
112 |
113 | # 首次调用会下载resnet预训练模型
114 |
115 | # 实例化模型. 模型的具体各种参数在Config文件中进行配置
116 | net = RetainNet(cfg)
117 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU
118 | net.to(cfg.DEVICE.MAINDEVICE)
119 | # 初始化训练器,训练器参数通过cfg进行配置;也可传入参数进行配置,但不建议
120 | trainer = Trainer(cfg)
121 | # 训练器开始在 数据集上训练模型
122 | trainer(net, train_dataset)
123 | ```
124 |
125 | ## 验证eval
126 | 验证过程支持单机多卡,单机单卡,不支持cpu.
127 |
128 | ```python
129 | # -*- coding: utf-8 -*-
130 | # @Author : LG
131 |
132 | from Model import RetainNet, Evaler
133 | from Data import vocdataset
134 | from Configs import _C as cfg
135 | from Data import transfrom,targettransform
136 |
137 |
138 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt'
139 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True),
140 | target_transform=targettransform(cfg))
141 |
142 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt'
143 | test_dataset = vocdataset(cfg=cfg, is_train=False,
144 | transform=transfrom(cfg=cfg, is_train=False),
145 | target_transform=targettransform(cfg))
146 |
147 | if __name__ == '__main__':
148 | # 模型测试只支持GPU单卡或多卡,不支持cpu
149 | net = RetainNet(cfg)
150 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU
151 | net.to(cfg.DEVICE.MAINDEVICE)
152 | # 模型从权重文件中加载权重
153 | net.load_pretrained_weight('XXX.pkl')
154 | # 初始化验证器,验证器参数通过cfg进行配置;也可传入参数进行配置,但不建议
155 | evaler = Evaler(cfg, eval_devices=None)
156 | # 验证器开始在数据集上验证模型
157 | ap, map = evaler(model=net,
158 | test_dataset=test_dataset)
159 | print(ap)
160 | print(map)
161 | ```
162 |
163 | ## 检测Detect
164 |
165 | 单次检测过程支持单机单卡,cpu.
166 |
167 | ### 单张图片检测
168 |
169 | ```python
170 | # -*- coding: utf-8 -*-
171 | # @Author : LG
172 | from Model import RetainNet
173 | from Configs import _C as cfg
174 | from PIL import Image
175 | from matplotlib import pyplot as plt
176 |
177 | # 实例化模型
178 | net = RetainNet(cfg)
179 | # 使用cpu或gpu
180 | net.to('cuda')
181 | # 模型从权重文件中加载权重
182 | net.load_pretrained_weight('XXX.pkl')
183 | # 打开图片
184 | image = Image.open("XXX.jpg")
185 | # 进行检测, 分别返回 绘制了检测框的图片数据/回归框/标签/分数.
186 | drawn_image, boxes, labels, scores = net.Detect_single_img(image=image,score_threshold=0.5)
187 |
188 | plt.imsave('XXX_det.jpg',drawn_image)
189 | plt.imshow(drawn_image)
190 | plt.show()
191 | ```
192 |
193 | ### 视频检测
194 |
195 | ```python
196 | # -*- coding: utf-8 -*-
197 | # @Author : LG
198 | from Model import RetainNet
199 | from Configs import _C as cfg
200 |
201 | # 实例化模型
202 | net = RetainNet(cfg)
203 | # 使用cpu或gpu
204 | net.to('cuda')
205 | # 模型从权重文件中加载权重
206 | net.load_pretrained_weight('XXX.pkl')
207 |
208 | video_path = 'XXX.mp4'
209 |
210 | # 进行检测,
211 | # if save_video_path不为None,则不保存视频,如需保存视频save_video_path=XXX.mp4 ,
212 | # show=True,实时显示检测结果
213 | net.Detect_video(video_path=video_path, score_threshold=0.02, save_video_path=None, show=True)
214 |
215 | ```
216 |
217 | ---
218 |
219 | support by **jetbrains**.
220 |
221 |
222 |
223 | https://www.jetbrains.com/?from=SSD-Pytorch
224 |
225 | ---
226 |
--------------------------------------------------------------------------------
/Utils/Boxs_op.py:
--------------------------------------------------------------------------------
1 | import torch
2 | # -*- coding: utf-8 -*-
3 | # @Author : LG
4 | import math
5 |
6 | # 解码
7 | def convert_locations_to_boxes(locations, priors, center_variance,
8 | size_variance):
9 | """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
10 |
11 | The conversion:
12 | $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
13 | $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
14 | We do it in the inverse direction here.
15 | Args:
16 | locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
17 | priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
18 | center_variance: a float used to change the scale of center.
19 | size_variance: a float used to change of scale of size.
20 | Returns:
21 | boxes: priors: [[center_x, center_y, w, h]]. All the values
22 | are relative to the image size.
23 | """
24 | # priors can have one dimension less.
25 |
26 | if priors.dim() + 1 == locations.dim():
27 | priors = priors.unsqueeze(0)
28 | return torch.cat([
29 | locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
30 | torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
31 | ], dim=locations.dim() - 1)
32 |
33 |
34 | # 编码
35 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
36 | # priors can have one dimension less
37 | if center_form_priors.dim() + 1 == center_form_boxes.dim():
38 | center_form_priors = center_form_priors.unsqueeze(0)
39 | return torch.cat([
40 | (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
41 | torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
42 | ], dim=center_form_boxes.dim() - 1)
43 |
44 |
45 | def area_of(left_top, right_bottom) -> torch.Tensor:
46 | """Compute the areas of rectangles given two corners.
47 |
48 | Args:
49 | left_top (N, 2): left top corner.
50 | right_bottom (N, 2): right bottom corner.
51 |
52 | Returns:
53 | area (N): return the area.
54 | """
55 | hw = torch.clamp(right_bottom - left_top, min=0.0)
56 | return hw[..., 0] * hw[..., 1]
57 |
58 |
59 | def iou_of(boxes0, boxes1, eps=1e-5):
60 | """Return intersection-over-union (Jaccard index) of boxes.
61 |
62 | Args:
63 | boxes0 (N, 4): ground truth boxes.
64 | boxes1 (N or 1, 4): predicted boxes.
65 | eps: a small number to avoid 0 as denominator.
66 | Returns:
67 | iou (N): IoU values.
68 | """
69 | overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
70 | overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])
71 |
72 | overlap_area = area_of(overlap_left_top, overlap_right_bottom)
73 | area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
74 | area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
75 | return overlap_area / (area0 + area1 - overlap_area + eps)
76 |
77 |
78 | def assign_priors(gt_boxes, gt_labels, corner_form_priors,
79 | iou_threshold):
80 | """Assign ground truth boxes and targets to priors.
81 |
82 | Args:
83 | gt_boxes (num_targets, 4): ground truth boxes.
84 | gt_labels (num_targets): labels of targets.
85 | priors (num_priors, 4): corner form priors
86 | Returns:
87 | boxes (num_priors, 4): real values for priors.
88 | labels (num_priros): labels for priors.
89 | """
90 | # size: num_priors x num_targets
91 | ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))
92 | # size: num_priors
93 | best_target_per_prior, best_target_per_prior_index = ious.max(1)
94 | # size: num_targets
95 | best_prior_per_target, best_prior_per_target_index = ious.max(0)
96 |
97 | for target_index, prior_index in enumerate(best_prior_per_target_index):
98 | best_target_per_prior_index[prior_index] = target_index
99 | # 2.0 is used to make sure every target has a prior assigned
100 | best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)
101 | # size: num_priors
102 | labels = gt_labels[best_target_per_prior_index]
103 | labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id
104 | boxes = gt_boxes[best_target_per_prior_index]
105 | return boxes, labels
106 |
107 |
108 | def hard_negative_mining(loss, labels, neg_pos_ratio):
109 | """
110 | It used to suppress the presence of a large number of negative prediction.
111 | It works on image level not batch level.
112 | For any example/image, it keeps all the positive predictions and
113 | cut the number of negative predictions to make sure the ratio
114 | between the negative examples and positive examples is no more
115 | the given ratio for an image.
116 |
117 | Args:
118 | loss (N, num_priors): the loss for each example.
119 | labels (N, num_priors): the labels.
120 | neg_pos_ratio: the ratio between the negative examples and positive examples.
121 | """
122 | pos_mask = labels > 0
123 | num_pos = pos_mask.long().sum(dim=1, keepdim=True)
124 | num_neg = num_pos * neg_pos_ratio
125 |
126 | loss[pos_mask] = -math.inf
127 | _, indexes = loss.sort(dim=1, descending=True)
128 | _, orders = indexes.sort(dim=1)
129 | neg_mask = orders < num_neg
130 | return pos_mask | neg_mask
131 |
132 | # [x, y, w, h] to [xmin, ymin, xmax, ymax]
133 | def center_form_to_corner_form(locations):
134 | return torch.cat([locations[..., :2] - locations[..., 2:] / 2,
135 | locations[..., :2] + locations[..., 2:] / 2], locations.dim() - 1)
136 |
137 | # [xmin, ymin, xmax, ymax] to [x, y, w, h]
138 | def corner_form_to_center_form(boxes):
139 | return torch.cat([
140 | (boxes[..., :2] + boxes[..., 2:]) / 2,
141 | boxes[..., 2:] - boxes[..., :2]
142 | ], boxes.dim() - 1)
143 |
--------------------------------------------------------------------------------
/Utils/Cal_mean_std.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | # 计算数据集均值方差,如果你是自己的数据集,可以使用这部分代码计算数据集的均值方差
4 | import numpy as np
5 | import os
6 | from PIL import Image
7 |
8 | def get_mean_std(img_root):
9 |
10 | means = 0
11 | stds = 0
12 | img_list = os.listdir(img_root)
13 | num = len(img_list)
14 | for i, img in enumerate(img_list):
15 | i +=1
16 | img = os.path.join(img_root,img)
17 | img = np.array(Image.open(img))
18 | mean = np.mean(img, axis=(0,1))
19 | std = np.std(img, axis=(0,1))
20 |
21 | means += mean
22 | stds += std
23 | print(' {}/{} , mean : [{:.2f}, {:.2f}, {:.2f}], std : [{:.2f}, {:.2f}, {:.2f}]'.format(i, num, means[0]/i, means[1]/i, means[2]/i, stds[0]/i, stds[1]/i, stds[2]/i))
24 | mean = means / i
25 | std = stds / i
26 | return mean, std
27 |
--------------------------------------------------------------------------------
/Utils/Hash.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import hashlib
4 | import os
5 |
6 | def GetFileMd5(filename):
7 | if not os.path.isfile(filename):
8 | return
9 | myHash = hashlib.md5()
10 | f = open(filename,'rb')
11 | while True:
12 | b = f.read(8096)
13 | if not b :
14 | break
15 | myHash.update(b)
16 | f.close()
17 | return myHash.hexdigest()
18 |
19 |
--------------------------------------------------------------------------------
/Utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from .voc_cal_ap import eval_detection_voc
4 | from .Hash import GetFileMd5
5 | from .Boxs_op import center_form_to_corner_form, corner_form_to_center_form
--------------------------------------------------------------------------------
/Utils/__pycache__/Boxs_op.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/Boxs_op.cpython-37.pyc
--------------------------------------------------------------------------------
/Utils/__pycache__/Hash.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/Hash.cpython-37.pyc
--------------------------------------------------------------------------------
/Utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/Utils/__pycache__/voc_cal_ap.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/voc_cal_ap.cpython-37.pyc
--------------------------------------------------------------------------------
/Utils/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import numpy as np
4 | import cv2
5 | from tqdm import tqdm
6 | import json
7 | import os
8 | import shutil
9 |
10 | def json_to_txt(json_file, root):
11 | """
12 | 解析json文件
13 | 文件名是图片名,
14 | 数据以 label1, x1, y1, w1, h1
15 | label2, x2, y2, w2, h2
16 | 格式存放
17 | eg:
18 | json_to_txt('/home/super/guangdong1_round1_train1_20190809/Annotations/gt_result.json',
19 | '/home/super/guangdong1_round1_train1_20190809/Our')
20 |
21 | :param json_file:
22 | :param root:
23 | :return:
24 | """
25 | if os.path.exists(root):
26 | shutil.rmtree(root)
27 | os.mkdir(root)
28 |
29 | with open(json_file, 'r')as f:
30 | json_dict_list = json.load(f)
31 | for json_dict in json_dict_list:
32 | name = json_dict['name']
33 | defect_name = json_dict['defect_name']
34 | bbox = json_dict['bbox']
35 | content = [defect_name]
36 | for xywh in bbox:
37 | content.append(str(xywh))
38 | content = ','.join(content)
39 |
40 | with open(os.path.join(root, name.split('.')[0] + '.txt'), 'a')as f:
41 | f.write(content + '\n')
42 | return True
43 |
44 |
45 | def cal_mean_std(images_dir):
46 | """
47 | 给定数据图片根目录,计算图片整体均值与方差
48 | :param images_dir:
49 | :return:
50 | """
51 | img_filenames = os.listdir(images_dir)
52 | m_list, s_list = [], []
53 | for img_filename in tqdm(img_filenames):
54 | img = cv2.imread(images_dir + '/' + img_filename)
55 | img = img / 255.0
56 | m, s = cv2.meanStdDev(img)
57 |
58 | m_list.append(m.reshape((3,)))
59 | s_list.append(s.reshape((3,)))
60 | print(m_list)
61 | m_array = np.array(m_list)
62 | s_array = np.array(s_list)
63 | m = m_array.mean(axis=0, keepdims=True)
64 | s = s_array.mean(axis=0, keepdims=True)
65 | print('mean: ',m[0][::-1])
66 | print('std: ',s[0][::-1])
67 | return m
68 |
--------------------------------------------------------------------------------
/Utils/visdom_op.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | import visdom
4 | import torch
5 | import numpy as np
6 |
7 | """
8 | visdom使用,
9 | 首先先安装visdom pip install visdom
10 | 启动 visdom服务器,python -m visdom.server
11 | 默认为 http://localhost:8097/
12 | """
13 |
14 | """
15 | """
16 |
17 | def setup_visdom(**kwargs):
18 |
19 | """
20 | eg :
21 | vis_eval = setup_visdom(env='SSD_eval')
22 |
23 | :param kwargs:
24 | :return:
25 | """
26 | vis = visdom.Visdom(**kwargs)
27 | return vis
28 |
29 |
30 | def visdom_line(vis, y, x, win_name, update='append'):
31 |
32 | """
33 | eg :
34 | visdom_line(vis_train, y=[loss], x=iteration, win_name='loss')
35 |
36 | :param vis: 由 setup_visdom 函数创建
37 | :param y: Y轴数据,为一系列数据,可同时传入多种数据。 eg : [loss1, loss2]
38 | :param x: X轴,格式同Y
39 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口
40 | :param update: 绘图方式。 这里默认append连续绘图,用于记录损失变化曲线
41 | :return:
42 | """
43 | if not isinstance(y,torch.Tensor):
44 | y=torch.Tensor(y)
45 | y = y.unsqueeze(0)
46 | x = torch.Tensor(y.size()).fill_(x)
47 | vis.line(Y=y, X=x, win=win_name, update=update, opts={'title':win_name})
48 | return True
49 |
50 |
51 | def visdom_images(vis, images,win_name,num_show=None,nrow=None):
52 | """
53 | eg:
54 | visdom_images(vis_train, images, num_show=3, nrow=3, win_name='Image')
55 |
56 | visdom 展示图片,默认只展示6张,每行3张。
57 |
58 | :param vis: 由 setup_visdom 函数创建
59 | :param images: 多幅图片张量,shape:[B,N,W,H]
60 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口
61 | :param num_show: 展示的图片数量,默认六张
62 | :param nrow: 每行展示的图片数量,默认三张
63 | :return:
64 | """
65 | if not num_show:
66 | num_show = 6
67 | if not nrow:
68 | nrow = 3
69 | num = images.size(0)
70 | if num > num_show:
71 | images = images [:num_show]
72 | vis.images(tensor=images,nrow=nrow,win=win_name)
73 | return True
74 |
75 |
76 | def visdom_image(vis, image,win_name):
77 | """
78 | eg :
79 | visdom_image(vis=vis, image=drawn_image, win_name='image')
80 |
81 | :param vis: 由 setup_visdom 函数创建
82 | :param image: 单幅图片张量,shape:[n,w,h]
83 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口
84 | :return:
85 | """
86 | vis.image(img=image, win=win_name)
87 | return True
88 |
89 | def visdom_bar(vis, X, Y, win_name):
90 | """
91 | 绘制柱形图
92 | eg:
93 | visdom_bar(vis_train, X=cfg.DATASETS.CLASS_NAME, Y=ap, win_name='ap', title='ap')
94 |
95 | :param vis:
96 | :param X: 类别
97 | :param Y: 数值
98 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口
99 | :return:
100 | """
101 | dic = dict(zip(X,Y))
102 | del_list = []
103 | for val in dic:
104 | if np.isnan(dic[val]):
105 | del_list.append(val)
106 |
107 | for val in del_list:
108 | del dic[val]
109 |
110 | vis.bar(X=list(dic.values()),Y=list(dic.keys()),win=win_name, opts={'title':win_name})
111 | return True
--------------------------------------------------------------------------------
/Utils/voc_cal_ap.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author : LG
3 | from __future__ import division
4 | from collections import defaultdict
5 | import itertools
6 | import numpy as np
7 | import six
8 |
9 | __all__ = ['eval_detection_voc']
10 |
11 | def bbox_iou(bbox_a, bbox_b):
12 | """Calculate the Intersection of Unions (IoUs) between bounding boxes.
13 | IoU is calculated as a ratio of area of the intersection
14 | and area of the union.
15 | This function accepts both :obj:`numpy.ndarray` and :obj:`cupy.ndarray` as
16 | inputs. Please note that both :obj:`bbox_a` and :obj:`bbox_b` need to be
17 | same type.
18 | The output is same type as the type of the inputs.
19 | Args:
20 | bbox_a (array): An array whose shape is :math:`(N, 4)`.
21 | :math:`N` is the number of bounding boxes.
22 | The dtype should be :obj:`numpy.float32`.
23 | bbox_b (array): An array similar to :obj:`bbox_a`,
24 | whose shape is :math:`(K, 4)`.
25 | The dtype should be :obj:`numpy.float32`.
26 | Returns:
27 | array:
28 | An array whose shape is :math:`(N, K)`. \
29 | An element at index :math:`(n, k)` contains IoUs between \
30 | :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
31 | box in :obj:`bbox_b`.
32 | """
33 | if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
34 | raise IndexError
35 |
36 | # top left
37 | tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
38 | # bottom right
39 | br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])
40 |
41 | area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
42 | area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
43 | area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)
44 | return area_i / (area_a[:, None] + area_b - area_i)
45 |
46 |
47 | def eval_detection_voc(
48 | pred_bboxes,
49 | pred_labels,
50 | pred_scores,
51 | gt_bboxes,
52 | gt_labels,
53 | gt_difficults=None,
54 | iou_thresh=0.5,
55 | use_07_metric=False):
56 | """Calculate average precisions based on evaluation code of PASCAL VOC.
57 |
58 | This function evaluates predicted bounding boxes obtained from a dataset
59 | which has :math:`N` images by using average precision for each class.
60 | The code is based on the evaluation code used in PASCAL VOC Challenge.
61 |
62 | Args:
63 | pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
64 | sets of bounding boxes.
65 | Its index corresponds to an index for the base dataset.
66 | Each element of :obj:`pred_bboxes` is a set of coordinates
67 | of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
68 | where :math:`R` corresponds
69 | to the number of bounding boxes, which may vary among boxes.
70 | The second axis corresponds to
71 | :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
72 | pred_labels (iterable of numpy.ndarray): An iterable of labels.
73 | Similar to :obj:`pred_bboxes`, its index corresponds to an
74 | index for the base dataset. Its length is :math:`N`.
75 | pred_scores (iterable of numpy.ndarray): An iterable of confidence
76 | scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
77 | its index corresponds to an index for the base dataset.
78 | Its length is :math:`N`.
79 | gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
80 | bounding boxes
81 | whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
82 | bounding box whose shape is :math:`(R, 4)`. Note that the number of
83 | bounding boxes in each image does not need to be same as the number
84 | of corresponding predicted boxes.
85 | gt_labels (iterable of numpy.ndarray): An iterable of ground truth
86 | labels which are organized similarly to :obj:`gt_bboxes`.
87 | gt_difficults (iterable of numpy.ndarray): An iterable of boolean
88 | arrays which is organized similarly to :obj:`gt_bboxes`.
89 | This tells whether the
90 | corresponding ground truth bounding box is difficult or not.
91 | By default, this is :obj:`None`. In that case, this function
92 | considers all bounding boxes to be not difficult.
93 | iou_thresh (float): A prediction is correct if its Intersection over
94 | Union with the ground truth is above this value.
95 | use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
96 | for calculating average precision. The default value is
97 | :obj:`False`.
98 |
99 | Returns:
100 | dict:
101 |
102 | The keys, value-types and the description of the values are listed
103 | below.
104 |
105 | * **ap** (*numpy.ndarray*): An array of average precisions. \
106 | The :math:`l`-th value corresponds to the average precision \
107 | for class :math:`l`. If class :math:`l` does not exist in \
108 | either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
109 | value is set to :obj:`numpy.nan`.
110 | * **map** (*float*): The average of Average Precisions over classes.
111 |
112 | """
113 |
114 | prec, rec = calc_detection_voc_prec_rec(pred_bboxes,
115 | pred_labels,
116 | pred_scores,
117 | gt_bboxes,
118 | gt_labels,
119 | gt_difficults,
120 | iou_thresh=iou_thresh)
121 |
122 | ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
123 |
124 | return {'ap': ap, 'map': np.nanmean(ap)}
125 |
126 |
127 | def calc_detection_voc_prec_rec(
128 | pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
129 | gt_difficults=None,
130 | iou_thresh=0.5):
131 | """Calculate precision and recall based on evaluation code of PASCAL VOC.
132 |
133 | This function calculates precision and recall of
134 | predicted bounding boxes obtained from a dataset which has :math:`N`
135 | images.
136 | The code is based on the evaluation code used in PASCAL VOC Challenge.
137 |
138 | Args:
139 | pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
140 | sets of bounding boxes.
141 | Its index corresponds to an index for the base dataset.
142 | Each element of :obj:`pred_bboxes` is a set of coordinates
143 | of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
144 | where :math:`R` corresponds
145 | to the number of bounding boxes, which may vary among boxes.
146 | The second axis corresponds to
147 | :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
148 | pred_labels (iterable of numpy.ndarray): An iterable of labels.
149 | Similar to :obj:`pred_bboxes`, its index corresponds to an
150 | index for the base dataset. Its length is :math:`N`.
151 | pred_scores (iterable of numpy.ndarray): An iterable of confidence
152 | scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
153 | its index corresponds to an index for the base dataset.
154 | Its length is :math:`N`.
155 | gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
156 | bounding boxes
157 | whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
158 | bounding box whose shape is :math:`(R, 4)`. Note that the number of
159 | bounding boxes in each image does not need to be same as the number
160 | of corresponding predicted boxes.
161 | gt_labels (iterable of numpy.ndarray): An iterable of ground truth
162 | labels which are organized similarly to :obj:`gt_bboxes`.
163 | gt_difficults (iterable of numpy.ndarray): An iterable of boolean
164 | arrays which is organized similarly to :obj:`gt_bboxes`.
165 | This tells whether the
166 | corresponding ground truth bounding box is difficult or not.
167 | By default, this is :obj:`None`. In that case, this function
168 | considers all bounding boxes to be not difficult.
169 | iou_thresh (float): A prediction is correct if its Intersection over
170 | Union with the ground truth is above this value..
171 |
172 | Returns:
173 | tuple of two lists:
174 | This function returns two lists: :obj:`prec` and :obj:`rec`.
175 |
176 | * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
177 | for class :math:`l`. If class :math:`l` does not exist in \
178 | either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
179 | set to :obj:`None`.
180 | * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
181 | for class :math:`l`. If class :math:`l` that is not marked as \
182 | difficult does not exist in \
183 | :obj:`gt_labels`, :obj:`rec[l]` is \
184 | set to :obj:`None`.
185 |
186 | """
187 |
188 | pred_bboxes = iter(pred_bboxes)
189 | pred_labels = iter(pred_labels)
190 | pred_scores = iter(pred_scores)
191 | gt_bboxes = iter(gt_bboxes)
192 | gt_labels = iter(gt_labels)
193 | if gt_difficults is None:
194 | gt_difficults = itertools.repeat(None)
195 | else:
196 | gt_difficults = iter(gt_difficults)
197 |
198 | n_pos = defaultdict(int)
199 | score = defaultdict(list)
200 | match = defaultdict(list)
201 |
202 | for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
203 | six.moves.zip(
204 | pred_bboxes, pred_labels, pred_scores,
205 | gt_bboxes, gt_labels, gt_difficults):
206 |
207 | if gt_difficult is None:
208 | gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
209 |
210 | for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
211 | pred_mask_l = pred_label == l
212 | pred_bbox_l = pred_bbox[pred_mask_l]
213 | pred_score_l = pred_score[pred_mask_l]
214 | # sort by score
215 | order = pred_score_l.argsort()[::-1]
216 | pred_bbox_l = pred_bbox_l[order]
217 | pred_score_l = pred_score_l[order]
218 |
219 | gt_mask_l = gt_label == l
220 | gt_bbox_l = gt_bbox[gt_mask_l]
221 | gt_difficult_l = gt_difficult[gt_mask_l]
222 |
223 | n_pos[l] += np.logical_not(gt_difficult_l).sum()
224 | score[l].extend(pred_score_l)
225 |
226 | if len(pred_bbox_l) == 0:
227 | continue
228 | if len(gt_bbox_l) == 0:
229 | match[l].extend((0,) * pred_bbox_l.shape[0])
230 | continue
231 |
232 | # VOC evaluation follows integer typed bounding boxes.
233 | pred_bbox_l = pred_bbox_l.copy()
234 | pred_bbox_l[:, 2:] += 1
235 | gt_bbox_l = gt_bbox_l.copy()
236 | gt_bbox_l[:, 2:] += 1
237 |
238 | iou = bbox_iou(pred_bbox_l, gt_bbox_l)
239 | gt_index = iou.argmax(axis=1)
240 | # set -1 if there is no matching ground truth
241 | gt_index[iou.max(axis=1) < iou_thresh] = -1
242 | del iou
243 |
244 | selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
245 | for gt_idx in gt_index:
246 | if gt_idx >= 0:
247 | if gt_difficult_l[gt_idx]:
248 | match[l].append(-1)
249 | else:
250 | if not selec[gt_idx]:
251 | match[l].append(1)
252 | else:
253 | match[l].append(0)
254 | selec[gt_idx] = True
255 | else:
256 | match[l].append(0)
257 |
258 | for iter_ in (
259 | pred_bboxes, pred_labels, pred_scores,
260 | gt_bboxes, gt_labels, gt_difficults):
261 | if next(iter_, None) is not None:
262 | raise ValueError('Length of input iterables need to be same.')
263 |
264 | n_fg_class = max(n_pos.keys()) + 1
265 | prec = [None] * n_fg_class
266 | rec = [None] * n_fg_class
267 |
268 | for l in n_pos.keys():
269 | score_l = np.array(score[l])
270 | match_l = np.array(match[l], dtype=np.int8)
271 |
272 | order = score_l.argsort()[::-1]
273 | match_l = match_l[order]
274 |
275 | tp = np.cumsum(match_l == 1)
276 | fp = np.cumsum(match_l == 0)
277 |
278 | # If an element of fp + tp is 0,
279 | # the corresponding element of prec[l] is nan.
280 | prec[l] = tp / (fp + tp)
281 | # If n_pos[l] is 0, rec[l] is None.
282 | if n_pos[l] > 0:
283 | rec[l] = tp / n_pos[l]
284 |
285 | return prec, rec
286 |
287 |
288 | def calc_detection_voc_ap(prec, rec, use_07_metric=False):
289 | """Calculate average precisions based on evaluation code of PASCAL VOC.
290 |
291 | This function calculates average precisions
292 | from given precisions and recalls.
293 | The code is based on the evaluation code used in PASCAL VOC Challenge.
294 |
295 | Args:
296 | prec (list of numpy.array): A list of arrays.
297 | :obj:`prec[l]` indicates precision for class :math:`l`.
298 | If :obj:`prec[l]` is :obj:`None`, this function returns
299 | :obj:`numpy.nan` for class :math:`l`.
300 | rec (list of numpy.array): A list of arrays.
301 | :obj:`rec[l]` indicates recall for class :math:`l`.
302 | If :obj:`rec[l]` is :obj:`None`, this function returns
303 | :obj:`numpy.nan` for class :math:`l`.
304 | use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
305 | for calculating average precision. The default value is
306 | :obj:`False`.
307 |
308 | Returns:
309 | ~numpy.ndarray:
310 | This function returns an array of average precisions.
311 | The :math:`l`-th value corresponds to the average precision
312 | for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
313 | :obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
314 |
315 | """
316 |
317 | n_fg_class = len(prec)
318 | ap = np.empty(n_fg_class)
319 | for l in six.moves.range(n_fg_class):
320 | if prec[l] is None or rec[l] is None:
321 | ap[l] = np.nan
322 | continue
323 |
324 | if use_07_metric:
325 | # 11 point metric
326 | ap[l] = 0
327 | for t in np.arange(0., 1.1, 0.1):
328 | if np.sum(rec[l] >= t) == 0:
329 | p = 0
330 | else:
331 | p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
332 | ap[l] += p / 11
333 | else:
334 | # correct AP calculation
335 | # first append sentinel values at the end
336 | mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
337 | mrec = np.concatenate(([0], rec[l], [1]))
338 |
339 | mpre = np.maximum.accumulate(mpre[::-1])[::-1]
340 |
341 | # to calculate area under PR curve, look for points
342 | # where X axis (recall) changes value
343 | i = np.where(mrec[1:] != mrec[:-1])[0]
344 |
345 | # and sum (\Delta recall) * prec
346 | ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
347 |
348 | return ap
349 |
--------------------------------------------------------------------------------
/__pycache__/Configs.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/__pycache__/Configs.cpython-37.pyc
--------------------------------------------------------------------------------