├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── retinanet-pytorch.iml └── vcs.xml ├── Configs.py ├── Data ├── Dataloader.py ├── Dataset_VOC.py ├── Transfroms.py ├── Transfroms_utils.py ├── __init__.py └── __pycache__ │ ├── Dataloader.cpython-37.pyc │ ├── Dataset_VOC.cpython-37.pyc │ ├── Transfroms.cpython-37.pyc │ ├── Transfroms_utils.cpython-37.pyc │ └── __init__.cpython-37.pyc ├── Demo_detect_one_image.py ├── Demo_detect_video.py ├── Demo_eval.py ├── Demo_train.py ├── Model ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── retainnet.cpython-37.pyc ├── base_models │ ├── Resnet.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Resnet.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── evaler.py ├── retainnet.py ├── struct │ ├── Anchors.py │ ├── Focal_Loss.py │ ├── Fpn.py │ ├── MultiBoxLoss.py │ ├── PostProcess.py │ ├── Predictor.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Anchors.cpython-37.pyc │ │ ├── Focal_Loss.cpython-37.pyc │ │ ├── Fpn.cpython-37.pyc │ │ ├── PostProcess.cpython-37.pyc │ │ ├── Predictor.cpython-37.pyc │ │ └── __init__.cpython-37.pyc └── trainer.py ├── README.md ├── Utils ├── Boxs_op.py ├── Cal_mean_std.py ├── Hash.py ├── __init__.py ├── __pycache__ │ ├── Boxs_op.cpython-37.pyc │ ├── Hash.cpython-37.pyc │ ├── __init__.cpython-37.pyc │ └── voc_cal_ap.cpython-37.pyc ├── utils.py ├── visdom_op.py └── voc_cal_ap.py └── __pycache__ └── Configs.cpython-37.pyc /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/retinanet-pytorch.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Configs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from yacs.config import CfgNode as CN 4 | import os 5 | 6 | project_root = os.getcwd() 7 | 8 | _C = CN() 9 | 10 | 11 | _C.FILE = CN() 12 | 13 | _C.FILE.PRETRAIN_WEIGHT_ROOT = project_root+'/Weights/pretrained' # 会使用到的预训练模型 14 | _C.FILE.MODEL_SAVE_ROOT = project_root+'/Weights/trained' # 训练模型的保存 15 | # _C.FILE.VGG16_WEIGHT = 'vgg16_reducedfc.pth' # vgg预训练模型 16 | 17 | _C.DEVICE = CN() 18 | 19 | _C.DEVICE.MAINDEVICE = 'cuda:0' # 主gpu 主GPU会占用内存稍大一丁点 20 | _C.DEVICE.TRAIN_DEVICES = [0, 1] # 训练gpu 0代表第一块gpu, 1 代表第二块gpu, 你可以随意更改. 你可以通过 nvidim-smi 来查看gpu编号及占用情况, 同样的,你可以[0,1,2,3,4,5,6,7]来指定八块gpu 或[0,2,4] 来指定其中的任意三块gpu 21 | _C.DEVICE.TEST_DEVICES = [0, 1] # 检测gpu 22 | 23 | _C.MODEL = CN() 24 | _C.MODEL.BASEMODEL = 'resnet50' # 现支持 resnet18, resnet34, resnet50, resnet101, resnet152 25 | 26 | _C.MODEL.INPUT = CN() 27 | _C.MODEL.INPUT.IMAGE_SIZE = 600 # 模型输入尺寸 28 | 29 | _C.MODEL.ANCHORS = CN() 30 | _C.MODEL.ANCHORS.FEATURE_MAPS = [(75, 75), (38, 38), (19, 19), (10, 10), (5, 5)] # fpn输出的特征图大小 # [(IMAGE_SIZE/2/2/2, ), (IMAGE_SIZE/2/2/2/2, ), (IMAGE_SIZE/2/2/2/2/2)] 这里都向上取整 31 | _C.MODEL.ANCHORS.SIZES = [32, 64, 128, 256, 512] # 每层特征图上anchor的真实尺寸 32 | _C.MODEL.ANCHORS.NUMS = 9 # 每个特征点上anchor的数量, 与_C.MODEL.ANCHORS.RATIOS 相关联 33 | _C.MODEL.ANCHORS.RATIOS = [0.5, 1, 2] # 不同特征图上检测框绘制比例 34 | _C.MODEL.ANCHORS.SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] # 不同特征图上检测框绘制比例 35 | _C.MODEL.ANCHORS.CLIP = True # 越界检测框截断,0~1 36 | _C.MODEL.ANCHORS.THRESHOLD = 0.5 # 交并比阈值 37 | _C.MODEL.ANCHORS.CENTER_VARIANCE = 0.1 # 解码 38 | _C.MODEL.ANCHORS.SIZE_VARIANCE = 0.2 # 解码 39 | 40 | _C.TRAIN = CN() 41 | 42 | _C.TRAIN.NEG_POS_RATIO = 3 # 负正样本比例,每张图中负样本比例(背景类)会占大多数,通过这个来对负样本进行抑制,只取3倍正样本数量的负样本进行训练,而不至于导致正负样本严重失衡 43 | _C.TRAIN.MAX_ITER = 120000 # 训练轮数 44 | _C.TRAIN.BATCH_SIZE = 20 # 训练批次, 如果内存小,可以调小。如果使用多块gpu,请使用整数倍gpu数量的批次数 45 | 46 | _C.MULTIBOXLOSS = CN() 47 | _C.MULTIBOXLOSS.ALPHA = 0.25 # focal loss 阿尔法参数,用于调节背景与目标比例,这里与 _C.TRAIN.NEG_POS_RATIO 目的相同,但原理不同,_C.TRAIN.NEG_POS_RATIO直接减少负样本数量,_C.MULTIBOXLOSS.ALPHA 减小负样本对损失的影响比重 48 | _C.MULTIBOXLOSS.GAMMA = 2 # focal loss 伽马参数 ,用于调节难易样本影响,一般为2即可 49 | 50 | _C.OPTIM = CN() 51 | 52 | _C.OPTIM.LR = 1e-3 # 初始学习率.默认优化器为SGD # 如需修改优化器,可以代码中进行修改 Model/trainer.py -> set_optimizer 53 | _C.OPTIM.MOMENTUM = 0.9 # 优化器动量.默认优化器为SGD 54 | _C.OPTIM.WEIGHT_DECAY = 5e-4 # 权重衰减,L2正则化.默认优化器为SGD 55 | 56 | _C.OPTIM.SCHEDULER = CN() # 默认使用MultiStepLR 57 | _C.OPTIM.SCHEDULER.GAMMA = 0.1 # 学习率衰减率 58 | _C.OPTIM.SCHEDULER.LR_STEPS = [80000, 100000] 59 | 60 | 61 | _C.MODEL.TEST = CN() 62 | 63 | _C.MODEL.TEST.NMS_THRESHOLD = 0.45 # 非极大抑制阈值 64 | _C.MODEL.TEST.CONFIDENCE_THRESHOLD = 0.1 # 分数阈值, 65 | _C.MODEL.TEST.MAX_PER_IMAGE = 100 # 预测结果最大保留数量 66 | _C.MODEL.TEST.MAX_PER_CLASS = -1 # 测试时,top-N 67 | 68 | 69 | _C.DATA = CN() 70 | 71 | # 由于在使用时,是自己的数据集.所以这里,并没有写0712合并的数据集格式,这里以VOC2007为例 72 | _C.DATA.DATASET = CN() 73 | _C.DATA.DATASET.NUM_CLASSES =21 74 | _C.DATA.DATASET.CLASS_NAME = ('__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 75 | 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 76 | 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 77 | 'sheep', 'sofa', 'train', 'tvmonitor') 78 | 79 | 80 | _C.DATA.DATASET.DATA_DIR = '/home/XXX/VOC_det/VOCdevkit/VOC2007' # 数据集voc格式,根目录 请更改为自己的目录 81 | _C.DATA.DATASET.TRAIN_SPLIT = 'train' # 训练集,对应于 /VOCdevkit/VOC2007/ImageSets/Main/train.txt' 82 | _C.DATA.DATASET.TEST_SPLIT = 'val' # 测试集,对应于 /VOCdevkit/VOC2007/ImageSets/Main/val.txt' 83 | _C.DATA.PIXEL_MEAN = [0, 0, 0] #数据集均值 用于数据增强部分,依数据集修改即可 84 | _C.DATA.PIXEL_STD = [1, 1, 1] # 数据集方差 85 | 86 | _C.DATA.DATALOADER = CN() 87 | 88 | 89 | _C.STEP = CN() 90 | _C.STEP.VIS_STEP = 10 # visdom可视化训练过程,打印步长 91 | _C.STEP.MODEL_SAVE_STEP = 1000 # 训练过程中,模型保存步长 92 | _C.STEP.EVAL_STEP = 1000 # 在训练过程中,并没有进行检测流程,建议保存模型后另外检测 93 | -------------------------------------------------------------------------------- /Data/Dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from torch._six import int_classes as _int_classes 4 | from torch.utils.data import DataLoader 5 | from torch.utils.data.sampler import RandomSampler, SequentialSampler 6 | from torch.utils.data import Sampler 7 | from torch.utils.data.dataloader import default_collate # 这个不用管,只是显示问题,实际可以使用 8 | 9 | __all__ = ['our_dataloader', 'our_dataloader_test'] 10 | 11 | class BatchSampler_Our(Sampler): 12 | """ 13 | 重新定义了 批采样类 ,实现按指定迭代数进行批次提取, 14 | 在取完一批次后没达到指定迭代数会进行循环,直到输出指定的批次数量。 15 | """ 16 | 17 | def __init__(self, sampler, batch_size, max_iteration=100000000, drop_last=True): 18 | """ 19 | 数据加载,默认循环加载1亿次,几近无限迭代. 20 | 每次迭代输出一个批次的数据. 21 | :param sampler: 采样器,传入 不同采样器 实现 不同的采样策略, RandomSampler随机采样,SequentialSampler顺序采样 22 | :param batch_size: 批次大小 23 | :param max_iteration: 迭代次数 24 | :param drop_last: 是否弃掉最后的不够一批次的数据。True则弃掉;False保留,并返回,但是这一批次会小于指定批次大小。 25 | """ 26 | if not isinstance(sampler, Sampler): 27 | raise ValueError("sampler should be an instance of " 28 | "torch.utils.data.Sampler, but got sampler={}" 29 | .format(sampler)) 30 | if not isinstance(batch_size, _int_classes) or isinstance(batch_size, bool) or \ 31 | batch_size <= 0: 32 | raise ValueError("batch_size should be a positive integer value, " 33 | "but got batch_size={}".format(batch_size)) 34 | if not isinstance(max_iteration, _int_classes) or isinstance(max_iteration, bool) or \ 35 | max_iteration <= 0: 36 | raise ValueError("max_iter should be a positive integer value, " 37 | "but got max_iter={}".format(max_iteration)) 38 | 39 | if not isinstance(drop_last, bool): 40 | raise ValueError("drop_last should be a boolean value, but got " 41 | "drop_last={}".format(drop_last)) 42 | self.sampler = sampler 43 | self.batch_size = batch_size 44 | self.max_iteration = max_iteration 45 | self.drop_last = drop_last 46 | 47 | def __iter__(self): 48 | iteration = 0 49 | 50 | while iteration <= self.max_iteration: 51 | batch = [] 52 | for idx in self.sampler: 53 | batch.append(idx) 54 | 55 | if len(batch) == self.batch_size: 56 | iteration += 1 57 | yield batch 58 | batch = [] 59 | 60 | if iteration > self.max_iteration: 61 | break 62 | 63 | if len(batch) > 0 and not self.drop_last: 64 | iteration += 1 65 | yield batch 66 | 67 | if iteration > self.max_iteration: 68 | break 69 | 70 | def __len__(self): 71 | if self.drop_last: 72 | return self.max_iteration 73 | else: 74 | return self.max_iteration 75 | 76 | 77 | class BatchCollator: 78 | def __init__(self, is_train=True): 79 | self.is_train = is_train 80 | 81 | def __call__(self, batch): 82 | transposed_batch = list(zip(*batch)) 83 | images = default_collate(transposed_batch[0]) 84 | img_ids = default_collate(transposed_batch[3]) 85 | 86 | if self.is_train: 87 | boxes = default_collate(transposed_batch[1]) 88 | labels = default_collate(transposed_batch[2]) 89 | else: 90 | boxes = None 91 | labels = None 92 | return images, boxes, labels, img_ids 93 | 94 | 95 | def our_dataloader(dataset,batch_size,shuffle=True,num_workers=2,drop_last=True,max_iteration=100000000): 96 | """ 97 | 几近无限迭代器,迭代次数为1亿次,每次迭代输出一个批次的数据. 98 | :param dataset: 数据集 99 | :param batch_size: 批次数 100 | :param max_iteration: 迭代的总次数,默认1亿次,具体迭代次数,在取数据时进行判断会更为灵活 101 | :param shuffle: 102 | :param num_workers: 103 | :param drop_last: 104 | :return: 105 | """ 106 | if shuffle: 107 | sampler = RandomSampler(dataset) # 随机采样器 108 | else: 109 | sampler = SequentialSampler(dataset) # 顺序采样器 110 | batch_sampler = BatchSampler_Our(sampler=sampler, 111 | batch_size=batch_size, 112 | max_iteration=max_iteration, 113 | drop_last=drop_last) 114 | loader = DataLoader(dataset=dataset,batch_sampler=batch_sampler,num_workers=num_workers,collate_fn=BatchCollator(is_train=dataset.is_train)) 115 | return loader 116 | 117 | def our_dataloader_test(dataset,batch_size,shuffle=False,get_box_label=True,num_workers=2,drop_last=False): 118 | 119 | loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=shuffle,num_workers=num_workers, 120 | collate_fn=BatchCollator(is_train=get_box_label),drop_last=drop_last) 121 | return loader -------------------------------------------------------------------------------- /Data/Dataset_VOC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import os 4 | import torch.utils.data 5 | import numpy as np 6 | import xml.etree.ElementTree as ET 7 | from PIL import Image 8 | 9 | __all__ = ['vocdataset'] 10 | 11 | class vocdataset(torch.utils.data.Dataset): 12 | 13 | def __init__(self, cfg, is_train=True, data_dir=None, transform=None, target_transform=None, keep_difficult=False): 14 | """VOC格式数据集 15 | Args: 16 | data_dir: VOC格式数据集根目录,该目录下包含: 17 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 18 | split: train、test 或者 eval, 对应于 ImageSets/Main/train.txt,eval.txt 19 | """ 20 | # 类别 21 | self.class_names = cfg.DATA.DATASET.CLASS_NAME 22 | self.data_dir = cfg.DATA.DATASET.DATA_DIR 23 | self.is_train = is_train 24 | if data_dir: 25 | self.data_dir = data_dir 26 | self.split = cfg.DATA.DATASET.TRAIN_SPLIT # train 对应于ImageSets/Main/train.txt 27 | if not self.is_train: 28 | self.split = cfg.DATA.DATASET.TEST_SPLIT # test 对应于ImageSets/Main/test.txt 29 | self.transform = transform 30 | self.target_transform = target_transform 31 | image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "{}.txt".format(self.split)) 32 | # 从train.txt 文件中读取图片 id 返回ids列表 33 | self.ids = self._read_image_ids(image_sets_file) 34 | self.keep_difficult = keep_difficult 35 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 36 | 37 | def __getitem__(self, index): 38 | image_name = self.ids[index] 39 | # 解析Annotations/id.xml 读取id图片对应的 boxes, labels, is_difficult 均为列表 40 | boxes, labels, is_difficult = self._get_annotation(image_name) 41 | if not self.keep_difficult: 42 | boxes = boxes[is_difficult == 0] 43 | labels = labels[is_difficult == 0] 44 | # 读取 JPEGImages/id.jpg 返回Image.Image 45 | image = self._read_image(image_name) 46 | if self.transform: 47 | image, boxes, labels = self.transform(image, boxes, labels) 48 | if self.target_transform: 49 | boxes, labels = self.target_transform(boxes, labels) 50 | 51 | return image, boxes, labels, image_name 52 | 53 | # 返回 id, boxes, labels, is_difficult 54 | def get_annotation(self, index): 55 | image_id = self.ids[index] 56 | return image_id, self._get_annotation(image_id) 57 | 58 | def __len__(self): 59 | return len(self.ids) 60 | 61 | @staticmethod 62 | def _read_image_ids(image_sets_file): 63 | ids = [] 64 | with open(image_sets_file) as f: 65 | for line in f: 66 | ids.append(line.rstrip()) 67 | return ids 68 | 69 | # 解析xml,返回 boxes, labels, is_difficult numpy.array格式 70 | def _get_annotation(self, image_name): 71 | annotation_file = os.path.join(self.data_dir, "Annotations", "{}.xml".format(image_name)) 72 | objects = ET.parse(annotation_file).findall("object") 73 | boxes = [] 74 | labels = [] 75 | is_difficult = [] 76 | for obj in objects: # .encode('utf-8').decode('UTF-8-sig') 解决Windows下中文编码问题 77 | class_name = obj.find('name').text.encode('utf-8').decode('UTF-8-sig').lower().strip() 78 | bbox = obj.find('bndbox') 79 | # VOC dataset format follows Matlab, in which indexes start from 0 80 | x1 = float(bbox.find('xmin').text.encode('utf-8').decode('UTF-8-sig')) - 1 81 | y1 = float(bbox.find('ymin').text.encode('utf-8').decode('UTF-8-sig')) - 1 82 | x2 = float(bbox.find('xmax').text.encode('utf-8').decode('UTF-8-sig')) - 1 83 | y2 = float(bbox.find('ymax').text.encode('utf-8').decode('UTF-8-sig')) - 1 84 | boxes.append([x1, y1, x2, y2]) 85 | labels.append(self.class_dict[class_name]) 86 | is_difficult_str = obj.find('difficult').text 87 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 88 | 89 | return (np.array(boxes, dtype=np.float32), 90 | np.array(labels, dtype=np.int64), 91 | np.array(is_difficult, dtype=np.uint8)) 92 | 93 | # 获取图片尺寸信息,返回字典 {'height': , 'width': } 94 | def get_img_size(self, img_name): 95 | annotation_file = os.path.join(self.data_dir, "Annotations", "{}.xml".format(img_name)) 96 | anno = ET.parse(annotation_file).getroot() 97 | size = anno.find("size") 98 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 99 | return {"height": im_info[0], "width": im_info[1]} 100 | 101 | # 读取图片数据,返回Image.Image 102 | def _read_image(self, image_id): 103 | image_file = os.path.join(self.data_dir, "JPEGImages", "{}.jpg".format(image_id)) 104 | image = Image.open(image_file).convert("RGB") 105 | image = np.array(image) 106 | return image 107 | 108 | def get_one_image(self,image_name = None): 109 | import random 110 | 111 | if not image_name: 112 | image_name = random.choice(self.ids) 113 | # 解析Annotations/id.xml 读取id图片对应的 boxes, labels, is_difficult 均为列表 114 | boxes, labels, is_difficult = self._get_annotation(image_name) 115 | if not self.keep_difficult: 116 | boxes = boxes[is_difficult == 0] 117 | labels = labels[is_difficult == 0] 118 | # 读取 JPEGImages/id.jpg 返回Image.Image 119 | image = self._read_image(image_name) 120 | image_after_transfrom = None 121 | if self.transform: 122 | image_after_transfrom, boxes, labels = self.transform(image, boxes, labels) 123 | if self.target_transform: 124 | boxes, labels = self.target_transform(boxes, labels) 125 | 126 | return image, image_after_transfrom, boxes, labels, image_name -------------------------------------------------------------------------------- /Data/Transfroms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from Utils.Boxs_op import center_form_to_corner_form, assign_priors,\ 4 | corner_form_to_center_form, convert_boxes_to_locations 5 | from Data.Transfroms_utils import * 6 | 7 | __all__ = ['transfrom', 'targettransform'] 8 | 9 | class transfrom: 10 | """ 11 | transfroms 12 | eg: 13 | transform = Tramsfrom(cfg,is_train=True) 14 | """ 15 | def __init__(self,cfg, is_train): 16 | if is_train: 17 | self.transforms = [ 18 | ConvertFromInts(), # 图像数据转float32 19 | PhotometricDistort(), # 光度畸变,对比度,亮度,光噪声,色调,饱和等(详情看函数,有详细备注.) 20 | SubtractMeans(cfg.DATA.PIXEL_MEAN), # 减均值 21 | DivideStds(cfg.DATA.PIXEL_STD), # 除方差 22 | Expand(), # 随机扩充 23 | RandomSampleCrop(), # 随机交兵比裁剪 24 | RandomMirror(), # 随机镜像 25 | ToPercentCoords(), # boxes 坐标转百分比制 26 | Resize(cfg.MODEL.INPUT.IMAGE_SIZE), 27 | 28 | ToTensor(), 29 | ] 30 | else: 31 | self.transforms = [ 32 | Resize(cfg.MODEL.INPUT.IMAGE_SIZE), 33 | SubtractMeans(cfg.DATA.PIXEL_MEAN), # 减均值 34 | DivideStds(cfg.DATA.PIXEL_STD), # 除方差 35 | ToTensor() 36 | ] 37 | 38 | def __call__(self, img, boxes=None, labels=None): 39 | for t in self.transforms: 40 | img, boxes, labels = t(img, boxes, labels) 41 | return img, boxes, labels 42 | 43 | 44 | class targettransform: 45 | """ 46 | targets_transfroms 47 | eg: 48 | transform = TargetTransform(cfg) 49 | """ 50 | 51 | def __init__(self, cfg): 52 | from Model.struct import priorbox # 避免循环导入.(模型中detect方法会使用transfrom,而targettransfrom会使用到priorbox, 这样写可以避免循环导入) 53 | 54 | self.center_form_priors = priorbox(cfg)() 55 | self.corner_form_priors = center_form_to_corner_form(self.center_form_priors) 56 | self.center_variance = cfg.MODEL.ANCHORS.CENTER_VARIANCE 57 | self.size_variance = cfg.MODEL.ANCHORS.SIZE_VARIANCE 58 | self.iou_threshold = cfg.MODEL.ANCHORS.THRESHOLD 59 | 60 | def __call__(self, gt_boxes, gt_labels): 61 | if type(gt_boxes) is np.ndarray: 62 | gt_boxes = torch.from_numpy(gt_boxes) 63 | if type(gt_labels) is np.ndarray: 64 | gt_labels = torch.from_numpy(gt_labels) 65 | boxes, labels = assign_priors(gt_boxes, gt_labels, 66 | self.corner_form_priors, 67 | self.iou_threshold) 68 | boxes = corner_form_to_center_form(boxes) 69 | locations = convert_boxes_to_locations(boxes, 70 | self.center_form_priors, 71 | self.center_variance, 72 | self.size_variance) 73 | return locations, labels 74 | -------------------------------------------------------------------------------- /Data/Transfroms_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import torch 4 | from torchvision import transforms 5 | import cv2 6 | import numpy as np 7 | import types 8 | from numpy import random 9 | 10 | 11 | def intersect(box_a, box_b): 12 | max_xy = np.minimum(box_a[:, 2:], box_b[2:]) 13 | min_xy = np.maximum(box_a[:, :2], box_b[:2]) 14 | inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) 15 | return inter[:, 0] * inter[:, 1] 16 | 17 | 18 | def jaccard_numpy(box_a, box_b): 19 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 20 | is simply the intersection over union of two boxes. 21 | E.g.: 22 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 23 | Args: 24 | box_a: Multiple bounding boxes, Shape: [num_boxes,4] 25 | box_b: Single bounding box, Shape: [4] 26 | Return: 27 | jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] 28 | """ 29 | inter = intersect(box_a, box_b) 30 | area_a = ((box_a[:, 2] - box_a[:, 0]) * 31 | (box_a[:, 3] - box_a[:, 1])) # [A,B] 32 | area_b = ((box_b[2] - box_b[0]) * 33 | (box_b[3] - box_b[1])) # [A,B] 34 | union = area_a + area_b - inter 35 | return inter / union # [A,B] 36 | 37 | 38 | class Compose(object): 39 | """Composes several augmentations together. 40 | Args: 41 | transforms (List[Transform]): list of transforms to compose. 42 | Example: 43 | >>> Compose([ 44 | >>> transforms.CenterCrop(10), 45 | >>> transforms.ToTensor(), 46 | >>> ]) 47 | """ 48 | 49 | def __init__(self, transforms): 50 | self.transforms = transforms 51 | 52 | def __call__(self, img, boxes=None, labels=None): 53 | for t in self.transforms: 54 | img, boxes, labels = t(img, boxes, labels) 55 | return img, boxes, labels 56 | 57 | 58 | class Lambda(object): 59 | """Applies a lambda as a transform.""" 60 | 61 | def __init__(self, lambd): 62 | assert isinstance(lambd, types.LambdaType) 63 | self.lambd = lambd 64 | 65 | def __call__(self, img, boxes=None, labels=None): 66 | return self.lambd(img, boxes, labels) 67 | 68 | 69 | class ConvertFromInts(object): 70 | def __call__(self, image, boxes=None, labels=None): 71 | return image.astype(np.float32), boxes, labels 72 | 73 | 74 | class SubtractMeans(object): 75 | def __init__(self, mean): 76 | self.mean = np.array(mean, dtype=np.float32) 77 | 78 | def __call__(self, image, boxes=None, labels=None): 79 | image = image.astype(np.float32) 80 | image -= self.mean 81 | return image.astype(np.float32), boxes, labels 82 | 83 | class DivideStds(object): 84 | def __init__(self, std): 85 | self.std = np.array(std, dtype=np.float32) 86 | 87 | def __call__(self, image, boxes=None, labels=None): 88 | image = image.astype(np.float32) 89 | image /= self.std 90 | return image.astype(np.float32), boxes, labels 91 | 92 | class ToAbsoluteCoords(object): 93 | def __call__(self, image, boxes=None, labels=None): 94 | height, width, channels = image.shape 95 | boxes[:, 0] *= width 96 | boxes[:, 2] *= width 97 | boxes[:, 1] *= height 98 | boxes[:, 3] *= height 99 | 100 | return image, boxes, labels 101 | 102 | 103 | class ToPercentCoords(object): 104 | def __call__(self, image, boxes=None, labels=None): 105 | height, width, channels = image.shape 106 | boxes[:, 0] /= width 107 | boxes[:, 2] /= width 108 | boxes[:, 1] /= height 109 | boxes[:, 3] /= height 110 | 111 | return image, boxes, labels 112 | 113 | 114 | class Resize(object): 115 | def __init__(self, size=600): 116 | self.size = size 117 | 118 | def __call__(self, image, boxes=None, labels=None): 119 | image = cv2.resize(image, (self.size, 120 | self.size)) 121 | return image, boxes, labels 122 | 123 | 124 | class RandomSaturation(object): 125 | def __init__(self, lower=0.5, upper=1.5): 126 | self.lower = lower 127 | self.upper = upper 128 | assert self.upper >= self.lower, "contrast upper must be >= lower." 129 | assert self.lower >= 0, "contrast lower must be non-negative." 130 | 131 | def __call__(self, image, boxes=None, labels=None): 132 | if random.randint(2): 133 | image[:, :, 1] *= random.uniform(self.lower, self.upper) 134 | 135 | return image, boxes, labels 136 | 137 | 138 | class RandomHue(object): 139 | def __init__(self, delta=18.0): 140 | assert delta >= 0.0 and delta <= 360.0 141 | self.delta = delta 142 | 143 | def __call__(self, image, boxes=None, labels=None): 144 | if random.randint(2): 145 | image[:, :, 0] += random.uniform(-self.delta, self.delta) 146 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 147 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 148 | return image, boxes, labels 149 | 150 | 151 | class RandomLightingNoise(object): 152 | def __init__(self): 153 | self.perms = ((0, 1, 2), (0, 2, 1), 154 | (1, 0, 2), (1, 2, 0), 155 | (2, 0, 1), (2, 1, 0)) 156 | 157 | def __call__(self, image, boxes=None, labels=None): 158 | if random.randint(2): 159 | swap = self.perms[random.randint(len(self.perms))] 160 | shuffle = SwapChannels(swap) # shuffle channels 161 | image = shuffle(image) 162 | return image, boxes, labels 163 | 164 | 165 | class ConvertColor(object): 166 | ''' 167 | H色调用角度度量,取值范围为0°~360°.从红色开始按逆时针方向计算,红色为0°,绿色为120°,蓝色为240°.它们的补色是:黄色为60°,青色为180°,品红为300°; 168 | S饱和度表示颜色接近光谱色的程度.一种颜色,可以看成是某种光谱色与白色混合的结果.其中光谱色所占的比例愈大,颜色接近光谱色的程度就愈高,颜色的饱和度也就愈高; 169 | 明度表示颜色明亮的程度,对于光源色,明度值与发光体的光亮度有关;对于物体色,此值和物体的透射比或反射比有关。通常取值范围为0%(黑)到100%(白)。 170 | ''' 171 | def __init__(self, current, transform): 172 | self.transform = transform 173 | self.current = current 174 | 175 | def __call__(self, image, boxes=None, labels=None): 176 | if self.current == 'BGR' and self.transform == 'HSV': 177 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 178 | elif self.current == 'RGB' and self.transform == 'HSV': 179 | image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) 180 | elif self.current == 'BGR' and self.transform == 'RGB': 181 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 182 | elif self.current == 'HSV' and self.transform == 'BGR': 183 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 184 | elif self.current == 'HSV' and self.transform == "RGB": 185 | image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB) 186 | else: 187 | raise NotImplementedError 188 | return image, boxes, labels 189 | 190 | 191 | class RandomContrast(object): 192 | def __init__(self, lower=0.5, upper=1.5): 193 | self.lower = lower 194 | self.upper = upper 195 | assert self.upper >= self.lower, "contrast upper must be >= lower." 196 | assert self.lower >= 0, "contrast lower must be non-negative." 197 | 198 | # expects float image 199 | def __call__(self, image, boxes=None, labels=None): 200 | if random.randint(2): 201 | alpha = random.uniform(self.lower, self.upper) 202 | image *= alpha 203 | return image, boxes, labels 204 | 205 | 206 | class RandomBrightness(object): 207 | def __init__(self, delta=32): 208 | assert delta >= 0.0 209 | assert delta <= 255.0 210 | self.delta = delta 211 | 212 | def __call__(self, image, boxes=None, labels=None): 213 | if random.randint(2): 214 | delta = random.uniform(-self.delta, self.delta) 215 | image += delta 216 | return image, boxes, labels 217 | 218 | 219 | class ToCV2Image(object): 220 | def __call__(self, tensor, boxes=None, labels=None): 221 | return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels 222 | 223 | 224 | class ToTensor(object): 225 | def __call__(self, cvimage, boxes=None, labels=None): 226 | return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels 227 | 228 | 229 | class RandomSampleCrop(object): 230 | """Crop 231 | Arguments: 232 | img (Image): the image being input during training 233 | boxes (Tensor): the original bounding boxes in pt form 234 | labels (Tensor): the class labels for each bbox 235 | mode (float tuple): the min and max jaccard overlaps 236 | Return: 237 | (img, boxes, classes) 238 | img (Image): the cropped image 239 | boxes (Tensor): the adjusted bounding boxes in pt form 240 | labels (Tensor): the class labels for each bbox 241 | """ 242 | 243 | def __init__(self): 244 | self.sample_options = ( 245 | None, # 直接返回,不裁剪 246 | # IOU裁剪, (最小iou, +无穷) , 这里的IOU为 所有 标注框 与 裁剪框的IOU, 247 | # 因而,只决定 裁剪框所包含多目标部分的比例, 并不是直接与单个标注框进行IOU裁剪 248 | (0.1, None), 249 | (0.3, None), 250 | (0.7, None), 251 | (0.9, None), 252 | # 不限定iou裁剪,(-无穷, +无穷) 253 | (None, None), 254 | ) 255 | 256 | def __call__(self, image, boxes=None, labels=None): 257 | # guard against no boxes 258 | if boxes is not None and boxes.shape[0] == 0: 259 | return image, boxes, labels 260 | height, width, _ = image.shape 261 | while True: 262 | # randomly choose a mode 263 | mode = random.choice(self.sample_options) 264 | if mode is None: 265 | return image, boxes, labels 266 | 267 | min_iou, max_iou = mode 268 | if min_iou is None: 269 | min_iou = float('-inf') # 负无穷 270 | if max_iou is None: 271 | max_iou = float('inf') # 正无穷 272 | 273 | # 尝试50次,每次随机裁剪不一定符合情况,会进行尝试. 274 | for _ in range(50): 275 | current_image = image 276 | 277 | w = random.uniform(0.3 * width, width) # 框尺寸,0.3~1 倍 图像尺寸 278 | h = random.uniform(0.3 * height, height) 279 | 280 | # 限制 框宽高,不要特别狭长的框体,不然会出现resize后的严重变形. 281 | if h / w < 0.5 or h / w > 2: 282 | continue 283 | 284 | left = random.uniform(width - w) 285 | top = random.uniform(height - h) 286 | 287 | # 截取框 (l, t, r, d) 288 | rect = np.array([int(left), int(top), int(left + w), int(top + h)]) 289 | 290 | # 计算 截取框与 标注框s的 交并比, boxes为单图对应的多个标注框,rect是当前的截取框 291 | overlap = jaccard_numpy(boxes, rect) 292 | 293 | # IOU限制 294 | if overlap.max() < min_iou or overlap.min() > max_iou: 295 | continue 296 | 297 | # 标注框s的 中心 (x, y) 298 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 299 | 300 | # 截取框 左上角 处于标注框中心 左上 ( 与下面两行 共同 确定 是否截取框 包含了所有的标注框中心) 301 | m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) 302 | 303 | # 截取框 右下角 处于标注框中心 右下 304 | m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) 305 | 306 | mask = m1 * m2 307 | 308 | # 如果截取框没有包含至少一个标注框的中心, 则 重新尝试 309 | if not mask.any(): 310 | continue 311 | 312 | # 截取图片 313 | current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :] 314 | 315 | # 这里只选择 中心处于截取框内的 目标 316 | current_boxes = boxes[mask, :].copy() 317 | 318 | # 标签同上 319 | current_labels = labels[mask] 320 | 321 | # should we use the box left and top corner or the crop's 322 | current_boxes[:, :2] = np.maximum(current_boxes[:, :2], 323 | rect[:2]) 324 | # adjust to crop (by substracting crop's left,top) 325 | current_boxes[:, :2] -= rect[:2] 326 | 327 | current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], 328 | rect[2:]) 329 | # adjust to crop (by substracting crop's left,top) 330 | current_boxes[:, 2:] -= rect[:2] 331 | 332 | return current_image, current_boxes, current_labels 333 | 334 | 335 | class Expand(object): 336 | def __call__(self, image, boxes, labels): 337 | if random.randint(2): 338 | return image, boxes, labels 339 | 340 | height, width, depth = image.shape 341 | ratio = random.uniform(1, 4) # (1,4) 342 | left = random.uniform(0, width * ratio - width) 343 | top = random.uniform(0, height * ratio - height) 344 | 345 | expand_image = np.zeros( 346 | (int(height * ratio), int(width * ratio), depth), 347 | dtype=image.dtype) 348 | expand_image[int(top):int(top + height), 349 | int(left):int(left + width)] = image 350 | image = expand_image 351 | 352 | boxes = boxes.copy() 353 | boxes[:, :2] += (int(left), int(top)) 354 | boxes[:, 2:] += (int(left), int(top)) 355 | 356 | return image, boxes, labels 357 | 358 | 359 | class RandomMirror(object): 360 | def __call__(self, image, boxes, classes): 361 | _, width, _ = image.shape 362 | if random.randint(2): 363 | image = image[:, ::-1] 364 | boxes = boxes.copy() 365 | boxes[:, 0::2] = width - boxes[:, 2::-2] 366 | return image, boxes, classes 367 | 368 | 369 | class SwapChannels(object): 370 | """Transforms a tensorized image by swapping the channels in the order 371 | specified in the swap tuple. 372 | Args: 373 | swaps (int triple): final order of channels 374 | eg: (2, 1, 0) 375 | """ 376 | 377 | def __init__(self, swaps): 378 | self.swaps = swaps 379 | 380 | def __call__(self, image): 381 | """ 382 | Args: 383 | image (Tensor): image tensor to be transformed 384 | Return: 385 | a tensor with channels swapped according to swap 386 | """ 387 | # if torch.is_tensor(image): 388 | # image = image.data.cpu().numpy() 389 | # else: 390 | # image = np.array(image) 391 | image = image[:, :, self.swaps] 392 | return image 393 | 394 | 395 | class PhotometricDistort(object): 396 | def __init__(self): 397 | self.pd = [ 398 | RandomContrast(), # RGB 随机对比度 399 | ConvertColor(current="RGB", transform='HSV'), # HSV HSV同RGB也是一种颜色表示. H色调,S饱和度,V明度,详情看函数备注 400 | RandomSaturation(), # HSV 随机饱和 401 | RandomHue(), # HSV 随机色调 402 | ConvertColor(current='HSV', transform='RGB'), # RGB 403 | RandomContrast() # 随机对比度 404 | ] 405 | self.rand_brightness = RandomBrightness() # 随机亮度 406 | self.rand_light_noise = RandomLightingNoise() # 随机光噪声 407 | 408 | def __call__(self, image, boxes, labels): 409 | im = image.copy() 410 | im, boxes, labels = self.rand_brightness(im, boxes, labels) 411 | if random.randint(2): 412 | distort = Compose(self.pd[:-1]) # 先对比度调整 413 | else: 414 | distort = Compose(self.pd[1:]) # 后对比度调整 415 | im, boxes, labels = distort(im, boxes, labels) 416 | return self.rand_light_noise(im, boxes, labels) 417 | 418 | 419 | 420 | -------------------------------------------------------------------------------- /Data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from .Dataset_VOC import vocdataset 4 | from .Dataloader import our_dataloader, our_dataloader_test 5 | from .Transfroms import transfrom, targettransform 6 | -------------------------------------------------------------------------------- /Data/__pycache__/Dataloader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Dataloader.cpython-37.pyc -------------------------------------------------------------------------------- /Data/__pycache__/Dataset_VOC.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Dataset_VOC.cpython-37.pyc -------------------------------------------------------------------------------- /Data/__pycache__/Transfroms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Transfroms.cpython-37.pyc -------------------------------------------------------------------------------- /Data/__pycache__/Transfroms_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/Transfroms_utils.cpython-37.pyc -------------------------------------------------------------------------------- /Data/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Data/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Demo_detect_one_image.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from Model import RetainNet 4 | from Configs import _C as cfg 5 | from PIL import Image 6 | import matplotlib.pyplot as plt 7 | # 实例化模型 8 | net = RetainNet(cfg) 9 | # 使用cpu或gpu 10 | net.to('cuda') 11 | # 模型从权重文件中加载权重 12 | net.load_pretrained_weight('/home/super/PycharmProjects/Retinanet-Pytorch/Weights/trained/model_35.pkl') 13 | # 打开图片 14 | image = Image.open("/home/super/VOC_det/VOCdevkit/VOC2007/JPEGImages/000009.jpg") 15 | # 进行检测, 分别返回 绘制了检测框的图片数据/回归框/标签/分数. 16 | drawn_image, boxes, labels, scores = net.Detect_single_img(image=image,score_threshold=0.5) 17 | 18 | plt.imsave('XXX_det.jpg',drawn_image) 19 | plt.imshow(drawn_image) 20 | plt.show() -------------------------------------------------------------------------------- /Demo_detect_video.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from Model import RetainNet 4 | from Configs import _C as cfg 5 | 6 | # 实例化模型 7 | net = RetainNet(cfg) 8 | # 使用cpu或gpu 9 | net.to('cuda') 10 | # 模型从权重文件中加载权重 11 | net.load_pretrained_weight('XXX.pkl') 12 | 13 | video_path = 'XXX.mp4' 14 | 15 | # 进行检测, 16 | # if save_video_path不为None,则不保存视频,如需保存视频save_video_path=XXX.mp4 , 17 | # show=True,实时显示检测结果 18 | net.Detect_video(video_path=video_path, score_threshold=0.02, save_video_path=None, show=True) 19 | -------------------------------------------------------------------------------- /Demo_eval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | 4 | from Model import RetainNet, Evaler 5 | from Data import vocdataset 6 | from Configs import _C as cfg 7 | from Data import transfrom,targettransform 8 | 9 | 10 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt' 11 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True), 12 | target_transform=targettransform(cfg)) 13 | 14 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt' 15 | test_dataset = vocdataset(cfg=cfg, is_train=False, 16 | transform=transfrom(cfg=cfg, is_train=False), 17 | target_transform=targettransform(cfg)) 18 | 19 | if __name__ == '__main__': 20 | # 模型测试只支持GPU单卡或多卡,不支持cpu 21 | net = RetainNet(cfg) 22 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU 23 | net.to(cfg.DEVICE.MAINDEVICE) 24 | # 模型从权重文件中加载权重 25 | net.load_pretrained_weight('XXX.pkl') 26 | # 初始化验证器,验证器参数通过cfg进行配置;也可传入参数进行配置,但不建议 27 | evaler = Evaler(cfg, eval_devices=None) 28 | # 验证器开始在数据集上验证模型 29 | ap, map = evaler(model=net, 30 | test_dataset=test_dataset) 31 | print(ap) 32 | print(map) -------------------------------------------------------------------------------- /Demo_train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | 4 | from Model import RetainNet, Trainer 5 | from Data import vocdataset 6 | from Configs import _C as cfg 7 | from Data import transfrom,targettransform 8 | 9 | 10 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt' 11 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True), 12 | target_transform=targettransform(cfg)) 13 | 14 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt' 15 | test_dataset = vocdataset(cfg=cfg, is_train=False, 16 | transform=transfrom(cfg=cfg, is_train=False), 17 | target_transform=targettransform(cfg)) 18 | 19 | if __name__ == '__main__': 20 | """ 21 | 使用时,请先打开visdom 22 | 23 | 命令行 输入 pip install visdom 进行安装 24 | 输入 python -m visdom.server' 启动 25 | """ 26 | 27 | # 首次调用会下载resnet预训练模型 28 | 29 | # 实例化模型. 模型的具体各种参数在Config文件中进行配置 30 | net = RetainNet(cfg) 31 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU 32 | net.to(cfg.DEVICE.MAINDEVICE) 33 | # 初始化训练器,训练器参数通过cfg进行配置;也可传入参数进行配置,但不建议 34 | trainer = Trainer(cfg) 35 | # 训练器开始在 数据集上训练模型 36 | trainer(net, train_dataset) 37 | -------------------------------------------------------------------------------- /Model/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from .retainnet import RetainNet 4 | from .trainer import Trainer 5 | from .evaler import Evaler -------------------------------------------------------------------------------- /Model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Model/__pycache__/retainnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/__pycache__/retainnet.cpython-37.pyc -------------------------------------------------------------------------------- /Model/base_models/Resnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import torch.nn as nn 4 | import torch 5 | from torch.nn import functional as F 6 | import wget 7 | import os 8 | from Configs import _C as cfg 9 | 10 | __all__ = ['build_resnet'] 11 | 12 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 13 | """3x3 convolution with padding""" 14 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 15 | padding=dilation, groups=groups, bias=False, dilation=dilation) 16 | 17 | 18 | def conv1x1(in_planes, out_planes, stride=1): 19 | """1x1 convolution""" 20 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 21 | 22 | 23 | class BasicBlock(nn.Module): 24 | expansion = 1 25 | 26 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 27 | base_width=64, dilation=1, norm_layer=None): 28 | super(BasicBlock, self).__init__() 29 | if norm_layer is None: 30 | norm_layer = nn.BatchNorm2d 31 | if groups != 1 or base_width != 64: 32 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 33 | if dilation > 1: 34 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 35 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.bn1 = norm_layer(planes) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.conv2 = conv3x3(planes, planes) 40 | self.bn2 = norm_layer(planes) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | identity = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | identity = self.downsample(x) 56 | 57 | out += identity 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 67 | base_width=64, dilation=1, norm_layer=None): 68 | super(Bottleneck, self).__init__() 69 | if norm_layer is None: 70 | norm_layer = nn.BatchNorm2d 71 | width = int(planes * (base_width / 64.)) * groups 72 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 73 | self.conv1 = conv1x1(inplanes, width) 74 | self.bn1 = norm_layer(width) 75 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 76 | self.bn2 = norm_layer(width) 77 | self.conv3 = conv1x1(width, planes * self.expansion) 78 | self.bn3 = norm_layer(planes * self.expansion) 79 | self.relu = nn.ReLU(inplace=True) 80 | self.downsample = downsample 81 | self.stride = stride 82 | 83 | def forward(self, x): 84 | identity = x 85 | 86 | out = self.conv1(x) 87 | out = self.bn1(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv2(out) 91 | out = self.bn2(out) 92 | out = self.relu(out) 93 | 94 | out = self.conv3(out) 95 | out = self.bn3(out) 96 | 97 | if self.downsample is not None: 98 | identity = self.downsample(x) 99 | 100 | out += identity 101 | out = self.relu(out) 102 | 103 | return out 104 | 105 | 106 | class ResNet(nn.Module): 107 | 108 | def __init__(self, arch, zero_init_residual=False, 109 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 110 | norm_layer=None): 111 | super(ResNet, self).__init__() 112 | resnets = { 113 | 'resnet18': [BasicBlock, [2, 2, 2, 2]], 114 | 'resnet34': [BasicBlock, [3, 4, 6, 3]], 115 | 'resnet50': [Bottleneck, [3, 4, 6, 3]], 116 | 'resnet101': [Bottleneck, [3, 4, 23, 3]], 117 | 'resnet152': [Bottleneck, [3, 8, 36, 3]], 118 | } 119 | block = resnets[arch][0] 120 | layers = resnets[arch][1] 121 | 122 | self.arch = arch 123 | if norm_layer is None: 124 | norm_layer = nn.BatchNorm2d 125 | self._norm_layer = norm_layer 126 | 127 | self.inplanes = 64 128 | self.dilation = 1 129 | if replace_stride_with_dilation is None: 130 | # each element in the tuple indicates if we should replace 131 | # the 2x2 stride with a dilated convolution instead 132 | replace_stride_with_dilation = [False, False, False] 133 | if len(replace_stride_with_dilation) != 3: 134 | raise ValueError("replace_stride_with_dilation should be None " 135 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 136 | self.groups = groups 137 | self.base_width = width_per_group 138 | 139 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 140 | bias=False) 141 | self.bn1 = norm_layer(self.inplanes) 142 | self.relu = nn.ReLU(inplace=True) 143 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 144 | self.layer1 = self._make_layer(block, 64, layers[0]) 145 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 146 | dilate=replace_stride_with_dilation[0]) 147 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 148 | dilate=replace_stride_with_dilation[1]) 149 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 150 | dilate=replace_stride_with_dilation[2]) 151 | # extra 额外层,用于在c5基础上输出p6,p7 152 | self.conv6 = nn.Conv2d(512*block.expansion, 256, kernel_size=3, stride=2, padding=1) 153 | self.conv7 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1) 154 | 155 | for m in self.modules(): 156 | if isinstance(m, nn.Conv2d): 157 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 158 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 159 | nn.init.constant_(m.weight, 1) 160 | nn.init.constant_(m.bias, 0) 161 | 162 | # Zero-initialize the last BN in each residual branch, 163 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 164 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 165 | if zero_init_residual: 166 | for m in self.modules(): 167 | if isinstance(m, Bottleneck): 168 | nn.init.constant_(m.bn3.weight, 0) 169 | elif isinstance(m, BasicBlock): 170 | nn.init.constant_(m.bn2.weight, 0) 171 | 172 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 173 | norm_layer = self._norm_layer 174 | downsample = None 175 | previous_dilation = self.dilation 176 | if dilate: 177 | self.dilation *= stride 178 | stride = 1 179 | if stride != 1 or self.inplanes != planes * block.expansion: 180 | downsample = nn.Sequential( 181 | conv1x1(self.inplanes, planes * block.expansion, stride), 182 | norm_layer(planes * block.expansion), 183 | ) 184 | 185 | layers = [] 186 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 187 | self.base_width, previous_dilation, norm_layer)) 188 | self.inplanes = planes * block.expansion 189 | for _ in range(1, blocks): 190 | layers.append(block(self.inplanes, planes, groups=self.groups, 191 | base_width=self.base_width, dilation=self.dilation, 192 | norm_layer=norm_layer)) 193 | 194 | return nn.Sequential(*layers) 195 | 196 | def forward(self, x): 197 | c1 = self.conv1(x) 198 | c1 = self.bn1(c1) 199 | c1 = self.relu(c1) 200 | c1 = self.maxpool(c1) 201 | 202 | c2 = self.layer1(c1) 203 | c3 = self.layer2(c2) 204 | c4 = self.layer3(c3) 205 | c5 = self.layer4(c4) 206 | 207 | p6 = self.conv6(c5) 208 | p7 = self.conv7(F.relu(p6)) 209 | return c3, c4, c5, p6, p7 210 | 211 | def load_weights(self): 212 | model_urls = { 213 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 214 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 215 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 216 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 217 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 218 | } 219 | 220 | url = model_urls[self.arch] 221 | weight_name = url.split('/')[-1] 222 | weight_path = cfg.FILE.PRETRAIN_WEIGHT_ROOT 223 | weight_file = os.path.join(weight_path, weight_name) 224 | 225 | if not os.path.exists(weight_file): 226 | if not os.path.exists(weight_path): 227 | os.makedirs(weight_path) 228 | 229 | print(' {} no exist ,downloading .....'.format(weight_name)) 230 | wget.download(url=url, out=weight_file) 231 | 232 | print(' --- donwload to {} finish --- '.format(weight_file)) 233 | self.load_state_dict(torch.load(weight_file), strict=False) 234 | print(' --- load weight finish --- ') 235 | 236 | 237 | def build_resnet(arch, pretrained=True, **kwargs): 238 | assert arch in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] 239 | model = ResNet(arch, **kwargs) 240 | if pretrained: 241 | model.load_weights() 242 | return model 243 | 244 | 245 | if __name__ == '__main__': 246 | import torch 247 | net = build_resnet('resnet18',pretrained=False) 248 | c3,c4,c5,p6,p7=net(torch.ones((1,3,600,600))) 249 | print(c3.size()) 250 | print(c4.size()) 251 | print(c5.size()) 252 | print(p6.size()) 253 | print(p7.size()) 254 | -------------------------------------------------------------------------------- /Model/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from .Resnet import build_resnet -------------------------------------------------------------------------------- /Model/base_models/__pycache__/Resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/base_models/__pycache__/Resnet.cpython-37.pyc -------------------------------------------------------------------------------- /Model/base_models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/base_models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Model/evaler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import torch 4 | from Configs import _C as cfg 5 | from Utils import eval_detection_voc 6 | from tqdm import tqdm 7 | from torch.nn import DataParallel 8 | from torch import nn 9 | from Data import our_dataloader_test 10 | from .struct import postprocessor 11 | 12 | 13 | class Evaler(object): 14 | """ 15 | 模型测试器,不指定参数时,均默认使用Configs中配置的参数 16 | *** 推荐使用Configs文件管理参数, 不推荐在函数中进行参数指定, 只是为了扩展 *** 17 | 18 | 模型在测试时,会使用DataParallel进行包装,以便于在多GPU上进行测试 19 | 本测试器只支持GPU训练,单机单卡与单机单卡均可,但不支持cpu,不支持多机多卡(别问为啥不支持多机多卡.穷!!!) 20 | 21 | eg: 22 | evaler = Evaler(cfg,eval_devices=[0,1]) # 使用俩块GPU进行测试,使用时请指定需使用的gpu编号,终端运行nvidia-smi进行查看 23 | ap, map = evaler(net,test_dataset=test_dataset) 24 | """ 25 | def __init__(self, cfg, eval_devices=None): 26 | self.cfg = cfg 27 | self.postprocessor = postprocessor(cfg) 28 | 29 | self.eval_devices = self.cfg.DEVICE.TEST_DEVICES 30 | if eval_devices: 31 | self.eval_devices = eval_devices 32 | 33 | def __call__(self, model, test_dataset): 34 | model.eval() 35 | if not isinstance(model, nn.DataParallel): 36 | model = DataParallel(model, device_ids=self.eval_devices) 37 | else: 38 | model = DataParallel(model.module, device_ids=self.eval_devices) 39 | test_loader = our_dataloader_test(dataset=test_dataset, batch_size=20) 40 | results_dict = self.eval_model_inference(model, data_loader=test_loader) 41 | result = cal_ap_map(results_dict, test_dataset=test_loader.dataset) 42 | ap, map = result['ap'], result['map'] 43 | return ap, map 44 | 45 | def eval_model_inference(self, model, data_loader): 46 | with torch.no_grad(): 47 | results_dict = {} 48 | print(' Evaluating...... use GPU : {}'.format(self.eval_devices)) 49 | for images, boxes, labels, image_names in tqdm(data_loader): 50 | cls_logits, bbox_pred = model(images) 51 | results = self.postprocessor(cls_logits, bbox_pred) 52 | for image_name, result in zip(image_names, results): 53 | pred_boxes, pred_labels, pred_scores = result 54 | pred_boxes, pred_labels, pred_scores = pred_boxes.to('cpu').numpy(), \ 55 | pred_labels.to('cpu').numpy(), \ 56 | pred_scores.to('cpu').numpy() 57 | results_dict.update({image_name: {'pred_boxes': pred_boxes, 58 | 'pred_labels': pred_labels, 59 | 'pred_scores': pred_scores}}) 60 | return results_dict 61 | 62 | 63 | def cal_ap_map(results_dict,test_dataset): 64 | pred_boxes_list = [] 65 | pred_labels_list = [] 66 | pred_scores_list = [] 67 | gt_boxs_list = [] 68 | gt_labels_list = [] 69 | gt_difficult_list = [] 70 | for img_name in results_dict: 71 | gt_boxs, gt_labels, gt_difficult = test_dataset._get_annotation(img_name) 72 | size = test_dataset.get_img_size(img_name) 73 | w, h = size['width'],size['height'] 74 | pred_boxes, pred_labels, pred_scores= results_dict[img_name]['pred_boxes'],results_dict[img_name]['pred_labels'],results_dict[img_name]['pred_scores'] 75 | pred_boxes[:, 0::2] *= (w / cfg.MODEL.INPUT.IMAGE_SIZE) 76 | pred_boxes[:, 1::2] *= (h / cfg.MODEL.INPUT.IMAGE_SIZE) 77 | pred_boxes_list.append(pred_boxes) 78 | pred_labels_list.append(pred_labels) 79 | pred_scores_list.append(pred_scores) 80 | gt_boxs_list.append(gt_boxs) 81 | gt_labels_list.append(gt_labels) 82 | gt_difficult_list.append(gt_difficult) 83 | result = eval_detection_voc(pred_bboxes=pred_boxes_list, 84 | pred_labels=pred_labels_list, 85 | pred_scores=pred_scores_list, 86 | gt_bboxes=gt_boxs_list, 87 | gt_labels=gt_labels_list, 88 | gt_difficults=gt_difficult_list) 89 | return result 90 | -------------------------------------------------------------------------------- /Model/retainnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | 4 | from .base_models import build_resnet 5 | from .struct import fpn, predictor, postprocessor 6 | from torch import nn 7 | from Data.Transfroms import transfrom 8 | from vizer.draw import draw_boxes 9 | import torch 10 | from PIL import Image 11 | import numpy as np 12 | import time 13 | 14 | class RetainNet(nn.Module): 15 | """ 16 | :return cls_logits, torch.Size([C, 67995, num_classes]) 17 | bbox_pred, torch.Size([C, 67995, 4]) 18 | """ 19 | def __init__(self,cfg=None, resnet=None): 20 | super(RetainNet,self).__init__() 21 | self.resnet = 'resnet50' 22 | self.num_classes = 21 23 | self.num_anchors = 9 24 | self.cfg = cfg 25 | if cfg: 26 | self.resnet = cfg.MODEL.BASEMODEL 27 | self.num_classes = cfg.DATA.DATASET.NUM_CLASSES 28 | self.num_anchors = cfg.MODEL.ANCHORS.NUMS 29 | if resnet: 30 | self.resnet = resnet 31 | expansion_list={ 32 | 'resnet18': 1, 33 | 'resnet34': 1, 34 | 'resnet50': 4, 35 | 'resnet101': 4, 36 | 'resnet152': 4, 37 | } 38 | assert self.resnet in expansion_list 39 | 40 | self.backbone = build_resnet(self.resnet, pretrained=True) 41 | expansion = expansion_list[self.resnet] 42 | self.fpn = fpn(channels_of_fetures=[128*expansion, 256*expansion, 512*expansion]) 43 | self.predictor = predictor(num_anchors=self.num_anchors, num_classes=self.num_classes) # num_anchors 默认为9,与anchor生成相对应 44 | self.postprocessor = postprocessor(cfg) 45 | 46 | def load_pretrained_weight(self, weight_pkl): 47 | self.load_state_dict(torch.load(weight_pkl)) 48 | 49 | def forward(self, x): 50 | c3, c4, c5, p6, p7 = self.backbone(x) # resnet输出五层特征图 51 | p3, p4, p5 = self.fpn([c3, c4, c5]) # 前三层特征图进FPN 52 | features = [p3, p4, p5, p6, p7] 53 | cls_logits, bbox_pred = self.predictor(features) 54 | return cls_logits, bbox_pred 55 | 56 | def forward_with_postprocess(self, images): 57 | """ 58 | 前向传播并后处理 59 | :param images: 60 | :return: 61 | """ 62 | cls_logits, bbox_pred = self.forward(images) 63 | detections = self.postprocessor(cls_logits, bbox_pred) 64 | return detections 65 | 66 | @torch.no_grad() 67 | def Detect_single_img(self, image, score_threshold=0.7, device='cuda'): 68 | """ 69 | 检测单张照片 70 | eg: 71 | image, boxes, labels, scores= net.Detect_single_img(img) 72 | plt.imshow(image) 73 | plt.show() 74 | 75 | :param image: 图片,PIL.Image.Image 76 | :param score_threshold: 阈值 77 | :param device: 检测时所用设备,默认'cuda' 78 | :return: 添加回归框的图片(np.array),回归框,标签,分数 79 | """ 80 | self.eval() 81 | assert isinstance(image, Image.Image) 82 | w, h = image.width, image.height 83 | images_tensor = transfrom(self.cfg, is_train=False)(np.array(image))[0].unsqueeze(0) 84 | 85 | self.to(device) 86 | images_tensor = images_tensor.to(device) 87 | time1 = time.time() 88 | detections = self.forward_with_postprocess(images_tensor)[0] 89 | boxes, labels, scores = detections 90 | boxes, labels, scores = boxes.to('cpu').numpy(), labels.to('cpu').numpy(), scores.to('cpu').numpy() 91 | boxes[:, 0::2] *= (w / self.cfg.MODEL.INPUT.IMAGE_SIZE) 92 | boxes[:, 1::2] *= (h / self.cfg.MODEL.INPUT.IMAGE_SIZE) 93 | 94 | indices = scores > score_threshold 95 | boxes = boxes[indices] 96 | labels = labels[indices] 97 | scores = scores[indices] 98 | print("Detect {} object, inference cost {:.2f} ms".format(len(scores), (time.time() - time1) * 1000)) 99 | # 图像数据加框 100 | drawn_image = draw_boxes(image=image, boxes=boxes, labels=labels, 101 | scores=scores, class_name_map=self.cfg.DATA.DATASET.CLASS_NAME).astype(np.uint8) 102 | return drawn_image, boxes, labels, scores 103 | 104 | @torch.no_grad() 105 | def Detect_video(self, video_path, score_threshold=0.5, save_video_path=None, show=True): 106 | """ 107 | 检测视频 108 | :param video_path: 视频路径 eg: /XXX/aaa.mp4 109 | :param score_threshold: 110 | :param save_video_path: 保存路径,不指定则不保存 111 | :param show: 在检测过程中实时显示,(会存在卡顿现象,受检测效率影响) 112 | :return: 113 | """ 114 | import cv2 115 | cap = cv2.VideoCapture(video_path) 116 | fourcc = cv2.VideoWriter_fourcc(*'MJPG') 117 | weight = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 118 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 119 | if save_video_path: 120 | out = cv2.VideoWriter(save_video_path, fourcc, cap.get(5), (weight, height)) 121 | while (cap.isOpened()): 122 | ret, frame = cap.read() 123 | if ret == True: 124 | image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 125 | drawn_image, boxes, labels, scores = self.Detect_single_img(image=image, 126 | device='cuda:0', 127 | score_threshold=score_threshold) 128 | frame = cv2.cvtColor(np.asarray(drawn_image), cv2.COLOR_RGB2BGR) 129 | if show: 130 | cv2.imshow('frame', frame) 131 | if save_video_path: 132 | out.write(frame) 133 | if cv2.waitKey(1) & 0xFF == ord('q'): 134 | break 135 | else: 136 | break 137 | cap.release() 138 | if save_video_path: 139 | out.release() 140 | cv2.destroyAllWindows() 141 | return True 142 | -------------------------------------------------------------------------------- /Model/struct/Anchors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from torch import nn 4 | import numpy as np 5 | import torch 6 | from Utils import corner_form_to_center_form, center_form_to_corner_form 7 | 8 | class priorbox: 9 | """ 10 | Retainnet anchors, 生成策略与SSD不同 11 | """ 12 | def __init__(self,cfg=None): 13 | self.features_maps = [(75, 75), (38, 38), (19, 19), (10, 10), (5, 5)] 14 | self.anchor_sizes = [32, 64, 128, 256, 512] 15 | self.ratios = np.array([0.5, 1, 2]) 16 | self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 17 | self.image_size = 600 18 | self.clip = True 19 | if cfg: 20 | self.features_maps = cfg.MODEL.ANCHORS.FEATURE_MAPS 21 | self.anchor_sizes = cfg.MODEL.ANCHORS.SIZES 22 | self.ratios = np.array(cfg.MODEL.ANCHORS.RATIOS) 23 | self.scales = np.array(cfg.MODEL.ANCHORS.SCALES) 24 | self.image_size = cfg.MODEL.INPUT.IMAGE_SIZE 25 | self.clip = cfg.MODEL.ANCHORS.CLIP 26 | 27 | def __call__(self): 28 | priors = [] 29 | for k , (feature_map_w, feature_map_h) in enumerate(self.features_maps): 30 | for i in range(feature_map_w): 31 | for j in range(feature_map_h): 32 | cx = (j + 0.5) / feature_map_w 33 | cy = (i + 0.5) / feature_map_h 34 | 35 | size = self.anchor_sizes[k]/self.image_size # 将框体长宽转为 比例形式 36 | 37 | sides_square = self.scales * size # 计算方形检测框边长 38 | for side_square in sides_square: 39 | priors.append([cx, cy, side_square, side_square]) # 添加方形检测框 40 | 41 | sides_long = sides_square*2**(1/2) # 计算长形检测框长边 42 | for side_long in sides_long: 43 | priors.append([cx, cy, side_long, side_long/2]) # 添加长形检测框,短边为长边的一半 44 | priors.append([cx, cy, side_long/2, side_long]) 45 | 46 | priors = torch.tensor(priors) 47 | if self.clip: # 对超出图像范围的框体进行截断 48 | priors = center_form_to_corner_form(priors) # 截断时,先转为 [xmin, ymin, xmin, xmax]形式 49 | priors.clamp_(max=1, min=0) 50 | priors = corner_form_to_center_form(priors) # 转回 [x, y, w, h]形式 51 | return priors 52 | 53 | if __name__ == '__main__': 54 | anchors = priorbox()() 55 | print(anchors[-10:]) 56 | print(len(anchors)) 57 | -------------------------------------------------------------------------------- /Model/struct/Focal_Loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from torch import nn 4 | import torch 5 | from torch.nn import functional as F 6 | 7 | class focal_loss(nn.Module): 8 | def __init__(self, alpha=0.25, gamma=2, num_classes = 3, reduction='mean'): 9 | """ 10 | focal_loss损失函数, -α(1-yi)**γ *ce_loss(xi,yi) 可以单独拎出来用,替代cross_entropy 11 | 步骤详细的实现了 focal_loss损失函数. 12 | :param alpha: 阿尔法α,类别权重. 当α是列表时,为各类别权重,当α为常数时,类别权重为[α, 1-α, 1-α, ....],常用于 目标检测算法中抑制背景类 , retainnet中设置为0.25 13 | :param gamma: 伽马γ,难易样本调节参数. retainnet中设置为2 14 | :param num_classes: 类别数量 15 | :param size_average: 损失计算方式,默认取均值 16 | """ 17 | super(focal_loss,self).__init__() 18 | self.reduction = reduction 19 | if isinstance(alpha,list): 20 | assert len(alpha)==num_classes # α可以以list方式输入,size:[num_classes] 用于对不同类别精细地赋予权重 21 | print("Focal_loss alpha = {}, 将对每一类权重进行精细化赋值".format(alpha)) 22 | self.alpha = torch.Tensor(alpha) 23 | else: 24 | assert alpha<1 #如果α为一个常数,则降低第一类的影响,在目标检测中为第一类 25 | print(" --- Focal_loss alpha = {} ,将对背景类进行衰减,请在目标检测任务中使用 --- ".format(alpha)) 26 | self.alpha = torch.zeros(num_classes) 27 | self.alpha[0] += alpha 28 | self.alpha[1:] += (1-alpha) # α 最终为 [ α, 1-α, 1-α, 1-α, 1-α, ...] size:[num_classes] 29 | 30 | self.gamma = gamma 31 | 32 | def forward(self, preds, labels): 33 | """ 34 | focal_loss损失计算 35 | :param preds: 预测类别. size:[B,N,C] 36 | :param labels: 实际类别. size:[B,N] 37 | :return: 38 | """ 39 | # assert preds.dim()==2 and labels.dim()==1 40 | preds = preds.view(-1,preds.size(-1)) 41 | self.alpha = self.alpha.to(preds.device) 42 | preds_softmax = F.softmax(preds, dim=1) # 这里并没有直接使用log_softmax, 因为后面会用到softmax的结果(当然你也可以使用log_softmax,然后进行exp操作) 43 | preds_logsoft = torch.log(preds_softmax) 44 | 45 | preds_softmax = preds_softmax.gather(1,labels.view(-1,1)) # 这部分实现nll_loss ( crossempty = log_softmax + nll ) 46 | preds_logsoft = preds_logsoft.gather(1,labels.view(-1,1)) 47 | self.alpha = self.alpha.gather(0,labels.view(-1)) 48 | loss = -torch.mul(torch.pow((1-preds_softmax), self.gamma), preds_logsoft) # torch.pow((1-preds_softmax), self.gamma) 为focal loss中 (1-pt)**γ 49 | 50 | loss = torch.mul(self.alpha, loss.t()) 51 | if self.reduction== 'mean': 52 | loss = loss.mean() 53 | elif self.reduction== 'sum': 54 | loss = loss.sum() 55 | return loss 56 | 57 | -------------------------------------------------------------------------------- /Model/struct/Fpn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | class fpn(nn.Module): 8 | def __init__(self,channels_of_fetures, channel_out=256): 9 | """ 10 | fpn,特征金字塔 11 | :param channels_of_fetures: list,输入层的通道数,必须与输入特征图相对应 12 | :param channel_out: 13 | """ 14 | super(fpn,self).__init__() 15 | self.channels_of_fetures = channels_of_fetures 16 | 17 | self.lateral_conv1 = nn.Conv2d(channels_of_fetures[2], channel_out, kernel_size=1, stride=1, padding=0) 18 | self.lateral_conv2 = nn.Conv2d(channels_of_fetures[1], channel_out, kernel_size=1, stride=1, padding=0) 19 | self.lateral_conv3 = nn.Conv2d(channels_of_fetures[0], channel_out, kernel_size=1, stride=1, padding=0) 20 | 21 | self.top_down_conv1 = nn.Conv2d(channel_out, channel_out, kernel_size=3, stride=1, padding=1) 22 | self.top_down_conv2 = nn.Conv2d(channel_out, channel_out, kernel_size=3, stride=1, padding=1) 23 | self.top_down_conv3 = nn.Conv2d(channel_out, channel_out, kernel_size=3, stride=1, padding=1) 24 | 25 | def forward(self, features): 26 | """ 27 | 28 | :param features: 29 | :return: 30 | """ 31 | c3, c4, c5 = features 32 | 33 | p5 = self.lateral_conv1(c5) # 19 34 | p4 = self.lateral_conv2(c4) # 38 35 | p3 = self.lateral_conv3(c3) # 75 36 | 37 | p4 = F.interpolate(input=p5, size=(p4.size(2),p4.size(3)), mode="nearest") + p4 38 | p3 = F.interpolate(input=p4, size=(p3.size(2),p3.size(3)), mode="nearest") + p3 39 | 40 | p5 = self.top_down_conv1(p5) 41 | p4 = self.top_down_conv2(p4) 42 | p3 = self.top_down_conv3(p3) 43 | 44 | return p3, p4, p5 45 | -------------------------------------------------------------------------------- /Model/struct/MultiBoxLoss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import torch.nn as nn 4 | from .Focal_Loss import focal_loss 5 | import torch 6 | import torch.nn.functional as F 7 | import math 8 | 9 | __all__ = ['multiboxloss'] 10 | 11 | class multiboxloss(nn.Module): 12 | def __init__(self, cfg=None, alpha=None, gamma=None, num_classes=None, neg_pos_ratio=None): 13 | """ 14 | retainnet损失函数,分为类别损失(focal loss) 15 | 框体回归损失(smooth_l1_loss) 16 | 采用分别返回的方式返回.便于训练过程中分析处理 17 | """ 18 | super(multiboxloss, self).__init__() 19 | if cfg: 20 | self.alpha = cfg.MULTIBOXLOSS.ALPHA 21 | self.gamma = cfg.MULTIBOXLOSS.GAMMA 22 | self.num_classes = cfg.DATA.DATASET.NUM_CLASSES 23 | self.neg_pos_ratio = cfg.TRAIN.NEG_POS_RATIO 24 | if alpha: 25 | self.alpha = alpha 26 | if gamma: 27 | self.gamma = gamma 28 | if num_classes: 29 | self.num_classes = num_classes 30 | if neg_pos_ratio: 31 | self.neg_pos_ratio = neg_pos_ratio 32 | 33 | self.loc_loss_fn = nn.SmoothL1Loss(reduction='sum') 34 | self.cls_loss_fn = focal_loss(alpha=self.alpha, gamma=self.gamma, num_classes=self.num_classes, reduction='sum') # 类别损失为focal loss 35 | print(" --- Multiboxloss : α={} γ={} num_classes={}".format(self.alpha, self.gamma, self.num_classes)) 36 | 37 | def forward(self, confidence, predicted_locations, labels, gt_locations): 38 | """ 39 | 计算类别损失和框体回归损失 40 | Args: 41 | confidence (batch_size, num_priors, num_classes): 预测类别 42 | predicted_locations (batch_size, num_priors, 4): 预测位置 43 | labels (batch_size, num_priors): 所有框的真实类别 44 | gt_locations (batch_size, num_priors, 4): 所有框真实的位置 45 | """ 46 | num_classes = confidence.size(2) 47 | 48 | with torch.no_grad(): 49 | loss = -F.log_softmax(confidence, dim=2)[:, :, 0] 50 | mask = hard_negative_mining(loss, labels, self.neg_pos_ratio) 51 | 52 | 53 | classification_loss = self.cls_loss_fn(confidence[mask, :], labels[mask]) 54 | 55 | # 回归损失,smooth_l1 56 | pos_mask = labels > 0 57 | predicted_locations = predicted_locations[pos_mask, :].view(-1, 4) 58 | gt_locations = gt_locations[pos_mask, :].view(-1, 4) 59 | smooth_l1_loss = self.loc_loss_fn(predicted_locations, gt_locations) 60 | num_pos = gt_locations.size(0) 61 | return smooth_l1_loss / num_pos, classification_loss / (num_pos * self.neg_pos_ratio) 62 | 63 | def hard_negative_mining(loss, labels, neg_pos_ratio=3): 64 | """ 65 | 用于训练过程中正负例比例的限制.默认在训练时,负例数量是正例数量的三倍 66 | Args: 67 | loss (N, num_priors): the loss for each example. 68 | labels (N, num_priors): the labels. 69 | neg_pos_ratio: 正负例比例: 负例数量/正例数量 70 | """ 71 | pos_mask = labels > 0 72 | num_pos = pos_mask.long().sum(dim=1, keepdim=True) 73 | num_neg = num_pos * neg_pos_ratio 74 | 75 | loss[pos_mask] = -math.inf # 无穷 76 | # 两次sort 找出元素在排序中的位置 77 | _, indexes = loss.sort(dim=1, descending=True) # descending 降序 ,返回 value,index 78 | _, orders = indexes.sort(dim=1) 79 | neg_mask = orders < num_neg # loss 降序排, 背景为-无穷, 选择排前num_neg的 负无穷,也就是 背景 80 | return pos_mask | neg_mask # 目标 或 背景 81 | -------------------------------------------------------------------------------- /Model/struct/PostProcess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import torch 4 | import torchvision 5 | from Utils.Boxs_op import center_form_to_corner_form, convert_locations_to_boxes 6 | from .Anchors import priorbox 7 | import torch.nn.functional as F 8 | 9 | __all__ = ['postprocessor'] 10 | 11 | class postprocessor: 12 | def __init__(self, cfg): 13 | super().__init__() 14 | self.cfg = cfg 15 | self.width = cfg.MODEL.INPUT.IMAGE_SIZE 16 | self.height = cfg.MODEL.INPUT.IMAGE_SIZE 17 | 18 | def __call__(self, cls_logits, bbox_pred): 19 | priors = priorbox(self.cfg)().to(cls_logits.device) 20 | batches_scores = F.softmax(cls_logits, dim=2) 21 | boxes = convert_locations_to_boxes( 22 | bbox_pred, priors, self.cfg.MODEL.ANCHORS.CENTER_VARIANCE, self.cfg.MODEL.ANCHORS.CENTER_VARIANCE 23 | ) 24 | batches_boxes = center_form_to_corner_form(boxes) 25 | 26 | device = batches_scores.device 27 | batch_size = batches_scores.size(0) 28 | results = [] 29 | for batch_id in range(batch_size): 30 | processed_boxes = [] 31 | processed_scores = [] 32 | processed_labels = [] 33 | 34 | per_img_scores, per_img_boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4) 35 | for class_id in range(1, per_img_scores.size(1)): # skip background 36 | scores = per_img_scores[:, class_id] 37 | mask = scores > self.cfg.MODEL.TEST.CONFIDENCE_THRESHOLD 38 | scores = scores[mask] 39 | if scores.size(0) == 0: 40 | continue 41 | boxes = per_img_boxes[mask, :] 42 | boxes[:, 0::2] *= self.width 43 | boxes[:, 1::2] *= self.height 44 | 45 | keep = boxes_nms(boxes, scores, self.cfg.MODEL.TEST.NMS_THRESHOLD, self.cfg.MODEL.TEST.MAX_PER_CLASS) 46 | 47 | nmsed_boxes = boxes[keep, :] 48 | nmsed_labels = torch.tensor([class_id] * keep.size(0), device=device) 49 | nmsed_scores = scores[keep] 50 | 51 | processed_boxes.append(nmsed_boxes) 52 | processed_scores.append(nmsed_scores) 53 | processed_labels.append(nmsed_labels) 54 | 55 | if len(processed_boxes) == 0: 56 | processed_boxes = torch.empty(0, 4) 57 | processed_labels = torch.empty(0) 58 | processed_scores = torch.empty(0) 59 | else: 60 | processed_boxes = torch.cat(processed_boxes, 0) 61 | processed_labels = torch.cat(processed_labels, 0) 62 | processed_scores = torch.cat(processed_scores, 0) 63 | 64 | if processed_boxes.size(0) > self.cfg.MODEL.TEST.MAX_PER_IMAGE > 0: 65 | processed_scores, keep = torch.topk(processed_scores, k=self.cfg.MODEL.TEST.MAX_PER_IMAGE) 66 | processed_boxes = processed_boxes[keep, :] 67 | processed_labels = processed_labels[keep] 68 | results.append([processed_boxes, processed_labels, processed_scores]) 69 | return results 70 | 71 | def boxes_nms(boxes, scores, nms_thresh, max_count=-1): 72 | """ Performs non-maximum suppression, run on GPU or CPU according to 73 | boxes's device. 74 | Args: 75 | boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(or relative coordinates), shape is (n, 4) 76 | scores(Tensor): scores, shape is (n, ) 77 | nms_thresh(float): thresh 78 | max_count (int): if > 0, then only the top max_proposals are kept after non-maximum suppression 79 | Returns: 80 | indices kept. 81 | """ 82 | keep = torchvision.ops.nms(boxes, scores, nms_thresh) 83 | if max_count > 0: 84 | keep = keep[:max_count] 85 | return keep 86 | 87 | -------------------------------------------------------------------------------- /Model/struct/Predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from torch import nn 4 | import torch 5 | 6 | class predictor(nn.Module): 7 | """ 8 | Retainnet 分类(cls)及回归(reg)网络 9 | """ 10 | def __init__(self, num_anchors, num_classes): 11 | super().__init__() 12 | self.num_classes = num_classes 13 | self.num_anchors = num_anchors 14 | self.make_headers() 15 | self.reset_parameters() 16 | 17 | def forward(self, features): 18 | """ 19 | 对输入的特征图中每个特征点进行分类及回归 20 | :param features: # 经过fpn后 输出的特征图 21 | :return: # 每个特征点的类别预测与回归预测 22 | """ 23 | cls_logits = [] 24 | bbox_pred = [] 25 | batch_size = features[0].size(0) 26 | for feature in features: 27 | cls_logit = self.cls_headers(feature) 28 | cls_logits.append(self.cls_headers(feature).permute(0, 2, 3, 1).contiguous().view(batch_size,-1,self.num_classes)) 29 | bbox_pred.append(self.reg_headers(feature).permute(0, 2, 3, 1).contiguous().view(batch_size,-1,4)) 30 | 31 | cls_logits = torch.cat(cls_logits, dim=1) 32 | bbox_pred = torch.cat(bbox_pred, dim=1) 33 | 34 | return cls_logits, bbox_pred 35 | 36 | def make_headers(self): 37 | cls_headers = [] 38 | reg_headers = [] 39 | 40 | for _ in range(4): 41 | cls_headers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)) 42 | cls_headers.append(nn.ReLU(inplace=True)) 43 | 44 | reg_headers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)) 45 | reg_headers.append(nn.ReLU(inplace=True)) 46 | 47 | cls_headers.append(nn.Conv2d(256, self.num_anchors * self.num_classes, kernel_size=3, stride=1, padding=1)) 48 | reg_headers.append(nn.Conv2d(256, self.num_anchors * 4, kernel_size=3, stride=1, padding=1)) 49 | 50 | self.cls_headers = nn.Sequential(*cls_headers) 51 | self.reg_headers = nn.Sequential(*reg_headers) 52 | 53 | def reset_parameters(self): 54 | for m in self.modules(): 55 | if isinstance(m, nn.Conv2d): 56 | nn.init.xavier_uniform_(m.weight) 57 | nn.init.zeros_(m.bias) 58 | -------------------------------------------------------------------------------- /Model/struct/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from .Anchors import priorbox 4 | from .Focal_Loss import focal_loss 5 | from .Fpn import fpn 6 | from .MultiBoxLoss import multiboxloss 7 | from .PostProcess import postprocessor 8 | from .Predictor import predictor -------------------------------------------------------------------------------- /Model/struct/__pycache__/Anchors.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Anchors.cpython-37.pyc -------------------------------------------------------------------------------- /Model/struct/__pycache__/Focal_Loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Focal_Loss.cpython-37.pyc -------------------------------------------------------------------------------- /Model/struct/__pycache__/Fpn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Fpn.cpython-37.pyc -------------------------------------------------------------------------------- /Model/struct/__pycache__/PostProcess.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/PostProcess.cpython-37.pyc -------------------------------------------------------------------------------- /Model/struct/__pycache__/Predictor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/Predictor.cpython-37.pyc -------------------------------------------------------------------------------- /Model/struct/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Model/struct/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Model/trainer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import torch 4 | from torch.optim.lr_scheduler import MultiStepLR 5 | from Data import our_dataloader 6 | from .struct import multiboxloss 7 | from Utils.visdom_op import visdom_line, setup_visdom, visdom_bar 8 | from torch import nn 9 | from torch.nn import DataParallel 10 | import os 11 | 12 | __all__ = ['Trainer'] 13 | 14 | class Trainer(object): 15 | """ 16 | 模型训练器,不指定参数时,均默认使用Configs中配置的参数 17 | *** 推荐使用Configs文件管理参数, 不推荐在函数中进行参数指定, 只是为了扩展 *** 18 | *** 默认使用 SGD 优化器, 如需使用其他优化器, 继承该类,对build_optimizer方法进行重写即可*** 19 | 20 | 模型在训练时,会使用DataParallel进行包装,以便于在多GPU上进行训练 21 | 本训练器只支持GPU训练,单机单卡与单机单卡均可,但不支持cpu,不支持多机多卡(别问为啥不用多机多卡.穷!!!) 22 | 23 | eg: 24 | trainer = Trainer(cfg) # 实例化训练器 25 | trainer(net,train_dataset) # 在train_dataset数据集上训练模型net 26 | """ 27 | def __init__(self, cfg, max_iter=None, batch_size=None, train_devices=None, 28 | model_save_step=None, model_save_root=None, vis = None, vis_step=None): 29 | """ 30 | 训练器初始化 31 | 值为None的参数项不指定时为默认,已在配置文件中设置. 32 | 如需更改参数建议在Configs配置文件中进行更改 33 | 不建议直接指定参数,只留做扩展用. 34 | 35 | :param cfg: 配置 36 | :param max_iter: 最大训练轮数 37 | :param batch_size: 批次数, 38 | :param train_devices: 训练设备,列表,eg:[0,1],使用0,1俩个GPU,这里0,1为gpu编号,可用nvidia-smi查看.,不指定时为默认,已在配置文件中设置 39 | :param vis: visdom.Visdom(),用于训练过程可视化.绘制损失曲线已经学习率 40 | :param model_save_step: 模型保存步长 41 | :param vis_step: visdom可视化步长 42 | """ 43 | self.cfg = cfg 44 | 45 | self.iterations = self.cfg.TRAIN.MAX_ITER 46 | if max_iter: 47 | self.iterations = max_iter 48 | 49 | self.batch_size = cfg.TRAIN.BATCH_SIZE 50 | if batch_size: 51 | self.batch_size = batch_size 52 | 53 | self.train_devices = cfg.DEVICE.TRAIN_DEVICES 54 | if train_devices: 55 | self.train_devices = train_devices 56 | 57 | self.model_save_root = cfg.FILE.MODEL_SAVE_ROOT 58 | if model_save_root: 59 | self.model_save_root = model_save_root 60 | 61 | if not os.path.exists(self.model_save_root): 62 | os.mkdir(self.model_save_root) 63 | self.model_save_step = self.cfg.STEP.MODEL_SAVE_STEP 64 | if model_save_step: 65 | self.model_save_step = model_save_step 66 | 67 | self.vis = setup_visdom() 68 | if vis: 69 | self.vis = vis 70 | self.vis_step = self.cfg.STEP.VIS_STEP 71 | if vis_step: 72 | self.vis_step = vis_step 73 | 74 | self.model = None 75 | self.loss_func = None 76 | self.optimizer = None 77 | self.scheduler = None 78 | 79 | def __call__(self, model, dataset): 80 | """ 81 | 训练器使用, 传入 模型 与数据集. 82 | :param model: 83 | :param dataset: 84 | :return: 85 | """ 86 | if not isinstance(model, nn.DataParallel): 87 | # raise TypeError('请用 DataParallel 包装模型. eg: model = DataParallel(model, device_ids=[0,1,2]),使用device_ids指定需要使用的gpu') 88 | model = DataParallel(model, device_ids=self.train_devices) 89 | self.model = model 90 | data_loader = our_dataloader(dataset, batch_size=self.batch_size, shuffle=True) 91 | print(' Max_iter = {}, Batch_size = {}'.format(self.iterations, self.batch_size)) 92 | print(' Model will train on cuda:{}'.format(self.train_devices)) 93 | 94 | num_gpu_use = len(self.train_devices) 95 | if (self.batch_size % num_gpu_use) != 0: 96 | raise ValueError( 97 | 'You use {} gpu to train , but set batch_size={}'.format(num_gpu_use, data_loader.batch_size)) 98 | 99 | self.set_lossfunc() 100 | self.set_optimizer() 101 | self.set_scheduler() 102 | 103 | print("Set optimizer : {}".format(self.optimizer)) 104 | print("Set scheduler : {}".format(self.scheduler)) 105 | print("Set lossfunc : {}".format(self.loss_func)) 106 | 107 | 108 | print(' Start Train......') 109 | print(' -------' * 20) 110 | 111 | for iteration, (images, boxes, labels, image_names) in enumerate(data_loader): 112 | iteration+=1 113 | boxes, labels = boxes.to('cuda'), labels.to('cuda') 114 | cls_logits, bbox_preds = self.model(images) 115 | reg_loss, cls_loss = self.loss_func(cls_logits, bbox_preds, labels, boxes) 116 | 117 | reg_loss = reg_loss.mean() 118 | cls_loss = cls_loss.mean() 119 | loss = reg_loss + cls_loss 120 | 121 | self.optimizer.zero_grad() 122 | loss.backward() 123 | self.optimizer.step() 124 | self.scheduler.step() 125 | 126 | lr = self.optimizer.param_groups[0]['lr'] 127 | 128 | if iteration % 10 == 0: 129 | print('Iter : {}/{} | Lr : {} | Loss : {:.4f} | cls_loss : {:.4f} | reg_loss : {:.4f}'.format(iteration, self.iterations, lr, loss.item(), cls_loss.item(), reg_loss.item())) 130 | 131 | if self.vis and iteration % self.vis_step == 0: 132 | visdom_line(self.vis, y=[loss], x=iteration, win_name='loss') 133 | visdom_line(self.vis, y=[reg_loss], x=iteration, win_name='reg_loss') 134 | visdom_line(self.vis, y=[cls_loss], x=iteration, win_name='cls_loss') 135 | visdom_line(self.vis, y=[lr], x=iteration, win_name='lr') 136 | 137 | if iteration % self.model_save_step == 0: 138 | torch.save(model.module.state_dict(), '{}/model_{}.pkl'.format(self.model_save_root, iteration)) 139 | 140 | if iteration > self.iterations: 141 | break 142 | return True 143 | 144 | def set_optimizer(self, lr=None, momentum=None, weight_decay=None): 145 | """ 146 | 配置优化器 147 | :param lr: 初始学习率, 默认0.001 148 | :param momentum: 动量, 默认 0.9 149 | :param weight_decay: 权重衰减,L2, 默认 5e-4 150 | :return: 151 | """ 152 | if not lr: 153 | lr= self.cfg.OPTIM.LR 154 | if not momentum: 155 | momentum = self.cfg.OPTIM.MOMENTUM 156 | if not weight_decay: 157 | weight_decay = self.cfg.OPTIM.WEIGHT_DECAY 158 | 159 | self.optimizer = torch.optim.SGD(self.model.parameters(), 160 | lr=lr, 161 | momentum=momentum, 162 | weight_decay=weight_decay) 163 | 164 | def set_lossfunc(self, neg_pos_ratio=None): 165 | """ 166 | 配置损失函数 167 | :param neg_pos_ratio: 负正例 比例,默认3, 负例数量是正例的三倍 168 | :return: 169 | """ 170 | if not neg_pos_ratio: 171 | neg_pos_ratio = self.cfg.TRAIN.NEG_POS_RATIO 172 | self.loss_func = multiboxloss(self.cfg) 173 | # print(' Trainer set loss_func : {}, neg_pos_ratio = {}'.format('multiboxloss', neg_pos_ratio)) 174 | 175 | def set_scheduler(self, lr_steps=None, gamma=None): 176 | """ 177 | 配置学习率衰减策略 178 | :param lr_steps: 默认 [80000, 10000],当训练到这些轮次时,学习率*gamma 179 | :param gamma: 默认 0.1,学习率下降10倍 180 | :return: 181 | """ 182 | if not lr_steps: 183 | lr_steps = self.cfg.OPTIM.SCHEDULER.LR_STEPS 184 | if not gamma: 185 | gamma = self.cfg.OPTIM.SCHEDULER.GAMMA 186 | self.scheduler = MultiStepLR(optimizer=self.optimizer, 187 | milestones=lr_steps, 188 | gamma=gamma) 189 | # print(' Trainer set scheduler : {}, lr_steps={}, gamma={}'.format('MultiStepLR', lr_steps, gamma)) 190 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | GIthub使用指北: 2 | 3 | **1.想将项目拷贝到自己帐号下就fork一下.** 4 | 5 | **2.持续关注项目更新就star一下** 6 | 7 | **3.watch是设置接收邮件提醒的.** 8 | 9 | --- 10 | 11 | # Retinanet-Pytorch 12 | 13 | [Retinanet](https://arxiv.org/abs/1708.02002)目标检测算法pytorch实现, 14 | 15 | **本项目不是完全的复现论文**(很多参数以及实现方式上与原论文存在部分差异,有疑问欢迎issues) 16 | 17 | 由于一些原因,训练已经过测试,但是并没有训练完毕,所以不会上传预训练模型. 18 | 19 | 但项目代码验证无误.(但在使用时需要自己进行调整。不建议新手进行尝试。) 20 | *** 21 | 项目在架构上与 [SSD-Pytorch](https://github.com/yatengLG/SSD-Pytorch) 采用了相似的结构. 22 | 23 | 重用了大量[SSD-Pytorch](https://github.com/yatengLG/SSD-Pytorch)中代码,如训练器,测试器等. 24 | 25 | *** 26 | 27 | **本项目单机多卡,通过torch.nn.DataParallel实现,将单机环境统一包装.支持单机单卡,单机多卡,指定gpu训练及测试,但不支持多机多卡和cpu训练和测试. 28 | 不限定检测时的设备(cpu,gpu均可).** 29 | 30 | *** 31 | 32 | # Requirements 33 | 34 | 35 | 1. pytorch 36 | 2. opencv-python 37 | 3. torchvision >= 0.3.0 38 | 4. Vizer 39 | 5. visdom 40 | 41 | (均可pip安装) 42 | 43 | ## 项目结构 44 | 45 | | 文件夹| 文件 |说明 | 46 | |:-------:|:-------:|:------:| 47 | | **Data** | 数据相关 | 48 | | | Dataloader| 数据加载器类'Our_Dataloader', 'Our_Dataloader_test'| 49 | | | Dataset_VOC|VOC格式数据集类| 50 | | | Transfroms|数据Transfroms| 51 | | | Transfroms_tuils|Transfroms子方法| 52 | | **Model**| 模型相关| 53 | | | base_models/Resnet|支持resnet18,34,50,101,152| 54 | | | structs/Anchors|retinanet默认检测框生成器| 55 | | | structs/MutiBoxLoss|损失函数| 56 | | | structs/Focal_Loss|focal_loss损失函数| 57 | | | structs/Fpn|特征金字塔结构| 58 | | | structs/PostProcess|后处理| 59 | | | structs/Predictor|分类及回归网络| 60 | | | evaler |验证器,用于在数据集上对模型进行验证(测试),计算ap,map | 61 | | | retainnet|Retinanet模型类 | 62 | | | trainer|训练器,用于在数据集上训练模型 | 63 | | **Utils**|各种工具| 64 | | |boxs_op |各种框体操作,编码解码,IOU计算,框体格式转换等| 65 | | **Weights**| 模型权重存放处| 66 | | | pretrained|预训练模型权重存放处,本项目模型并没有训练完毕,因而没有上传训练好的模型,但是训练过程已经过验证| 67 | | | trained |训练过程中默认模型存放处| 68 | | ---- | Configs.py|配置文件,包含了模型定义,数据以及训练过程,测试过程等的全部参数,建议备份一份再进行修改| 69 | | ---- | Demo_train.py| 模型训练的例子,训练过程中的模型会保存在Weights/Our/ | 70 | | ---- | Demo_eval.py| 模型测试的例子,计算模型ap,map | 71 | | ---- | Demo_detect_one_image.py|检测单张图片例子| 72 | | ---- | Demo_detect_video.py|视频检测例子,传入一个视频,进行检测| 73 | 74 | 75 | # Demo 76 | 77 | 本项目配有训练,验证,检测部分的代码,所有Demo均经过测试,可直接运行. 78 | 79 | ## 训练train 80 | 81 | 同[针对单机多卡环境的SSD目标检测算法实现(Single Shot MultiBox Detector)(简单,明了,易用,中文注释)](https://ptorch.com/news/252.html)一样,项目**使用visdom进行训练过程可视化**.在运行前请安装并运行visdom. 82 | 83 | 同样的,训练过程也只支持单机单卡或单机多卡环境,不支持cpu训练. 84 | 85 | ```python 86 | 87 | # -*- coding: utf-8 -*- 88 | # @Author : LG 89 | 90 | from Model import RetainNet, Trainer 91 | from Data import vocdataset 92 | from Configs import _C as cfg 93 | from Data import transfrom,targettransform 94 | 95 | 96 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt' 97 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True), 98 | target_transform=targettransform(cfg)) 99 | 100 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt' 101 | test_dataset = vocdataset(cfg=cfg, is_train=False, 102 | transform=transfrom(cfg=cfg, is_train=False), 103 | target_transform=targettransform(cfg)) 104 | 105 | if __name__ == '__main__': 106 | """ 107 | 使用时,请先打开visdom 108 | 109 | 命令行 输入 pip install visdom 进行安装 110 | 输入 python -m visdom.server' 启动 111 | """ 112 | 113 | # 首次调用会下载resnet预训练模型 114 | 115 | # 实例化模型. 模型的具体各种参数在Config文件中进行配置 116 | net = RetainNet(cfg) 117 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU 118 | net.to(cfg.DEVICE.MAINDEVICE) 119 | # 初始化训练器,训练器参数通过cfg进行配置;也可传入参数进行配置,但不建议 120 | trainer = Trainer(cfg) 121 | # 训练器开始在 数据集上训练模型 122 | trainer(net, train_dataset) 123 | ``` 124 | 125 | ## 验证eval 126 | 验证过程支持单机多卡,单机单卡,不支持cpu. 127 | 128 | ```python 129 | # -*- coding: utf-8 -*- 130 | # @Author : LG 131 | 132 | from Model import RetainNet, Evaler 133 | from Data import vocdataset 134 | from Configs import _C as cfg 135 | from Data import transfrom,targettransform 136 | 137 | 138 | # 训练数据集,VOC格式数据集, 训练数据取自 ImageSets/Main/train.txt' 139 | train_dataset=vocdataset(cfg, is_train=True, transform=transfrom(cfg,is_train=True), 140 | target_transform=targettransform(cfg)) 141 | 142 | # 测试数据集,VOC格式数据集, 测试数据取自 ImageSets/Main/eval.txt' 143 | test_dataset = vocdataset(cfg=cfg, is_train=False, 144 | transform=transfrom(cfg=cfg, is_train=False), 145 | target_transform=targettransform(cfg)) 146 | 147 | if __name__ == '__main__': 148 | # 模型测试只支持GPU单卡或多卡,不支持cpu 149 | net = RetainNet(cfg) 150 | # 将模型移动到gpu上,cfg.DEVICE.MAINDEVICE定义了模型所使用的主GPU 151 | net.to(cfg.DEVICE.MAINDEVICE) 152 | # 模型从权重文件中加载权重 153 | net.load_pretrained_weight('XXX.pkl') 154 | # 初始化验证器,验证器参数通过cfg进行配置;也可传入参数进行配置,但不建议 155 | evaler = Evaler(cfg, eval_devices=None) 156 | # 验证器开始在数据集上验证模型 157 | ap, map = evaler(model=net, 158 | test_dataset=test_dataset) 159 | print(ap) 160 | print(map) 161 | ``` 162 | 163 | ## 检测Detect 164 | 165 | 单次检测过程支持单机单卡,cpu. 166 | 167 | ### 单张图片检测 168 | 169 | ```python 170 | # -*- coding: utf-8 -*- 171 | # @Author : LG 172 | from Model import RetainNet 173 | from Configs import _C as cfg 174 | from PIL import Image 175 | from matplotlib import pyplot as plt 176 | 177 | # 实例化模型 178 | net = RetainNet(cfg) 179 | # 使用cpu或gpu 180 | net.to('cuda') 181 | # 模型从权重文件中加载权重 182 | net.load_pretrained_weight('XXX.pkl') 183 | # 打开图片 184 | image = Image.open("XXX.jpg") 185 | # 进行检测, 分别返回 绘制了检测框的图片数据/回归框/标签/分数. 186 | drawn_image, boxes, labels, scores = net.Detect_single_img(image=image,score_threshold=0.5) 187 | 188 | plt.imsave('XXX_det.jpg',drawn_image) 189 | plt.imshow(drawn_image) 190 | plt.show() 191 | ``` 192 | 193 | ### 视频检测 194 | 195 | ```python 196 | # -*- coding: utf-8 -*- 197 | # @Author : LG 198 | from Model import RetainNet 199 | from Configs import _C as cfg 200 | 201 | # 实例化模型 202 | net = RetainNet(cfg) 203 | # 使用cpu或gpu 204 | net.to('cuda') 205 | # 模型从权重文件中加载权重 206 | net.load_pretrained_weight('XXX.pkl') 207 | 208 | video_path = 'XXX.mp4' 209 | 210 | # 进行检测, 211 | # if save_video_path不为None,则不保存视频,如需保存视频save_video_path=XXX.mp4 , 212 | # show=True,实时显示检测结果 213 | net.Detect_video(video_path=video_path, score_threshold=0.02, save_video_path=None, show=True) 214 | 215 | ``` 216 | 217 | --- 218 | 219 | support by **jetbrains**. 220 | 221 | Jetbrains 222 | 223 | https://www.jetbrains.com/?from=SSD-Pytorch 224 | 225 | --- 226 | -------------------------------------------------------------------------------- /Utils/Boxs_op.py: -------------------------------------------------------------------------------- 1 | import torch 2 | # -*- coding: utf-8 -*- 3 | # @Author : LG 4 | import math 5 | 6 | # 解码 7 | def convert_locations_to_boxes(locations, priors, center_variance, 8 | size_variance): 9 | """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w). 10 | 11 | The conversion: 12 | $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$ 13 | $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$ 14 | We do it in the inverse direction here. 15 | Args: 16 | locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well. 17 | priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes. 18 | center_variance: a float used to change the scale of center. 19 | size_variance: a float used to change of scale of size. 20 | Returns: 21 | boxes: priors: [[center_x, center_y, w, h]]. All the values 22 | are relative to the image size. 23 | """ 24 | # priors can have one dimension less. 25 | 26 | if priors.dim() + 1 == locations.dim(): 27 | priors = priors.unsqueeze(0) 28 | return torch.cat([ 29 | locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], 30 | torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:] 31 | ], dim=locations.dim() - 1) 32 | 33 | 34 | # 编码 35 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): 36 | # priors can have one dimension less 37 | if center_form_priors.dim() + 1 == center_form_boxes.dim(): 38 | center_form_priors = center_form_priors.unsqueeze(0) 39 | return torch.cat([ 40 | (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance, 41 | torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance 42 | ], dim=center_form_boxes.dim() - 1) 43 | 44 | 45 | def area_of(left_top, right_bottom) -> torch.Tensor: 46 | """Compute the areas of rectangles given two corners. 47 | 48 | Args: 49 | left_top (N, 2): left top corner. 50 | right_bottom (N, 2): right bottom corner. 51 | 52 | Returns: 53 | area (N): return the area. 54 | """ 55 | hw = torch.clamp(right_bottom - left_top, min=0.0) 56 | return hw[..., 0] * hw[..., 1] 57 | 58 | 59 | def iou_of(boxes0, boxes1, eps=1e-5): 60 | """Return intersection-over-union (Jaccard index) of boxes. 61 | 62 | Args: 63 | boxes0 (N, 4): ground truth boxes. 64 | boxes1 (N or 1, 4): predicted boxes. 65 | eps: a small number to avoid 0 as denominator. 66 | Returns: 67 | iou (N): IoU values. 68 | """ 69 | overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) 70 | overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) 71 | 72 | overlap_area = area_of(overlap_left_top, overlap_right_bottom) 73 | area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) 74 | area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) 75 | return overlap_area / (area0 + area1 - overlap_area + eps) 76 | 77 | 78 | def assign_priors(gt_boxes, gt_labels, corner_form_priors, 79 | iou_threshold): 80 | """Assign ground truth boxes and targets to priors. 81 | 82 | Args: 83 | gt_boxes (num_targets, 4): ground truth boxes. 84 | gt_labels (num_targets): labels of targets. 85 | priors (num_priors, 4): corner form priors 86 | Returns: 87 | boxes (num_priors, 4): real values for priors. 88 | labels (num_priros): labels for priors. 89 | """ 90 | # size: num_priors x num_targets 91 | ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1)) 92 | # size: num_priors 93 | best_target_per_prior, best_target_per_prior_index = ious.max(1) 94 | # size: num_targets 95 | best_prior_per_target, best_prior_per_target_index = ious.max(0) 96 | 97 | for target_index, prior_index in enumerate(best_prior_per_target_index): 98 | best_target_per_prior_index[prior_index] = target_index 99 | # 2.0 is used to make sure every target has a prior assigned 100 | best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2) 101 | # size: num_priors 102 | labels = gt_labels[best_target_per_prior_index] 103 | labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id 104 | boxes = gt_boxes[best_target_per_prior_index] 105 | return boxes, labels 106 | 107 | 108 | def hard_negative_mining(loss, labels, neg_pos_ratio): 109 | """ 110 | It used to suppress the presence of a large number of negative prediction. 111 | It works on image level not batch level. 112 | For any example/image, it keeps all the positive predictions and 113 | cut the number of negative predictions to make sure the ratio 114 | between the negative examples and positive examples is no more 115 | the given ratio for an image. 116 | 117 | Args: 118 | loss (N, num_priors): the loss for each example. 119 | labels (N, num_priors): the labels. 120 | neg_pos_ratio: the ratio between the negative examples and positive examples. 121 | """ 122 | pos_mask = labels > 0 123 | num_pos = pos_mask.long().sum(dim=1, keepdim=True) 124 | num_neg = num_pos * neg_pos_ratio 125 | 126 | loss[pos_mask] = -math.inf 127 | _, indexes = loss.sort(dim=1, descending=True) 128 | _, orders = indexes.sort(dim=1) 129 | neg_mask = orders < num_neg 130 | return pos_mask | neg_mask 131 | 132 | # [x, y, w, h] to [xmin, ymin, xmax, ymax] 133 | def center_form_to_corner_form(locations): 134 | return torch.cat([locations[..., :2] - locations[..., 2:] / 2, 135 | locations[..., :2] + locations[..., 2:] / 2], locations.dim() - 1) 136 | 137 | # [xmin, ymin, xmax, ymax] to [x, y, w, h] 138 | def corner_form_to_center_form(boxes): 139 | return torch.cat([ 140 | (boxes[..., :2] + boxes[..., 2:]) / 2, 141 | boxes[..., 2:] - boxes[..., :2] 142 | ], boxes.dim() - 1) 143 | -------------------------------------------------------------------------------- /Utils/Cal_mean_std.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | # 计算数据集均值方差,如果你是自己的数据集,可以使用这部分代码计算数据集的均值方差 4 | import numpy as np 5 | import os 6 | from PIL import Image 7 | 8 | def get_mean_std(img_root): 9 | 10 | means = 0 11 | stds = 0 12 | img_list = os.listdir(img_root) 13 | num = len(img_list) 14 | for i, img in enumerate(img_list): 15 | i +=1 16 | img = os.path.join(img_root,img) 17 | img = np.array(Image.open(img)) 18 | mean = np.mean(img, axis=(0,1)) 19 | std = np.std(img, axis=(0,1)) 20 | 21 | means += mean 22 | stds += std 23 | print(' {}/{} , mean : [{:.2f}, {:.2f}, {:.2f}], std : [{:.2f}, {:.2f}, {:.2f}]'.format(i, num, means[0]/i, means[1]/i, means[2]/i, stds[0]/i, stds[1]/i, stds[2]/i)) 24 | mean = means / i 25 | std = stds / i 26 | return mean, std 27 | -------------------------------------------------------------------------------- /Utils/Hash.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import hashlib 4 | import os 5 | 6 | def GetFileMd5(filename): 7 | if not os.path.isfile(filename): 8 | return 9 | myHash = hashlib.md5() 10 | f = open(filename,'rb') 11 | while True: 12 | b = f.read(8096) 13 | if not b : 14 | break 15 | myHash.update(b) 16 | f.close() 17 | return myHash.hexdigest() 18 | 19 | -------------------------------------------------------------------------------- /Utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from .voc_cal_ap import eval_detection_voc 4 | from .Hash import GetFileMd5 5 | from .Boxs_op import center_form_to_corner_form, corner_form_to_center_form -------------------------------------------------------------------------------- /Utils/__pycache__/Boxs_op.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/Boxs_op.cpython-37.pyc -------------------------------------------------------------------------------- /Utils/__pycache__/Hash.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/Hash.cpython-37.pyc -------------------------------------------------------------------------------- /Utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Utils/__pycache__/voc_cal_ap.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/Utils/__pycache__/voc_cal_ap.cpython-37.pyc -------------------------------------------------------------------------------- /Utils/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import numpy as np 4 | import cv2 5 | from tqdm import tqdm 6 | import json 7 | import os 8 | import shutil 9 | 10 | def json_to_txt(json_file, root): 11 | """ 12 | 解析json文件 13 | 文件名是图片名, 14 | 数据以 label1, x1, y1, w1, h1 15 | label2, x2, y2, w2, h2 16 | 格式存放 17 | eg: 18 | json_to_txt('/home/super/guangdong1_round1_train1_20190809/Annotations/gt_result.json', 19 | '/home/super/guangdong1_round1_train1_20190809/Our') 20 | 21 | :param json_file: 22 | :param root: 23 | :return: 24 | """ 25 | if os.path.exists(root): 26 | shutil.rmtree(root) 27 | os.mkdir(root) 28 | 29 | with open(json_file, 'r')as f: 30 | json_dict_list = json.load(f) 31 | for json_dict in json_dict_list: 32 | name = json_dict['name'] 33 | defect_name = json_dict['defect_name'] 34 | bbox = json_dict['bbox'] 35 | content = [defect_name] 36 | for xywh in bbox: 37 | content.append(str(xywh)) 38 | content = ','.join(content) 39 | 40 | with open(os.path.join(root, name.split('.')[0] + '.txt'), 'a')as f: 41 | f.write(content + '\n') 42 | return True 43 | 44 | 45 | def cal_mean_std(images_dir): 46 | """ 47 | 给定数据图片根目录,计算图片整体均值与方差 48 | :param images_dir: 49 | :return: 50 | """ 51 | img_filenames = os.listdir(images_dir) 52 | m_list, s_list = [], [] 53 | for img_filename in tqdm(img_filenames): 54 | img = cv2.imread(images_dir + '/' + img_filename) 55 | img = img / 255.0 56 | m, s = cv2.meanStdDev(img) 57 | 58 | m_list.append(m.reshape((3,))) 59 | s_list.append(s.reshape((3,))) 60 | print(m_list) 61 | m_array = np.array(m_list) 62 | s_array = np.array(s_list) 63 | m = m_array.mean(axis=0, keepdims=True) 64 | s = s_array.mean(axis=0, keepdims=True) 65 | print('mean: ',m[0][::-1]) 66 | print('std: ',s[0][::-1]) 67 | return m 68 | -------------------------------------------------------------------------------- /Utils/visdom_op.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | import visdom 4 | import torch 5 | import numpy as np 6 | 7 | """ 8 | visdom使用, 9 | 首先先安装visdom pip install visdom 10 | 启动 visdom服务器,python -m visdom.server 11 | 默认为 http://localhost:8097/ 12 | """ 13 | 14 | """ 15 | """ 16 | 17 | def setup_visdom(**kwargs): 18 | 19 | """ 20 | eg : 21 | vis_eval = setup_visdom(env='SSD_eval') 22 | 23 | :param kwargs: 24 | :return: 25 | """ 26 | vis = visdom.Visdom(**kwargs) 27 | return vis 28 | 29 | 30 | def visdom_line(vis, y, x, win_name, update='append'): 31 | 32 | """ 33 | eg : 34 | visdom_line(vis_train, y=[loss], x=iteration, win_name='loss') 35 | 36 | :param vis: 由 setup_visdom 函数创建 37 | :param y: Y轴数据,为一系列数据,可同时传入多种数据。 eg : [loss1, loss2] 38 | :param x: X轴,格式同Y 39 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口 40 | :param update: 绘图方式。 这里默认append连续绘图,用于记录损失变化曲线 41 | :return: 42 | """ 43 | if not isinstance(y,torch.Tensor): 44 | y=torch.Tensor(y) 45 | y = y.unsqueeze(0) 46 | x = torch.Tensor(y.size()).fill_(x) 47 | vis.line(Y=y, X=x, win=win_name, update=update, opts={'title':win_name}) 48 | return True 49 | 50 | 51 | def visdom_images(vis, images,win_name,num_show=None,nrow=None): 52 | """ 53 | eg: 54 | visdom_images(vis_train, images, num_show=3, nrow=3, win_name='Image') 55 | 56 | visdom 展示图片,默认只展示6张,每行3张。 57 | 58 | :param vis: 由 setup_visdom 函数创建 59 | :param images: 多幅图片张量,shape:[B,N,W,H] 60 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口 61 | :param num_show: 展示的图片数量,默认六张 62 | :param nrow: 每行展示的图片数量,默认三张 63 | :return: 64 | """ 65 | if not num_show: 66 | num_show = 6 67 | if not nrow: 68 | nrow = 3 69 | num = images.size(0) 70 | if num > num_show: 71 | images = images [:num_show] 72 | vis.images(tensor=images,nrow=nrow,win=win_name) 73 | return True 74 | 75 | 76 | def visdom_image(vis, image,win_name): 77 | """ 78 | eg : 79 | visdom_image(vis=vis, image=drawn_image, win_name='image') 80 | 81 | :param vis: 由 setup_visdom 函数创建 82 | :param image: 单幅图片张量,shape:[n,w,h] 83 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口 84 | :return: 85 | """ 86 | vis.image(img=image, win=win_name) 87 | return True 88 | 89 | def visdom_bar(vis, X, Y, win_name): 90 | """ 91 | 绘制柱形图 92 | eg: 93 | visdom_bar(vis_train, X=cfg.DATASETS.CLASS_NAME, Y=ap, win_name='ap', title='ap') 94 | 95 | :param vis: 96 | :param X: 类别 97 | :param Y: 数值 98 | :param win_name: 绘图窗口名称,必须指定,不然会一直创建窗口 99 | :return: 100 | """ 101 | dic = dict(zip(X,Y)) 102 | del_list = [] 103 | for val in dic: 104 | if np.isnan(dic[val]): 105 | del_list.append(val) 106 | 107 | for val in del_list: 108 | del dic[val] 109 | 110 | vis.bar(X=list(dic.values()),Y=list(dic.keys()),win=win_name, opts={'title':win_name}) 111 | return True -------------------------------------------------------------------------------- /Utils/voc_cal_ap.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author : LG 3 | from __future__ import division 4 | from collections import defaultdict 5 | import itertools 6 | import numpy as np 7 | import six 8 | 9 | __all__ = ['eval_detection_voc'] 10 | 11 | def bbox_iou(bbox_a, bbox_b): 12 | """Calculate the Intersection of Unions (IoUs) between bounding boxes. 13 | IoU is calculated as a ratio of area of the intersection 14 | and area of the union. 15 | This function accepts both :obj:`numpy.ndarray` and :obj:`cupy.ndarray` as 16 | inputs. Please note that both :obj:`bbox_a` and :obj:`bbox_b` need to be 17 | same type. 18 | The output is same type as the type of the inputs. 19 | Args: 20 | bbox_a (array): An array whose shape is :math:`(N, 4)`. 21 | :math:`N` is the number of bounding boxes. 22 | The dtype should be :obj:`numpy.float32`. 23 | bbox_b (array): An array similar to :obj:`bbox_a`, 24 | whose shape is :math:`(K, 4)`. 25 | The dtype should be :obj:`numpy.float32`. 26 | Returns: 27 | array: 28 | An array whose shape is :math:`(N, K)`. \ 29 | An element at index :math:`(n, k)` contains IoUs between \ 30 | :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \ 31 | box in :obj:`bbox_b`. 32 | """ 33 | if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4: 34 | raise IndexError 35 | 36 | # top left 37 | tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2]) 38 | # bottom right 39 | br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:]) 40 | 41 | area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2) 42 | area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1) 43 | area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1) 44 | return area_i / (area_a[:, None] + area_b - area_i) 45 | 46 | 47 | def eval_detection_voc( 48 | pred_bboxes, 49 | pred_labels, 50 | pred_scores, 51 | gt_bboxes, 52 | gt_labels, 53 | gt_difficults=None, 54 | iou_thresh=0.5, 55 | use_07_metric=False): 56 | """Calculate average precisions based on evaluation code of PASCAL VOC. 57 | 58 | This function evaluates predicted bounding boxes obtained from a dataset 59 | which has :math:`N` images by using average precision for each class. 60 | The code is based on the evaluation code used in PASCAL VOC Challenge. 61 | 62 | Args: 63 | pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` 64 | sets of bounding boxes. 65 | Its index corresponds to an index for the base dataset. 66 | Each element of :obj:`pred_bboxes` is a set of coordinates 67 | of bounding boxes. This is an array whose shape is :math:`(R, 4)`, 68 | where :math:`R` corresponds 69 | to the number of bounding boxes, which may vary among boxes. 70 | The second axis corresponds to 71 | :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. 72 | pred_labels (iterable of numpy.ndarray): An iterable of labels. 73 | Similar to :obj:`pred_bboxes`, its index corresponds to an 74 | index for the base dataset. Its length is :math:`N`. 75 | pred_scores (iterable of numpy.ndarray): An iterable of confidence 76 | scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, 77 | its index corresponds to an index for the base dataset. 78 | Its length is :math:`N`. 79 | gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth 80 | bounding boxes 81 | whose length is :math:`N`. An element of :obj:`gt_bboxes` is a 82 | bounding box whose shape is :math:`(R, 4)`. Note that the number of 83 | bounding boxes in each image does not need to be same as the number 84 | of corresponding predicted boxes. 85 | gt_labels (iterable of numpy.ndarray): An iterable of ground truth 86 | labels which are organized similarly to :obj:`gt_bboxes`. 87 | gt_difficults (iterable of numpy.ndarray): An iterable of boolean 88 | arrays which is organized similarly to :obj:`gt_bboxes`. 89 | This tells whether the 90 | corresponding ground truth bounding box is difficult or not. 91 | By default, this is :obj:`None`. In that case, this function 92 | considers all bounding boxes to be not difficult. 93 | iou_thresh (float): A prediction is correct if its Intersection over 94 | Union with the ground truth is above this value. 95 | use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric 96 | for calculating average precision. The default value is 97 | :obj:`False`. 98 | 99 | Returns: 100 | dict: 101 | 102 | The keys, value-types and the description of the values are listed 103 | below. 104 | 105 | * **ap** (*numpy.ndarray*): An array of average precisions. \ 106 | The :math:`l`-th value corresponds to the average precision \ 107 | for class :math:`l`. If class :math:`l` does not exist in \ 108 | either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \ 109 | value is set to :obj:`numpy.nan`. 110 | * **map** (*float*): The average of Average Precisions over classes. 111 | 112 | """ 113 | 114 | prec, rec = calc_detection_voc_prec_rec(pred_bboxes, 115 | pred_labels, 116 | pred_scores, 117 | gt_bboxes, 118 | gt_labels, 119 | gt_difficults, 120 | iou_thresh=iou_thresh) 121 | 122 | ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric) 123 | 124 | return {'ap': ap, 'map': np.nanmean(ap)} 125 | 126 | 127 | def calc_detection_voc_prec_rec( 128 | pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, 129 | gt_difficults=None, 130 | iou_thresh=0.5): 131 | """Calculate precision and recall based on evaluation code of PASCAL VOC. 132 | 133 | This function calculates precision and recall of 134 | predicted bounding boxes obtained from a dataset which has :math:`N` 135 | images. 136 | The code is based on the evaluation code used in PASCAL VOC Challenge. 137 | 138 | Args: 139 | pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` 140 | sets of bounding boxes. 141 | Its index corresponds to an index for the base dataset. 142 | Each element of :obj:`pred_bboxes` is a set of coordinates 143 | of bounding boxes. This is an array whose shape is :math:`(R, 4)`, 144 | where :math:`R` corresponds 145 | to the number of bounding boxes, which may vary among boxes. 146 | The second axis corresponds to 147 | :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. 148 | pred_labels (iterable of numpy.ndarray): An iterable of labels. 149 | Similar to :obj:`pred_bboxes`, its index corresponds to an 150 | index for the base dataset. Its length is :math:`N`. 151 | pred_scores (iterable of numpy.ndarray): An iterable of confidence 152 | scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, 153 | its index corresponds to an index for the base dataset. 154 | Its length is :math:`N`. 155 | gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth 156 | bounding boxes 157 | whose length is :math:`N`. An element of :obj:`gt_bboxes` is a 158 | bounding box whose shape is :math:`(R, 4)`. Note that the number of 159 | bounding boxes in each image does not need to be same as the number 160 | of corresponding predicted boxes. 161 | gt_labels (iterable of numpy.ndarray): An iterable of ground truth 162 | labels which are organized similarly to :obj:`gt_bboxes`. 163 | gt_difficults (iterable of numpy.ndarray): An iterable of boolean 164 | arrays which is organized similarly to :obj:`gt_bboxes`. 165 | This tells whether the 166 | corresponding ground truth bounding box is difficult or not. 167 | By default, this is :obj:`None`. In that case, this function 168 | considers all bounding boxes to be not difficult. 169 | iou_thresh (float): A prediction is correct if its Intersection over 170 | Union with the ground truth is above this value.. 171 | 172 | Returns: 173 | tuple of two lists: 174 | This function returns two lists: :obj:`prec` and :obj:`rec`. 175 | 176 | * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \ 177 | for class :math:`l`. If class :math:`l` does not exist in \ 178 | either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \ 179 | set to :obj:`None`. 180 | * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \ 181 | for class :math:`l`. If class :math:`l` that is not marked as \ 182 | difficult does not exist in \ 183 | :obj:`gt_labels`, :obj:`rec[l]` is \ 184 | set to :obj:`None`. 185 | 186 | """ 187 | 188 | pred_bboxes = iter(pred_bboxes) 189 | pred_labels = iter(pred_labels) 190 | pred_scores = iter(pred_scores) 191 | gt_bboxes = iter(gt_bboxes) 192 | gt_labels = iter(gt_labels) 193 | if gt_difficults is None: 194 | gt_difficults = itertools.repeat(None) 195 | else: 196 | gt_difficults = iter(gt_difficults) 197 | 198 | n_pos = defaultdict(int) 199 | score = defaultdict(list) 200 | match = defaultdict(list) 201 | 202 | for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \ 203 | six.moves.zip( 204 | pred_bboxes, pred_labels, pred_scores, 205 | gt_bboxes, gt_labels, gt_difficults): 206 | 207 | if gt_difficult is None: 208 | gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) 209 | 210 | for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): 211 | pred_mask_l = pred_label == l 212 | pred_bbox_l = pred_bbox[pred_mask_l] 213 | pred_score_l = pred_score[pred_mask_l] 214 | # sort by score 215 | order = pred_score_l.argsort()[::-1] 216 | pred_bbox_l = pred_bbox_l[order] 217 | pred_score_l = pred_score_l[order] 218 | 219 | gt_mask_l = gt_label == l 220 | gt_bbox_l = gt_bbox[gt_mask_l] 221 | gt_difficult_l = gt_difficult[gt_mask_l] 222 | 223 | n_pos[l] += np.logical_not(gt_difficult_l).sum() 224 | score[l].extend(pred_score_l) 225 | 226 | if len(pred_bbox_l) == 0: 227 | continue 228 | if len(gt_bbox_l) == 0: 229 | match[l].extend((0,) * pred_bbox_l.shape[0]) 230 | continue 231 | 232 | # VOC evaluation follows integer typed bounding boxes. 233 | pred_bbox_l = pred_bbox_l.copy() 234 | pred_bbox_l[:, 2:] += 1 235 | gt_bbox_l = gt_bbox_l.copy() 236 | gt_bbox_l[:, 2:] += 1 237 | 238 | iou = bbox_iou(pred_bbox_l, gt_bbox_l) 239 | gt_index = iou.argmax(axis=1) 240 | # set -1 if there is no matching ground truth 241 | gt_index[iou.max(axis=1) < iou_thresh] = -1 242 | del iou 243 | 244 | selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) 245 | for gt_idx in gt_index: 246 | if gt_idx >= 0: 247 | if gt_difficult_l[gt_idx]: 248 | match[l].append(-1) 249 | else: 250 | if not selec[gt_idx]: 251 | match[l].append(1) 252 | else: 253 | match[l].append(0) 254 | selec[gt_idx] = True 255 | else: 256 | match[l].append(0) 257 | 258 | for iter_ in ( 259 | pred_bboxes, pred_labels, pred_scores, 260 | gt_bboxes, gt_labels, gt_difficults): 261 | if next(iter_, None) is not None: 262 | raise ValueError('Length of input iterables need to be same.') 263 | 264 | n_fg_class = max(n_pos.keys()) + 1 265 | prec = [None] * n_fg_class 266 | rec = [None] * n_fg_class 267 | 268 | for l in n_pos.keys(): 269 | score_l = np.array(score[l]) 270 | match_l = np.array(match[l], dtype=np.int8) 271 | 272 | order = score_l.argsort()[::-1] 273 | match_l = match_l[order] 274 | 275 | tp = np.cumsum(match_l == 1) 276 | fp = np.cumsum(match_l == 0) 277 | 278 | # If an element of fp + tp is 0, 279 | # the corresponding element of prec[l] is nan. 280 | prec[l] = tp / (fp + tp) 281 | # If n_pos[l] is 0, rec[l] is None. 282 | if n_pos[l] > 0: 283 | rec[l] = tp / n_pos[l] 284 | 285 | return prec, rec 286 | 287 | 288 | def calc_detection_voc_ap(prec, rec, use_07_metric=False): 289 | """Calculate average precisions based on evaluation code of PASCAL VOC. 290 | 291 | This function calculates average precisions 292 | from given precisions and recalls. 293 | The code is based on the evaluation code used in PASCAL VOC Challenge. 294 | 295 | Args: 296 | prec (list of numpy.array): A list of arrays. 297 | :obj:`prec[l]` indicates precision for class :math:`l`. 298 | If :obj:`prec[l]` is :obj:`None`, this function returns 299 | :obj:`numpy.nan` for class :math:`l`. 300 | rec (list of numpy.array): A list of arrays. 301 | :obj:`rec[l]` indicates recall for class :math:`l`. 302 | If :obj:`rec[l]` is :obj:`None`, this function returns 303 | :obj:`numpy.nan` for class :math:`l`. 304 | use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric 305 | for calculating average precision. The default value is 306 | :obj:`False`. 307 | 308 | Returns: 309 | ~numpy.ndarray: 310 | This function returns an array of average precisions. 311 | The :math:`l`-th value corresponds to the average precision 312 | for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is 313 | :obj:`None`, the corresponding value is set to :obj:`numpy.nan`. 314 | 315 | """ 316 | 317 | n_fg_class = len(prec) 318 | ap = np.empty(n_fg_class) 319 | for l in six.moves.range(n_fg_class): 320 | if prec[l] is None or rec[l] is None: 321 | ap[l] = np.nan 322 | continue 323 | 324 | if use_07_metric: 325 | # 11 point metric 326 | ap[l] = 0 327 | for t in np.arange(0., 1.1, 0.1): 328 | if np.sum(rec[l] >= t) == 0: 329 | p = 0 330 | else: 331 | p = np.max(np.nan_to_num(prec[l])[rec[l] >= t]) 332 | ap[l] += p / 11 333 | else: 334 | # correct AP calculation 335 | # first append sentinel values at the end 336 | mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0])) 337 | mrec = np.concatenate(([0], rec[l], [1])) 338 | 339 | mpre = np.maximum.accumulate(mpre[::-1])[::-1] 340 | 341 | # to calculate area under PR curve, look for points 342 | # where X axis (recall) changes value 343 | i = np.where(mrec[1:] != mrec[:-1])[0] 344 | 345 | # and sum (\Delta recall) * prec 346 | ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 347 | 348 | return ap 349 | -------------------------------------------------------------------------------- /__pycache__/Configs.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yatengLG/Retinanet-Pytorch/b1ab0ce1c36958c255391a41c935a76f86651442/__pycache__/Configs.cpython-37.pyc --------------------------------------------------------------------------------