├── quarkdet ├── data │ ├── transform │ │ ├── __init__.py │ │ ├── pipeline.py │ │ ├── color.py │ │ └── warp.py │ ├── dataset │ │ ├── __init__.py │ │ └── base.py │ └── collate.py ├── util │ ├── path.py │ ├── rank_filter.py │ ├── distributed_data_parallel.py │ ├── __init__.py │ ├── config.py │ ├── check_point.py │ ├── box_transform.py │ ├── scatter_gather.py │ ├── logger.py │ ├── util_mixins.py │ └── data_parallel.py ├── model │ ├── head │ │ ├── assigner │ │ │ ├── base_assigner.py │ │ │ └── atss_assigner.py │ │ ├── __init__.py │ │ ├── sampler │ │ │ ├── pseudo_sampler.py │ │ │ ├── base_sampler.py │ │ │ └── sampling_result.py │ │ ├── anchor │ │ │ ├── anchor_target.py │ │ │ ├── anchor_generator.py │ │ │ └── base_anchor_head.py │ │ └── quarkdet_head.py │ ├── detector │ │ ├── __init__.py │ │ ├── gfl.py │ │ └── one_stage.py │ ├── module │ │ ├── scale.py │ │ ├── activation.py │ │ ├── init_weights.py │ │ ├── norm.py │ │ └── nms.py │ ├── neck │ │ ├── __init__.py │ │ ├── pan_slim.py │ │ ├── fpn_slim.py │ │ ├── fpn.py │ │ └── pan.py │ ├── backbone │ │ ├── __init__.py │ │ ├── mobilenetv2.py │ │ ├── shufflenetv2.py │ │ └── vovnet.py │ └── loss │ │ ├── utils.py │ │ └── varifocal_loss.py ├── evaluator │ ├── __init__.py │ └── coco_detection.py └── trainer │ ├── __init__.py │ └── dist_trainer.py ├── requirements.txt ├── tools ├── flops.py ├── export.py ├── statistics.py ├── inference.py ├── test.py └── train.py ├── config ├── shufflenetv2_0.5x.yml ├── mobilenetv3.yml ├── nanodet.yml ├── shufflenet.yml ├── test.yml ├── efficientdet.yml ├── ghostnet_full.yml ├── ghostnet_full_bifpn.yml ├── ghostnet_slim640.yml ├── quarkdet.yml └── ghostnet_slim.yml └── demo └── demo.py /quarkdet/data/transform/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .pipeline import Pipeline -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | termcolor 3 | numpy 4 | torchvision 5 | tensorboard 6 | pycocotools 7 | matplotlib 8 | pyaml 9 | opencv-python 10 | tqdm 11 | torch-summary 12 | -------------------------------------------------------------------------------- /quarkdet/util/path.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .rank_filter import rank_filter 3 | 4 | 5 | @rank_filter 6 | def mkdir(path): 7 | if not os.path.exists(path): 8 | os.makedirs(path) 9 | -------------------------------------------------------------------------------- /quarkdet/util/rank_filter.py: -------------------------------------------------------------------------------- 1 | 2 | def rank_filter(func): 3 | def func_filter(local_rank=-1, *args, **kwargs): 4 | if local_rank < 1: 5 | return func(*args, **kwargs) 6 | else: 7 | pass 8 | return func_filter 9 | -------------------------------------------------------------------------------- /quarkdet/model/head/assigner/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass -------------------------------------------------------------------------------- /quarkdet/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_detection import CocoDetectionEvaluator 2 | 3 | 4 | def build_evaluator(cfg, dataset): 5 | if cfg.evaluator.name == 'CocoDetectionEvaluator': 6 | return CocoDetectionEvaluator(dataset) 7 | else: 8 | raise NotImplementedError 9 | -------------------------------------------------------------------------------- /quarkdet/data/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from .coco import CocoDataset 3 | 4 | 5 | def build_dataset(cfg, mode): 6 | dataset_cfg = copy.deepcopy(cfg) 7 | if dataset_cfg['name'] == 'coco': 8 | dataset_cfg.pop('name') 9 | return CocoDataset(mode=mode, **dataset_cfg) 10 | -------------------------------------------------------------------------------- /quarkdet/model/detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .gfl import GFL 2 | 3 | 4 | def build_model(model_cfg): 5 | if model_cfg.detector.name == 'GFL': 6 | model = GFL(model_cfg.detector.backbone, model_cfg.detector.neck, model_cfg.detector.head) 7 | else: 8 | raise NotImplementedError 9 | return model 10 | -------------------------------------------------------------------------------- /quarkdet/model/module/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | """ 7 | A learnable scale parameter 8 | """ 9 | 10 | def __init__(self, scale=1.0): 11 | super(Scale, self).__init__() 12 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 13 | 14 | def forward(self, x): 15 | return x * self.scale 16 | -------------------------------------------------------------------------------- /quarkdet/model/head/__init__.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from .gfl_headv2 import GFLHeadV2 3 | from .quarkdet_head import QuarkDetHead 4 | 5 | 6 | def build_head(cfg): 7 | head_cfg = copy.deepcopy(cfg) 8 | name = head_cfg.pop('name') 9 | if name == 'GFLHeadV2': 10 | return GFLHeadV2(**head_cfg) 11 | elif name == 'QuarkDetHead': 12 | return QuarkDetHead(**head_cfg) 13 | else: 14 | raise NotImplementedError -------------------------------------------------------------------------------- /quarkdet/util/distributed_data_parallel.py: -------------------------------------------------------------------------------- 1 | from torch.nn.parallel import DistributedDataParallel 2 | from .scatter_gather import scatter_kwargs 3 | 4 | 5 | class DDP(DistributedDataParallel): 6 | 7 | def __init__(self, batchsize, **kwargs): 8 | self.batchsize = batchsize 9 | super(DDP, self).__init__(**kwargs) 10 | 11 | def scatter(self, inputs, kwargs, device_ids): 12 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=[self.batchsize]) -------------------------------------------------------------------------------- /quarkdet/model/detector/gfl.py: -------------------------------------------------------------------------------- 1 | from .one_stage import OneStage 2 | 3 | 4 | class GFL(OneStage): 5 | def __init__(self, 6 | backbone_cfg, 7 | fpn_cfg, 8 | head_cfg, ): 9 | super(GFL, self).__init__(backbone_cfg, 10 | fpn_cfg, 11 | head_cfg) 12 | 13 | def forward(self, x): 14 | x = self.backbone(x) 15 | x = self.fpn(x) 16 | x = self.head(x) 17 | return x 18 | -------------------------------------------------------------------------------- /quarkdet/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .rank_filter import rank_filter 2 | from .path import mkdir 3 | from .logger import Logger, MovingAverage, AverageMeter 4 | from .data_parallel import DataParallel 5 | from .distributed_data_parallel import DDP 6 | from .check_point import load_model_weight, save_model 7 | from .config import cfg, load_config 8 | from .box_transform import * 9 | from .util_mixins import NiceRepr 10 | from .visualization import Visualizer, overlay_bbox_cv 11 | from .flops_counter import get_model_complexity_info 12 | -------------------------------------------------------------------------------- /quarkdet/model/module/activation.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | activations = {'ReLU': nn.ReLU, 4 | 'LeakyReLU': nn.LeakyReLU, 5 | 'ReLU6': nn.ReLU6, 6 | 'SELU': nn.SELU, 7 | 'ELU': nn.ELU, 8 | None: nn.Identity 9 | } 10 | 11 | 12 | def act_layers(name): 13 | assert name in activations.keys() 14 | if name == 'LeakyReLU': 15 | return nn.LeakyReLU(negative_slope=0.1, inplace=True) 16 | else: 17 | return activations[name](inplace=True) 18 | -------------------------------------------------------------------------------- /quarkdet/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .trainer import Trainer 3 | from .dist_trainer import DistTrainer 4 | 5 | 6 | def build_trainer(rank, cfg, model, logger): 7 | if len(cfg.device.gpu_ids) > 1: 8 | trainer = DistTrainer(rank, cfg, model, logger) 9 | trainer.set_device(cfg.device.batchsize_per_gpu, rank, device=torch.device('cuda')) # TODO: device 10 | else: 11 | trainer = Trainer(rank, cfg, model, logger) 12 | trainer.set_device(cfg.device.batchsize_per_gpu, cfg.device.gpu_ids, device=torch.device('cuda')) 13 | return trainer 14 | 15 | -------------------------------------------------------------------------------- /quarkdet/model/neck/__init__.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from .fpn import FPN 3 | from .pan import PAN 4 | from .bifpn import BiFPN 5 | from .fpn_slim import FPN_Slim 6 | from .pan_slim import PAN_Slim 7 | 8 | 9 | def build_fpn(cfg): 10 | fpn_cfg = copy.deepcopy(cfg) 11 | name = fpn_cfg.pop('name') 12 | if name == 'FPN': 13 | return FPN(**fpn_cfg) 14 | elif name == 'PAN': 15 | return PAN(**fpn_cfg) 16 | elif name == 'BiFPN': 17 | return BiFPN(**fpn_cfg) 18 | elif name == 'FPN_Slim': 19 | return FPN_Slim(**fpn_cfg) 20 | elif name == 'PAN_Slim': 21 | return PAN_Slim(**fpn_cfg) 22 | else: 23 | raise NotImplementedError -------------------------------------------------------------------------------- /tools/flops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import sys 3 | sys.path.append("./") 4 | from quarkdet.model.detector import build_model 5 | from quarkdet.util import cfg, load_config, get_model_complexity_info 6 | 7 | 8 | def main(config, input_shape=(3, 320, 320)): 9 | model = build_model(config.model) 10 | #flops, params = get_model_complexity_info(model, input_shape) 11 | 12 | macs, params = get_model_complexity_info(model, input_shape, as_strings=True, 13 | print_per_layer_stat=True) 14 | print('{:<30} {:<8}'.format('Computational complexity: ', macs)) 15 | print('{:<30} {:<8}'.format('Number of parameters: ', params)) 16 | 17 | 18 | if __name__ == '__main__': 19 | cfg_path = r"config/ghostnet_slim.yml" 20 | load_config(cfg, cfg_path) 21 | main(config=cfg, 22 | input_shape=(3, 320, 320) 23 | ) 24 | 25 | 26 | -------------------------------------------------------------------------------- /quarkdet/model/head/sampler/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /tools/export.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import sys 4 | sys.path.append("./") 5 | from quarkdet.model.detector import build_model 6 | from quarkdet.util import Logger, cfg, load_config, load_model_weight 7 | 8 | def main(config, model_path, output_path, input_shape=(320, 320)): 9 | logger = Logger(-1, config.save_dir, False) 10 | model = build_model(config.model) 11 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 12 | load_model_weight(model, checkpoint, logger) 13 | dummy_input = torch.autograd.Variable(torch.randn(1, 3, input_shape[0], input_shape[1])) 14 | torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11) 15 | print('finished exporting onnx ') 16 | 17 | if __name__ == '__main__': 18 | cfg_path = r"config/quarkdet.yml" 19 | model_path = r"quarkdet.pth" 20 | out_path = r'output.onnx' 21 | load_config(cfg, cfg_path) 22 | main(cfg, model_path, out_path, input_shape=(320, 320)) -------------------------------------------------------------------------------- /quarkdet/util/config.py: -------------------------------------------------------------------------------- 1 | from .yacs import CfgNode 2 | 3 | cfg = CfgNode(new_allowed=True) 4 | cfg.save_dir = './' 5 | # common params for NETWORK 6 | cfg.model = CfgNode() 7 | cfg.model.detector = CfgNode(new_allowed=True) 8 | cfg.model.detector.backbone = CfgNode(new_allowed=True) 9 | cfg.model.detector.neck = CfgNode(new_allowed=True) 10 | cfg.model.detector.head = CfgNode(new_allowed=True) 11 | 12 | # DATASET related params 13 | cfg.data = CfgNode(new_allowed=True) 14 | cfg.data.train = CfgNode(new_allowed=True) 15 | cfg.data.val = CfgNode(new_allowed=True) 16 | cfg.device = CfgNode(new_allowed=True) 17 | # train 18 | cfg.schedule = CfgNode(new_allowed=True) 19 | 20 | # logger 21 | cfg.log = CfgNode() 22 | cfg.log.interval = 50 23 | 24 | # testing 25 | cfg.test = CfgNode() 26 | # size of images for each device 27 | 28 | 29 | def load_config(cfg, args_cfg): 30 | cfg.defrost() 31 | cfg.merge_from_file(args_cfg) 32 | cfg.freeze() 33 | 34 | 35 | if __name__ == '__main__': 36 | import sys 37 | 38 | with open(sys.argv[1], 'w') as f: 39 | print(cfg, file=f) 40 | -------------------------------------------------------------------------------- /quarkdet/model/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from .ghostnet import GhostNet_slim, GhostNet_full 3 | from .shufflenetv2 import ShuffleNetV2 4 | from .mobilenetv2 import MobileNetV2 5 | from .mobilenetv3 import MobileNetV3_Small 6 | from .efficientnet import EfficientNet 7 | 8 | def build_backbone(cfg): 9 | backbone_cfg = copy.deepcopy(cfg) 10 | name = backbone_cfg.pop('name') 11 | if name == 'MicroNet': 12 | pass 13 | elif name == 'VovNetV2': 14 | pass 15 | elif name == 'ShuffleNetV2': 16 | return ShuffleNetV2(**backbone_cfg) 17 | elif name == 'GhostNet_slim': 18 | return GhostNet_slim(**backbone_cfg) 19 | elif name == 'GhostNet_full': 20 | return GhostNet_full(**backbone_cfg) 21 | 22 | elif name == 'MobileNetV2': 23 | return MobileNetV2(**backbone_cfg) 24 | elif name == 'MobileNetV3_Small': 25 | return MobileNetV3_Small(**backbone_cfg) 26 | elif name == 'EfficientNet': 27 | return EfficientNet(**backbone_cfg) 28 | 29 | else: 30 | raise NotImplementedError 31 | 32 | -------------------------------------------------------------------------------- /quarkdet/data/transform/pipeline.py: -------------------------------------------------------------------------------- 1 | from .warp import warp_and_resize 2 | from .color import color_aug_and_norm 3 | import functools 4 | 5 | 6 | class Pipeline: 7 | def __init__(self, 8 | cfg, 9 | keep_ratio): 10 | self.warp = functools.partial(warp_and_resize, 11 | warp_kwargs=cfg, 12 | keep_ratio=keep_ratio) 13 | self.color = functools.partial(color_aug_and_norm, 14 | kwargs=cfg) 15 | 16 | def __call__(self, meta, dst_shape): 17 | meta = self.warp(meta=meta, dst_shape=dst_shape) 18 | meta = self.color(meta=meta) 19 | return meta 20 | 21 | 22 | 23 | 24 | #经过两个数据增强,重要的是参数是否配置,如果配置则启用,不配置不启用,程序中首先判断参数是否存在 25 | # functools.partial(func, /, *args, **keywords) 26 | # Return a new partial object which when called will behave like func called with the positional arguments args and keyword arguments keywords. 27 | # If more arguments are supplied to the call, they are appended to args. 28 | # If additional keyword arguments are supplied, they extend and override keywords. -------------------------------------------------------------------------------- /quarkdet/model/module/init_weights.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | 4 | 5 | def kaiming_init(module, 6 | a=0, 7 | mode='fan_out', 8 | nonlinearity='relu', 9 | bias=0, 10 | distribution='normal'): 11 | assert distribution in ['uniform', 'normal'] 12 | if distribution == 'uniform': 13 | nn.init.kaiming_uniform_( 14 | module.weight, a=a, mode=mode, nonlinearity=nonlinearity) 15 | else: 16 | nn.init.kaiming_normal_( 17 | module.weight, a=a, mode=mode, nonlinearity=nonlinearity) 18 | if hasattr(module, 'bias') and module.bias is not None: 19 | nn.init.constant_(module.bias, bias) 20 | 21 | 22 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 23 | assert distribution in ['uniform', 'normal'] 24 | if distribution == 'uniform': 25 | nn.init.xavier_uniform_(module.weight, gain=gain) 26 | else: 27 | nn.init.xavier_normal_(module.weight, gain=gain) 28 | if hasattr(module, 'bias') and module.bias is not None: 29 | nn.init.constant_(module.bias, bias) 30 | 31 | 32 | def normal_init(module, mean=0, std=1, bias=0): 33 | nn.init.normal_(module.weight, mean, std) 34 | if hasattr(module, 'bias') and module.bias is not None: 35 | nn.init.constant_(module.bias, bias) 36 | 37 | 38 | def constant_init(module, val, bias=0): 39 | if hasattr(module, 'weight') and module.weight is not None: 40 | nn.init.constant_(module.weight, val) 41 | if hasattr(module, 'bias') and module.bias is not None: 42 | nn.init.constant_(module.bias, bias) -------------------------------------------------------------------------------- /quarkdet/util/check_point.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .rank_filter import rank_filter 3 | 4 | def load_model_weight(model, checkpoint, logger): 5 | state_dict = checkpoint['state_dict'] 6 | # strip prefix of state_dict 7 | if list(state_dict.keys())[0].startswith('module.'): 8 | state_dict = {k[7:]: v for k, v in checkpoint['state_dict'].items()} 9 | 10 | model_state_dict = model.module.state_dict() if hasattr(model, 'module') else model.state_dict() 11 | 12 | # check loaded parameters and created model parameters 13 | for k in state_dict: 14 | if k in model_state_dict: 15 | if state_dict[k].shape != model_state_dict[k].shape: 16 | logger.log('Skip loading parameter {}, required shape{}, loaded shape{}.'.format( 17 | k, model_state_dict[k].shape, state_dict[k].shape)) 18 | state_dict[k] = model_state_dict[k] 19 | else: 20 | logger.log('Drop parameter {}.'.format(k)) 21 | for k in model_state_dict: 22 | if not (k in state_dict): 23 | logger.log('No param {}.'.format(k)) 24 | state_dict[k] = model_state_dict[k] 25 | model.load_state_dict(state_dict, strict=False) 26 | 27 | 28 | @rank_filter 29 | def save_model(model, path, epoch, iter, optimizer=None): 30 | model_state_dict = model.module.state_dict() if hasattr(model, 'module') else model.state_dict() 31 | data = {'epoch': epoch, 32 | 'state_dict': model_state_dict, 33 | 'iter': iter} 34 | if optimizer is not None: 35 | data['optimizer'] = optimizer.state_dict() 36 | 37 | torch.save(data, path) 38 | -------------------------------------------------------------------------------- /tools/statistics.py: -------------------------------------------------------------------------------- 1 | from pycocotools.coco import COCO 2 | # img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 3 | # ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/ 4 | dataDir='/media/ubuntu/data/dataset/COCOv1/2017/' 5 | dataType='train2017' 6 | annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType) 7 | 8 | # initialize COCO api for instance annotations 9 | coco=COCO(annFile) 10 | 11 | # display COCO categories and supercategories 12 | cats = coco.loadCats(coco.getCatIds()) # 类别 13 | cat_nms=[cat['name'] for cat in cats] #cat_nms是list类型 14 | #print(type(cat_nms)) 15 | #print('COCO categories: \n{}\n'.format(' '.join(cat_nms))) 16 | #print(len(cats)) 17 | 18 | # 错误的方式 19 | # catId = coco.getCatIds(catNms=cat_name) 20 | # 应把cat_name 变成 [cat_name] 21 | # 统计各类的图片数量和GT框数量 22 | for cat_name in cat_nms: 23 | #print("type(cat_name):",type(cat_name)) #test cat_name是str类型 24 | catId = coco.getCatIds(catNms=[cat_name]) 25 | #print("type(catId):",type(catId)) #test catId是list所以可以直接传参 26 | imgId = coco.getImgIds(catIds=catId) 27 | annId = coco.getAnnIds(imgIds=imgId, catIds=catId, iscrowd=None) 28 | 29 | 30 | #下面这段代码是测试,如果输出后面的注释的数字例如[3, 57]表示统计存在错误。 31 | # 如果输出一个数字表示正确 32 | #car & carrot 33 | #ear & teddy bear 34 | #dog & hot dog 35 | #----------------------------------------------------------------------- 36 | # if cat_name == "carrot": 37 | # print(catId) #[3, 57] 38 | 39 | # if cat_name == "teddy bear": 40 | # print(catId) #[23, 88] 41 | 42 | # if cat_name == "hot dog": 43 | # print(catId) #[18, 58] 44 | #----------------------------------------------------------------------- 45 | 46 | print("{:<15} {:<6d} {:<10d}".format(cat_name, len(imgId), len(annId))) -------------------------------------------------------------------------------- /tools/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import torch 5 | import sys 6 | sys.path.append("./") 7 | from quarkdet.model.detector import build_model 8 | from quarkdet.util import load_model_weight 9 | from quarkdet.data.transform import Pipeline 10 | 11 | 12 | class Predictor(object): 13 | def __init__(self, cfg, model_path, logger, device='cuda:0'): 14 | self.cfg = cfg 15 | self.device = device 16 | model = build_model(cfg.model) 17 | ckpt = torch.load(model_path, map_location=lambda storage, loc: storage) 18 | load_model_weight(model, ckpt, logger) 19 | self.model = model.to(device).eval() 20 | self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio) 21 | 22 | def inference(self, img): 23 | img_info = {} 24 | if isinstance(img, str): 25 | img_info['file_name'] = os.path.basename(img) 26 | img = cv2.imread(img) 27 | else: 28 | img_info['file_name'] = None 29 | 30 | height, width = img.shape[:2] 31 | img_info['height'] = height 32 | img_info['width'] = width 33 | meta = dict(img_info=img_info, 34 | raw_img=img, 35 | img=img) 36 | meta = self.pipeline(meta, self.cfg.data.val.input_size) 37 | meta['img'] = torch.from_numpy(meta['img'].transpose(2, 0, 1)).unsqueeze(0).to(self.device) 38 | with torch.no_grad(): 39 | results = self.model.inference(meta) 40 | return meta, results 41 | 42 | def visualize(self, dets, meta, class_names, score_thres, wait=0): 43 | time1 = time.time() 44 | self.model.head.show_result(meta['raw_img'], dets, class_names, score_thres=score_thres, show=True) 45 | print('viz time: {:.3f}s'.format(time.time()-time1)) 46 | -------------------------------------------------------------------------------- /quarkdet/model/detector/one_stage.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn as nn 4 | from ..backbone import build_backbone 5 | from ..neck import build_fpn 6 | from ..head import build_head 7 | 8 | 9 | class OneStage(nn.Module): 10 | def __init__(self, 11 | backbone_cfg, 12 | fpn_cfg=None, 13 | head_cfg=None,): 14 | super(OneStage, self).__init__() 15 | self.backbone = build_backbone(backbone_cfg) 16 | if fpn_cfg is not None: 17 | self.fpn = build_fpn(fpn_cfg) 18 | if head_cfg is not None: 19 | self.head = build_head(head_cfg) 20 | 21 | def forward(self, x): 22 | x = self.backbone(x) 23 | if hasattr(self, 'neck') and self.fpn is not None: 24 | x = self.fpn(x) 25 | if hasattr(self, 'head'): 26 | out = [] 27 | for xx in x: 28 | out.append(self.head(xx)) 29 | x = tuple(out) 30 | return x 31 | 32 | def inference(self, meta): 33 | with torch.no_grad(): 34 | torch.cuda.synchronize() 35 | time1 = time.time() 36 | preds = self(meta['img']) 37 | torch.cuda.synchronize() 38 | time2 = time.time() 39 | print('forward time: {:.3f}s'.format((time2 - time1)), end=' | ') 40 | results = self.head.post_process(preds, meta) 41 | torch.cuda.synchronize() 42 | print('decode time: {:.3f}s'.format((time.time() - time2)), end=' | ') 43 | return results 44 | 45 | def forward_train(self, gt_meta): 46 | preds = self(gt_meta['img']) 47 | loss, loss_states = self.head.loss(preds, gt_meta) 48 | 49 | #print("forward_train:",loss,type(loss)) 50 | 51 | return preds, loss, loss_states 52 | -------------------------------------------------------------------------------- /quarkdet/data/transform/color.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import random 4 | 5 | 6 | def random_brightness(img, delta): 7 | img += random.uniform(-delta, delta) 8 | return img 9 | 10 | 11 | def random_contrast(img, alpha_low, alpha_up): 12 | img *= random.uniform(alpha_low, alpha_up) 13 | return img 14 | 15 | 16 | def random_saturation(img, alpha_low, alpha_up): 17 | 18 | hsv_img = cv2.cvtColor(img.astype(np.float32), cv2.COLOR_BGR2HSV) 19 | hsv_img[..., 1] *= random.uniform(alpha_low, alpha_up) 20 | img = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR) 21 | 22 | return img 23 | 24 | 25 | def normalize(meta, mean, std): 26 | img = meta['img'].astype(np.float32) 27 | mean = np.array(mean, dtype=np.float64).reshape(1, -1) 28 | stdinv = 1 / np.array(std, dtype=np.float64).reshape(1, -1) 29 | cv2.subtract(img, mean, img) 30 | cv2.multiply(img, stdinv, img) 31 | meta['img'] = img 32 | return meta 33 | 34 | 35 | def _normalize(img, mean, std): 36 | mean = np.array(mean, dtype=np.float32).reshape(1, 1, 3) / 255 37 | std = np.array(std, dtype=np.float32).reshape(1, 1, 3) / 255 38 | img = (img - mean) / std 39 | return img 40 | 41 | 42 | def color_aug_and_norm(meta, kwargs): 43 | img = meta['img'].astype(np.float32) / 255 44 | 45 | if 'brightness' in kwargs and random.randint(0, 1): 46 | img = random_brightness(img, kwargs['brightness']) 47 | 48 | if 'contrast' in kwargs and random.randint(0, 1): 49 | img = random_contrast(img, *kwargs['contrast']) 50 | 51 | if 'saturation' in kwargs and random.randint(0, 1): 52 | img = random_saturation(img, *kwargs['saturation']) 53 | # cv2.imshow('trans', img) 54 | # cv2.waitKey(0) 55 | img = _normalize(img, *kwargs['normalize']) 56 | meta['img'] = img 57 | return meta 58 | 59 | 60 | -------------------------------------------------------------------------------- /quarkdet/model/head/anchor/anchor_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from functools import partial 3 | 4 | 5 | def multi_apply(func, *args, **kwargs): 6 | pfunc = partial(func, **kwargs) if kwargs else func 7 | map_results = map(pfunc, *args) 8 | return tuple(map(list, zip(*map_results))) 9 | 10 | 11 | def images_to_levels(target, num_level_anchors): 12 | """Convert targets by image to targets by feature level. 13 | 14 | [target_img0, target_img1] -> [target_level0, target_level1, ...] 15 | """ 16 | target = torch.stack(target, 0) 17 | level_targets = [] 18 | start = 0 19 | for n in num_level_anchors: 20 | end = start + n 21 | level_targets.append(target[:, start:end].squeeze(0)) 22 | start = end 23 | return level_targets 24 | 25 | 26 | def anchor_inside_flags(flat_anchors, 27 | valid_flags, 28 | img_shape, 29 | allowed_border=0): 30 | img_h, img_w = img_shape 31 | if allowed_border >= 0: 32 | inside_flags = valid_flags & \ 33 | (flat_anchors[:, 0] >= -allowed_border) & \ 34 | (flat_anchors[:, 1] >= -allowed_border) & \ 35 | (flat_anchors[:, 2] < img_w + allowed_border) & \ 36 | (flat_anchors[:, 3] < img_h + allowed_border) 37 | else: 38 | inside_flags = valid_flags 39 | return inside_flags 40 | 41 | 42 | def unmap(data, count, inds, fill=0): 43 | """ Unmap a subset of item (data) back to the original set of items (of 44 | size count) """ 45 | if data.dim() == 1: 46 | ret = data.new_full((count, ), fill) 47 | ret[inds.type(torch.bool)] = data 48 | else: 49 | new_size = (count, ) + data.size()[1:] 50 | ret = data.new_full(new_size, fill) 51 | ret[inds.type(torch.bool), :] = data 52 | return ret 53 | -------------------------------------------------------------------------------- /quarkdet/util/box_transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def distance2bbox(points, distance, max_shape=None): 5 | """Decode distance prediction to bounding box. 6 | 7 | Args: 8 | points (Tensor): Shape (n, 2), [x, y]. 9 | distance (Tensor): Distance from the given point to 4 10 | boundaries (left, top, right, bottom). 11 | max_shape (tuple): Shape of the image. 12 | 13 | Returns: 14 | Tensor: Decoded bboxes. 15 | """ 16 | x1 = points[:, 0] - distance[:, 0] 17 | y1 = points[:, 1] - distance[:, 1] 18 | x2 = points[:, 0] + distance[:, 2] 19 | y2 = points[:, 1] + distance[:, 3] 20 | if max_shape is not None: 21 | x1 = x1.clamp(min=0, max=max_shape[1]) 22 | y1 = y1.clamp(min=0, max=max_shape[0]) 23 | x2 = x2.clamp(min=0, max=max_shape[1]) 24 | y2 = y2.clamp(min=0, max=max_shape[0]) 25 | return torch.stack([x1, y1, x2, y2], -1) 26 | 27 | 28 | def bbox2distance(points, bbox, max_dis=None, eps=0.1): 29 | """Decode bounding box based on distances. 30 | 31 | Args: 32 | points (Tensor): Shape (n, 2), [x, y]. 33 | bbox (Tensor): Shape (n, 4), "xyxy" format 34 | max_dis (float): Upper bound of the distance. 35 | eps (float): a small value to ensure target < max_dis, instead <= 36 | 37 | Returns: 38 | Tensor: Decoded distances. 39 | """ 40 | left = points[:, 0] - bbox[:, 0] 41 | top = points[:, 1] - bbox[:, 1] 42 | right = bbox[:, 2] - points[:, 0] 43 | bottom = bbox[:, 3] - points[:, 1] 44 | if max_dis is not None: 45 | left = left.clamp(min=0, max=max_dis - eps) 46 | top = top.clamp(min=0, max=max_dis - eps) 47 | right = right.clamp(min=0, max=max_dis - eps) 48 | bottom = bottom.clamp(min=0, max=max_dis - eps) 49 | return torch.stack([left, top, right, bottom], -1) -------------------------------------------------------------------------------- /quarkdet/util/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter 4 | 5 | 6 | def list_scatter(input, target_gpus, chunk_sizes): 7 | ret = [] 8 | for idx, size in enumerate(chunk_sizes): 9 | ret.append(input[:size]) 10 | del input[:size] 11 | return tuple(ret) 12 | 13 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 14 | """ 15 | Slices variables into approximately equal chunks and 16 | distributes them across given GPUs. Duplicates 17 | references to objects that are not variables. Does not 18 | support Tensors. 19 | """ 20 | def scatter_map(obj): 21 | if isinstance(obj, Variable): 22 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 23 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 24 | if isinstance(obj, list): 25 | return list_scatter(obj, target_gpus, chunk_sizes) 26 | if isinstance(obj, tuple): 27 | return list(zip(*map(scatter_map, obj))) 28 | if isinstance(obj, dict): 29 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 30 | return [obj for targets in target_gpus] 31 | 32 | return scatter_map(inputs) 33 | 34 | 35 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 36 | r"""Scatter with support for kwargs dictionary""" 37 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 38 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 39 | if len(inputs) < len(kwargs): 40 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 41 | elif len(kwargs) < len(inputs): 42 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 43 | inputs = tuple(inputs) 44 | kwargs = tuple(kwargs) 45 | return inputs, kwargs -------------------------------------------------------------------------------- /quarkdet/model/module/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /quarkdet/model/neck/pan_slim.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from .fpn_slim import FPN_Slim 4 | 5 | 6 | 7 | class PAN_Slim(FPN_Slim): 8 | def __init__(self, 9 | in_channels, 10 | out_channels, 11 | num_outs, 12 | start_level=0, 13 | end_level=-1, 14 | conv_cfg=None, 15 | norm_cfg=None, 16 | activation=None): 17 | super(PAN_Slim, 18 | self).__init__(in_channels, out_channels, num_outs, start_level, 19 | end_level, conv_cfg, norm_cfg, activation) 20 | self.init_weights() 21 | 22 | def forward(self, inputs): 23 | """Forward function.""" 24 | assert len(inputs) == len(self.in_channels) 25 | 26 | # build laterals 27 | laterals = [ 28 | lateral_conv(inputs[i + self.start_level]) 29 | for i, lateral_conv in enumerate(self.lateral_convs) 30 | ] 31 | 32 | # build top-down path 33 | used_backbone_levels = len(laterals) 34 | for i in range(used_backbone_levels - 1, 0, -1): 35 | prev_shape = laterals[i - 1].shape[2:] 36 | laterals[i - 1] += F.interpolate( 37 | laterals[i], size=prev_shape, mode='bilinear') 38 | 39 | # build outputs 40 | # part 1: from original levels 41 | inter_outs = [ 42 | laterals[i] for i in range(used_backbone_levels) 43 | ] 44 | 45 | # part 2: add bottom-up path 46 | for i in range(0, used_backbone_levels - 1): 47 | prev_shape = inter_outs[i + 1].shape[2:] 48 | inter_outs[i + 1] += F.interpolate(inter_outs[i], size=prev_shape, mode='bilinear') 49 | 50 | outs = [] 51 | outs.append(inter_outs[0]) 52 | outs.extend([ 53 | inter_outs[i] for i in range(1, used_backbone_levels) 54 | ]) 55 | return tuple(outs) 56 | -------------------------------------------------------------------------------- /quarkdet/trainer/dist_trainer.py: -------------------------------------------------------------------------------- 1 | import torch.distributed as dist 2 | from .trainer import Trainer 3 | from ..util import DDP 4 | import torch 5 | 6 | 7 | def average_gradients(model): 8 | """ Gradient averaging. """ 9 | size = float(dist.get_world_size()) 10 | for param in model.parameters(): 11 | if param.grad is not None: 12 | dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM) 13 | param.grad.data /= size 14 | 15 | 16 | 17 | class DistTrainer(Trainer): 18 | """ 19 | Distributed trainer for multi-gpu training. (not finish yet) 20 | """ 21 | def run_step(self, model, batch, mode='train'): 22 | output, loss, loss_stats = model.module.forward_train(batch) 23 | loss = loss.mean() 24 | loss.requires_grad_() 25 | 26 | #----------------------------------------------------------------------- 27 | # # #santiago 28 | # grad_params = torch.autograd.grad(loss, model.parameters(), create_graph=True,allow_unused=True) 29 | # # torch.autograd.grad does not accumuate the gradients into the .grad attributes 30 | # # It instead returns the gradients as Variable tuples. 31 | 32 | # # now compute the 2-norm of the grad_params 33 | # grad_norm = 0 34 | # for grad in grad_params: 35 | # grad_norm += (grad * grad).sum() 36 | # grad_norm = grad_norm.sqrt() 37 | # print("grad_norm:",grad_norm) 38 | 39 | # # take the gradients wrt grad_norm. backward() will accumulate 40 | # # the gradients into the .grad attributes 41 | # grad_norm.backward() 42 | #----------------------------------------------------------------------- 43 | 44 | 45 | if mode == 'train': 46 | self.optimizer.zero_grad() 47 | loss.backward() 48 | average_gradients(model) 49 | self.optimizer.step() 50 | return output, loss, loss_stats 51 | 52 | def set_device(self, batch_per_gpu, rank, device): 53 | """ 54 | Set model device for Distributed-Data-Parallel 55 | :param batch_per_gpu: batch size of each gpu 56 | :param rank: distributed training process rank 57 | :param device: cuda 58 | """ 59 | self.rank = rank 60 | self.model = DDP(batch_per_gpu, module=self.model.cuda(), device_ids=[rank], output_device=rank) 61 | 62 | 63 | -------------------------------------------------------------------------------- /quarkdet/evaluator/coco_detection.py: -------------------------------------------------------------------------------- 1 | import pycocotools.coco as coco 2 | from pycocotools.cocoeval import COCOeval 3 | import json 4 | import os 5 | import copy 6 | 7 | 8 | def xyxy2xywh(bbox): 9 | """ 10 | change bbox to coco format 11 | :param bbox: [x1, y1, x2, y2] 12 | :return: [x, y, w, h] 13 | """ 14 | return [ 15 | bbox[0], 16 | bbox[1], 17 | bbox[2] - bbox[0], 18 | bbox[3] - bbox[1], 19 | ] 20 | 21 | 22 | class CocoDetectionEvaluator: 23 | def __init__(self, dataset): 24 | assert hasattr(dataset, 'coco_api') 25 | self.coco_api = dataset.coco_api 26 | self.cat_ids = dataset.cat_ids 27 | self.metric_names = ['mAP', 'AP_50', 'AP_75', 'AP_small', 'AP_m', 'AP_l'] 28 | 29 | def results2json(self, results): 30 | """ 31 | results: {image_id: {label: [bboxes...] } } 32 | :return coco json format: {image_id: 33 | category_id: 34 | bbox: 35 | score: } 36 | """ 37 | json_results = [] 38 | for image_id, dets in results.items(): 39 | for label, bboxes in dets.items(): 40 | category_id = self.cat_ids[label] 41 | for bbox in bboxes: 42 | score = float(bbox[4]) 43 | detection = dict( 44 | image_id=int(image_id), 45 | category_id=int(category_id), 46 | bbox=xyxy2xywh(bbox), 47 | score=score) 48 | json_results.append(detection) 49 | return json_results 50 | 51 | def evaluate(self, results, save_dir, epoch, logger, rank=-1): 52 | results_json = self.results2json(results) 53 | json_path = os.path.join(save_dir, 'results{}.json'.format(rank)) 54 | json.dump(results_json, open(json_path, 'w')) 55 | coco_dets = self.coco_api.loadRes(json_path) 56 | coco_eval = COCOeval(copy.deepcopy(self.coco_api), copy.deepcopy(coco_dets), "bbox") 57 | coco_eval.evaluate() 58 | coco_eval.accumulate() 59 | coco_eval.summarize() 60 | aps = coco_eval.stats[:6] 61 | eval_results = {} 62 | for k, v in zip(self.metric_names, aps): 63 | eval_results[k] = v 64 | logger.scalar_summary('Val_coco_bbox/' + k, 'val', v, epoch) 65 | return eval_results 66 | -------------------------------------------------------------------------------- /quarkdet/data/collate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | import re 5 | from torch._six import container_abcs, string_classes, int_classes 6 | 7 | 8 | np_str_obj_array_pattern = re.compile(r'[SaUO]') 9 | 10 | 11 | default_collate_err_msg_format = ( 12 | "default_collate: batch must contain tensors, numpy arrays, numbers, " 13 | "dicts or lists; found {}") 14 | 15 | 16 | def custom_collate_function(batch): 17 | r"""Puts each data field into a tensor with outer dimension batch size""" 18 | 19 | elem = batch[0] 20 | elem_type = type(elem) 21 | if isinstance(elem, torch.Tensor): 22 | out = None 23 | 24 | # if torch.utils.data.get_worker_info() is not None: 25 | # # # If we're in a background process, concatenate directly into a 26 | # # # shared memory tensor to avoid an extra copy 27 | # numel = sum([x.numel() for x in batch]) 28 | # storage = elem.storage()._new_shared(numel) 29 | # out = elem.new(storage) 30 | return torch.stack(batch, 0, out=out) 31 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 32 | and elem_type.__name__ != 'string_': 33 | elem = batch[0] 34 | if elem_type.__name__ == 'ndarray': 35 | # array of string classes and object 36 | if np_str_obj_array_pattern.search(elem.dtype.str) is not None: 37 | raise TypeError(default_collate_err_msg_format.format(elem.dtype)) 38 | 39 | # return custom_collate_function([torch.as_tensor(b) for b in batch]) 40 | return batch 41 | elif elem.shape == (): # scalars 42 | # return torch.as_tensor(batch) 43 | return batch 44 | elif isinstance(elem, float): 45 | return torch.tensor(batch, dtype=torch.float64) 46 | elif isinstance(elem, int_classes): 47 | return torch.tensor(batch) 48 | elif isinstance(elem, string_classes): 49 | return batch 50 | elif isinstance(elem, container_abcs.Mapping): 51 | return {key: custom_collate_function([d[key] for d in batch]) for key in elem} 52 | elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple 53 | return elem_type(*(custom_collate_function(samples) for samples in zip(*batch))) 54 | elif isinstance(elem, container_abcs.Sequence): 55 | transposed = zip(*batch) 56 | return [custom_collate_function(samples) for samples in transposed] 57 | 58 | raise TypeError(default_collate_err_msg_format.format(elem_type)) 59 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import json 4 | import datetime 5 | import argparse 6 | import sys 7 | sys.path.append("./") 8 | from quarkdet.util import mkdir, Logger, cfg, load_config 9 | from quarkdet.trainer import build_trainer 10 | from quarkdet.data.collate import collate_function 11 | from quarkdet.data.dataset import build_dataset 12 | from quarkdet.model.detector import build_model 13 | from quarkdet.evaluator import build_evaluator 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('config', help='model config file path') 19 | parser.add_argument('--task', default='val', help='task to run, test or val') 20 | parser.add_argument('--save_result', action='store_true', default=True, help='save val results to txt') 21 | args = parser.parse_args() 22 | return args 23 | 24 | 25 | def main(args): 26 | load_config(cfg, args.config) 27 | local_rank = -1 28 | torch.backends.cudnn.enabled = True 29 | torch.backends.cudnn.benchmark = True 30 | cfg.defrost() 31 | timestr = datetime.datetime.now().__format__('%Y%m%d%H%M%S') 32 | cfg.save_dir = os.path.join(cfg.save_dir, timestr) 33 | cfg.freeze() 34 | mkdir(local_rank, cfg.save_dir) 35 | logger = Logger(local_rank, cfg.save_dir) 36 | 37 | logger.log('Creating model...') 38 | model = build_model(cfg.model) 39 | 40 | logger.log('Setting up data...') 41 | val_dataset = build_dataset(cfg.data.val, args.task) 42 | val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, 43 | pin_memory=True, collate_fn=collate_function, drop_last=True) 44 | trainer = build_trainer(local_rank, cfg, model, logger) 45 | if 'load_model' in cfg.schedule: 46 | trainer.load_model(cfg) 47 | evaluator = build_evaluator(cfg, val_dataset) 48 | logger.log('Starting testing...') 49 | with torch.no_grad(): 50 | results, val_loss_dict,_ = trainer.run_epoch(0, val_dataloader, mode=args.task) 51 | if args.task == 'test': 52 | res_json = evaluator.results2json(results) 53 | json_path = os.path.join(cfg.save_dir, 'results{}.json'.format(timestr)) 54 | json.dump(res_json, open(json_path, 'w')) 55 | elif args.task == 'val': 56 | eval_results = evaluator.evaluate(results, cfg.save_dir, 0, logger, rank=local_rank) 57 | if args.save_result: 58 | txt_path = os.path.join(cfg.save_dir, "eval_results{}.txt".format(timestr)) 59 | with open(txt_path, "a") as f: 60 | for k, v in eval_results.items(): 61 | f.write("{}: {}\n".format(k, v)) 62 | 63 | 64 | if __name__ == '__main__': 65 | args = parse_args() 66 | main(args) 67 | -------------------------------------------------------------------------------- /quarkdet/util/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import torch 4 | import numpy as np 5 | from termcolor import colored 6 | from .rank_filter import rank_filter 7 | from .path import mkdir 8 | 9 | 10 | class Logger: 11 | def __init__(self, local_rank, save_dir='./', use_tensorboard=True): 12 | mkdir(local_rank, save_dir) 13 | self.rank = local_rank 14 | fmt = colored('[%(name)s]', 'magenta', attrs=['bold']) + colored('[%(asctime)s]', 'blue') + \ 15 | colored('%(levelname)s:', 'green') + colored('%(message)s', 'white') 16 | logging.basicConfig(level=logging.INFO, 17 | filename=os.path.join(save_dir, 'logs.txt'), 18 | filemode='w') 19 | self.log_dir = os.path.join(save_dir, 'logs') 20 | console = logging.StreamHandler() 21 | console.setLevel(logging.INFO) 22 | formatter = logging.Formatter(fmt, datefmt="%m-%d %H:%M:%S") 23 | console.setFormatter(formatter) 24 | logging.getLogger().addHandler(console) 25 | if use_tensorboard: 26 | try: 27 | from torch.utils.tensorboard import SummaryWriter 28 | except ImportError: 29 | raise ImportError( 30 | 'Please run "pip install future tensorboard" to install ' 31 | 'the dependencies to use torch.utils.tensorboard ' 32 | '(applicable to PyTorch 1.1 or higher)') 33 | if self.rank < 1: 34 | logging.info('Using Tensorboard, logs will be saved in {}'.format(self.log_dir)) 35 | self.writer = SummaryWriter(log_dir=self.log_dir) 36 | 37 | def log(self, string): 38 | if self.rank < 1: 39 | logging.info(string) 40 | 41 | def scalar_summary(self, tag, phase, value, step): 42 | if self.rank < 1: 43 | self.writer.add_scalars(tag, {phase: value}, step) 44 | 45 | 46 | class MovingAverage(object): 47 | def __init__(self, val, window_size=50): 48 | self.window_size = window_size 49 | self.reset() 50 | self.push(val) 51 | 52 | def reset(self): 53 | self.queue = [] 54 | 55 | def push(self, val): 56 | self.queue.append(val) 57 | if len(self.queue) > self.window_size: 58 | self.queue.pop(0) 59 | 60 | def avg(self): 61 | return np.mean(self.queue) 62 | 63 | 64 | class AverageMeter(object): 65 | """Computes and stores the average and current value""" 66 | 67 | def __init__(self, val): 68 | self.reset() 69 | self.update(val) 70 | 71 | def reset(self): 72 | self.val = 0 73 | self.avg = 0 74 | self.sum = 0 75 | self.count = 0 76 | 77 | def update(self, val, n=1): 78 | self.val = val 79 | self.sum += val * n 80 | self.count += n 81 | if self.count > 0: 82 | self.avg = self.sum / self.count 83 | -------------------------------------------------------------------------------- /quarkdet/model/neck/fpn_slim.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from ..module.conv import ConvModule 5 | from ..module.init_weights import xavier_init 6 | 7 | 8 | 9 | 10 | 11 | class FPN_Slim(nn.Module): 12 | 13 | def __init__(self, 14 | in_channels, 15 | out_channels, 16 | num_outs, 17 | start_level=0, 18 | end_level=-1, 19 | conv_cfg=None, 20 | norm_cfg=None, 21 | activation=None 22 | ): 23 | super(FPN_Slim, self).__init__() 24 | assert isinstance(in_channels, list) 25 | self.in_channels = in_channels 26 | self.out_channels = out_channels 27 | self.num_ins = len(in_channels) 28 | self.num_outs = num_outs 29 | self.fp16_enabled = False 30 | 31 | if end_level == -1: 32 | self.backbone_end_level = self.num_ins 33 | assert num_outs >= self.num_ins - start_level 34 | else: 35 | # if end_level < inputs, no extra level is allowed 36 | self.backbone_end_level = end_level 37 | assert end_level <= len(in_channels) 38 | assert num_outs == end_level - start_level 39 | self.start_level = start_level 40 | self.end_level = end_level 41 | self.lateral_convs = nn.ModuleList() 42 | 43 | for i in range(self.start_level, self.backbone_end_level): 44 | l_conv = ConvModule( 45 | in_channels[i], 46 | out_channels, 47 | 1, 48 | conv_cfg=conv_cfg, 49 | norm_cfg=norm_cfg, 50 | activation=activation, 51 | inplace=False) 52 | 53 | self.lateral_convs.append(l_conv) 54 | self.init_weights() 55 | 56 | # default init_weights for conv(msra) and norm in ConvModule 57 | def init_weights(self): 58 | for m in self.modules(): 59 | if isinstance(m, nn.Conv2d): 60 | xavier_init(m, distribution='uniform') 61 | 62 | def forward(self, inputs): 63 | assert len(inputs) == len(self.in_channels) 64 | 65 | # build laterals 66 | laterals = [ 67 | lateral_conv(inputs[i + self.start_level]) 68 | for i, lateral_conv in enumerate(self.lateral_convs) 69 | ] 70 | 71 | # build top-down path 72 | used_backbone_levels = len(laterals) 73 | for i in range(used_backbone_levels - 1, 0, -1): 74 | prev_shape = laterals[i - 1].shape[2:] 75 | laterals[i - 1] += F.interpolate( 76 | laterals[i], size=prev_shape, mode='bilinear') 77 | 78 | # build outputs 79 | outs = [ 80 | # self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 81 | laterals[i] for i in range(used_backbone_levels) 82 | ] 83 | return tuple(outs) 84 | 85 | 86 | # if __name__ == '__main__': 87 | -------------------------------------------------------------------------------- /quarkdet/data/dataset/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | import torch 3 | import numpy as np 4 | from torch.utils.data import Dataset 5 | from ..transform import Pipeline 6 | 7 | 8 | class BaseDataset(Dataset, metaclass=ABCMeta): 9 | """ 10 | A dataset should have images, annotations and preprocessing pipelines 11 | QuarkDet use [xmin, ymin, xmax, ymax] format for box and 12 | [[x0,y0], [x1,y1] ... [xn,yn]] format for key points. 13 | instance masks should decode into binary masks for each instance like 14 | { 15 | 'bbox': [xmin,ymin,xmax,ymax], 16 | 'mask': mask 17 | } 18 | segmentation mask should decode into binary masks for each class. 19 | 20 | :param img_path: image data folder 21 | :param ann_path: annotation file path or folder 22 | :param use_instance_mask: load instance segmentation data 23 | :param use_seg_mask: load semantic segmentation data 24 | :param use_keypoint: load pose keypoint data 25 | :param load_mosaic: using mosaic data augmentation from yolov4 26 | :param mode: train or val or test 27 | :param mosaic_image_size: image size Dynamic segmentation 例如图像大小640则会分成4个320×320的图像,以配置文件为准 28 | """ 29 | def __init__(self, 30 | img_path, 31 | ann_path, 32 | input_size, 33 | pipeline, 34 | keep_ratio=True, 35 | use_instance_mask=False, 36 | use_seg_mask=False, 37 | use_keypoint=False, 38 | load_mosaic=False, 39 | mosaic_probability= 0.3, 40 | mosaic_area =9, 41 | mosaic_image_size=320, 42 | mode='train' 43 | ): 44 | self.img_path = img_path 45 | self.ann_path = ann_path 46 | self.input_size = input_size 47 | self.pipeline = Pipeline(pipeline, keep_ratio) 48 | self.keep_ratio = keep_ratio 49 | self.use_instance_mask = use_instance_mask 50 | self.use_seg_mask = use_seg_mask 51 | self.use_keypoint = use_keypoint 52 | self.load_mosaic = load_mosaic 53 | self.mosaic_probability=mosaic_probability 54 | self.mosaic_area=mosaic_area 55 | self.mosaic_image_size=mosaic_image_size 56 | self.mode = mode 57 | 58 | self.data_info = self.get_data_info(ann_path) 59 | 60 | def __len__(self): 61 | return len(self.data_info) 62 | 63 | def __getitem__(self, idx): 64 | if self.mode == 'val' or self.mode == 'test': 65 | return self.get_val_data(idx) 66 | else: 67 | while True: 68 | data = self.get_train_data(idx) 69 | if data is None: 70 | idx = self.get_another_id() 71 | continue 72 | return data 73 | 74 | @abstractmethod 75 | def get_data_info(self, ann_path): 76 | pass 77 | 78 | @abstractmethod 79 | def get_train_data(self, idx): 80 | pass 81 | 82 | @abstractmethod 83 | def get_val_data(self, idx): 84 | pass 85 | 86 | def get_another_id(self): 87 | return np.random.random_integers(0, len(self.data_info)-1) 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /quarkdet/model/head/sampler/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | 51 | """ 52 | if len(bboxes.shape) < 2: 53 | bboxes = bboxes[None, :] 54 | 55 | bboxes = bboxes[:, :4] 56 | 57 | gt_flags = bboxes.new_zeros((bboxes.shape[0],), dtype=torch.uint8) 58 | if self.add_gt_as_proposals and len(gt_bboxes) > 0: 59 | if gt_labels is None: 60 | raise ValueError( 61 | 'gt_labels must be given when add_gt_as_proposals is True') 62 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 63 | assign_result.add_gt_(gt_labels) 64 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 65 | gt_flags = torch.cat([gt_ones, gt_flags]) 66 | 67 | num_expected_pos = int(self.num * self.pos_fraction) 68 | pos_inds = self.pos_sampler._sample_pos( 69 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 70 | # We found that sampled indices have duplicated items occasionally. 71 | # (may be a bug of PyTorch) 72 | pos_inds = pos_inds.unique() 73 | num_sampled_pos = pos_inds.numel() 74 | num_expected_neg = self.num - num_sampled_pos 75 | if self.neg_pos_ub >= 0: 76 | _pos = max(1, num_sampled_pos) 77 | neg_upper_bound = int(self.neg_pos_ub * _pos) 78 | if num_expected_neg > neg_upper_bound: 79 | num_expected_neg = neg_upper_bound 80 | neg_inds = self.neg_sampler._sample_neg( 81 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 82 | neg_inds = neg_inds.unique() 83 | 84 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 85 | assign_result, gt_flags) 86 | return sampling_result 87 | -------------------------------------------------------------------------------- /quarkdet/model/head/sampler/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from quarkdet.util import util_mixins 4 | 5 | 6 | class SamplingResult(util_mixins.NiceRepr): 7 | """ 8 | Example: 9 | >>> # xdoctest: +IGNORE_WANT 10 | >>> self = SamplingResult.random(rng=10) 11 | >>> print('self = {}'.format(self)) 12 | self = 21 | """ 22 | 23 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 24 | gt_flags): 25 | self.pos_inds = pos_inds 26 | self.neg_inds = neg_inds 27 | self.pos_bboxes = bboxes[pos_inds] 28 | self.neg_bboxes = bboxes[neg_inds] 29 | self.pos_is_gt = gt_flags[pos_inds] 30 | 31 | self.num_gts = gt_bboxes.shape[0] 32 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 33 | 34 | if gt_bboxes.numel() == 0: 35 | # hack for index error case 36 | assert self.pos_assigned_gt_inds.numel() == 0 37 | self.pos_gt_bboxes = torch.empty_like(gt_bboxes).view(-1, 4) 38 | else: 39 | if len(gt_bboxes.shape) < 2: 40 | gt_bboxes = gt_bboxes.view(-1, 4) 41 | 42 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 43 | 44 | if assign_result.labels is not None: 45 | self.pos_gt_labels = assign_result.labels[pos_inds] 46 | else: 47 | self.pos_gt_labels = None 48 | 49 | @property 50 | def bboxes(self): 51 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 52 | 53 | def to(self, device): 54 | """ 55 | Change the device of the data inplace. 56 | 57 | Example: 58 | >>> self = SamplingResult.random() 59 | >>> print('self = {}'.format(self.to(None))) 60 | >>> # xdoctest: +REQUIRES(--gpu) 61 | >>> print('self = {}'.format(self.to(0))) 62 | """ 63 | _dict = self.__dict__ 64 | for key, value in _dict.items(): 65 | if isinstance(value, torch.Tensor): 66 | _dict[key] = value.to(device) 67 | return self 68 | 69 | def __nice__(self): 70 | data = self.info.copy() 71 | data['pos_bboxes'] = data.pop('pos_bboxes').shape 72 | data['neg_bboxes'] = data.pop('neg_bboxes').shape 73 | parts = ['\'{}\': {!r}'.format(k, v) for k, v in sorted(data.items())] 74 | body = ' ' + ',\n '.join(parts) 75 | return '{\n' + body + '\n}' 76 | 77 | @property 78 | def info(self): 79 | """ 80 | Returns a dictionary of info about the object 81 | """ 82 | return { 83 | 'pos_inds': self.pos_inds, 84 | 'neg_inds': self.neg_inds, 85 | 'pos_bboxes': self.pos_bboxes, 86 | 'neg_bboxes': self.neg_bboxes, 87 | 'pos_is_gt': self.pos_is_gt, 88 | 'num_gts': self.num_gts, 89 | 'pos_assigned_gt_inds': self.pos_assigned_gt_inds, 90 | } 91 | -------------------------------------------------------------------------------- /quarkdet/model/loss/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> import torch 68 | >>> @weighted_loss 69 | >>> def l1_loss(pred, target): 70 | >>> return (pred - target).abs() 71 | 72 | >>> pred = torch.Tensor([0, 2, 3]) 73 | >>> target = torch.Tensor([1, 1, 1]) 74 | >>> weight = torch.Tensor([1, 0, 1]) 75 | 76 | >>> l1_loss(pred, target) 77 | tensor(1.3333) 78 | >>> l1_loss(pred, target, weight) 79 | tensor(1.) 80 | >>> l1_loss(pred, target, reduction='none') 81 | tensor([1., 1., 2.]) 82 | >>> l1_loss(pred, target, weight, avg_factor=2) 83 | tensor(1.5000) 84 | """ 85 | 86 | @functools.wraps(loss_func) 87 | def wrapper(pred, 88 | target, 89 | weight=None, 90 | reduction='mean', 91 | avg_factor=None, 92 | **kwargs): 93 | # get element-wise loss 94 | loss = loss_func(pred, target, **kwargs) 95 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 96 | return loss 97 | 98 | return wrapper 99 | -------------------------------------------------------------------------------- /config/shufflenetv2_0.5x.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/shufflenetv2_05x 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | name: ShuffleNetV2 8 | out_stages: [2,3,4] 9 | activation: LeakyReLU 10 | model_size: 0.5x 11 | neck: 12 | name: PAN 13 | in_channels: [48, 96, 192] 14 | out_channels: 96 15 | start_level: 0 16 | num_outs: 3 17 | head: 18 | name: QuarkDetHead 19 | num_classes: 80 # 80 20 | input_channel: 96 21 | feat_channels: 96 22 | stacked_convs: 2 23 | share_cls_reg: True #True 24 | octave_base_scale: 5 25 | scales_per_octave: 1 26 | strides: [8, 16, 32] 27 | reg_max: 7 #16 #7 28 | norm_cfg: 29 | type: BN 30 | loss: 31 | loss_qfl: 32 | name: QualityFocalLoss 33 | use_sigmoid: False #True 34 | beta: 2.0 35 | loss_weight: 1.0 36 | loss_dfl: 37 | name: DistributionFocalLoss 38 | loss_weight: 0.25 39 | loss_bbox: 40 | name: GIoULoss 41 | loss_weight: 2.0 42 | data: 43 | train: 44 | name: coco 45 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 46 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 47 | input_size: [320,320] #[w,h] 48 | keep_ratio: True 49 | pipeline: 50 | perspective: 0.0 51 | scale: [0.6, 1.4] 52 | stretch: [[1, 1], [1, 1]] 53 | rotation: 0 54 | shear: 0 55 | translate: 0 56 | flip: 0.5 57 | brightness: 0.2 58 | contrast: [0.8, 1.2] 59 | saturation: [0.8, 1.2] 60 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 61 | val: 62 | name: coco 63 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 64 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 65 | input_size: [320,320] #[w,h] 66 | keep_ratio: True 67 | pipeline: 68 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 69 | device: 70 | gpu_ids: [0,1] 71 | workers_per_gpu: 8 72 | batchsize_per_gpu: 80 # santiago test 73 | schedule: 74 | resume: False 75 | load_model: ./workspace/shufflenetv2_05x/model_last.pth 76 | 77 | optimizer: 78 | name: SGD 79 | lr: 0.14 80 | momentum: 0.9 81 | weight_decay: 0.0001 82 | warmup: 83 | name: linear 84 | steps: 300 85 | ratio: 0.1 86 | total_epochs: 160 #70 87 | 88 | lr_schedule: 89 | name: ReduceLROnPlateau 90 | mode: min 91 | factor: 0.1 92 | patience: 3 #15 93 | verbose: True 94 | threshold: 0.00001 95 | threshold_mode: rel 96 | cooldown: 0 97 | min_lr: 0 98 | eps: 0.000000001 #1e-08 99 | val_intervals: 5 #5 100 | evaluator: 101 | name: CocoDetectionEvaluator 102 | save_key: mAP 103 | 104 | log: 105 | interval: 10 106 | 107 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 108 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 109 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 110 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 111 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 112 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 113 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 114 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 115 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 116 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 117 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 118 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 119 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 120 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /quarkdet/util/util_mixins.py: -------------------------------------------------------------------------------- 1 | """This module defines the :class:`NiceRepr` mixin class, which defines a 2 | ``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__`` 3 | method, which you must define. This means you only have to overload one 4 | function instead of two. Furthermore, if the object defines a ``__len__`` 5 | method, then the ``__nice__`` method defaults to something sensible, otherwise 6 | it is treated as abstract and raises ``NotImplementedError``. 7 | 8 | To use simply have your object inherit from :class:`NiceRepr` 9 | (multi-inheritance should be ok). 10 | 11 | This code was copied from the ubelt library: https://github.com/Erotemic/ubelt 12 | 13 | Example: 14 | >>> # Objects that define __nice__ have a default __str__ and __repr__ 15 | >>> class Student(NiceRepr): 16 | ... def __init__(self, name): 17 | ... self.name = name 18 | ... def __nice__(self): 19 | ... return self.name 20 | >>> s1 = Student('Alice') 21 | >>> s2 = Student('Bob') 22 | >>> print(f's1 = {s1}') 23 | >>> print(f's2 = {s2}') 24 | s1 = 25 | s2 = 26 | 27 | Example: 28 | >>> # Objects that define __len__ have a default __nice__ 29 | >>> class Group(NiceRepr): 30 | ... def __init__(self, data): 31 | ... self.data = data 32 | ... def __len__(self): 33 | ... return len(self.data) 34 | >>> g = Group([1, 2, 3]) 35 | >>> print(f'g = {g}') 36 | g = 37 | """ 38 | import warnings 39 | 40 | 41 | class NiceRepr(object): 42 | """Inherit from this class and define ``__nice__`` to "nicely" print your 43 | objects. 44 | 45 | Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function 46 | Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``. 47 | If the inheriting class has a ``__len__``, method then the default 48 | ``__nice__`` method will return its length. 49 | 50 | Example: 51 | >>> class Foo(NiceRepr): 52 | ... def __nice__(self): 53 | ... return 'info' 54 | >>> foo = Foo() 55 | >>> assert str(foo) == '' 56 | >>> assert repr(foo).startswith('>> class Bar(NiceRepr): 60 | ... pass 61 | >>> bar = Bar() 62 | >>> import pytest 63 | >>> with pytest.warns(None) as record: 64 | >>> assert 'object at' in str(bar) 65 | >>> assert 'object at' in repr(bar) 66 | 67 | Example: 68 | >>> class Baz(NiceRepr): 69 | ... def __len__(self): 70 | ... return 5 71 | >>> baz = Baz() 72 | >>> assert str(baz) == '' 73 | """ 74 | 75 | def __nice__(self): 76 | """str: a "nice" summary string describing this module""" 77 | if hasattr(self, '__len__'): 78 | # It is a common pattern for objects to use __len__ in __nice__ 79 | # As a convenience we define a default __nice__ for these objects 80 | return str(len(self)) 81 | else: 82 | # In all other cases force the subclass to overload __nice__ 83 | raise NotImplementedError( 84 | f'Define the __nice__ method for {self.__class__!r}') 85 | 86 | def __repr__(self): 87 | """str: the string of the module""" 88 | try: 89 | nice = self.__nice__() 90 | classname = self.__class__.__name__ 91 | return f'<{classname}({nice}) at {hex(id(self))}>' 92 | except NotImplementedError as ex: 93 | warnings.warn(str(ex), category=RuntimeWarning) 94 | return object.__repr__(self) 95 | 96 | def __str__(self): 97 | """str: the string of the module""" 98 | try: 99 | classname = self.__class__.__name__ 100 | nice = self.__nice__() 101 | return f'<{classname}({nice})>' 102 | except NotImplementedError as ex: 103 | warnings.warn(str(ex), category=RuntimeWarning) 104 | return object.__repr__(self) 105 | -------------------------------------------------------------------------------- /quarkdet/model/neck/fpn.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from ..module.conv import ConvModule 6 | from ..module.init_weights import xavier_init 7 | 8 | 9 | class FPN(nn.Module): 10 | 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | num_outs, 15 | start_level=0, 16 | end_level=-1, 17 | conv_cfg=None, 18 | norm_cfg=None, 19 | activation=None 20 | ): 21 | super(FPN, self).__init__() 22 | assert isinstance(in_channels, list) 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels 25 | self.num_ins = len(in_channels) 26 | self.num_outs = num_outs 27 | self.fp16_enabled = False 28 | 29 | if end_level == -1: 30 | self.backbone_end_level = self.num_ins 31 | assert num_outs >= self.num_ins - start_level 32 | else: 33 | # if end_level < inputs, no extra level is allowed 34 | self.backbone_end_level = end_level 35 | assert end_level <= len(in_channels) 36 | assert num_outs == end_level - start_level 37 | self.start_level = start_level 38 | self.end_level = end_level 39 | self.lateral_convs = nn.ModuleList() 40 | self.fpn_convs = nn.ModuleList() 41 | 42 | # for i in range(self.start_level, self.backbone_end_level): 43 | # l_conv = ConvModule( 44 | # in_channels[i], 45 | # out_channels, 46 | # 1, 47 | # conv_cfg=conv_cfg, 48 | # norm_cfg=norm_cfg, 49 | # activation=activation, 50 | # inplace=False) 51 | 52 | # self.lateral_convs.append(l_conv) 53 | 54 | for i in range(self.start_level, self.backbone_end_level): 55 | l_conv = ConvModule( 56 | in_channels[i], 57 | out_channels, 58 | 1, 59 | conv_cfg=conv_cfg, 60 | norm_cfg=norm_cfg, 61 | #act_cfg=act_cfg, 62 | activation=activation, 63 | inplace=False) 64 | fpn_conv = ConvModule( 65 | out_channels, 66 | out_channels, 67 | 3, 68 | padding=1, 69 | conv_cfg=conv_cfg, 70 | norm_cfg=norm_cfg, 71 | activation=activation, 72 | inplace=False) 73 | 74 | self.lateral_convs.append(l_conv) 75 | self.fpn_convs.append(fpn_conv) 76 | print("FPN:",self.lateral_convs) 77 | self.init_weights() 78 | 79 | # default init_weights for conv(msra) and norm in ConvModule 80 | def init_weights(self): 81 | for m in self.modules(): 82 | if isinstance(m, nn.Conv2d): 83 | xavier_init(m, distribution='uniform') 84 | 85 | def forward(self, inputs): 86 | assert len(inputs) == len(self.in_channels) 87 | 88 | # build laterals 89 | laterals = [ 90 | lateral_conv(inputs[i + self.start_level]) 91 | for i, lateral_conv in enumerate(self.lateral_convs) 92 | ] 93 | 94 | # build top-down path 95 | used_backbone_levels = len(laterals) 96 | for i in range(used_backbone_levels - 1, 0, -1): 97 | prev_shape = laterals[i - 1].shape[2:] 98 | laterals[i - 1] += F.interpolate( 99 | laterals[i], size=prev_shape, mode='bilinear',align_corners=True) 100 | 101 | # build outputs 102 | # outs = [ 103 | # # self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 104 | # laterals[i] for i in range(used_backbone_levels) 105 | # ] 106 | outs = [ 107 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 108 | ] 109 | #这里去除了fpn_convs stride=(2, 2)的两层,lateral_convs和fpn_convs都是三层 110 | return tuple(outs) 111 | 112 | 113 | # if __name__ == '__main__': 114 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from torchsummary import summary 2 | import sys 3 | sys.path.append("./") 4 | from quarkdet.evaluator import build_evaluator 5 | from quarkdet.model.detector import build_model 6 | from quarkdet.data.dataset import build_dataset 7 | from quarkdet.data.collate import custom_collate_function 8 | from quarkdet.trainer import build_trainer 9 | from quarkdet.util import mkdir, Logger, cfg, load_config 10 | import os 11 | import torch 12 | import logging 13 | import argparse 14 | import numpy as np 15 | import torch.distributed as dist 16 | 17 | 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('config', help='train config file path') 23 | parser.add_argument('--local_rank', default=-1, type=int, 24 | help='node rank for distributed training') 25 | parser.add_argument('--seed', type=int, default=None, 26 | help='random seed') 27 | args = parser.parse_args() 28 | return args 29 | 30 | 31 | def init_seeds(seed=0): 32 | """ 33 | manually set a random seed for numpy, torch and cuda 34 | :param seed: random seed 35 | """ 36 | torch.manual_seed(seed) 37 | np.random.seed(seed) 38 | torch.cuda.manual_seed(seed) 39 | torch.cuda.manual_seed_all(seed) 40 | if seed == 0: 41 | torch.backends.cudnn.deterministic = True 42 | torch.backends.cudnn.benchmark = False 43 | 44 | def collate_fn_coco(batch): 45 | return tuple(zip(*batch)) 46 | def main(args): 47 | load_config(cfg, args.config) 48 | local_rank = int(args.local_rank) 49 | torch.backends.cudnn.enabled = True 50 | torch.backends.cudnn.benchmark = True 51 | mkdir(local_rank, cfg.save_dir) 52 | logger = Logger(local_rank, cfg.save_dir) 53 | if args.seed is not None: 54 | logger.log('Set random seed to {}'.format(args.seed)) 55 | init_seeds(args.seed) 56 | 57 | logger.log('Creating model...') 58 | model = build_model(cfg.model) 59 | 60 | print("model:", model) 61 | 62 | # pre_dict = model.state_dict() #按键值对将模型参数加载到pre_dict 63 | # for k, v in pre_dict.items(): # 打印模型参数 64 | # for k, v in pre_dict.items(): #打印模型每层命名 65 | # print ('%-50s%s' %(k,v.shape)) 66 | 67 | #summary(model, (3, 320, 320)) 68 | 69 | logger.log('Setting up data...') 70 | train_dataset = build_dataset(cfg.data.train, 'train') 71 | val_dataset = build_dataset(cfg.data.val, 'test') 72 | 73 | if len(cfg.device.gpu_ids) > 1: 74 | print('rank = ', local_rank) 75 | num_gpus = torch.cuda.device_count() 76 | torch.cuda.set_device(local_rank % num_gpus) 77 | dist.init_process_group(backend='nccl') 78 | train_sampler = torch.utils.data.distributed.DistributedSampler( 79 | train_dataset) 80 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, 81 | num_workers=cfg.device.workers_per_gpu, pin_memory=True, 82 | collate_fn=custom_collate_function, sampler=train_sampler, 83 | drop_last=True) 84 | else: 85 | 86 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, 87 | shuffle=True,collate_fn=custom_collate_function, 88 | 89 | pin_memory=True, drop_last=True) 90 | 91 | 92 | val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, 93 | pin_memory=True, collate_fn=custom_collate_function, drop_last=True) 94 | 95 | trainer = build_trainer(local_rank, cfg, model, logger) 96 | 97 | if cfg.schedule.resume: 98 | trainer.resume(cfg) 99 | if 'load_model' in cfg.schedule: 100 | trainer.load_model(cfg) 101 | 102 | evaluator = build_evaluator(cfg, val_dataset) 103 | 104 | logger.log('Starting training...') 105 | trainer.run(train_dataloader, val_dataloader, evaluator) 106 | 107 | 108 | if __name__ == '__main__': 109 | args = parse_args() 110 | main(args) 111 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import time 4 | import torch 5 | import argparse 6 | import sys 7 | sys.path.append("./") 8 | from quarkdet.util import cfg, load_config, Logger 9 | from quarkdet.model.detector import build_model 10 | from quarkdet.util import load_model_weight 11 | from quarkdet.data.transform import Pipeline 12 | 13 | 14 | image_ext = ['.jpg', '.jpeg', '.webp', '.bmp', '.png'] 15 | video_ext = ['mp4', 'mov', 'avi', 'mkv'] 16 | 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('demo', default='image', help='demo type, eg. image, video and webcam') 21 | parser.add_argument('--config', help='model config file path') 22 | parser.add_argument('--model', help='model file path') 23 | parser.add_argument('--path', default='./demo', help='path to images or video') 24 | parser.add_argument('--camid', type=int, default=0, help='webcam demo camera id') 25 | args = parser.parse_args() 26 | return args 27 | 28 | 29 | class Predictor(object): 30 | def __init__(self, cfg, model_path, logger, device='cuda:0'): 31 | self.cfg = cfg 32 | self.device = device 33 | model = build_model(cfg.model) 34 | ckpt = torch.load(model_path, map_location=lambda storage, loc: storage) 35 | load_model_weight(model, ckpt, logger) 36 | self.model = model.to(device).eval() 37 | self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio) 38 | 39 | def inference(self, img): 40 | img_info = {} 41 | if isinstance(img, str): 42 | img_info['file_name'] = os.path.basename(img) 43 | img = cv2.imread(img) 44 | else: 45 | img_info['file_name'] = None 46 | 47 | height, width = img.shape[:2] 48 | img_info['height'] = height 49 | img_info['width'] = width 50 | meta = dict(img_info=img_info, 51 | raw_img=img, 52 | img=img) 53 | meta = self.pipeline(meta, self.cfg.data.val.input_size) 54 | meta['img'] = torch.from_numpy(meta['img'].transpose(2, 0, 1)).unsqueeze(0).to(self.device) 55 | with torch.no_grad(): 56 | results = self.model.inference(meta) 57 | return meta, results 58 | 59 | def visualize(self, dets, meta, class_names, score_thres, wait=0): 60 | time1 = time.time() 61 | self.model.head.show_result(meta['raw_img'], dets, class_names, score_thres=score_thres, show=True) 62 | print('viz time: {:.3f}s'.format(time.time()-time1)) 63 | 64 | 65 | def get_image_list(path): 66 | image_names = [] 67 | for maindir, subdir, file_name_list in os.walk(path): 68 | for filename in file_name_list: 69 | apath = os.path.join(maindir, filename) 70 | ext = os.path.splitext(apath)[1] 71 | if ext in image_ext: 72 | image_names.append(apath) 73 | return image_names 74 | 75 | 76 | def main(): 77 | args = parse_args() 78 | torch.backends.cudnn.deterministic = True 79 | torch.backends.cudnn.benchmark = False 80 | 81 | load_config(cfg, args.config) 82 | logger = Logger(-1, use_tensorboard=False) 83 | predictor = Predictor(cfg, args.model, logger, device='cuda:0') 84 | logger.log('Press "Esc", "q" or "Q" to exit.') 85 | if args.demo == 'image': 86 | if os.path.isdir(args.path): 87 | files = get_image_list(args.path) 88 | else: 89 | files = [args.path] 90 | files.sort() 91 | for image_name in files: 92 | meta, res = predictor.inference(image_name) 93 | predictor.visualize(res, meta, cfg.class_names, 0.35) 94 | ch = cv2.waitKey(0) 95 | if ch == 27 or ch == ord('q') or ch == ord('Q'): 96 | break 97 | elif args.demo == 'video' or args.demo == 'webcam': 98 | cap = cv2.VideoCapture(args.path if args.demo == 'video' else args.camid) 99 | while True: 100 | ret_val, frame = cap.read() 101 | meta, res = predictor.inference(frame) 102 | predictor.visualize(res, meta, cfg.class_names, 0.35) 103 | ch = cv2.waitKey(1) 104 | if ch == 27 or ch == ord('q') or ch == ord('Q'): 105 | break 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /quarkdet/model/backbone/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | from ..module.activation import act_layers 8 | 9 | 10 | class ConvBNReLU(nn.Sequential): 11 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, act='ReLU'): 12 | padding = (kernel_size - 1) // 2 13 | super(ConvBNReLU, self).__init__( 14 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), 15 | nn.BatchNorm2d(out_planes), 16 | act_layers(act) 17 | ) 18 | 19 | 20 | class InvertedResidual(nn.Module): 21 | def __init__(self, inp, oup, stride, expand_ratio, act='ReLU'): 22 | super(InvertedResidual, self).__init__() 23 | self.stride = stride 24 | assert stride in [1, 2] 25 | 26 | hidden_dim = int(round(inp * expand_ratio)) 27 | self.use_res_connect = self.stride == 1 and inp == oup 28 | 29 | layers = [] 30 | if expand_ratio != 1: 31 | # pw 32 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, act=act)) 33 | layers.extend([ 34 | # dw 35 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, act=act), 36 | # pw-linear 37 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 38 | nn.BatchNorm2d(oup), 39 | ]) 40 | self.conv = nn.Sequential(*layers) 41 | 42 | def forward(self, x): 43 | if self.use_res_connect: 44 | return x + self.conv(x) 45 | else: 46 | return self.conv(x) 47 | 48 | 49 | class MobileNetV2(nn.Module): 50 | def __init__(self, width_mult=1., out_stages=(1, 2, 4, 6), last_channel=1280, act='ReLU'): 51 | super(MobileNetV2, self).__init__() 52 | self.width_mult = width_mult 53 | self.out_stages = out_stages 54 | input_channel = 32 55 | self.last_channel = last_channel 56 | self.act = act 57 | self.interverted_residual_setting = [ 58 | # t, c, n, s 59 | [1, 16, 1, 1], 60 | [6, 24, 2, 2], 61 | [6, 32, 3, 2], 62 | [6, 64, 4, 2], 63 | [6, 96, 3, 1], 64 | [6, 160, 3, 2], 65 | [6, 320, 1, 1], 66 | ] 67 | 68 | # building first layer 69 | self.input_channel = int(input_channel * width_mult) 70 | self.first_layer = ConvBNReLU(3, input_channel, stride=2, act=self.act) 71 | # building inverted residual blocks 72 | for i in range(7): 73 | name = 'stage{}'.format(i) 74 | setattr(self, name, self.build_mobilenet_stage(stage_num=i)) 75 | 76 | def build_mobilenet_stage(self, stage_num): 77 | stage = [] 78 | t, c, n, s = self.interverted_residual_setting[stage_num] 79 | output_channel = int(c * self.width_mult) 80 | for i in range(n): 81 | if i == 0: 82 | stage.append(InvertedResidual(self.input_channel, output_channel, s, expand_ratio=t, act=self.act)) 83 | else: 84 | stage.append(InvertedResidual(self.input_channel, output_channel, 1, expand_ratio=t, act=self.act)) 85 | self.input_channel = output_channel 86 | if stage_num == 6: 87 | last_layer = ConvBNReLU(self.input_channel, self.last_channel, kernel_size=1, act=self.act) 88 | stage.append(last_layer) 89 | stage = nn.Sequential(*stage) 90 | return stage 91 | 92 | def forward(self, x): 93 | x = self.first_layer(x) 94 | output = [] 95 | for i in range(0, 7): 96 | stage = getattr(self, 'stage{}'.format(i)) 97 | x = stage(x) 98 | if i in self.out_stages: 99 | output.append(x) 100 | 101 | return tuple(output) 102 | 103 | def init_weights(self): 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | nn.init.normal_(m.weight, std=0.001) 107 | if m.bias is not None: 108 | m.bias.data.zero_() 109 | elif isinstance(m, nn.BatchNorm2d): 110 | m.weight.data.fill_(1) 111 | m.bias.data.zero_() 112 | 113 | -------------------------------------------------------------------------------- /quarkdet/model/head/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | """ 6 | Examples: 7 | >>> self = AnchorGenerator(9, [1.], [1.]) 8 | >>> all_anchors = self.grid_anchors((2, 2), device='cpu') 9 | >>> print(all_anchors) 10 | tensor([[ 0., 0., 8., 8.], 11 | [16., 0., 24., 8.], 12 | [ 0., 16., 8., 24.], 13 | [16., 16., 24., 24.]]) 14 | """ 15 | 16 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 17 | self.base_size = base_size 18 | self.scales = torch.Tensor(scales) 19 | self.ratios = torch.Tensor(ratios) 20 | self.scale_major = scale_major 21 | self.ctr = ctr 22 | self.base_anchors = self.gen_base_anchors() 23 | 24 | @property 25 | def num_base_anchors(self): 26 | return self.base_anchors.size(0) 27 | 28 | def gen_base_anchors(self): 29 | w = self.base_size 30 | h = self.base_size 31 | if self.ctr is None: 32 | x_ctr = 0.5 * (w - 1) 33 | y_ctr = 0.5 * (h - 1) 34 | else: 35 | x_ctr, y_ctr = self.ctr 36 | 37 | h_ratios = torch.sqrt(self.ratios) 38 | w_ratios = 1 / h_ratios 39 | if self.scale_major: 40 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 41 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 42 | else: 43 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 44 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 45 | 46 | # yapf: disable 47 | base_anchors = torch.stack( 48 | [ 49 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 50 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 51 | ], 52 | dim=-1).round() 53 | # yapf: enable 54 | #print("base_size:{%s}, scales:{%s}, ratios:{%s}"%(self.base_size, self.scales, self.ratios)) 55 | #print("base_anchors",base_anchors) 56 | 57 | return base_anchors 58 | 59 | def _meshgrid(self, x, y, row_major=True): 60 | xx = x.repeat(len(y)) 61 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 62 | if row_major: 63 | return xx, yy 64 | else: 65 | return yy, xx 66 | 67 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 68 | base_anchors = self.base_anchors.to(device) 69 | # print("grid_anchors base_size:{%s}, scales:{%s}, ratios:{%s}"%(self.base_size, self.scales, self.ratios)) 70 | # print("grid_anchors base_anchors",base_anchors) 71 | 72 | feat_h, feat_w = featmap_size 73 | #print(feat_h,feat_w) 74 | shift_x = torch.arange(0, feat_w, device=device) * stride 75 | shift_y = torch.arange(0, feat_h, device=device) * stride 76 | # print("shift_x:",shift_x) 77 | # print("shift_y:",shift_y) 78 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 79 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 80 | shifts = shifts.type_as(base_anchors) 81 | # first feat_w elements correspond to the first row of shifts 82 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 83 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 84 | 85 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 86 | all_anchors = all_anchors.view(-1, 4) 87 | # first A rows correspond to A anchors of (0, 0) in feature map, 88 | # then (0, 1), (0, 2), ... 89 | return all_anchors 90 | 91 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 92 | feat_h, feat_w = featmap_size 93 | valid_h, valid_w = valid_size 94 | # print("valid_flags featmap_size:",featmap_size) 95 | # print("valid_flags valid_size:",valid_size) 96 | 97 | assert valid_h <= feat_h and valid_w <= feat_w 98 | valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) 99 | valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device) 100 | valid_x[:valid_w] = 1 101 | valid_y[:valid_h] = 1 102 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 103 | #print("valid_xx, valid_yy:",valid_xx, valid_yy) 104 | valid = valid_xx & valid_yy 105 | valid = valid[:, None].expand(valid.size(0), 106 | self.num_base_anchors).contiguous().view(-1) 107 | #print("valid_flags valid:",valid) 108 | return valid 109 | -------------------------------------------------------------------------------- /quarkdet/model/neck/pan.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from ..module.conv import ConvModule 4 | from .fpn import FPN 5 | import numpy as np 6 | 7 | 8 | class PAN(FPN): 9 | """Path Aggregation Network for Instance Segmentation. 10 | 11 | This is an implementation of the `PAN in Path Aggregation Network 12 | `_. 13 | 14 | Args: 15 | in_channels (List[int]): Number of input channels per scale. 16 | out_channels (int): Number of output channels (used at each scale) 17 | num_outs (int): Number of output scales. 18 | start_level (int): Index of the start input backbone level used to 19 | build the feature pyramid. Default: 0. 20 | end_level (int): Index of the end input backbone level (exclusive) to 21 | build the feature pyramid. Default: -1, which means the last level. 22 | add_extra_convs (bool): Whether to add conv layers on top of the 23 | original feature maps. Default: False. 24 | extra_convs_on_inputs (bool): Whether to apply extra conv on 25 | the original feature from the backbone. Default: False. 26 | relu_before_extra_convs (bool): Whether to apply relu before the extra 27 | conv. Default: False. 28 | no_norm_on_lateral (bool): Whether to apply norm on lateral. 29 | Default: False. 30 | conv_cfg (dict): Config dict for convolution layer. Default: None. 31 | norm_cfg (dict): Config dict for normalization layer. Default: None. 32 | act_cfg (str): Config dict for activation layer in ConvModule. 33 | Default: None. 34 | """ 35 | 36 | def __init__(self, 37 | in_channels, 38 | out_channels, 39 | num_outs, 40 | start_level=0, 41 | end_level=-1, 42 | conv_cfg=None, 43 | norm_cfg=None, 44 | activation=None): 45 | super(PAN, 46 | self).__init__(in_channels, out_channels, num_outs, start_level, 47 | end_level, conv_cfg, norm_cfg, activation) 48 | #显示调用基类的__init__方法,Python不会自动执行这些初始化操作。 49 | print("PAN:",in_channels, out_channels, num_outs, start_level,end_level, conv_cfg, norm_cfg, activation) 50 | self.init_weights() 51 | # add extra bottom up pathway 52 | self.downsample_convs = nn.ModuleList() 53 | self.pan_convs = nn.ModuleList() 54 | 55 | for i in range(self.start_level + 1, self.backbone_end_level): 56 | d_conv = ConvModule( 57 | out_channels, 58 | out_channels, 59 | 3, 60 | stride=2, 61 | padding=1, 62 | conv_cfg=conv_cfg, 63 | norm_cfg=norm_cfg, 64 | activation=None, 65 | inplace=False) 66 | pafpn_conv = ConvModule( 67 | out_channels, 68 | out_channels, 69 | 3, 70 | padding=1, 71 | conv_cfg=conv_cfg, 72 | norm_cfg=norm_cfg, 73 | activation=None, 74 | inplace=False) 75 | self.downsample_convs.append(d_conv) 76 | self.pan_convs.append(pafpn_conv) 77 | 78 | def forward(self, inputs): 79 | """Forward function.""" 80 | assert len(inputs) == len(self.in_channels) 81 | #print("PAN forward:",self.in_channels, self.out_channels, self.num_outs, self.start_level,self.end_level) 82 | 83 | 84 | # build laterals 85 | laterals = [ 86 | lateral_conv(inputs[i + self.start_level]) 87 | for i, lateral_conv in enumerate(self.lateral_convs) 88 | ] 89 | 90 | # build top-down path 91 | used_backbone_levels = len(laterals) 92 | for i in range(used_backbone_levels - 1, 0, -1): #i=[2,1] 93 | prev_shape = laterals[i - 1].shape[2:] 94 | laterals[i - 1] += F.interpolate( 95 | laterals[i], size=prev_shape, mode='bilinear',align_corners=True) 96 | 97 | # part 1: from original levels 98 | 99 | inter_outs = [ 100 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 101 | ] 102 | 103 | 104 | # part 2: add bottom-up path 105 | for i in range(0, used_backbone_levels - 1): 106 | inter_outs[i + 1] += self.downsample_convs[i](inter_outs[i]) 107 | 108 | outs = [] 109 | outs.append(inter_outs[0]) 110 | outs.extend([ 111 | self.pan_convs[i - 1](inter_outs[i]) 112 | for i in range(1, used_backbone_levels) 113 | ]) 114 | 115 | 116 | 117 | 118 | return tuple(outs) 119 | -------------------------------------------------------------------------------- /quarkdet/util/data_parallel.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch.nn.modules import Module 4 | from torch.nn.parallel.scatter_gather import gather 5 | from torch.nn.parallel.replicate import replicate 6 | from torch.nn.parallel.parallel_apply import parallel_apply 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 43 | super(DataParallel, self).__init__() 44 | 45 | if not torch.cuda.is_available(): 46 | self.module = module 47 | self.device_ids = [] 48 | return 49 | 50 | if device_ids is None: 51 | device_ids = list(range(torch.cuda.device_count())) 52 | if output_device is None: 53 | output_device = device_ids[0] 54 | self.dim = dim 55 | self.module = module 56 | self.device_ids = device_ids 57 | self.chunk_sizes = chunk_sizes 58 | self.output_device = output_device 59 | if len(self.device_ids) == 1: 60 | self.module.cuda(device_ids[0]) 61 | 62 | def forward(self, *inputs, **kwargs): 63 | if not self.device_ids: 64 | return self.module(*inputs, **kwargs) 65 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 66 | if len(self.device_ids) == 1: 67 | return self.module(*inputs[0], **kwargs[0]) 68 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 69 | outputs = self.parallel_apply(replicas, inputs, kwargs) 70 | return self.gather(outputs, self.output_device) 71 | 72 | def replicate(self, module, device_ids): 73 | return replicate(module, device_ids) 74 | 75 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 76 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 77 | 78 | def parallel_apply(self, replicas, inputs, kwargs): 79 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 80 | 81 | def gather(self, outputs, output_device): 82 | return gather(outputs, output_device, dim=self.dim) 83 | 84 | 85 | # TODO: remove this 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 87 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 88 | 89 | This is the functional version of the DataParallel module. 90 | 91 | Args: 92 | module: the module to evaluate in parallel 93 | inputs: inputs to the module 94 | device_ids: GPU ids on which to replicate module 95 | output_device: GPU location of the output Use -1 to indicate the CPU. 96 | (default: device_ids[0]) 97 | Returns: 98 | a Variable containing the result of module(input) located on 99 | output_device 100 | """ 101 | if not isinstance(inputs, tuple): 102 | inputs = (inputs,) 103 | 104 | if device_ids is None: 105 | device_ids = list(range(torch.cuda.device_count())) 106 | 107 | if output_device is None: 108 | output_device = device_ids[0] 109 | 110 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 111 | if len(device_ids) == 1: 112 | return module(*inputs[0], **module_kwargs[0]) 113 | used_device_ids = device_ids[:len(inputs)] 114 | replicas = replicate(module, used_device_ids) 115 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 116 | return gather(outputs, output_device, dim) 117 | 118 | -------------------------------------------------------------------------------- /quarkdet/model/loss/varifocal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from .utils import weight_reduce_loss 4 | 5 | 6 | def varifocal_loss(pred, 7 | target, 8 | weight=None, 9 | alpha=0.75, 10 | gamma=2.0, 11 | iou_weighted=True, 12 | reduction='mean', 13 | avg_factor=None): 14 | """`Varifocal Loss `_ 15 | 16 | Args: 17 | pred (torch.Tensor): The prediction with shape (N, C), C is the 18 | number of classes 19 | target (torch.Tensor): The learning target of the iou-aware 20 | classification score with shape (N, C), C is the number of classes. 21 | weight (torch.Tensor, optional): The weight of loss for each 22 | prediction. Defaults to None. 23 | alpha (float, optional): A balance factor for the negative part of 24 | Varifocal Loss, which is different from the alpha of Focal Loss. 25 | Defaults to 0.75. 26 | gamma (float, optional): The gamma for calculating the modulating 27 | factor. Defaults to 2.0. 28 | iou_weighted (bool, optional): Whether to weight the loss of the 29 | positive example with the iou target. Defaults to True. 30 | reduction (str, optional): The method used to reduce the loss into 31 | a scalar. Defaults to 'mean'. Options are "none", "mean" and 32 | "sum". 33 | avg_factor (int, optional): Average factor that is used to average 34 | the loss. Defaults to None. 35 | """ 36 | # pred and target should be of the same size 37 | assert pred.size() == target.size() 38 | pred_sigmoid = pred.sigmoid() 39 | target = target.type_as(pred) 40 | if iou_weighted: 41 | focal_weight = target * (target > 0.0).float() + \ 42 | alpha * (pred_sigmoid - target).abs().pow(gamma) * \ 43 | (target <= 0.0).float() 44 | else: 45 | focal_weight = (target > 0.0).float() + \ 46 | alpha * (pred_sigmoid - target).abs().pow(gamma) * \ 47 | (target <= 0.0).float() 48 | loss = F.binary_cross_entropy_with_logits( 49 | pred, target, reduction='none') * focal_weight 50 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 51 | return loss 52 | 53 | 54 | class VarifocalLoss(nn.Module): 55 | 56 | def __init__(self, 57 | use_sigmoid=True, 58 | alpha=0.75, 59 | gamma=2.0, 60 | iou_weighted=True, 61 | reduction='mean', 62 | loss_weight=1.0): 63 | """`Varifocal Loss `_ 64 | 65 | Args: 66 | use_sigmoid (bool, optional): Whether the prediction is 67 | used for sigmoid or softmax. Defaults to True. 68 | alpha (float, optional): A balance factor for the negative part of 69 | Varifocal Loss, which is different from the alpha of Focal 70 | Loss. Defaults to 0.75. 71 | gamma (float, optional): The gamma for calculating the modulating 72 | factor. Defaults to 2.0. 73 | iou_weighted (bool, optional): Whether to weight the loss of the 74 | positive examples with the iou target. Defaults to True. 75 | reduction (str, optional): The method used to reduce the loss into 76 | a scalar. Defaults to 'mean'. Options are "none", "mean" and 77 | "sum". 78 | loss_weight (float, optional): Weight of loss. Defaults to 1.0. 79 | """ 80 | super(VarifocalLoss, self).__init__() 81 | assert use_sigmoid is True, \ 82 | 'Only sigmoid varifocal loss supported now.' 83 | assert alpha >= 0.0 84 | self.use_sigmoid = use_sigmoid 85 | self.alpha = alpha 86 | self.gamma = gamma 87 | self.iou_weighted = iou_weighted 88 | self.reduction = reduction 89 | self.loss_weight = loss_weight 90 | 91 | def forward(self, 92 | pred, 93 | target, 94 | weight=None, 95 | avg_factor=None, 96 | reduction_override=None): 97 | """Forward function. 98 | 99 | Args: 100 | pred (torch.Tensor): The prediction. 101 | target (torch.Tensor): The learning target of the prediction. 102 | weight (torch.Tensor, optional): The weight of loss for each 103 | prediction. Defaults to None. 104 | avg_factor (int, optional): Average factor that is used to average 105 | the loss. Defaults to None. 106 | reduction_override (str, optional): The reduction method used to 107 | override the original reduction method of the loss. 108 | Options are "none", "mean" and "sum". 109 | 110 | Returns: 111 | torch.Tensor: The calculated loss 112 | """ 113 | assert reduction_override in (None, 'none', 'mean', 'sum') 114 | reduction = ( 115 | reduction_override if reduction_override else self.reduction) 116 | if self.use_sigmoid: 117 | loss_cls = self.loss_weight * varifocal_loss( 118 | pred, 119 | target, 120 | weight, 121 | alpha=self.alpha, 122 | gamma=self.gamma, 123 | iou_weighted=self.iou_weighted, 124 | reduction=reduction, 125 | avg_factor=avg_factor) 126 | else: 127 | raise NotImplementedError 128 | return loss_cls 129 | -------------------------------------------------------------------------------- /quarkdet/model/module/nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.ops import nms 3 | 4 | 5 | def multiclass_nms(multi_bboxes, 6 | multi_scores, 7 | score_thr, 8 | nms_cfg, 9 | max_num=-1, 10 | score_factors=None): 11 | """NMS for multi-class bboxes. 12 | 13 | Args: 14 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 15 | multi_scores (Tensor): shape (n, #class), where the last column 16 | contains scores of the background class, but this will be ignored. 17 | score_thr (float): bbox threshold, bboxes with scores lower than it 18 | will not be considered. 19 | nms_thr (float): NMS IoU threshold 20 | max_num (int): if there are more than max_num bboxes after NMS, 21 | only top max_num will be kept. 22 | score_factors (Tensor): The factors multiplied to scores before 23 | applying NMS 24 | 25 | Returns: 26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels \ 27 | are 0-based. 28 | """ 29 | num_classes = multi_scores.size(1) - 1 30 | # exclude background category 31 | if multi_bboxes.shape[1] > 4: 32 | bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4) 33 | else: 34 | bboxes = multi_bboxes[:, None].expand( 35 | multi_scores.size(0), num_classes, 4) 36 | scores = multi_scores[:, :-1] 37 | 38 | # filter out boxes with low scores 39 | valid_mask = scores > score_thr 40 | 41 | # We use masked_select for ONNX exporting purpose, 42 | # which is equivalent to bboxes = bboxes[valid_mask] 43 | # (TODO): as ONNX does not support repeat now, 44 | # we have to use this ugly code 45 | bboxes = torch.masked_select( 46 | bboxes, 47 | torch.stack((valid_mask, valid_mask, valid_mask, valid_mask), 48 | -1)).view(-1, 4) 49 | if score_factors is not None: 50 | scores = scores * score_factors[:, None] 51 | scores = torch.masked_select(scores, valid_mask) 52 | labels = valid_mask.nonzero(as_tuple=False)[:, 1] 53 | 54 | if bboxes.numel() == 0: 55 | bboxes = multi_bboxes.new_zeros((0, 5)) 56 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 57 | 58 | if torch.onnx.is_in_onnx_export(): 59 | raise RuntimeError('[ONNX Error] Can not record NMS ' 60 | 'as it has not been executed this time') 61 | return bboxes, labels 62 | 63 | dets, keep = batched_nms(bboxes, scores, labels, nms_cfg) 64 | 65 | if max_num > 0: 66 | dets = dets[:max_num] 67 | keep = keep[:max_num] 68 | 69 | return dets, labels[keep] 70 | 71 | 72 | def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): 73 | """Performs non-maximum suppression in a batched fashion. 74 | Modified from https://github.com/pytorch/vision/blob 75 | /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. 76 | In order to perform NMS independently per class, we add an offset to all 77 | the boxes. The offset is dependent only on the class idx, and is large 78 | enough so that boxes from different classes do not overlap. 79 | Arguments: 80 | boxes (torch.Tensor): boxes in shape (N, 4). 81 | scores (torch.Tensor): scores in shape (N, ). 82 | idxs (torch.Tensor): each index value correspond to a bbox cluster, 83 | and NMS will not be applied between elements of different idxs, 84 | shape (N, ). 85 | nms_cfg (dict): specify nms type and other parameters like iou_thr. 86 | Possible keys includes the following. 87 | - iou_thr (float): IoU threshold used for NMS. 88 | - split_thr (float): threshold number of boxes. In some cases the 89 | number of boxes is large (e.g., 200k). To avoid OOM during 90 | training, the users could set `split_thr` to a small value. 91 | If the number of boxes is greater than the threshold, it will 92 | perform NMS on each group of boxes separately and sequentially. 93 | Defaults to 10000. 94 | class_agnostic (bool): if true, nms is class agnostic, 95 | i.e. IoU thresholding happens over all boxes, 96 | regardless of the predicted class. 97 | Returns: 98 | tuple: kept dets and indice. 99 | """ 100 | nms_cfg_ = nms_cfg.copy() 101 | class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) 102 | if class_agnostic: 103 | boxes_for_nms = boxes 104 | else: 105 | max_coordinate = boxes.max() 106 | offsets = idxs.to(boxes) * (max_coordinate + 1) 107 | boxes_for_nms = boxes + offsets[:, None] 108 | 109 | nms_type = nms_cfg_.pop('type', 'nms') 110 | # nms_op = eval(nms_type) 111 | 112 | split_thr = nms_cfg_.pop('split_thr', 10000) 113 | if len(boxes_for_nms) < split_thr: 114 | # dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) 115 | keep = nms(boxes_for_nms, scores, **nms_cfg_) 116 | boxes = boxes[keep] 117 | # scores = dets[:, -1] 118 | scores = scores[keep] 119 | else: 120 | total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) 121 | for id in torch.unique(idxs): 122 | mask = (idxs == id).nonzero(as_tuple=False).view(-1) 123 | # dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) 124 | keep = nms(boxes_for_nms[mask], scores[mask], **nms_cfg_) 125 | total_mask[mask[keep]] = True 126 | 127 | keep = total_mask.nonzero(as_tuple=False).view(-1) 128 | keep = keep[scores[keep].argsort(descending=True)] 129 | boxes = boxes[keep] 130 | scores = scores[keep] 131 | 132 | return torch.cat([boxes, scores[:, None]], -1), keep -------------------------------------------------------------------------------- /config/mobilenetv3.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/mobilenet 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | name: MobileNetV3_Small 8 | out_stages: [2,6] 9 | neck: 10 | name: PAN 11 | in_channels: [24, 48, 576] 12 | out_channels: 96 13 | start_level: 0 14 | num_outs: 3 15 | head: 16 | name: QuarkDetHead 17 | num_classes: 80 # 80 18 | input_channel: 96 19 | feat_channels: 96 20 | stacked_convs: 2 21 | share_cls_reg: True #True 22 | octave_base_scale: 5 23 | scales_per_octave: 1 24 | strides: [8, 16, 32] 25 | reg_max: 7 #16 #7 26 | norm_cfg: 27 | type: BN 28 | loss: 29 | loss_qfl: 30 | name: QualityFocalLoss 31 | use_sigmoid: False #True 32 | beta: 2.0 33 | loss_weight: 1.0 34 | loss_dfl: 35 | name: DistributionFocalLoss 36 | loss_weight: 0.25 37 | loss_bbox: 38 | name: GIoULoss 39 | loss_weight: 2.0 40 | data: 41 | train: 42 | name: coco 43 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 44 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 45 | input_size: [320,320] #[w,h] 46 | keep_ratio: True 47 | pipeline: 48 | perspective: 0.0 49 | scale: [0.6, 1.4] 50 | stretch: [[1, 1], [1, 1]] 51 | rotation: 0 52 | shear: 0 53 | translate: 0 54 | flip: 0.5 55 | brightness: 0.2 56 | contrast: [0.8, 1.2] 57 | saturation: [0.8, 1.2] 58 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 59 | val: 60 | name: coco 61 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 62 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 63 | input_size: [320,320] #[w,h] 64 | keep_ratio: True 65 | pipeline: 66 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 67 | device: 68 | gpu_ids: [1] 69 | workers_per_gpu: 8 70 | batchsize_per_gpu: 80 # 80 #40 #160 santiago test 71 | schedule: 72 | resume: False 73 | load_model: ./workspace/mobilenet/model_last.pth 74 | 75 | optimizer: 76 | name: SGD 77 | lr: 0.14 78 | momentum: 0.9 79 | weight_decay: 0.0001 80 | warmup: 81 | name: linear 82 | steps: 300 #santiago test 83 | ratio: 0.1 84 | total_epochs: 160 #70 85 | lr_schedule: 86 | name: MultiStepLR 87 | milestones: [130,160,150,155] 88 | gamma: 0.1 89 | val_intervals: 5 #5 90 | evaluator: 91 | name: CocoDetectionEvaluator 92 | save_key: mAP 93 | 94 | log: 95 | interval: 10 96 | 97 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 98 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 99 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 100 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 101 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 102 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 103 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 104 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 105 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 106 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 107 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 108 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 109 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 110 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 111 | 112 | 113 | 114 | # { 115 | # 'img': tensor([ 116 | # [ 117 | # [ 118 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 119 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 120 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 121 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 122 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 123 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 124 | # ], 125 | 126 | # [ 127 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 128 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 129 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 130 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 131 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 132 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 133 | # ], 134 | 135 | # [ 136 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 137 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 138 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 139 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 140 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 141 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 142 | # ] 143 | # ] 144 | # ]), 145 | # 'img_info': { 146 | # 'license': tensor([2]), 147 | # 'file_name': ['000000007616.jpg'], 148 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 149 | # 'height': tensor([375]), 150 | # 'width': tensor([500]), 151 | # 'date_captured': ['2013-11-16 19:22:23'], 152 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 153 | # 'id': tensor([7616]) 154 | # }, 155 | # 'gt_bboxes': [array([ 156 | # [193.312, 153.37599, 216.5952, 175.8784], 157 | # [110.0224, 135.4624, 208.1792, 215.2832], 158 | # [160.1216, 85.7984, 168.64641, 110.976], 159 | # [204.7232, 93.6704, 212.2048, 108.3904], 160 | # [85.414406, 148.8192, 111.8976, 167.5584], 161 | # [236.0832, 155.96161, 267.5264, 166.3424], 162 | # [1.0816001, 231.6224, 320., 277.568], 163 | # [85.4656, 148.3776, 112.22401, 168.096], 164 | # [40.7232, 109.024, 44.607998, 121.5552] 165 | # ], 166 | # dtype = float32)], 167 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 168 | # 'warp_matrix': [array([ 169 | # [0.64, 0., 0.], 170 | # [0., 0.64, 40.], 171 | # [0., 0., 1.] 172 | # ])] 173 | # } 174 | -------------------------------------------------------------------------------- /config/nanodet.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/nanodet 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | name: ShuffleNetV2 8 | out_stages: [2,3,4] 9 | activation: LeakyReLU 10 | model_size: 1.0x 11 | neck: 12 | name: PAN_Slim 13 | in_channels: [116, 232, 464] 14 | out_channels: 96 15 | start_level: 0 16 | num_outs: 3 17 | head: 18 | name: QuarkDetHead 19 | num_classes: 80 # 80 20 | input_channel: 96 21 | feat_channels: 96 22 | stacked_convs: 2 23 | share_cls_reg: True #True 24 | octave_base_scale: 5 25 | scales_per_octave: 1 26 | strides: [8, 16, 32] 27 | reg_max: 7 #16 #7 28 | norm_cfg: 29 | type: BN 30 | loss: 31 | loss_qfl: 32 | name: QualityFocalLoss 33 | use_sigmoid: False #True 34 | beta: 2.0 35 | loss_weight: 1.0 36 | loss_dfl: 37 | name: DistributionFocalLoss 38 | loss_weight: 0.25 39 | loss_bbox: 40 | name: GIoULoss 41 | loss_weight: 2.0 42 | data: 43 | train: 44 | name: coco 45 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 46 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 47 | input_size: [320,320] #[w,h] 48 | keep_ratio: True 49 | pipeline: 50 | perspective: 0.0 51 | scale: [0.6, 1.4] 52 | stretch: [[1, 1], [1, 1]] 53 | rotation: 0 54 | shear: 0 55 | translate: 0 56 | flip: 0.5 57 | brightness: 0.2 58 | contrast: [0.8, 1.2] 59 | saturation: [0.8, 1.2] 60 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 61 | val: 62 | name: coco 63 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 64 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 65 | input_size: [320,320] #[w,h] 66 | keep_ratio: True 67 | pipeline: 68 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 69 | device: 70 | gpu_ids: [0] 71 | workers_per_gpu: 8 72 | batchsize_per_gpu: 80 # 73 | schedule: 74 | resume: False 75 | load_model: ./workspace/nanodet/model_last.pth 76 | 77 | optimizer: 78 | name: SGD 79 | lr: 0.14 80 | momentum: 0.9 81 | weight_decay: 0.0001 82 | warmup: 83 | name: linear 84 | steps: 3 85 | ratio: 0.1 86 | total_epochs: 160 #70 87 | lr_schedule: 88 | name: MultiStepLR 89 | milestones: [40,130,160,150,155] 90 | gamma: 0.1 91 | val_intervals: 5 #5 92 | evaluator: 93 | name: CocoDetectionEvaluator 94 | save_key: mAP 95 | 96 | log: 97 | interval: 10 98 | 99 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 100 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 101 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 102 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 103 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 104 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 105 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 106 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 107 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 108 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 109 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 110 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 111 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 112 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 113 | 114 | 115 | 116 | # { 117 | # 'img': tensor([ 118 | # [ 119 | # [ 120 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 121 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 122 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 123 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 124 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 125 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 126 | # ], 127 | 128 | # [ 129 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 130 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 131 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 132 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 133 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 134 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 135 | # ], 136 | 137 | # [ 138 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 139 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 140 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 141 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 142 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 143 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 144 | # ] 145 | # ] 146 | # ]), 147 | # 'img_info': { 148 | # 'license': tensor([2]), 149 | # 'file_name': ['000000007616.jpg'], 150 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 151 | # 'height': tensor([375]), 152 | # 'width': tensor([500]), 153 | # 'date_captured': ['2013-11-16 19:22:23'], 154 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 155 | # 'id': tensor([7616]) 156 | # }, 157 | # 'gt_bboxes': [array([ 158 | # [193.312, 153.37599, 216.5952, 175.8784], 159 | # [110.0224, 135.4624, 208.1792, 215.2832], 160 | # [160.1216, 85.7984, 168.64641, 110.976], 161 | # [204.7232, 93.6704, 212.2048, 108.3904], 162 | # [85.414406, 148.8192, 111.8976, 167.5584], 163 | # [236.0832, 155.96161, 267.5264, 166.3424], 164 | # [1.0816001, 231.6224, 320., 277.568], 165 | # [85.4656, 148.3776, 112.22401, 168.096], 166 | # [40.7232, 109.024, 44.607998, 121.5552] 167 | # ], 168 | # dtype = float32)], 169 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 170 | # 'warp_matrix': [array([ 171 | # [0.64, 0., 0.], 172 | # [0., 0.64, 40.], 173 | # [0., 0., 1.] 174 | # ])] 175 | # } 176 | -------------------------------------------------------------------------------- /quarkdet/model/head/anchor/base_anchor_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from quarkdet.model.module.init_weights import normal_init 5 | 6 | from .anchor_generator import AnchorGenerator 7 | from .anchor_target import multi_apply 8 | 9 | 10 | class AnchorHead(nn.Module): 11 | """Anchor-based head (RPN, RetinaNet, SSD, etc.). 12 | 13 | Args: 14 | num_classes (int): Number of categories including the background 15 | category. 16 | in_channels (int): Number of channels in the input feature map. 17 | feat_channels (int): Number of hidden channels. Used in child classes. 18 | anchor_scales (Iterable): Anchor scales. 19 | anchor_ratios (Iterable): Anchor aspect ratios. 20 | anchor_strides (Iterable): Anchor strides. 21 | anchor_base_sizes (Iterable): Anchor base sizes. 22 | target_means (Iterable): Mean values of regression targets. 23 | target_stds (Iterable): Std values of regression targets. 24 | loss_cls (dict): Config of classification loss. 25 | loss_bbox (dict): Config of localization loss. 26 | """ # noqa: W605 27 | 28 | def __init__(self, 29 | num_classes, 30 | loss, 31 | use_sigmoid, 32 | input_channel, 33 | feat_channels=256, 34 | anchor_scales=[8], 35 | anchor_ratios=[1.0], 36 | strides=[8, 16, 32], 37 | anchor_base_sizes=None, 38 | target_means=(.0, .0, .0, .0), 39 | target_stds=(0.1, 0.1, 0.2, 0.2), 40 | ): 41 | super(AnchorHead, self).__init__() 42 | self.in_channels = input_channel 43 | self.num_classes = num_classes 44 | self.loss_cfg = loss 45 | self.feat_channels = feat_channels 46 | self.anchor_scales = anchor_scales 47 | self.anchor_ratios = anchor_ratios 48 | self.anchor_strides = strides 49 | self.anchor_base_sizes = list( 50 | strides) if anchor_base_sizes is None else anchor_base_sizes 51 | self.target_means = target_means 52 | self.target_stds = target_stds 53 | 54 | self.use_sigmoid_cls = use_sigmoid #loss.get('use_sigmoid', False) #use_sigmoid 55 | #self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) 56 | # self.sampling = self.loss_cfg.loss_cls['name'] not in ['FocalLoss', 'GHMC'] 57 | if self.use_sigmoid_cls: 58 | self.cls_out_channels = num_classes 59 | else: 60 | self.cls_out_channels = num_classes + 1 61 | 62 | 63 | 64 | print("self.cls_out_channels:",self.cls_out_channels) 65 | if self.cls_out_channels <= 0: 66 | raise ValueError('num_classes={} is too small'.format(num_classes)) 67 | 68 | # self.loss_cls = build_loss(loss_cls) 69 | # self.loss_bbox = build_loss(loss_bbox) 70 | self.fp16_enabled = False 71 | 72 | self.anchor_generators = [] 73 | for anchor_base in self.anchor_base_sizes: 74 | self.anchor_generators.append( 75 | AnchorGenerator(anchor_base, anchor_scales, anchor_ratios)) 76 | 77 | self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) 78 | self._init_layers() 79 | 80 | def _init_layers(self): 81 | self.conv_cls = nn.Conv2d(self.in_channels, 82 | self.num_anchors * self.cls_out_channels, 1) 83 | self.conv_reg = nn.Conv2d(self.in_channels, self.num_anchors * 4, 1) 84 | 85 | def init_weights(self): 86 | normal_init(self.conv_cls, std=0.01) 87 | normal_init(self.conv_reg, std=0.01) 88 | 89 | def forward_single(self, x): 90 | cls_score = self.conv_cls(x) 91 | bbox_pred = self.conv_reg(x) 92 | return cls_score, bbox_pred 93 | 94 | def forward(self, feats): 95 | return multi_apply(self.forward_single, feats) 96 | 97 | def get_anchors(self, featmap_sizes, img_shapes, device='cuda'): # checked! 98 | """Get anchors according to feature map sizes. 99 | 100 | Args: 101 | featmap_sizes (list[tuple]): Multi-level feature map sizes. 102 | img_shapes (h,w): Image meta info. 103 | device (torch.device | str): device for returned tensors 104 | 105 | Returns: 106 | tuple: anchors of each image, valid flags of each image 107 | """ 108 | num_imgs = len(img_shapes) 109 | num_levels = len(featmap_sizes) 110 | 111 | # print("num_imgs",num_imgs) 112 | # print("num_levels:",num_levels) 113 | # print("featmap_sizes",featmap_sizes) 114 | # print("img_shapes",img_shapes) 115 | 116 | # since feature map sizes of all images are the same, we only compute 117 | # anchors for one time 118 | multi_level_anchors = [] 119 | for i in range(num_levels): 120 | anchors = self.anchor_generators[i].grid_anchors( 121 | featmap_sizes[i], self.anchor_strides[i], device=device) 122 | #print(":featmap_sizes:",featmap_sizes[i],":anchor_strides:",self.anchor_strides[i]) 123 | multi_level_anchors.append(anchors) 124 | #print("multi_level_anchors:",multi_level_anchors) 125 | anchor_list = [multi_level_anchors for _ in range(num_imgs)] 126 | #print("anchor_list:",anchor_list) 127 | #print("for i in range(num_levels)") 128 | 129 | # for each image, we compute valid flags of multi level anchors 130 | valid_flag_list = [] 131 | for img_id, img_shape in enumerate(img_shapes): 132 | multi_level_flags = [] 133 | for i in range(num_levels): 134 | anchor_stride = self.anchor_strides[i] 135 | feat_h, feat_w = featmap_sizes[i] 136 | h, w = img_shape 137 | valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h) 138 | valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w) 139 | flags = self.anchor_generators[i].valid_flags( 140 | (feat_h, feat_w), (valid_feat_h, valid_feat_w), 141 | device=device) 142 | multi_level_flags.append(flags) 143 | valid_flag_list.append(multi_level_flags) 144 | 145 | return anchor_list, valid_flag_list 146 | -------------------------------------------------------------------------------- /config/shufflenet.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/shufflenet 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | name: ShuffleNetV2 8 | out_stages: [2,3,4] 9 | activation: LeakyReLU 10 | model_size: 1.0x 11 | neck: 12 | name: PAN 13 | in_channels: [116, 232, 464] 14 | out_channels: 96 15 | start_level: 0 16 | num_outs: 3 17 | head: 18 | name: QuarkDetHead 19 | num_classes: 80 # 80 20 | input_channel: 96 21 | feat_channels: 96 22 | stacked_convs: 2 23 | share_cls_reg: True #True 24 | octave_base_scale: 5 25 | scales_per_octave: 1 26 | strides: [8, 16, 32] 27 | reg_max: 7 #16 #7 28 | norm_cfg: 29 | type: BN 30 | loss: 31 | loss_qfl: 32 | name: QualityFocalLoss 33 | use_sigmoid: False #True 34 | beta: 2.0 35 | loss_weight: 1.0 36 | loss_dfl: 37 | name: DistributionFocalLoss 38 | loss_weight: 0.25 39 | loss_bbox: 40 | name: GIoULoss 41 | loss_weight: 2.0 42 | data: 43 | train: 44 | name: coco 45 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 46 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 47 | input_size: [320,320] #[w,h] 48 | keep_ratio: True 49 | pipeline: 50 | perspective: 0.0 51 | scale: [0.6, 1.4] 52 | stretch: [[1, 1], [1, 1]] 53 | rotation: 0 54 | shear: 0 55 | translate: 0 56 | flip: 0.5 57 | brightness: 0.2 58 | contrast: [0.8, 1.2] 59 | saturation: [0.8, 1.2] 60 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 61 | val: 62 | name: coco 63 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 64 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 65 | input_size: [320,320] #[w,h] 66 | keep_ratio: True 67 | pipeline: 68 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 69 | device: 70 | gpu_ids: [0,1] 71 | workers_per_gpu: 8 72 | batchsize_per_gpu: 80 # 80 #40 #160 santiago test 73 | schedule: 74 | resume: True 75 | load_model: ./workspace/shufflenet/model_last.pth 76 | 77 | optimizer: 78 | name: SGD 79 | lr: 0.14 80 | momentum: 0.9 81 | weight_decay: 0.0001 82 | warmup: 83 | name: linear 84 | steps: 300 #santiago test 85 | ratio: 0.1 86 | total_epochs: 160 #70 87 | lr_schedule: 88 | name: MultiStepLR 89 | milestones: [130,160,150,155] 90 | gamma: 0.1 91 | val_intervals: 5 #5 92 | evaluator: 93 | name: CocoDetectionEvaluator 94 | save_key: mAP 95 | 96 | log: 97 | interval: 10 98 | 99 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 100 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 101 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 102 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 103 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 104 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 105 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 106 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 107 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 108 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 109 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 110 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 111 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 112 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 113 | 114 | 115 | 116 | # { 117 | # 'img': tensor([ 118 | # [ 119 | # [ 120 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 121 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 122 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 123 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 124 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 125 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 126 | # ], 127 | 128 | # [ 129 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 130 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 131 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 132 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 133 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 134 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 135 | # ], 136 | 137 | # [ 138 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 139 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 140 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 141 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 142 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 143 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 144 | # ] 145 | # ] 146 | # ]), 147 | # 'img_info': { 148 | # 'license': tensor([2]), 149 | # 'file_name': ['000000007616.jpg'], 150 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 151 | # 'height': tensor([375]), 152 | # 'width': tensor([500]), 153 | # 'date_captured': ['2013-11-16 19:22:23'], 154 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 155 | # 'id': tensor([7616]) 156 | # }, 157 | # 'gt_bboxes': [array([ 158 | # [193.312, 153.37599, 216.5952, 175.8784], 159 | # [110.0224, 135.4624, 208.1792, 215.2832], 160 | # [160.1216, 85.7984, 168.64641, 110.976], 161 | # [204.7232, 93.6704, 212.2048, 108.3904], 162 | # [85.414406, 148.8192, 111.8976, 167.5584], 163 | # [236.0832, 155.96161, 267.5264, 166.3424], 164 | # [1.0816001, 231.6224, 320., 277.568], 165 | # [85.4656, 148.3776, 112.22401, 168.096], 166 | # [40.7232, 109.024, 44.607998, 121.5552] 167 | # ], 168 | # dtype = float32)], 169 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 170 | # 'warp_matrix': [array([ 171 | # [0.64, 0., 0.], 172 | # [0., 0.64, 40.], 173 | # [0., 0., 1.] 174 | # ])] 175 | # } 176 | -------------------------------------------------------------------------------- /quarkdet/model/head/quarkdet_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..module.conv import ConvModule, DepthwiseConvModule 5 | from ..module.init_weights import normal_init 6 | #from .gfl_head import GFLHead 7 | from .gfl_headv2 import GFLHeadV2 8 | from .gfl_head import GFLHead 9 | from .anchor.anchor_target import multi_apply 10 | 11 | 12 | class QuarkDetHead(GFLHead): # 可以直接将GFLHead替换成 GFLHeadV2 13 | """ 14 | Modified from GFL, use same loss functions but much lightweight convolution heads 15 | """ 16 | 17 | def __init__(self, 18 | num_classes, 19 | loss, 20 | input_channel, 21 | stacked_convs=2, 22 | octave_base_scale=5, 23 | scales_per_octave=1, 24 | conv_cfg=None, 25 | norm_cfg=dict(type='BN'), 26 | reg_max=16, 27 | share_cls_reg=False, 28 | activation='LeakyReLU', 29 | **kwargs): 30 | self.share_cls_reg = share_cls_reg 31 | self.activation = activation 32 | super(QuarkDetHead, self).__init__(num_classes, 33 | loss, 34 | input_channel, 35 | stacked_convs, 36 | octave_base_scale, 37 | scales_per_octave, 38 | conv_cfg, 39 | norm_cfg, 40 | reg_max, 41 | **kwargs) 42 | 43 | def _init_layers(self): 44 | self.cls_convs = nn.ModuleList() 45 | self.reg_convs = nn.ModuleList() 46 | for _ in self.anchor_strides: 47 | cls_convs, reg_convs = self._buid_not_shared_head() 48 | self.cls_convs.append(cls_convs) 49 | self.reg_convs.append(reg_convs) 50 | 51 | self.gfl_cls = nn.ModuleList([nn.Conv2d(self.feat_channels, 52 | self.cls_out_channels + 53 | 4 * (self.reg_max + 1) if self.share_cls_reg else self.cls_out_channels, 54 | 1, 55 | padding=0) for _ in self.anchor_strides]) 56 | # TODO: if 57 | self.gfl_reg = nn.ModuleList([nn.Conv2d(self.feat_channels, 58 | 4 * (self.reg_max + 1), 59 | 1, 60 | padding=0) for _ in self.anchor_strides]) 61 | 62 | def _buid_not_shared_head(self): 63 | cls_convs = nn.ModuleList() 64 | reg_convs = nn.ModuleList() 65 | # print("cls_convs before:",cls_convs) 66 | # print("reg_convs before:",reg_convs) 67 | # print("self.stacked_convs:",self.stacked_convs) 68 | for i in range(self.stacked_convs): 69 | chn = self.in_channels if i == 0 else self.feat_channels 70 | cls_convs.append( 71 | DepthwiseConvModule(chn, 72 | self.feat_channels, 73 | 3, 74 | stride=1, 75 | padding=1, 76 | norm_cfg=self.norm_cfg, 77 | bias=self.norm_cfg is None, 78 | activation=self.activation)) 79 | if not self.share_cls_reg: 80 | reg_convs.append( 81 | DepthwiseConvModule(chn, 82 | self.feat_channels, 83 | 3, 84 | stride=1, 85 | padding=1, 86 | norm_cfg=self.norm_cfg, 87 | bias=self.norm_cfg is None, 88 | activation=self.activation)) 89 | 90 | # print("cls_convs after:",cls_convs) 91 | # print("reg_convs after:",reg_convs) 92 | return cls_convs, reg_convs 93 | 94 | def init_weights(self): 95 | for seq in self.cls_convs: 96 | for m in seq: 97 | normal_init(m.depthwise, std=0.01) 98 | normal_init(m.pointwise, std=0.01) 99 | for seq in self.reg_convs: 100 | for m in seq: 101 | normal_init(m.depthwise, std=0.01) 102 | normal_init(m.pointwise, std=0.01) 103 | bias_cls = -4.595 # 用0.01的置信度初始化 104 | for i in range(len(self.anchor_strides)): 105 | normal_init(self.gfl_cls[i], std=0.01, bias=bias_cls) 106 | normal_init(self.gfl_reg[i], std=0.01) 107 | print('Finish initialize Lite quarkdet Head.') 108 | 109 | def forward(self, feats): 110 | return multi_apply(self.forward_single, 111 | feats, 112 | self.cls_convs, 113 | self.reg_convs, 114 | self.gfl_cls, 115 | self.gfl_reg, 116 | ) 117 | 118 | def forward_single(self, x, cls_convs, reg_convs, gfl_cls, gfl_reg): 119 | cls_feat = x 120 | reg_feat = x 121 | for cls_conv in cls_convs: 122 | cls_feat = cls_conv(cls_feat) 123 | for reg_conv in reg_convs: 124 | reg_feat = reg_conv(reg_feat) 125 | if self.share_cls_reg: 126 | feat = gfl_cls(cls_feat) 127 | # print("feat:",feat.shape) 128 | # print("cls_feat:",cls_feat.shape) 129 | # print("self.cls_out_channels:",self.cls_out_channels) 130 | cls_score, bbox_pred = torch.split(feat, [self.cls_out_channels, 4 * (self.reg_max + 1)], dim=1) 131 | # print("cls_score:",cls_score.shape) 132 | # print("bbox_pred:",bbox_pred.shape) 133 | else: 134 | cls_score = gfl_cls(cls_feat) 135 | bbox_pred = gfl_reg(reg_feat) 136 | 137 | if torch.onnx.is_in_onnx_export(): 138 | cls_score = torch.sigmoid(cls_score).reshape(1, self.num_classes, -1).permute(0, 2, 1) 139 | bbox_pred = bbox_pred.reshape(1, (self.reg_max+1)*4, -1).permute(0, 2, 1) 140 | return cls_score, bbox_pred 141 | 142 | 143 | -------------------------------------------------------------------------------- /config/test.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/test 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | name: ShuffleNetV2 8 | out_stages: [2,3,4] 9 | activation: LeakyReLU 10 | model_size: 1.0x 11 | 12 | # name: GhostNet 13 | # width_mult: 1.0 14 | # out_stages: [4, 6, 9] 15 | # act: ReLU6 16 | 17 | neck: 18 | name: PAN 19 | in_channels: [116, 232, 464] #[40, 112, 960] 20 | out_channels: 96 21 | start_level: 0 22 | num_outs: 3 23 | head: 24 | name: QuarkDetHead 25 | num_classes: 80 # 80 26 | input_channel: 96 27 | feat_channels: 96 28 | stacked_convs: 2 29 | share_cls_reg: True #True 30 | octave_base_scale: 5 31 | scales_per_octave: 1 32 | strides: [8, 16, 32] 33 | reg_max: 7 #16 #7 34 | norm_cfg: 35 | type: BN 36 | loss: 37 | loss_qfl: 38 | name: QualityFocalLoss 39 | use_sigmoid: False #True 40 | beta: 2.0 41 | loss_weight: 1.0 42 | loss_dfl: 43 | name: DistributionFocalLoss 44 | loss_weight: 0.25 45 | loss_bbox: 46 | name: GIoULoss 47 | loss_weight: 2.0 48 | data: 49 | train: 50 | name: coco 51 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 52 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 53 | input_size: [320,320] #[w,h] 54 | keep_ratio: True 55 | pipeline: 56 | perspective: 0.0 57 | scale: [0.6, 1.4] 58 | stretch: [[1, 1], [1, 1]] 59 | rotation: 0 60 | shear: 0 61 | translate: 0 62 | flip: 0.5 63 | brightness: 0.2 64 | contrast: [0.8, 1.2] 65 | saturation: [0.8, 1.2] 66 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 67 | val: 68 | name: coco 69 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 70 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 71 | input_size: [320,320] #[w,h] 72 | keep_ratio: True 73 | pipeline: 74 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 75 | device: 76 | gpu_ids: [1] 77 | workers_per_gpu: 1 78 | batchsize_per_gpu: 1 # 80 #40 #160 santiago test 79 | schedule: 80 | resume: False 81 | load_model: ./workspace/test/model_last.pth 82 | 83 | optimizer: 84 | name: SGD 85 | lr: 0.14 86 | momentum: 0.9 87 | weight_decay: 0.0001 88 | warmup: 89 | name: linear 90 | steps: 1 #santiago test 91 | ratio: 0.1 92 | total_epochs: 200 #70 93 | lr_schedule: 94 | name: MultiStepLR 95 | milestones: [130,160,150,155] 96 | gamma: 0.1 97 | val_intervals: 5 #5 98 | evaluator: 99 | name: CocoDetectionEvaluator 100 | save_key: mAP 101 | 102 | log: 103 | interval: 1 104 | 105 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 106 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 107 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 108 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 109 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 110 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 111 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 112 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 113 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 114 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 115 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 116 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 117 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 118 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 119 | 120 | 121 | 122 | # { 123 | # 'img': tensor([ 124 | # [ 125 | # [ 126 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 127 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 128 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 129 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 130 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 131 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 132 | # ], 133 | 134 | # [ 135 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 136 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 137 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 138 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 139 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 140 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 141 | # ], 142 | 143 | # [ 144 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 145 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 146 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 147 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 148 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 149 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 150 | # ] 151 | # ] 152 | # ]), 153 | # 'img_info': { 154 | # 'license': tensor([2]), 155 | # 'file_name': ['000000007616.jpg'], 156 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 157 | # 'height': tensor([375]), 158 | # 'width': tensor([500]), 159 | # 'date_captured': ['2013-11-16 19:22:23'], 160 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 161 | # 'id': tensor([7616]) 162 | # }, 163 | # 'gt_bboxes': [array([ 164 | # [193.312, 153.37599, 216.5952, 175.8784], 165 | # [110.0224, 135.4624, 208.1792, 215.2832], 166 | # [160.1216, 85.7984, 168.64641, 110.976], 167 | # [204.7232, 93.6704, 212.2048, 108.3904], 168 | # [85.414406, 148.8192, 111.8976, 167.5584], 169 | # [236.0832, 155.96161, 267.5264, 166.3424], 170 | # [1.0816001, 231.6224, 320., 277.568], 171 | # [85.4656, 148.3776, 112.22401, 168.096], 172 | # [40.7232, 109.024, 44.607998, 121.5552] 173 | # ], 174 | # dtype = float32)], 175 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 176 | # 'warp_matrix': [array([ 177 | # [0.64, 0., 0.], 178 | # [0., 0.64, 40.], 179 | # [0., 0., 1.] 180 | # ])] 181 | # } 182 | -------------------------------------------------------------------------------- /config/efficientdet.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/efficientdet 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | 8 | name: EfficientNet 9 | arch: efficientnet-b2 10 | out_levels: [3, 4, 5] 11 | norm_eval: False 12 | 13 | neck: 14 | name: BiFPN 15 | in_channels: [48, 120, 352] #如果是b3可以采用配置 [48, 136, 384], 16 | out_channels: 112 17 | num_outs: 3 18 | start_level: 0 19 | end_level: -1 20 | stack: 3 21 | head: 22 | name: QuarkDetHead 23 | num_classes: 80 # 80 24 | input_channel: 112 25 | feat_channels: 112 26 | stacked_convs: 2 27 | share_cls_reg: True #True 28 | octave_base_scale: 5 29 | scales_per_octave: 1 30 | strides: [8, 16, 32] 31 | reg_max: 7 #16 #7 32 | norm_cfg: 33 | type: BN 34 | loss: 35 | loss_qfl: 36 | name: QualityFocalLoss 37 | use_sigmoid: True 38 | beta: 2.0 39 | loss_weight: 1.0 40 | loss_dfl: 41 | name: DistributionFocalLoss 42 | loss_weight: 0.25 43 | loss_bbox: 44 | name: GIoULoss 45 | loss_weight: 2.0 46 | data: 47 | train: 48 | name: coco 49 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 50 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 51 | input_size: [320,320] #[w,h] 52 | keep_ratio: True 53 | pipeline: 54 | perspective: 0.0 55 | scale: [0.6, 1.4] 56 | stretch: [[1, 1], [1, 1]] 57 | rotation: 0 58 | shear: 0 59 | translate: 0 60 | flip: 0.5 61 | brightness: 0.2 62 | contrast: [0.8, 1.2] 63 | saturation: [0.8, 1.2] 64 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 65 | val: 66 | name: coco 67 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 68 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 69 | input_size: [320,320] #[w,h] 70 | keep_ratio: True 71 | pipeline: 72 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 73 | device: 74 | gpu_ids: [1] 75 | workers_per_gpu: 8 76 | batchsize_per_gpu: 20 # 80 #40 #160 santiago test 77 | schedule: 78 | resume: False 79 | load_model: ./workspace/efficientdet/model_last.pth 80 | 81 | optimizer: 82 | name: SGD 83 | lr: 0.14 84 | momentum: 0.9 85 | weight_decay: 0.0001 86 | warmup: 87 | name: linear 88 | steps: 1 #santiago test 89 | ratio: 0.1 90 | total_epochs: 90 91 | 92 | lr_schedule: 93 | name: ReduceLROnPlateau 94 | mode: min 95 | factor: 0.1 96 | patience: 2 #15 97 | verbose: True 98 | threshold: 0.00001 99 | threshold_mode: rel 100 | cooldown: 0 101 | min_lr: 0 102 | eps: 0.000000001 #1e-08 103 | 104 | val_intervals: 10 105 | evaluator: 106 | name: CocoDetectionEvaluator 107 | save_key: mAP 108 | 109 | log: 110 | interval: 10 #10 111 | 112 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 113 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 114 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 115 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 116 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 117 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 118 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 119 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 120 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 121 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 122 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 123 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 124 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 125 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 126 | 127 | 128 | 129 | # { 130 | # 'img': tensor([ 131 | # [ 132 | # [ 133 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 134 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 135 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 136 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 137 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 138 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 139 | # ], 140 | 141 | # [ 142 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 143 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 144 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 145 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 146 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 147 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 148 | # ], 149 | 150 | # [ 151 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 152 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 153 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 154 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 155 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 156 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 157 | # ] 158 | # ] 159 | # ]), 160 | # 'img_info': { 161 | # 'license': tensor([2]), 162 | # 'file_name': ['000000007616.jpg'], 163 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 164 | # 'height': tensor([375]), 165 | # 'width': tensor([500]), 166 | # 'date_captured': ['2013-11-16 19:22:23'], 167 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 168 | # 'id': tensor([7616]) 169 | # }, 170 | # 'gt_bboxes': [array([ 171 | # [193.312, 153.37599, 216.5952, 175.8784], 172 | # [110.0224, 135.4624, 208.1792, 215.2832], 173 | # [160.1216, 85.7984, 168.64641, 110.976], 174 | # [204.7232, 93.6704, 212.2048, 108.3904], 175 | # [85.414406, 148.8192, 111.8976, 167.5584], 176 | # [236.0832, 155.96161, 267.5264, 166.3424], 177 | # [1.0816001, 231.6224, 320., 277.568], 178 | # [85.4656, 148.3776, 112.22401, 168.096], 179 | # [40.7232, 109.024, 44.607998, 121.5552] 180 | # ], 181 | # dtype = float32)], 182 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 183 | # 'warp_matrix': [array([ 184 | # [0.64, 0., 0.], 185 | # [0., 0.64, 40.], 186 | # [0., 0., 1.] 187 | # ])] 188 | # } 189 | -------------------------------------------------------------------------------- /config/ghostnet_full.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/ghostnet_full 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | 8 | name: GhostNet_full 9 | width_mult: 1.0 10 | out_stages: [4, 6, 9] 11 | act: ReLU 12 | 13 | neck: 14 | name: PAN 15 | in_channels: [40, 112, 960] 16 | out_channels: 96 17 | start_level: 0 18 | num_outs: 3 19 | head: 20 | name: QuarkDetHead 21 | num_classes: 80 # 80 22 | input_channel: 96 23 | feat_channels: 96 24 | stacked_convs: 2 25 | share_cls_reg: True #True 26 | octave_base_scale: 5 27 | scales_per_octave: 1 28 | strides: [8, 16, 32] 29 | reg_max: 7 #16 #7 30 | norm_cfg: 31 | type: BN 32 | loss: 33 | loss_qfl: 34 | name: QualityFocalLoss 35 | use_sigmoid: True 36 | beta: 2.0 37 | loss_weight: 1.0 38 | loss_dfl: 39 | name: DistributionFocalLoss 40 | loss_weight: 0.25 41 | loss_bbox: 42 | name: GIoULoss 43 | loss_weight: 2.0 44 | data: 45 | train: 46 | name: coco 47 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 48 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 49 | input_size: [320,320] #[w,h] 50 | keep_ratio: True 51 | pipeline: 52 | perspective: 0.0 53 | scale: [0.6, 1.4] 54 | stretch: [[1, 1], [1, 1]] 55 | rotation: 0 56 | shear: 0 57 | translate: 0 58 | flip: 0.5 59 | brightness: 0.2 60 | contrast: [0.8, 1.2] 61 | saturation: [0.8, 1.2] 62 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 63 | val: 64 | name: coco 65 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 66 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 67 | input_size: [320,320] #[w,h] 68 | keep_ratio: True 69 | pipeline: 70 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 71 | device: 72 | gpu_ids: [0] 73 | workers_per_gpu: 8 74 | batchsize_per_gpu: 80 #40 #160 santiago test 75 | schedule: 76 | resume: True 77 | load_model: ./workspace/ghostnet_full/model_last.pth 78 | 79 | optimizer: 80 | name: SGD 81 | lr: 0.14 82 | momentum: 0.9 83 | weight_decay: 0.0001 84 | warmup: 85 | name: linear 86 | steps: 300 #300 #santiago test 87 | ratio: 0.1 88 | total_epochs: 90 89 | 90 | lr_schedule: 91 | name: ReduceLROnPlateau 92 | mode: min 93 | factor: 0.1 94 | patience: 3 95 | verbose: True 96 | threshold: 0.00001 97 | threshold_mode: rel 98 | cooldown: 0 99 | min_lr: 0 100 | eps: 0.000000001 #1e-08 101 | 102 | val_intervals: 10 103 | evaluator: 104 | name: CocoDetectionEvaluator 105 | save_key: mAP 106 | 107 | log: 108 | interval: 10 #10 109 | 110 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 111 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 112 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 113 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 114 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 115 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 116 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 117 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 118 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 119 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 120 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 121 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 122 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 123 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 124 | 125 | 126 | 127 | # { 128 | # 'img': tensor([ 129 | # [ 130 | # [ 131 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 132 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 133 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 134 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 135 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 136 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 137 | # ], 138 | 139 | # [ 140 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 141 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 142 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 143 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 144 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 145 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 146 | # ], 147 | 148 | # [ 149 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 150 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 151 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 152 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 153 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 154 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 155 | # ] 156 | # ] 157 | # ]), 158 | # 'img_info': { 159 | # 'license': tensor([2]), 160 | # 'file_name': ['000000007616.jpg'], 161 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 162 | # 'height': tensor([375]), 163 | # 'width': tensor([500]), 164 | # 'date_captured': ['2013-11-16 19:22:23'], 165 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 166 | # 'id': tensor([7616]) 167 | # }, 168 | # 'gt_bboxes': [array([ 169 | # [193.312, 153.37599, 216.5952, 175.8784], 170 | # [110.0224, 135.4624, 208.1792, 215.2832], 171 | # [160.1216, 85.7984, 168.64641, 110.976], 172 | # [204.7232, 93.6704, 212.2048, 108.3904], 173 | # [85.414406, 148.8192, 111.8976, 167.5584], 174 | # [236.0832, 155.96161, 267.5264, 166.3424], 175 | # [1.0816001, 231.6224, 320., 277.568], 176 | # [85.4656, 148.3776, 112.22401, 168.096], 177 | # [40.7232, 109.024, 44.607998, 121.5552] 178 | # ], 179 | # dtype = float32)], 180 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 181 | # 'warp_matrix': [array([ 182 | # [0.64, 0., 0.], 183 | # [0., 0.64, 40.], 184 | # [0., 0., 1.] 185 | # ])] 186 | # } 187 | 188 | 189 | 190 | # ghostnet精简版本 191 | # 对GhostNet做了以下精简 192 | # 取出stage5中expansion size等于960的所有层,去除的层还包括 193 | # Conv2d 1×1 the number of output channels等于960和1280的层,平均池化层和最后的全连接层 194 | -------------------------------------------------------------------------------- /config/ghostnet_full_bifpn.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/ghostnet_full_bifpn 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | 8 | name: GhostNet_full 9 | width_mult: 1.0 10 | out_stages: [4, 6, 9] 11 | act: ReLU 12 | 13 | neck: 14 | name: BiFPN 15 | in_channels: [40, 112, 960] 16 | out_channels: 96 17 | num_outs: 3 18 | start_level: 0 19 | end_level: -1 20 | stack: 3 21 | head: 22 | name: QuarkDetHead 23 | num_classes: 80 # 80 24 | input_channel: 96 25 | feat_channels: 96 26 | stacked_convs: 2 27 | share_cls_reg: True #True 28 | octave_base_scale: 5 29 | scales_per_octave: 1 30 | strides: [8, 16, 32] 31 | reg_max: 7 #16 #7 32 | norm_cfg: 33 | type: BN 34 | loss: 35 | loss_qfl: 36 | name: QualityFocalLoss 37 | use_sigmoid: True 38 | beta: 2.0 39 | loss_weight: 1.0 40 | loss_dfl: 41 | name: DistributionFocalLoss 42 | loss_weight: 0.25 43 | loss_bbox: 44 | name: GIoULoss 45 | loss_weight: 2.0 46 | data: 47 | train: 48 | name: coco 49 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 50 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 51 | input_size: [320,320] #[w,h] 52 | keep_ratio: True 53 | pipeline: 54 | perspective: 0.0 55 | scale: [0.6, 1.4] 56 | stretch: [[1, 1], [1, 1]] 57 | rotation: 0 58 | shear: 0 59 | translate: 0 60 | flip: 0.5 61 | brightness: 0.2 62 | contrast: [0.8, 1.2] 63 | saturation: [0.8, 1.2] 64 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 65 | val: 66 | name: coco 67 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 68 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 69 | input_size: [320,320] #[w,h] 70 | keep_ratio: True 71 | pipeline: 72 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 73 | device: 74 | gpu_ids: [1] 75 | workers_per_gpu: 8 76 | batchsize_per_gpu: 80 # 80 #40 #160 santiago test 77 | schedule: 78 | resume: False 79 | load_model: ./workspace/ghostnet_full_bifpn/model_last.pth 80 | 81 | optimizer: 82 | name: SGD 83 | lr: 0.14 84 | momentum: 0.9 85 | weight_decay: 0.0001 86 | warmup: 87 | name: linear 88 | steps: 300 #santiago test 89 | ratio: 0.1 90 | total_epochs: 90 91 | 92 | lr_schedule: 93 | name: ReduceLROnPlateau 94 | mode: min 95 | factor: 0.1 96 | patience: 3 #15 97 | verbose: True 98 | threshold: 0.00001 99 | threshold_mode: rel 100 | cooldown: 0 101 | min_lr: 0 102 | eps: 0.000000001 103 | 104 | val_intervals: 10 105 | evaluator: 106 | name: CocoDetectionEvaluator 107 | save_key: mAP 108 | 109 | log: 110 | interval: 10 #10 111 | 112 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 113 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 114 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 115 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 116 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 117 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 118 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 119 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 120 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 121 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 122 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 123 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 124 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 125 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 126 | 127 | 128 | 129 | # { 130 | # 'img': tensor([ 131 | # [ 132 | # [ 133 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 134 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 135 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 136 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 137 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 138 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 139 | # ], 140 | 141 | # [ 142 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 143 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 144 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 145 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 146 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 147 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 148 | # ], 149 | 150 | # [ 151 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 152 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 153 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 154 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 155 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 156 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 157 | # ] 158 | # ] 159 | # ]), 160 | # 'img_info': { 161 | # 'license': tensor([2]), 162 | # 'file_name': ['000000007616.jpg'], 163 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 164 | # 'height': tensor([375]), 165 | # 'width': tensor([500]), 166 | # 'date_captured': ['2013-11-16 19:22:23'], 167 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 168 | # 'id': tensor([7616]) 169 | # }, 170 | # 'gt_bboxes': [array([ 171 | # [193.312, 153.37599, 216.5952, 175.8784], 172 | # [110.0224, 135.4624, 208.1792, 215.2832], 173 | # [160.1216, 85.7984, 168.64641, 110.976], 174 | # [204.7232, 93.6704, 212.2048, 108.3904], 175 | # [85.414406, 148.8192, 111.8976, 167.5584], 176 | # [236.0832, 155.96161, 267.5264, 166.3424], 177 | # [1.0816001, 231.6224, 320., 277.568], 178 | # [85.4656, 148.3776, 112.22401, 168.096], 179 | # [40.7232, 109.024, 44.607998, 121.5552] 180 | # ], 181 | # dtype = float32)], 182 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 183 | # 'warp_matrix': [array([ 184 | # [0.64, 0., 0.], 185 | # [0., 0.64, 40.], 186 | # [0., 0., 1.] 187 | # ])] 188 | # } 189 | 190 | 191 | 192 | # ghostnet精简版本 193 | # 对GhostNet做了以下精简 194 | # 取出stage5中expansion size等于960的所有层,去除的层还包括 195 | # Conv2d 1×1 the number of output channels等于960和1280的层,平均池化层和最后的全连接层 196 | -------------------------------------------------------------------------------- /quarkdet/data/transform/warp.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import cv2 4 | import math 5 | 6 | #PyTorch已经有现成的函数来替代 7 | def get_flip_matrix(prob=0.5): 8 | F = np.eye(3) 9 | if random.random() < prob: 10 | F[0, 0] = -1 11 | return F 12 | 13 | def get_perspective_matrix(perspective=0): 14 | """ 15 | 16 | :param perspective: 17 | :return: 18 | """ 19 | P = np.eye(3) 20 | P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) 21 | P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) 22 | return P 23 | 24 | 25 | def get_rotation_matrix(degree=0): 26 | """ 27 | 28 | :param degree: 29 | :return: 30 | """ 31 | R = np.eye(3) 32 | a = random.uniform(-degree, degree) 33 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=1) 34 | return R 35 | 36 | 37 | def get_scale_matrix(ratio=(1, 1)): 38 | """ 39 | 40 | :param width_ratio: 41 | :param height_ratio: 42 | """ 43 | Scl = np.eye(3) 44 | scale = random.uniform(*ratio) 45 | Scl[0, 0] *= scale 46 | Scl[1, 1] *= scale 47 | return Scl 48 | 49 | 50 | def get_stretch_matrix(width_ratio=(1, 1), height_ratio=(1, 1)): 51 | """ 52 | 53 | :param width_ratio: 54 | :param height_ratio: 55 | """ 56 | Str = np.eye(3) 57 | Str[0, 0] *= random.uniform(*width_ratio) 58 | Str[1, 1] *= random.uniform(*height_ratio) 59 | return Str 60 | 61 | 62 | def get_shear_matrix(degree): 63 | """ 64 | 65 | :param degree: 66 | :return: 67 | """ 68 | Sh = np.eye(3) 69 | Sh[0, 1] = math.tan(random.uniform(-degree, degree) * math.pi / 180) # x shear (deg) 70 | Sh[1, 0] = math.tan(random.uniform(-degree, degree) * math.pi / 180) # y shear (deg) 71 | return Sh 72 | 73 | 74 | def get_translate_matrix(translate, width, height): 75 | """ 76 | 77 | :param translate: 78 | :return: 79 | """ 80 | T = np.eye(3) 81 | T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation 82 | T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation 83 | return T 84 | 85 | 86 | def get_resize_matrix(raw_shape, dst_shape, keep_ratio): 87 | """ 88 | Get resize matrix for resizing raw img to input size 89 | :param raw_shape: (width, height) of raw image 90 | :param dst_shape: (width, height) of input image 91 | :param keep_ratio: whether keep original ratio 92 | :return: 3x3 Matrix 93 | """ 94 | r_w, r_h = raw_shape 95 | d_w, d_h = dst_shape 96 | Rs = np.eye(3) 97 | if keep_ratio: 98 | C = np.eye(3) 99 | C[0, 2] = - r_w / 2 100 | C[1, 2] = - r_h / 2 101 | 102 | if r_w / r_h < d_w / d_h: 103 | ratio = d_h / r_h 104 | else: 105 | ratio = d_w / r_w 106 | Rs[0, 0] *= ratio 107 | Rs[1, 1] *= ratio 108 | 109 | T = np.eye(3) 110 | T[0, 2] = 0.5 * d_w 111 | T[1, 2] = 0.5 * d_h 112 | return T @ Rs @ C 113 | else: 114 | Rs[0, 0] *= d_w / r_w 115 | Rs[1, 1] *= d_h / r_h 116 | return Rs 117 | 118 | def warp_and_resize(meta, warp_kwargs, dst_shape, keep_ratio=True): 119 | # TODO: background, type 120 | raw_img = meta['img'] 121 | height = raw_img.shape[0] # shape(h,w,c) 122 | width = raw_img.shape[1] 123 | 124 | # center 125 | C = np.eye(3) 126 | C[0, 2] = - width / 2 127 | C[1, 2] = - height / 2 128 | 129 | # do not change the order of mat mul 130 | if 'perspective' in warp_kwargs and random.randint(0, 1): 131 | P = get_perspective_matrix(warp_kwargs['perspective']) 132 | C = P @ C 133 | if 'scale' in warp_kwargs and random.randint(0, 1): 134 | Scl = get_scale_matrix(warp_kwargs['scale']) 135 | C = Scl @ C 136 | if 'stretch' in warp_kwargs and random.randint(0, 1): 137 | Str = get_stretch_matrix(*warp_kwargs['stretch']) 138 | C = Str @ C 139 | if 'rotation' in warp_kwargs and random.randint(0, 1): 140 | R = get_rotation_matrix(warp_kwargs['rotation']) 141 | C = R @ C 142 | if 'shear' in warp_kwargs and random.randint(0, 1): 143 | Sh = get_shear_matrix(warp_kwargs['shear']) 144 | C = Sh @ C 145 | if 'flip' in warp_kwargs: 146 | F = get_flip_matrix(warp_kwargs['flip']) 147 | C = F @ C 148 | if 'translate' in warp_kwargs and random.randint(0, 1): 149 | T = get_translate_matrix(warp_kwargs['translate'], width, height) 150 | else: 151 | T = get_translate_matrix(0, width, height) 152 | M = T @ C 153 | # M = T @ Sh @ R @ Str @ P @ C 154 | ResizeM = get_resize_matrix((width, height), dst_shape, keep_ratio) 155 | M = ResizeM @ M 156 | img = cv2.warpPerspective(raw_img, M, dsize=tuple(dst_shape)) 157 | meta['img'] = img 158 | meta['warp_matrix'] = M 159 | if 'gt_bboxes' in meta: 160 | boxes = meta['gt_bboxes'] 161 | meta['gt_bboxes'] = warp_boxes(boxes, M, dst_shape[0], dst_shape[1]) 162 | if 'gt_masks' in meta: 163 | for i, mask in enumerate(meta['gt_masks']): 164 | meta['gt_masks'][i] = cv2.warpPerspective(mask, M, dsize=tuple(dst_shape)) 165 | 166 | # TODO: keypoints 167 | # if 'gt_keypoints' in meta: 168 | 169 | return meta 170 | 171 | 172 | def warp_boxes(boxes, M, width, height): 173 | n = len(boxes) 174 | if n: 175 | # warp points 176 | xy = np.ones((n * 4, 3)) 177 | xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 178 | xy = xy @ M.T # transform 179 | xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale 180 | # create new boxes 181 | x = xy[:, [0, 2, 4, 6]] 182 | y = xy[:, [1, 3, 5, 7]] 183 | xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T 184 | # clip boxes 185 | xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) 186 | xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) 187 | return xy.astype(np.float32) 188 | else: 189 | return boxes 190 | 191 | # def warp_keypoints(keypoints, M, width, height): 192 | # n = len(keypoints) 193 | # if n: 194 | # 195 | # # warp points 196 | # xy = np.ones((n * 4, 3)) 197 | # xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 198 | # xy = xy @ M.T # transform 199 | # xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale 200 | # # create new boxes 201 | # x = xy[:, [0, 2, 4, 6]] 202 | # y = xy[:, [1, 3, 5, 7]] 203 | # xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T 204 | # # clip boxes 205 | # xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) 206 | # xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) 207 | # return xy 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /config/ghostnet_slim640.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/GhostNet_slim640 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | 8 | name: GhostNet_slim 9 | width_mult: 1.0 10 | #out_stages: [4, 6, 9] 11 | out_stages: [4, 6, 7] 12 | act: ReLU 13 | 14 | neck: 15 | name: PAN 16 | #in_channels: [40, 112, 960] 17 | in_channels: [40, 112, 160] 18 | out_channels: 96 19 | start_level: 0 20 | num_outs: 3 21 | head: 22 | name: QuarkDetHead 23 | num_classes: 80 # 80 24 | input_channel: 96 25 | feat_channels: 96 26 | stacked_convs: 2 27 | share_cls_reg: True #True 28 | octave_base_scale: 5 29 | scales_per_octave: 1 30 | strides: [8, 16, 32] 31 | reg_max: 7 #16 #7 32 | norm_cfg: 33 | type: BN 34 | loss: 35 | loss_qfl: 36 | name: QualityFocalLoss 37 | use_sigmoid: True 38 | beta: 2.0 39 | loss_weight: 1.0 40 | loss_dfl: 41 | name: DistributionFocalLoss 42 | loss_weight: 0.25 43 | loss_bbox: 44 | name: GIoULoss 45 | loss_weight: 2.0 46 | 47 | 48 | data: 49 | train: 50 | name: coco 51 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 52 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 53 | input_size: [640,640] #[320,320] #[w,h] 54 | keep_ratio: True 55 | pipeline: 56 | perspective: 0.0 57 | scale: [0.6, 1.4] 58 | stretch: [[1, 1], [1, 1]] 59 | rotation: 0 60 | shear: 0 61 | translate: 0 62 | flip: 0.5 63 | brightness: 0.2 64 | contrast: [0.8, 1.2] 65 | saturation: [0.8, 1.2] 66 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 67 | 68 | load_mosaic: True 69 | mosaic_probability: 0.3 70 | mosaic_area: 9 71 | mosaic_image_size: 640 72 | 73 | val: 74 | name: coco 75 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 76 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 77 | input_size: [640,640] #[320,320] #[w,h] 78 | keep_ratio: True 79 | pipeline: 80 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 81 | device: 82 | gpu_ids: [1] 83 | workers_per_gpu: 8 #8 84 | batchsize_per_gpu: 10 #160 santiago test 85 | schedule: 86 | resume: False 87 | load_model: ./workspace/GhostNet_slim640/model_last.pth 88 | 89 | optimizer: 90 | name: SGD 91 | lr: 0.14 92 | momentum: 0.9 93 | weight_decay: 0.0001 94 | warmup: 95 | name: linear 96 | steps: 300 #300 #santiago test 97 | ratio: 0.1 98 | total_epochs: 300 #70 99 | # lr_schedule: 100 | # name: MultiStepLR 101 | # milestones: [30,80,130,150,155,160] 102 | # gamma: 0.1 103 | 104 | lr_schedule: 105 | name: ReduceLROnPlateau 106 | mode: 'min' 107 | factor: 0.1 108 | patience: 3 #15 109 | verbose: True 110 | threshold: 0.00001 111 | threshold_mode: 'rel' 112 | cooldown: 0 113 | min_lr: 0 114 | eps: 0.000000001 #1e-08 115 | 116 | val_intervals: 5 117 | evaluator: 118 | name: CocoDetectionEvaluator 119 | save_key: mAP 120 | 121 | log: 122 | interval: 5 #10 123 | 124 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 125 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 126 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 127 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 128 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 129 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 130 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 131 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 132 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 133 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 134 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 135 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 136 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 137 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 138 | 139 | 140 | 141 | # { 142 | # 'img': tensor([ 143 | # [ 144 | # [ 145 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 146 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 147 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 148 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 149 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 150 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 151 | # ], 152 | 153 | # [ 154 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 155 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 156 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 157 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 158 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 159 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 160 | # ], 161 | 162 | # [ 163 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 164 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 165 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 166 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 167 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 168 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 169 | # ] 170 | # ] 171 | # ]), 172 | # 'img_info': { 173 | # 'license': tensor([2]), 174 | # 'file_name': ['000000007616.jpg'], 175 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 176 | # 'height': tensor([375]), 177 | # 'width': tensor([500]), 178 | # 'date_captured': ['2013-11-16 19:22:23'], 179 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 180 | # 'id': tensor([7616]) 181 | # }, 182 | # 'gt_bboxes': [array([ 183 | # [193.312, 153.37599, 216.5952, 175.8784], 184 | # [110.0224, 135.4624, 208.1792, 215.2832], 185 | # [160.1216, 85.7984, 168.64641, 110.976], 186 | # [204.7232, 93.6704, 212.2048, 108.3904], 187 | # [85.414406, 148.8192, 111.8976, 167.5584], 188 | # [236.0832, 155.96161, 267.5264, 166.3424], 189 | # [1.0816001, 231.6224, 320., 277.568], 190 | # [85.4656, 148.3776, 112.22401, 168.096], 191 | # [40.7232, 109.024, 44.607998, 121.5552] 192 | # ], 193 | # dtype = float32)], 194 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 195 | # 'warp_matrix': [array([ 196 | # [0.64, 0., 0.], 197 | # [0., 0.64, 40.], 198 | # [0., 0., 1.] 199 | # ])] 200 | # } 201 | 202 | 203 | 204 | 205 | -------------------------------------------------------------------------------- /quarkdet/model/head/assigner/atss_assigner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ...loss.iou_loss import bbox_overlaps 4 | from .base_assigner import BaseAssigner 5 | from .assign_result import AssignResult 6 | 7 | 8 | class ATSSAssigner(BaseAssigner): 9 | """Assign a corresponding gt bbox or background to each bbox. 10 | 11 | Each proposals will be assigned with `0` or a positive integer 12 | indicating the ground truth index. 13 | 14 | - 0: negative sample, no assigned gt 15 | - positive integer: positive sample, index (1-based) of assigned gt 16 | 17 | Args: 18 | topk (float): number of bbox selected in each level 19 | """ 20 | 21 | def __init__(self, topk): 22 | self.topk = topk 23 | 24 | # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py 25 | 26 | def assign(self, 27 | bboxes, 28 | num_level_bboxes, 29 | gt_bboxes, 30 | gt_bboxes_ignore=None, 31 | gt_labels=None): 32 | """Assign gt to bboxes. 33 | 34 | The assignment is done in following steps 35 | 36 | 1. compute iou between all bbox (bbox of all pyramid levels) and gt 37 | 2. compute center distance between all bbox and gt 38 | 3. on each pyramid level, for each gt, select k bbox whose center 39 | are closest to the gt center, so we total select k*l bbox as 40 | candidates for each gt 41 | 4. get corresponding iou for the these candidates, and compute the 42 | mean and std, set mean + std as the iou threshold 43 | 5. select these candidates whose iou are greater than or equal to 44 | the threshold as postive 45 | 6. limit the positive sample's center in gt 46 | 47 | 48 | Args: 49 | bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4). 50 | num_level_bboxes (List): num of bboxes in each level 51 | gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4). 52 | gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are 53 | labelled as `ignored`, e.g., crowd boxes in COCO. 54 | gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ). 55 | 56 | Returns: 57 | :obj:`AssignResult`: The assign result. 58 | """ 59 | INF = 100000000 60 | bboxes = bboxes[:, :4] 61 | num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0) 62 | 63 | # compute iou between all bbox and gt 64 | overlaps = bbox_overlaps(bboxes, gt_bboxes) 65 | 66 | # assign 0 by default 67 | assigned_gt_inds = overlaps.new_full((num_bboxes,), 68 | 0, 69 | dtype=torch.long) 70 | 71 | if num_gt == 0 or num_bboxes == 0: 72 | # No ground truth or boxes, return empty assignment 73 | max_overlaps = overlaps.new_zeros((num_bboxes,)) 74 | if num_gt == 0: 75 | # No truth, assign everything to background 76 | assigned_gt_inds[:] = 0 77 | if gt_labels is None: 78 | assigned_labels = None 79 | else: 80 | assigned_labels = overlaps.new_full((num_bboxes,), 81 | -1, 82 | dtype=torch.long) 83 | return AssignResult( 84 | num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels) 85 | 86 | # compute center distance between all bbox and gt 87 | gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 88 | gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 89 | gt_points = torch.stack((gt_cx, gt_cy), dim=1) 90 | 91 | bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0 92 | bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0 93 | bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1) 94 | 95 | distances = (bboxes_points[:, None, :] - 96 | gt_points[None, :, :]).pow(2).sum(-1).sqrt() 97 | 98 | # Selecting candidates based on the center distance 99 | candidate_idxs = [] 100 | start_idx = 0 101 | for level, bboxes_per_level in enumerate(num_level_bboxes): 102 | # on each pyramid level, for each gt, 103 | # select k bbox whose center are closest to the gt center 104 | end_idx = start_idx + bboxes_per_level 105 | distances_per_level = distances[start_idx:end_idx, :] 106 | selectable_k = min(self.topk, bboxes_per_level) 107 | _, topk_idxs_per_level = distances_per_level.topk( 108 | selectable_k, dim=0, largest=False) 109 | candidate_idxs.append(topk_idxs_per_level + start_idx) 110 | start_idx = end_idx 111 | candidate_idxs = torch.cat(candidate_idxs, dim=0) 112 | 113 | # get corresponding iou for the these candidates, and compute the 114 | # mean and std, set mean + std as the iou threshold 115 | candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)] 116 | overlaps_mean_per_gt = candidate_overlaps.mean(0) 117 | overlaps_std_per_gt = candidate_overlaps.std(0) 118 | overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt 119 | 120 | is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :] 121 | 122 | # limit the positive sample's center in gt 123 | for gt_idx in range(num_gt): 124 | candidate_idxs[:, gt_idx] += gt_idx * num_bboxes 125 | ep_bboxes_cx = bboxes_cx.view(1, -1).expand( 126 | num_gt, num_bboxes).contiguous().view(-1) 127 | ep_bboxes_cy = bboxes_cy.view(1, -1).expand( 128 | num_gt, num_bboxes).contiguous().view(-1) 129 | candidate_idxs = candidate_idxs.view(-1) 130 | 131 | # calculate the left, top, right, bottom distance between positive 132 | # bbox center and gt side 133 | l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0] 134 | t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1] 135 | r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt) 136 | b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt) 137 | is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01 138 | is_pos = is_pos & is_in_gts 139 | 140 | # if an anchor box is assigned to multiple gts, 141 | # the one with the highest IoU will be selected. 142 | overlaps_inf = torch.full_like(overlaps, 143 | -INF).t().contiguous().view(-1) 144 | index = candidate_idxs.view(-1)[is_pos.view(-1)] 145 | overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index] 146 | overlaps_inf = overlaps_inf.view(num_gt, -1).t() 147 | 148 | max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1) 149 | assigned_gt_inds[ 150 | max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1 151 | 152 | if gt_labels is not None: 153 | assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1) 154 | pos_inds = torch.nonzero( 155 | assigned_gt_inds > 0, as_tuple=False).squeeze() 156 | if pos_inds.numel() > 0: 157 | assigned_labels[pos_inds] = gt_labels[ 158 | assigned_gt_inds[pos_inds] - 1] 159 | else: 160 | assigned_labels = None 161 | return AssignResult( 162 | num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels) 163 | -------------------------------------------------------------------------------- /config/quarkdet.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/ghostnet 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | 8 | name: GhostNet 9 | width_mult: 1.0 10 | #out_stages: [4, 6, 9] 11 | out_stages: [4, 6, 7] 12 | act: ReLU 13 | 14 | neck: 15 | name: PAN 16 | #in_channels: [40, 112, 960] 17 | in_channels: [40, 112, 160] 18 | out_channels: 96 19 | start_level: 0 20 | num_outs: 3 21 | head: 22 | name: QuarkDetHead 23 | num_classes: 80 # 80 24 | input_channel: 96 25 | feat_channels: 96 26 | stacked_convs: 2 27 | share_cls_reg: True #True 28 | octave_base_scale: 5 29 | scales_per_octave: 1 30 | strides: [8, 16, 32] 31 | reg_max: 7 #16 #7 32 | norm_cfg: 33 | type: BN 34 | loss: 35 | loss_qfl: 36 | name: QualityFocalLoss 37 | use_sigmoid: False #True 38 | beta: 2.0 39 | loss_weight: 1.0 40 | loss_dfl: 41 | name: DistributionFocalLoss 42 | loss_weight: 0.25 43 | loss_bbox: 44 | name: GIoULoss 45 | loss_weight: 2.0 46 | data: 47 | train: 48 | name: coco 49 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 50 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 51 | input_size: [320,320] #[w,h] 52 | keep_ratio: True 53 | pipeline: 54 | perspective: 0.0 55 | scale: [0.6, 1.4] 56 | stretch: [[1, 1], [1, 1]] 57 | rotation: 0 58 | shear: 0 59 | translate: 0 60 | flip: 0.5 61 | brightness: 0.2 62 | contrast: [0.8, 1.2] 63 | saturation: [0.8, 1.2] 64 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 65 | val: 66 | name: coco 67 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 68 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 69 | input_size: [320,320] #[w,h] 70 | keep_ratio: True 71 | pipeline: 72 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 73 | device: 74 | gpu_ids: [0,1] 75 | workers_per_gpu: 16 #8 76 | batchsize_per_gpu: 80 # 80 #40 #160 santiago test 77 | schedule: 78 | resume: False 79 | load_model: ./workspace/ghostnet/model_last.pth 80 | 81 | optimizer: 82 | name: SGD 83 | lr: 0.14 84 | momentum: 0.9 85 | weight_decay: 0.0001 86 | warmup: 87 | name: linear 88 | steps: 100 #300 #santiago test 89 | ratio: 0.1 90 | total_epochs: 160 #70 91 | lr_schedule: 92 | name: MultiStepLR 93 | milestones: [30,80,130,150,155,160] 94 | gamma: 0.1 95 | val_intervals: 5 96 | evaluator: 97 | name: CocoDetectionEvaluator 98 | save_key: mAP 99 | 100 | log: 101 | interval: 10 #10 102 | 103 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 104 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 105 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 106 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 107 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 108 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 109 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 110 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 111 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 112 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 113 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 114 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 115 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 116 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 117 | 118 | 119 | 120 | # { 121 | # 'img': tensor([ 122 | # [ 123 | # [ 124 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 125 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 126 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 127 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 128 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 129 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 130 | # ], 131 | 132 | # [ 133 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 134 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 135 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 136 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 137 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 138 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 139 | # ], 140 | 141 | # [ 142 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 143 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 144 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 145 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 146 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 147 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 148 | # ] 149 | # ] 150 | # ]), 151 | # 'img_info': { 152 | # 'license': tensor([2]), 153 | # 'file_name': ['000000007616.jpg'], 154 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 155 | # 'height': tensor([375]), 156 | # 'width': tensor([500]), 157 | # 'date_captured': ['2013-11-16 19:22:23'], 158 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 159 | # 'id': tensor([7616]) 160 | # }, 161 | # 'gt_bboxes': [array([ 162 | # [193.312, 153.37599, 216.5952, 175.8784], 163 | # [110.0224, 135.4624, 208.1792, 215.2832], 164 | # [160.1216, 85.7984, 168.64641, 110.976], 165 | # [204.7232, 93.6704, 212.2048, 108.3904], 166 | # [85.414406, 148.8192, 111.8976, 167.5584], 167 | # [236.0832, 155.96161, 267.5264, 166.3424], 168 | # [1.0816001, 231.6224, 320., 277.568], 169 | # [85.4656, 148.3776, 112.22401, 168.096], 170 | # [40.7232, 109.024, 44.607998, 121.5552] 171 | # ], 172 | # dtype = float32)], 173 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 174 | # 'warp_matrix': [array([ 175 | # [0.64, 0., 0.], 176 | # [0., 0.64, 40.], 177 | # [0., 0., 1.] 178 | # ])] 179 | # } 180 | 181 | 182 | 183 | # ghostnet精简版本 184 | # 对GhostNet做了以下精简 185 | # 取出stage5中expansion size等于960的所有层,去除的层还包括 186 | # Conv2d 1×1 the number of output channels等于960和1280的层,平均池化层和最后的全连接层 187 | 188 | # Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.198 189 | # Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.339 190 | # Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.198 191 | # Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059 192 | # Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.197 193 | # Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.323 194 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.211 195 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.340 196 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.362 197 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.105 198 | # Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.410 199 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.583 -------------------------------------------------------------------------------- /config/ghostnet_slim.yml: -------------------------------------------------------------------------------- 1 | #Config File example 2 | save_dir: workspace/GhostNet_slim 3 | model: 4 | detector: 5 | name: GFL 6 | backbone: 7 | 8 | name: GhostNet_slim 9 | width_mult: 1.0 10 | #out_stages: [4, 6, 9] 11 | out_stages: [4, 6, 7] 12 | act: ReLU 13 | 14 | neck: 15 | name: PAN 16 | #in_channels: [40, 112, 960] 17 | in_channels: [40, 112, 160] 18 | out_channels: 96 19 | start_level: 0 20 | num_outs: 3 21 | head: 22 | name: QuarkDetHead 23 | num_classes: 80 # 80 24 | input_channel: 96 25 | feat_channels: 96 26 | stacked_convs: 2 27 | share_cls_reg: True #True 28 | octave_base_scale: 5 29 | scales_per_octave: 1 30 | strides: [8, 16, 32] 31 | reg_max: 7 #16 #7 32 | norm_cfg: 33 | type: BN 34 | loss: 35 | loss_qfl: 36 | name: QualityFocalLoss 37 | use_sigmoid: False #True 38 | beta: 2.0 39 | loss_weight: 1.0 40 | loss_dfl: 41 | name: DistributionFocalLoss 42 | loss_weight: 0.25 43 | loss_bbox: 44 | name: GIoULoss 45 | loss_weight: 2.0 46 | data: 47 | train: 48 | name: coco 49 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017 50 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json 51 | input_size: [320,320] #[w,h] 52 | keep_ratio: True 53 | pipeline: 54 | perspective: 0.0 55 | scale: [0.6, 1.4] 56 | stretch: [[1, 1], [1, 1]] 57 | rotation: 0 58 | shear: 0 59 | translate: 0 60 | flip: 0.5 61 | brightness: 0.2 62 | contrast: [0.8, 1.2] 63 | saturation: [0.8, 1.2] 64 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 65 | val: 66 | name: coco 67 | img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017 68 | ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json 69 | input_size: [320,320] #[w,h] 70 | keep_ratio: True 71 | pipeline: 72 | normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] 73 | device: 74 | gpu_ids: [1] 75 | workers_per_gpu: 16 #8 76 | batchsize_per_gpu: 80 # 80 #40 #160 santiago test 77 | schedule: 78 | resume: False 79 | load_model: ./workspace/GhostNet_slim/model_last.pth 80 | 81 | optimizer: 82 | name: SGD 83 | lr: 0.14 84 | momentum: 0.9 85 | weight_decay: 0.0001 86 | warmup: 87 | name: linear 88 | steps: 100 #300 #santiago test 89 | ratio: 0.1 90 | total_epochs: 160 #70 91 | lr_schedule: 92 | name: MultiStepLR 93 | milestones: [30,80,130,150,155,160] 94 | gamma: 0.1 95 | val_intervals: 5 96 | evaluator: 97 | name: CocoDetectionEvaluator 98 | save_key: mAP 99 | 100 | log: 101 | interval: 10 #10 102 | 103 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 104 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 105 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 106 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 107 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 108 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 109 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 110 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 111 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 112 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 113 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 114 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 115 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 116 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 117 | 118 | 119 | 120 | # { 121 | # 'img': tensor([ 122 | # [ 123 | # [ 124 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 125 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 126 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 127 | # ..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 128 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667], 129 | # [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667] 130 | # ], 131 | 132 | # [ 133 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 134 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 135 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 136 | # ..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 137 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013], 138 | # [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013] 139 | # ], 140 | 141 | # [ 142 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 143 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 144 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 145 | # ..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 146 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668], 147 | # [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668] 148 | # ] 149 | # ] 150 | # ]), 151 | # 'img_info': { 152 | # 'license': tensor([2]), 153 | # 'file_name': ['000000007616.jpg'], 154 | # 'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'], 155 | # 'height': tensor([375]), 156 | # 'width': tensor([500]), 157 | # 'date_captured': ['2013-11-16 19:22:23'], 158 | # 'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'], 159 | # 'id': tensor([7616]) 160 | # }, 161 | # 'gt_bboxes': [array([ 162 | # [193.312, 153.37599, 216.5952, 175.8784], 163 | # [110.0224, 135.4624, 208.1792, 215.2832], 164 | # [160.1216, 85.7984, 168.64641, 110.976], 165 | # [204.7232, 93.6704, 212.2048, 108.3904], 166 | # [85.414406, 148.8192, 111.8976, 167.5584], 167 | # [236.0832, 155.96161, 267.5264, 166.3424], 168 | # [1.0816001, 231.6224, 320., 277.568], 169 | # [85.4656, 148.3776, 112.22401, 168.096], 170 | # [40.7232, 109.024, 44.607998, 121.5552] 171 | # ], 172 | # dtype = float32)], 173 | # 'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])], 174 | # 'warp_matrix': [array([ 175 | # [0.64, 0., 0.], 176 | # [0., 0.64, 40.], 177 | # [0., 0., 1.] 178 | # ])] 179 | # } 180 | 181 | 182 | 183 | # ghostnet精简版本 184 | # 对GhostNet做了以下精简 185 | # 取出stage5中expansion size等于960的所有层,去除的层还包括 186 | # Conv2d 1×1 the number of output channels等于960和1280的层,平均池化层和最后的全连接层 187 | 188 | # Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.198 189 | # Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.339 190 | # Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.198 191 | # Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059 192 | # Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.197 193 | # Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.323 194 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.211 195 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.340 196 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.362 197 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.105 198 | # Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.410 199 | # Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.583 -------------------------------------------------------------------------------- /quarkdet/model/backbone/shufflenetv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | from ..module.activation import act_layers 5 | 6 | model_urls = { 7 | 'shufflenetv2_0.5x': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', 8 | 'shufflenetv2_1.0x': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', 9 | 'shufflenetv2_1.5x': None, 10 | 'shufflenetv2_2.0x': None, 11 | } 12 | 13 | 14 | def channel_shuffle(x, groups): 15 | # type: (torch.Tensor, int) -> torch.Tensor 16 | batchsize, num_channels, height, width = x.data.size() 17 | channels_per_group = num_channels // groups 18 | 19 | # reshape 20 | x = x.view(batchsize, groups, 21 | channels_per_group, height, width) 22 | 23 | x = torch.transpose(x, 1, 2).contiguous() 24 | 25 | # flatten 26 | x = x.view(batchsize, -1, height, width) 27 | 28 | return x 29 | 30 | 31 | class ShuffleV2Block(nn.Module): 32 | def __init__(self, inp, oup, stride, activation='ReLU'): 33 | super(ShuffleV2Block, self).__init__() 34 | 35 | if not (1 <= stride <= 3): 36 | raise ValueError('illegal stride value') 37 | self.stride = stride 38 | 39 | branch_features = oup // 2 40 | assert (self.stride != 1) or (inp == branch_features << 1) 41 | 42 | if self.stride > 1: 43 | self.branch1 = nn.Sequential( 44 | self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), 45 | nn.BatchNorm2d(inp), 46 | nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), 47 | nn.BatchNorm2d(branch_features), 48 | act_layers(activation), 49 | ) 50 | else: 51 | self.branch1 = nn.Sequential() 52 | 53 | self.branch2 = nn.Sequential( 54 | nn.Conv2d(inp if (self.stride > 1) else branch_features, 55 | branch_features, kernel_size=1, stride=1, padding=0, bias=False), 56 | nn.BatchNorm2d(branch_features), 57 | act_layers(activation), 58 | self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), 59 | nn.BatchNorm2d(branch_features), 60 | nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), 61 | nn.BatchNorm2d(branch_features), 62 | act_layers(activation), 63 | ) 64 | 65 | @staticmethod 66 | def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): 67 | return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) 68 | 69 | def forward(self, x): 70 | if self.stride == 1: 71 | x1, x2 = x.chunk(2, dim=1) 72 | out = torch.cat((x1, self.branch2(x2)), dim=1) 73 | else: 74 | out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) 75 | 76 | out = channel_shuffle(out, 2) 77 | 78 | return out 79 | 80 | 81 | class ShuffleNetV2(nn.Module): 82 | def __init__(self, 83 | model_size='1.5x', 84 | out_stages=(2, 3, 4), 85 | with_last_conv=False, 86 | kernal_size=3, 87 | activation='ReLU'): 88 | super(ShuffleNetV2, self).__init__() 89 | print('model size is ', model_size) 90 | 91 | self.stage_repeats = [4, 8, 4] 92 | self.model_size = model_size 93 | self.out_stages = out_stages 94 | self.with_last_conv = with_last_conv 95 | self.kernal_size = kernal_size 96 | self.activation = activation 97 | if model_size == '0.5x': 98 | self._stage_out_channels = [24, 48, 96, 192, 1024] 99 | elif model_size == '1.0x': 100 | self._stage_out_channels = [24, 116, 232, 464, 1024] 101 | elif model_size == '1.5x': 102 | self._stage_out_channels = [24, 176, 352, 704, 1024] 103 | elif model_size == '2.0x': 104 | self._stage_out_channels = [24, 244, 488, 976, 2048] 105 | else: 106 | raise NotImplementedError 107 | 108 | # building first layer 109 | input_channels = 3 110 | output_channels = self._stage_out_channels[0] 111 | self.conv1 = nn.Sequential( 112 | nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), 113 | nn.BatchNorm2d(output_channels), 114 | act_layers(activation), 115 | ) 116 | input_channels = output_channels 117 | 118 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 119 | 120 | stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] 121 | for name, repeats, output_channels in zip( 122 | stage_names, self.stage_repeats, self._stage_out_channels[1:]): 123 | seq = [ShuffleV2Block(input_channels, output_channels, 2, activation=activation)] 124 | for i in range(repeats - 1): 125 | seq.append(ShuffleV2Block(output_channels, output_channels, 1, activation=activation)) 126 | setattr(self, name, nn.Sequential(*seq)) 127 | input_channels = output_channels 128 | output_channels = self._stage_out_channels[-1] 129 | if self.with_last_conv: 130 | self.conv5 = nn.Sequential( 131 | nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), 132 | nn.BatchNorm2d(output_channels), 133 | act_layers(activation), 134 | ) 135 | self.stage4.add_module('conv5', self.conv5) 136 | self._initialize_weights() 137 | 138 | def forward(self, x): 139 | x = self.conv1(x) 140 | x = self.maxpool(x) 141 | output = [] 142 | for i in range(2, 5): 143 | stage = getattr(self, 'stage{}'.format(i)) 144 | x = stage(x) 145 | if i in self.out_stages: 146 | output.append(x) 147 | return tuple(output) 148 | 149 | def _initialize_weights(self, pretrain=True): 150 | print('init weights...') 151 | for name, m in self.named_modules(): 152 | if isinstance(m, nn.Conv2d): 153 | if 'first' in name: 154 | nn.init.normal_(m.weight, 0, 0.01) 155 | else: 156 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 157 | if m.bias is not None: 158 | nn.init.constant_(m.bias, 0) 159 | elif isinstance(m, nn.BatchNorm2d): 160 | nn.init.constant_(m.weight, 1) 161 | if m.bias is not None: 162 | nn.init.constant_(m.bias, 0.0001) 163 | nn.init.constant_(m.running_mean, 0) 164 | elif isinstance(m, nn.BatchNorm1d): 165 | nn.init.constant_(m.weight, 1) 166 | if m.bias is not None: 167 | nn.init.constant_(m.bias, 0.0001) 168 | nn.init.constant_(m.running_mean, 0) 169 | elif isinstance(m, nn.Linear): 170 | nn.init.normal_(m.weight, 0, 0.01) 171 | if m.bias is not None: 172 | nn.init.constant_(m.bias, 0) 173 | if pretrain: 174 | url = model_urls['shufflenetv2_{}'.format(self.model_size)] 175 | if url is not None: 176 | pretrained_state_dict = model_zoo.load_url(url) 177 | print('=> loading pretrained model {}'.format(url)) 178 | self.load_state_dict(pretrained_state_dict, strict=False) 179 | 180 | 181 | if __name__ == "__main__": 182 | model = ShuffleNetV2(model_size='1.0x', ) 183 | print(model) 184 | test_data = torch.rand(5, 3, 320, 320) 185 | test_outputs = model(test_data) 186 | for out in test_outputs: 187 | print(out.size()) 188 | -------------------------------------------------------------------------------- /quarkdet/model/backbone/vovnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | #https://github.com/youngwanLEE/vovnet-detectron2 6 | #https://arxiv.org/pdf/1904.09730v1.pdf 7 | #https://arxiv.org/pdf/1911.06667.pdf 8 | __all__ = ['VoVNet', 'vovnet27_slim', 'vovnet39', 'vovnet57'] 9 | 10 | 11 | model_urls = { 12 | 'vovnet39': 'https://dl.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth?dl=1', 13 | 'vovnet57': 'https://dl.dropbox.com/s/6bfu9gstbwfw31m/vovnet57_torchvision.pth?dl=1' 14 | } 15 | 16 | 17 | def conv3x3(in_channels, out_channels, module_name, postfix, 18 | stride=1, groups=1, kernel_size=3, padding=1): 19 | """3x3 convolution with padding""" 20 | return [ 21 | ('{}_{}/conv'.format(module_name, postfix), 22 | nn.Conv2d(in_channels, out_channels, 23 | kernel_size=kernel_size, 24 | stride=stride, 25 | padding=padding, 26 | groups=groups, 27 | bias=False)), 28 | ('{}_{}/norm'.format(module_name, postfix), 29 | nn.BatchNorm2d(out_channels)), 30 | ('{}_{}/relu'.format(module_name, postfix), 31 | nn.ReLU(inplace=True)), 32 | ] 33 | 34 | 35 | def conv1x1(in_channels, out_channels, module_name, postfix, 36 | stride=1, groups=1, kernel_size=1, padding=0): 37 | """1x1 convolution""" 38 | return [ 39 | ('{}_{}/conv'.format(module_name, postfix), 40 | nn.Conv2d(in_channels, out_channels, 41 | kernel_size=kernel_size, 42 | stride=stride, 43 | padding=padding, 44 | groups=groups, 45 | bias=False)), 46 | ('{}_{}/norm'.format(module_name, postfix), 47 | nn.BatchNorm2d(out_channels)), 48 | ('{}_{}/relu'.format(module_name, postfix), 49 | nn.ReLU(inplace=True)), 50 | ] 51 | 52 | 53 | class _OSA_module(nn.Module): 54 | def __init__(self, 55 | in_ch, 56 | stage_ch, 57 | concat_ch, 58 | layer_per_block, 59 | module_name, 60 | identity=False): 61 | super(_OSA_module, self).__init__() 62 | 63 | self.identity = identity 64 | self.layers = nn.ModuleList() 65 | in_channel = in_ch 66 | for i in range(layer_per_block): 67 | self.layers.append(nn.Sequential( 68 | OrderedDict(conv3x3(in_channel, stage_ch, module_name, i)))) 69 | in_channel = stage_ch 70 | 71 | # feature aggregation in_channel = in_ch + layer_per_block * stage_ch 72 | self.concat = nn.Sequential( 73 | OrderedDict(conv1x1(in_channel, concat_ch, module_name, 'concat'))) 74 | 75 | def forward(self, x): 76 | identity_feat = x 77 | output = [] 78 | output.append(x) 79 | for layer in self.layers: 80 | x = layer(x) 81 | output.append(x) 82 | 83 | x = torch.cat(output, dim=1) 84 | xt = self.concat(x) 85 | 86 | if self.identity: 87 | xt = xt + identity_feat 88 | 89 | return xt 90 | 91 | 92 | class _OSA_stage(nn.Sequential): 93 | def __init__(self, 94 | in_ch, 95 | stage_ch, 96 | concat_ch, 97 | block_per_stage, 98 | layer_per_block, 99 | stage_num): 100 | super(_OSA_stage, self).__init__() 101 | 102 | if not stage_num == 2: 103 | self.add_module('Pooling', 104 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)) 105 | 106 | module_name = f'OSA{stage_num}_1' 107 | self.add_module(module_name, 108 | _OSA_module(in_ch, 109 | stage_ch, 110 | concat_ch, 111 | layer_per_block, 112 | module_name)) 113 | for i in range(block_per_stage-1): 114 | module_name = f'OSA{stage_num}_{i+2}' 115 | self.add_module(module_name, 116 | _OSA_module(concat_ch, 117 | stage_ch, 118 | concat_ch, 119 | layer_per_block, 120 | module_name, 121 | identity=True)) 122 | 123 | 124 | class VoVNet(nn.Module): 125 | def __init__(self, 126 | config_stage_ch, 127 | config_concat_ch, 128 | block_per_stage, 129 | layer_per_block, 130 | num_classes=2): 131 | super(VoVNet, self).__init__() 132 | 133 | # Stem module stem = conv3x3(3, 64, 'stem', '1', 2) 134 | stem += conv3x3(64, 64, 'stem', '2', 1) 135 | stem += conv3x3(64, 128, 'stem', '3', 2) 136 | self.add_module('stem', nn.Sequential(OrderedDict(stem))) 137 | 138 | stem_out_ch = [128] 139 | in_ch_list = stem_out_ch + config_concat_ch[:-1] 140 | self.stage_names = [] 141 | for i in range(4): #num_stages name = 'stage%d' % (i+2) 142 | self.stage_names.append(name) 143 | self.add_module(name, 144 | _OSA_stage(in_ch_list[i], 145 | config_stage_ch[i], 146 | config_concat_ch[i], 147 | block_per_stage[i], 148 | layer_per_block, 149 | i+2)) 150 | 151 | self.feature_layer = nn.Linear(config_concat_ch[-1], 128) 152 | self.classifier = nn.Linear(128, num_classes) 153 | 154 | for m in self.modules(): 155 | if isinstance(m, nn.Conv2d): 156 | nn.init.kaiming_normal_(m.weight) 157 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 158 | nn.init.constant_(m.weight, 1) 159 | nn.init.constant_(m.bias, 0) 160 | elif isinstance(m, nn.Linear): 161 | nn.init.constant_(m.bias, 0) 162 | 163 | def forward(self, x): 164 | x = self.stem(x) 165 | for name in self.stage_names: 166 | x = getattr(self, name)(x) 167 | x = F.adaptive_avg_pool2d(x, (1, 1)).view(x.size(0), -1) 168 | features = self.feature_layer(x) 169 | x = self.classifier(features) 170 | return features,x 171 | 172 | 173 | def _vovnet(arch, 174 | config_stage_ch, 175 | config_concat_ch, 176 | block_per_stage, 177 | layer_per_block, 178 | pretrained, 179 | progress, 180 | **kwargs): 181 | model = VoVNet(config_stage_ch, config_concat_ch, 182 | block_per_stage, layer_per_block, 183 | **kwargs) 184 | if pretrained: 185 | state_dict = load_state_dict_from_url(model_urls[arch], 186 | progress=progress) 187 | model.load_state_dict(state_dict) 188 | return model 189 | 190 | 191 | def vovnet57(pretrained=False, progress=True, **kwargs): 192 | return _vovnet('vovnet57', [128, 160, 192, 224], [256, 512, 768, 1024], 193 | [1,1,4,3], 5, pretrained, progress, **kwargs) 194 | 195 | 196 | def vovnet39(pretrained=False, progress=True, **kwargs): 197 | return _vovnet('vovnet39', [128, 160, 192, 224], [256, 512, 768, 1024], 198 | [1,1,2,2], 5, pretrained, progress, **kwargs) 199 | 200 | 201 | def vovnet27_slim(pretrained=False, progress=True, **kwargs): 202 | return _vovnet('vovnet27_slim', [64, 80, 96, 112], [128, 256, 384, 512], 203 | [1,1,1,1], 5, pretrained, progress, **kwargs) --------------------------------------------------------------------------------