├── quarkdet
    ├── data
    │   ├── transform
    │   │   ├── __init__.py
    │   │   ├── pipeline.py
    │   │   ├── color.py
    │   │   └── warp.py
    │   ├── dataset
    │   │   ├── __init__.py
    │   │   └── base.py
    │   └── collate.py
    ├── util
    │   ├── path.py
    │   ├── rank_filter.py
    │   ├── distributed_data_parallel.py
    │   ├── __init__.py
    │   ├── config.py
    │   ├── check_point.py
    │   ├── box_transform.py
    │   ├── scatter_gather.py
    │   ├── logger.py
    │   ├── util_mixins.py
    │   └── data_parallel.py
    ├── model
    │   ├── head
    │   │   ├── assigner
    │   │   │   ├── base_assigner.py
    │   │   │   └── atss_assigner.py
    │   │   ├── __init__.py
    │   │   ├── sampler
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── base_sampler.py
    │   │   │   └── sampling_result.py
    │   │   ├── anchor
    │   │   │   ├── anchor_target.py
    │   │   │   ├── anchor_generator.py
    │   │   │   └── base_anchor_head.py
    │   │   └── quarkdet_head.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── gfl.py
    │   │   └── one_stage.py
    │   ├── module
    │   │   ├── scale.py
    │   │   ├── activation.py
    │   │   ├── init_weights.py
    │   │   ├── norm.py
    │   │   └── nms.py
    │   ├── neck
    │   │   ├── __init__.py
    │   │   ├── pan_slim.py
    │   │   ├── fpn_slim.py
    │   │   ├── fpn.py
    │   │   └── pan.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── mobilenetv2.py
    │   │   ├── shufflenetv2.py
    │   │   └── vovnet.py
    │   └── loss
    │   │   ├── utils.py
    │   │   └── varifocal_loss.py
    ├── evaluator
    │   ├── __init__.py
    │   └── coco_detection.py
    └── trainer
    │   ├── __init__.py
    │   └── dist_trainer.py
├── requirements.txt
├── tools
    ├── flops.py
    ├── export.py
    ├── statistics.py
    ├── inference.py
    ├── test.py
    └── train.py
├── config
    ├── shufflenetv2_0.5x.yml
    ├── mobilenetv3.yml
    ├── nanodet.yml
    ├── shufflenet.yml
    ├── test.yml
    ├── efficientdet.yml
    ├── ghostnet_full.yml
    ├── ghostnet_full_bifpn.yml
    ├── ghostnet_slim640.yml
    ├── quarkdet.yml
    └── ghostnet_slim.yml
└── demo
    └── demo.py


/quarkdet/data/transform/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .pipeline import Pipeline


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython
 2 | termcolor
 3 | numpy
 4 | torchvision
 5 | tensorboard
 6 | pycocotools
 7 | matplotlib
 8 | pyaml
 9 | opencv-python
10 | tqdm
11 | torch-summary
12 | 


--------------------------------------------------------------------------------
/quarkdet/util/path.py:
--------------------------------------------------------------------------------
1 | import os
2 | from .rank_filter import rank_filter
3 | 
4 | 
5 | @rank_filter
6 | def mkdir(path):
7 |     if not os.path.exists(path):
8 |         os.makedirs(path)
9 | 


--------------------------------------------------------------------------------
/quarkdet/util/rank_filter.py:
--------------------------------------------------------------------------------
1 | 
2 | def rank_filter(func):
3 |     def func_filter(local_rank=-1, *args, **kwargs):
4 |         if local_rank < 1:
5 |             return func(*args, **kwargs)
6 |         else:
7 |             pass
8 |     return func_filter
9 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/assigner/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | 
4 | class BaseAssigner(metaclass=ABCMeta):
5 | 
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass


--------------------------------------------------------------------------------
/quarkdet/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_detection import CocoDetectionEvaluator
2 | 
3 | 
4 | def build_evaluator(cfg, dataset):
5 |     if cfg.evaluator.name == 'CocoDetectionEvaluator':
6 |         return CocoDetectionEvaluator(dataset)
7 |     else:
8 |         raise NotImplementedError
9 | 


--------------------------------------------------------------------------------
/quarkdet/data/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from .coco import CocoDataset
 3 | 
 4 | 
 5 | def build_dataset(cfg, mode):
 6 |     dataset_cfg = copy.deepcopy(cfg)
 7 |     if dataset_cfg['name'] == 'coco':
 8 |         dataset_cfg.pop('name')
 9 |         return CocoDataset(mode=mode, **dataset_cfg)
10 | 


--------------------------------------------------------------------------------
/quarkdet/model/detector/__init__.py:
--------------------------------------------------------------------------------
 1 | from .gfl import GFL
 2 | 
 3 | 
 4 | def build_model(model_cfg):
 5 |     if model_cfg.detector.name == 'GFL':
 6 |         model = GFL(model_cfg.detector.backbone, model_cfg.detector.neck, model_cfg.detector.head)
 7 |     else:
 8 |         raise NotImplementedError
 9 |     return model
10 | 


--------------------------------------------------------------------------------
/quarkdet/model/module/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     """
 7 |     A learnable scale parameter
 8 |     """
 9 | 
10 |     def __init__(self, scale=1.0):
11 |         super(Scale, self).__init__()
12 |         self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
13 | 
14 |     def forward(self, x):
15 |         return x * self.scale
16 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/__init__.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from .gfl_headv2 import GFLHeadV2
 3 | from .quarkdet_head import QuarkDetHead
 4 | 
 5 | 
 6 | def build_head(cfg):
 7 |     head_cfg = copy.deepcopy(cfg)
 8 |     name = head_cfg.pop('name')
 9 |     if name == 'GFLHeadV2':
10 |         return GFLHeadV2(**head_cfg)
11 |     elif name == 'QuarkDetHead':
12 |         return QuarkDetHead(**head_cfg)
13 |     else:
14 |         raise NotImplementedError


--------------------------------------------------------------------------------
/quarkdet/util/distributed_data_parallel.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.parallel import DistributedDataParallel
 2 | from .scatter_gather import scatter_kwargs
 3 | 
 4 | 
 5 | class DDP(DistributedDataParallel):
 6 | 
 7 |     def __init__(self, batchsize, **kwargs):
 8 |         self.batchsize = batchsize
 9 |         super(DDP, self).__init__(**kwargs)
10 | 
11 |     def scatter(self, inputs, kwargs, device_ids):
12 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=[self.batchsize])


--------------------------------------------------------------------------------
/quarkdet/model/detector/gfl.py:
--------------------------------------------------------------------------------
 1 | from .one_stage import OneStage
 2 | 
 3 | 
 4 | class GFL(OneStage):
 5 |     def __init__(self,
 6 |                  backbone_cfg,
 7 |                  fpn_cfg,
 8 |                  head_cfg, ):
 9 |         super(GFL, self).__init__(backbone_cfg,
10 |                                    fpn_cfg,
11 |                                    head_cfg)
12 | 
13 |     def forward(self, x):
14 |         x = self.backbone(x)
15 |         x = self.fpn(x)
16 |         x = self.head(x)
17 |         return x
18 | 


--------------------------------------------------------------------------------
/quarkdet/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from .rank_filter import rank_filter
 2 | from .path import mkdir
 3 | from .logger import Logger, MovingAverage, AverageMeter
 4 | from .data_parallel import DataParallel
 5 | from .distributed_data_parallel import DDP
 6 | from .check_point import load_model_weight, save_model
 7 | from .config import cfg, load_config
 8 | from .box_transform import *
 9 | from .util_mixins import NiceRepr
10 | from .visualization import Visualizer, overlay_bbox_cv
11 | from .flops_counter import get_model_complexity_info
12 | 


--------------------------------------------------------------------------------
/quarkdet/model/module/activation.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | activations = {'ReLU': nn.ReLU,
 4 |                'LeakyReLU': nn.LeakyReLU,
 5 |                'ReLU6': nn.ReLU6,
 6 |                'SELU': nn.SELU,
 7 |                'ELU': nn.ELU,
 8 |                None: nn.Identity
 9 |                }
10 | 
11 | 
12 | def act_layers(name):
13 |     assert name in activations.keys()
14 |     if name == 'LeakyReLU':
15 |         return nn.LeakyReLU(negative_slope=0.1, inplace=True)
16 |     else:
17 |         return activations[name](inplace=True)
18 | 


--------------------------------------------------------------------------------
/quarkdet/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .trainer import Trainer
 3 | from .dist_trainer import DistTrainer
 4 | 
 5 | 
 6 | def build_trainer(rank, cfg, model, logger):
 7 |     if len(cfg.device.gpu_ids) > 1:
 8 |         trainer = DistTrainer(rank, cfg, model, logger)
 9 |         trainer.set_device(cfg.device.batchsize_per_gpu, rank, device=torch.device('cuda'))  # TODO: device
10 |     else:
11 |         trainer = Trainer(rank, cfg, model, logger)
12 |         trainer.set_device(cfg.device.batchsize_per_gpu, cfg.device.gpu_ids, device=torch.device('cuda'))
13 |     return trainer
14 | 
15 | 


--------------------------------------------------------------------------------
/quarkdet/model/neck/__init__.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from .fpn import FPN
 3 | from .pan import PAN
 4 | from .bifpn import BiFPN
 5 | from .fpn_slim import FPN_Slim
 6 | from .pan_slim import PAN_Slim
 7 | 
 8 | 
 9 | def build_fpn(cfg):
10 |     fpn_cfg = copy.deepcopy(cfg)
11 |     name = fpn_cfg.pop('name')
12 |     if name == 'FPN':
13 |         return FPN(**fpn_cfg)
14 |     elif name == 'PAN':
15 |         return PAN(**fpn_cfg)
16 |     elif name == 'BiFPN':
17 |         return BiFPN(**fpn_cfg)
18 |     elif name == 'FPN_Slim':
19 |             return FPN_Slim(**fpn_cfg)
20 |     elif name == 'PAN_Slim':
21 |         return PAN_Slim(**fpn_cfg)
22 |     else:
23 |         raise NotImplementedError


--------------------------------------------------------------------------------
/tools/flops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import sys  
 3 | sys.path.append("./") 
 4 | from quarkdet.model.detector import build_model
 5 | from quarkdet.util import cfg, load_config, get_model_complexity_info
 6 | 
 7 | 
 8 | def main(config, input_shape=(3, 320, 320)):
 9 |     model = build_model(config.model)
10 |     #flops, params = get_model_complexity_info(model, input_shape)
11 | 
12 |     macs, params = get_model_complexity_info(model, input_shape, as_strings=True,
13 |                                            print_per_layer_stat=True)
14 |     print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
15 |     print('{:<30}  {:<8}'.format('Number of parameters: ', params))
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     cfg_path = r"config/ghostnet_slim.yml"
20 |     load_config(cfg, cfg_path)
21 |     main(config=cfg,
22 |          input_shape=(3, 320, 320)
23 |          )
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/sampler/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         pos_inds = torch.nonzero(
20 |             assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
21 |         neg_inds = torch.nonzero(
22 |             assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
23 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 |                                          assign_result, gt_flags)
26 |         return sampling_result
27 | 


--------------------------------------------------------------------------------
/tools/export.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import sys  
 4 | sys.path.append("./") 
 5 | from quarkdet.model.detector import build_model
 6 | from quarkdet.util import Logger, cfg, load_config, load_model_weight
 7 | 
 8 | def main(config, model_path, output_path, input_shape=(320, 320)):
 9 |     logger = Logger(-1, config.save_dir, False)
10 |     model = build_model(config.model)
11 |     checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
12 |     load_model_weight(model, checkpoint, logger)
13 |     dummy_input = torch.autograd.Variable(torch.randn(1, 3, input_shape[0], input_shape[1]))
14 |     torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11)
15 |     print('finished exporting onnx ')
16 | 
17 | if __name__ == '__main__':
18 |     cfg_path = r"config/quarkdet.yml"
19 |     model_path = r"quarkdet.pth"
20 |     out_path = r'output.onnx'
21 |     load_config(cfg, cfg_path)
22 |     main(cfg, model_path, out_path, input_shape=(320, 320))


--------------------------------------------------------------------------------
/quarkdet/util/config.py:
--------------------------------------------------------------------------------
 1 | from .yacs import CfgNode
 2 | 
 3 | cfg = CfgNode(new_allowed=True)
 4 | cfg.save_dir = './'
 5 | # common params for NETWORK
 6 | cfg.model = CfgNode()
 7 | cfg.model.detector = CfgNode(new_allowed=True)
 8 | cfg.model.detector.backbone = CfgNode(new_allowed=True)
 9 | cfg.model.detector.neck = CfgNode(new_allowed=True)
10 | cfg.model.detector.head = CfgNode(new_allowed=True)
11 | 
12 | # DATASET related params
13 | cfg.data = CfgNode(new_allowed=True)
14 | cfg.data.train = CfgNode(new_allowed=True)
15 | cfg.data.val = CfgNode(new_allowed=True)
16 | cfg.device = CfgNode(new_allowed=True)
17 | # train
18 | cfg.schedule = CfgNode(new_allowed=True)
19 | 
20 | # logger
21 | cfg.log = CfgNode()
22 | cfg.log.interval = 50
23 | 
24 | # testing
25 | cfg.test = CfgNode()
26 | # size of images for each device
27 | 
28 | 
29 | def load_config(cfg, args_cfg):
30 |     cfg.defrost()
31 |     cfg.merge_from_file(args_cfg)
32 |     cfg.freeze()
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     import sys
37 | 
38 |     with open(sys.argv[1], 'w') as f:
39 |         print(cfg, file=f)
40 | 


--------------------------------------------------------------------------------
/quarkdet/model/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from .ghostnet import GhostNet_slim, GhostNet_full
 3 | from .shufflenetv2 import ShuffleNetV2
 4 | from .mobilenetv2 import MobileNetV2
 5 | from .mobilenetv3 import MobileNetV3_Small
 6 | from .efficientnet import EfficientNet
 7 | 
 8 | def build_backbone(cfg):
 9 |     backbone_cfg = copy.deepcopy(cfg)
10 |     name = backbone_cfg.pop('name')
11 |     if name == 'MicroNet':
12 |         pass
13 |     elif name == 'VovNetV2':
14 |         pass
15 |     elif name == 'ShuffleNetV2':
16 |         return ShuffleNetV2(**backbone_cfg)
17 |     elif name == 'GhostNet_slim':
18 |         return GhostNet_slim(**backbone_cfg)
19 |     elif name == 'GhostNet_full':
20 |             return GhostNet_full(**backbone_cfg)
21 |     
22 |     elif name == 'MobileNetV2':
23 |         return MobileNetV2(**backbone_cfg)
24 |     elif name == 'MobileNetV3_Small':
25 |         return MobileNetV3_Small(**backbone_cfg)
26 |     elif name == 'EfficientNet':
27 |             return EfficientNet(**backbone_cfg)
28 |     
29 |     else:
30 |         raise NotImplementedError
31 | 
32 | 


--------------------------------------------------------------------------------
/quarkdet/data/transform/pipeline.py:
--------------------------------------------------------------------------------
 1 | from .warp import warp_and_resize
 2 | from .color import color_aug_and_norm
 3 | import functools
 4 | 
 5 | 
 6 | class Pipeline:
 7 |     def __init__(self,
 8 |                  cfg,
 9 |                  keep_ratio):
10 |         self.warp = functools.partial(warp_and_resize,
11 |                                       warp_kwargs=cfg,
12 |                                       keep_ratio=keep_ratio)
13 |         self.color = functools.partial(color_aug_and_norm,
14 |                                        kwargs=cfg)
15 | 
16 |     def __call__(self, meta, dst_shape):
17 |         meta = self.warp(meta=meta, dst_shape=dst_shape)
18 |         meta = self.color(meta=meta)
19 |         return meta
20 |     
21 |     
22 |     
23 |     
24 | #经过两个数据增强，重要的是参数是否配置，如果配置则启用，不配置不启用，程序中首先判断参数是否存在
25 | # functools.partial(func, /, *args, **keywords)
26 | # Return a new partial object which when called will behave like func called with the positional arguments args and keyword arguments keywords. 
27 | # If more arguments are supplied to the call, they are appended to args. 
28 | # If additional keyword arguments are supplied, they extend and override keywords. 


--------------------------------------------------------------------------------
/quarkdet/model/module/init_weights.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def kaiming_init(module,
 6 |                  a=0,
 7 |                  mode='fan_out',
 8 |                  nonlinearity='relu',
 9 |                  bias=0,
10 |                  distribution='normal'):
11 |     assert distribution in ['uniform', 'normal']
12 |     if distribution == 'uniform':
13 |         nn.init.kaiming_uniform_(
14 |             module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
15 |     else:
16 |         nn.init.kaiming_normal_(
17 |             module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
18 |     if hasattr(module, 'bias') and module.bias is not None:
19 |         nn.init.constant_(module.bias, bias)
20 | 
21 | 
22 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
23 |     assert distribution in ['uniform', 'normal']
24 |     if distribution == 'uniform':
25 |         nn.init.xavier_uniform_(module.weight, gain=gain)
26 |     else:
27 |         nn.init.xavier_normal_(module.weight, gain=gain)
28 |     if hasattr(module, 'bias') and module.bias is not None:
29 |         nn.init.constant_(module.bias, bias)
30 | 
31 | 
32 | def normal_init(module, mean=0, std=1, bias=0):
33 |     nn.init.normal_(module.weight, mean, std)
34 |     if hasattr(module, 'bias') and module.bias is not None:
35 |         nn.init.constant_(module.bias, bias)
36 | 
37 | 
38 | def constant_init(module, val, bias=0):
39 |     if hasattr(module, 'weight') and module.weight is not None:
40 |         nn.init.constant_(module.weight, val)
41 |     if hasattr(module, 'bias') and module.bias is not None:
42 |         nn.init.constant_(module.bias, bias)


--------------------------------------------------------------------------------
/quarkdet/util/check_point.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .rank_filter import rank_filter
 3 | 
 4 | def load_model_weight(model, checkpoint, logger):
 5 |     state_dict = checkpoint['state_dict']
 6 |     # strip prefix of state_dict
 7 |     if list(state_dict.keys())[0].startswith('module.'):
 8 |         state_dict = {k[7:]: v for k, v in checkpoint['state_dict'].items()}
 9 | 
10 |     model_state_dict = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()
11 | 
12 |     # check loaded parameters and created model parameters
13 |     for k in state_dict:
14 |         if k in model_state_dict:
15 |             if state_dict[k].shape != model_state_dict[k].shape:
16 |                 logger.log('Skip loading parameter {}, required shape{}, loaded shape{}.'.format(
17 |                     k, model_state_dict[k].shape, state_dict[k].shape))
18 |                 state_dict[k] = model_state_dict[k]
19 |         else:
20 |             logger.log('Drop parameter {}.'.format(k))
21 |     for k in model_state_dict:
22 |         if not (k in state_dict):
23 |             logger.log('No param {}.'.format(k))
24 |             state_dict[k] = model_state_dict[k]
25 |     model.load_state_dict(state_dict, strict=False)
26 | 
27 | 
28 | @rank_filter
29 | def save_model(model, path, epoch, iter, optimizer=None):
30 |     model_state_dict = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()
31 |     data = {'epoch': epoch,
32 |             'state_dict': model_state_dict,
33 |             'iter': iter}
34 |     if optimizer is not None:
35 |         data['optimizer'] = optimizer.state_dict()
36 | 
37 |     torch.save(data, path)
38 | 


--------------------------------------------------------------------------------
/tools/statistics.py:
--------------------------------------------------------------------------------
 1 | from pycocotools.coco import COCO
 2 | # img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 3 | # ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/
 4 | dataDir='/media/ubuntu/data/dataset/COCOv1/2017/'
 5 | dataType='train2017'
 6 | annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
 7 | 
 8 | # initialize COCO api for instance annotations
 9 | coco=COCO(annFile)
10 | 
11 | # display COCO categories and supercategories
12 | cats = coco.loadCats(coco.getCatIds()) # 类别
13 | cat_nms=[cat['name'] for cat in cats] #cat_nms是list类型
14 | #print(type(cat_nms))
15 | #print('COCO categories: \n{}\n'.format(' '.join(cat_nms)))
16 | #print(len(cats))
17 | 
18 | # 错误的方式
19 | # catId = coco.getCatIds(catNms=cat_name)
20 | # 应把cat_name 变成 [cat_name]
21 | # 统计各类的图片数量和GT框数量
22 | for cat_name in cat_nms:
23 |     #print("type(cat_name):",type(cat_name)) #test cat_name是str类型
24 |     catId = coco.getCatIds(catNms=[cat_name])
25 |     #print("type(catId):",type(catId)) #test catId是list所以可以直接传参
26 |     imgId = coco.getImgIds(catIds=catId)
27 |     annId = coco.getAnnIds(imgIds=imgId, catIds=catId, iscrowd=None)
28 |     
29 |     
30 |     #下面这段代码是测试，如果输出后面的注释的数字例如[3, 57]表示统计存在错误。
31 |     # 如果输出一个数字表示正确
32 |     #car & carrot
33 |     #ear & teddy bear
34 |     #dog & hot dog
35 |     #-----------------------------------------------------------------------
36 |     # if cat_name == "carrot":
37 |     #     print(catId) #[3, 57]
38 | 
39 |     # if cat_name == "teddy bear":
40 |     #     print(catId) #[23, 88]
41 | 
42 |     # if cat_name == "hot dog":
43 |     #     print(catId) #[18, 58]
44 |     #-----------------------------------------------------------------------
45 |                 
46 |     print("{:<15} {:<6d}     {:<10d}".format(cat_name, len(imgId), len(annId)))


--------------------------------------------------------------------------------
/tools/inference.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import time
 4 | import torch
 5 | import sys  
 6 | sys.path.append("./") 
 7 | from quarkdet.model.detector import build_model
 8 | from quarkdet.util import load_model_weight
 9 | from quarkdet.data.transform import Pipeline
10 | 
11 | 
12 | class Predictor(object):
13 |     def __init__(self, cfg, model_path, logger, device='cuda:0'):
14 |         self.cfg = cfg
15 |         self.device = device
16 |         model = build_model(cfg.model)
17 |         ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
18 |         load_model_weight(model, ckpt, logger)
19 |         self.model = model.to(device).eval()
20 |         self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
21 | 
22 |     def inference(self, img):
23 |         img_info = {}
24 |         if isinstance(img, str):
25 |             img_info['file_name'] = os.path.basename(img)
26 |             img = cv2.imread(img)
27 |         else:
28 |             img_info['file_name'] = None
29 | 
30 |         height, width = img.shape[:2]
31 |         img_info['height'] = height
32 |         img_info['width'] = width
33 |         meta = dict(img_info=img_info,
34 |                     raw_img=img,
35 |                     img=img)
36 |         meta = self.pipeline(meta, self.cfg.data.val.input_size)
37 |         meta['img'] = torch.from_numpy(meta['img'].transpose(2, 0, 1)).unsqueeze(0).to(self.device)
38 |         with torch.no_grad():
39 |             results = self.model.inference(meta)
40 |         return meta, results
41 | 
42 |     def visualize(self, dets, meta, class_names, score_thres, wait=0):
43 |         time1 = time.time()
44 |         self.model.head.show_result(meta['raw_img'], dets, class_names, score_thres=score_thres, show=True)
45 |         print('viz time: {:.3f}s'.format(time.time()-time1))
46 | 


--------------------------------------------------------------------------------
/quarkdet/model/detector/one_stage.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | import torch.nn as nn
 4 | from ..backbone import build_backbone
 5 | from ..neck import build_fpn
 6 | from ..head import build_head
 7 | 
 8 | 
 9 | class OneStage(nn.Module):
10 |     def __init__(self,
11 |                  backbone_cfg,
12 |                  fpn_cfg=None,
13 |                  head_cfg=None,):
14 |         super(OneStage, self).__init__()
15 |         self.backbone = build_backbone(backbone_cfg)
16 |         if fpn_cfg is not None:
17 |             self.fpn = build_fpn(fpn_cfg)
18 |         if head_cfg is not None:
19 |             self.head = build_head(head_cfg)
20 | 
21 |     def forward(self, x):
22 |         x = self.backbone(x)
23 |         if hasattr(self, 'neck') and self.fpn is not None:
24 |             x = self.fpn(x)
25 |         if hasattr(self, 'head'):
26 |             out = []
27 |             for xx in x:
28 |                 out.append(self.head(xx))
29 |             x = tuple(out)
30 |         return x
31 | 
32 |     def inference(self, meta):
33 |         with torch.no_grad():
34 |             torch.cuda.synchronize()
35 |             time1 = time.time()
36 |             preds = self(meta['img'])
37 |             torch.cuda.synchronize()
38 |             time2 = time.time()
39 |             print('forward time: {:.3f}s'.format((time2 - time1)), end=' | ')
40 |             results = self.head.post_process(preds, meta)
41 |             torch.cuda.synchronize()
42 |             print('decode time: {:.3f}s'.format((time.time() - time2)), end=' | ')
43 |         return results
44 | 
45 |     def forward_train(self, gt_meta):
46 |         preds = self(gt_meta['img'])
47 |         loss, loss_states = self.head.loss(preds, gt_meta)
48 |         
49 |         #print("forward_train:",loss,type(loss))
50 | 
51 |         return preds, loss, loss_states
52 | 


--------------------------------------------------------------------------------
/quarkdet/data/transform/color.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import random
 4 | 
 5 | 
 6 | def random_brightness(img, delta):
 7 |     img += random.uniform(-delta, delta)
 8 |     return img
 9 | 
10 | 
11 | def random_contrast(img, alpha_low, alpha_up):
12 |     img *= random.uniform(alpha_low, alpha_up)
13 |     return img
14 | 
15 | 
16 | def random_saturation(img, alpha_low, alpha_up):
17 | 
18 |     hsv_img = cv2.cvtColor(img.astype(np.float32), cv2.COLOR_BGR2HSV)
19 |     hsv_img[..., 1] *= random.uniform(alpha_low, alpha_up)
20 |     img = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR)
21 | 
22 |     return img
23 | 
24 | 
25 | def normalize(meta, mean, std):
26 |     img = meta['img'].astype(np.float32)
27 |     mean = np.array(mean, dtype=np.float64).reshape(1, -1)
28 |     stdinv = 1 / np.array(std, dtype=np.float64).reshape(1, -1)
29 |     cv2.subtract(img, mean, img)
30 |     cv2.multiply(img, stdinv, img)
31 |     meta['img'] = img
32 |     return meta
33 | 
34 | 
35 | def _normalize(img, mean, std):
36 |     mean = np.array(mean, dtype=np.float32).reshape(1, 1, 3) / 255
37 |     std = np.array(std, dtype=np.float32).reshape(1, 1, 3) / 255
38 |     img = (img - mean) / std
39 |     return img
40 | 
41 | 
42 | def color_aug_and_norm(meta, kwargs):
43 |     img = meta['img'].astype(np.float32) / 255
44 | 
45 |     if 'brightness' in kwargs and random.randint(0, 1):
46 |         img = random_brightness(img, kwargs['brightness'])
47 | 
48 |     if 'contrast' in kwargs and random.randint(0, 1):
49 |         img = random_contrast(img, *kwargs['contrast'])
50 | 
51 |     if 'saturation' in kwargs and random.randint(0, 1):
52 |         img = random_saturation(img, *kwargs['saturation'])
53 |     # cv2.imshow('trans', img)
54 |     # cv2.waitKey(0)
55 |     img = _normalize(img, *kwargs['normalize'])
56 |     meta['img'] = img
57 |     return meta
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/anchor/anchor_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from functools import partial
 3 | 
 4 | 
 5 | def multi_apply(func, *args, **kwargs):
 6 |     pfunc = partial(func, **kwargs) if kwargs else func
 7 |     map_results = map(pfunc, *args)
 8 |     return tuple(map(list, zip(*map_results)))
 9 | 
10 | 
11 | def images_to_levels(target, num_level_anchors):
12 |     """Convert targets by image to targets by feature level.
13 | 
14 |     [target_img0, target_img1] -> [target_level0, target_level1, ...]
15 |     """
16 |     target = torch.stack(target, 0)
17 |     level_targets = []
18 |     start = 0
19 |     for n in num_level_anchors:
20 |         end = start + n
21 |         level_targets.append(target[:, start:end].squeeze(0))
22 |         start = end
23 |     return level_targets
24 | 
25 | 
26 | def anchor_inside_flags(flat_anchors,
27 |                         valid_flags,
28 |                         img_shape,
29 |                         allowed_border=0):
30 |     img_h, img_w = img_shape
31 |     if allowed_border >= 0:
32 |         inside_flags = valid_flags & \
33 |             (flat_anchors[:, 0] >= -allowed_border) & \
34 |             (flat_anchors[:, 1] >= -allowed_border) & \
35 |             (flat_anchors[:, 2] < img_w + allowed_border) & \
36 |             (flat_anchors[:, 3] < img_h + allowed_border)
37 |     else:
38 |         inside_flags = valid_flags
39 |     return inside_flags
40 | 
41 | 
42 | def unmap(data, count, inds, fill=0):
43 |     """ Unmap a subset of item (data) back to the original set of items (of
44 |     size count) """
45 |     if data.dim() == 1:
46 |         ret = data.new_full((count, ), fill)
47 |         ret[inds.type(torch.bool)] = data
48 |     else:
49 |         new_size = (count, ) + data.size()[1:]
50 |         ret = data.new_full(new_size, fill)
51 |         ret[inds.type(torch.bool), :] = data
52 |     return ret
53 | 


--------------------------------------------------------------------------------
/quarkdet/util/box_transform.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def distance2bbox(points, distance, max_shape=None):
 5 |     """Decode distance prediction to bounding box.
 6 | 
 7 |     Args:
 8 |         points (Tensor): Shape (n, 2), [x, y].
 9 |         distance (Tensor): Distance from the given point to 4
10 |             boundaries (left, top, right, bottom).
11 |         max_shape (tuple): Shape of the image.
12 | 
13 |     Returns:
14 |         Tensor: Decoded bboxes.
15 |     """
16 |     x1 = points[:, 0] - distance[:, 0]
17 |     y1 = points[:, 1] - distance[:, 1]
18 |     x2 = points[:, 0] + distance[:, 2]
19 |     y2 = points[:, 1] + distance[:, 3]
20 |     if max_shape is not None:
21 |         x1 = x1.clamp(min=0, max=max_shape[1])
22 |         y1 = y1.clamp(min=0, max=max_shape[0])
23 |         x2 = x2.clamp(min=0, max=max_shape[1])
24 |         y2 = y2.clamp(min=0, max=max_shape[0])
25 |     return torch.stack([x1, y1, x2, y2], -1)
26 | 
27 | 
28 | def bbox2distance(points, bbox, max_dis=None, eps=0.1):
29 |     """Decode bounding box based on distances.
30 | 
31 |     Args:
32 |         points (Tensor): Shape (n, 2), [x, y].
33 |         bbox (Tensor): Shape (n, 4), "xyxy" format
34 |         max_dis (float): Upper bound of the distance.
35 |         eps (float): a small value to ensure target < max_dis, instead <=
36 | 
37 |     Returns:
38 |         Tensor: Decoded distances.
39 |     """
40 |     left = points[:, 0] - bbox[:, 0]
41 |     top = points[:, 1] - bbox[:, 1]
42 |     right = bbox[:, 2] - points[:, 0]
43 |     bottom = bbox[:, 3] - points[:, 1]
44 |     if max_dis is not None:
45 |         left = left.clamp(min=0, max=max_dis - eps)
46 |         top = top.clamp(min=0, max=max_dis - eps)
47 |         right = right.clamp(min=0, max=max_dis - eps)
48 |         bottom = bottom.clamp(min=0, max=max_dis - eps)
49 |     return torch.stack([left, top, right, bottom], -1)


--------------------------------------------------------------------------------
/quarkdet/util/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter
 4 | 
 5 | 
 6 | def list_scatter(input, target_gpus, chunk_sizes):
 7 |     ret = []
 8 |     for idx, size in enumerate(chunk_sizes):
 9 |         ret.append(input[:size])
10 |         del input[:size]
11 |     return tuple(ret)
12 | 
13 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
14 |     """
15 |     Slices variables into approximately equal chunks and
16 |     distributes them across given GPUs. Duplicates
17 |     references to objects that are not variables. Does not
18 |     support Tensors.
19 |     """
20 |     def scatter_map(obj):
21 |         if isinstance(obj, Variable):
22 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
23 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
24 |         if isinstance(obj, list):
25 |             return list_scatter(obj, target_gpus, chunk_sizes)
26 |         if isinstance(obj, tuple):
27 |             return list(zip(*map(scatter_map, obj)))
28 |         if isinstance(obj, dict):
29 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
30 |         return [obj for targets in target_gpus]
31 | 
32 |     return scatter_map(inputs)
33 | 
34 | 
35 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
36 |     r"""Scatter with support for kwargs dictionary"""
37 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
38 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
39 |     if len(inputs) < len(kwargs):
40 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
41 |     elif len(kwargs) < len(inputs):
42 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
43 |     inputs = tuple(inputs)
44 |     kwargs = tuple(kwargs)
45 |     return inputs, kwargs


--------------------------------------------------------------------------------
/quarkdet/model/module/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | norm_cfg = {
 4 |     # format: layer_type: (abbreviation, module)
 5 |     'BN': ('bn', nn.BatchNorm2d),
 6 |     'SyncBN': ('bn', nn.SyncBatchNorm),
 7 |     'GN': ('gn', nn.GroupNorm),
 8 |     # and potentially 'SN'
 9 | }
10 | 
11 | 
12 | def build_norm_layer(cfg, num_features, postfix=''):
13 |     """ Build normalization layer
14 | 
15 |     Args:
16 |         cfg (dict): cfg should contain:
17 |             type (str): identify norm layer type.
18 |             layer args: args needed to instantiate a norm layer.
19 |             requires_grad (bool): [optional] whether stop gradient updates
20 |         num_features (int): number of channels from input.
21 |         postfix (int, str): appended into norm abbreviation to
22 |             create named layer.
23 | 
24 |     Returns:
25 |         name (str): abbreviation + postfix
26 |         layer (nn.Module): created norm layer
27 |     """
28 |     assert isinstance(cfg, dict) and 'type' in cfg
29 |     cfg_ = cfg.copy()
30 | 
31 |     layer_type = cfg_.pop('type')
32 |     if layer_type not in norm_cfg:
33 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
34 |     else:
35 |         abbr, norm_layer = norm_cfg[layer_type]
36 |         if norm_layer is None:
37 |             raise NotImplementedError
38 | 
39 |     assert isinstance(postfix, (int, str))
40 |     name = abbr + str(postfix)
41 | 
42 |     requires_grad = cfg_.pop('requires_grad', True)
43 |     cfg_.setdefault('eps', 1e-5)
44 |     if layer_type != 'GN':
45 |         layer = norm_layer(num_features, **cfg_)
46 |         if layer_type == 'SyncBN':
47 |             layer._specify_ddp_gpu_num(1)
48 |     else:
49 |         assert 'num_groups' in cfg_
50 |         layer = norm_layer(num_channels=num_features, **cfg_)
51 | 
52 |     for param in layer.parameters():
53 |         param.requires_grad = requires_grad
54 | 
55 |     return name, layer
56 | 


--------------------------------------------------------------------------------
/quarkdet/model/neck/pan_slim.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from .fpn_slim import FPN_Slim
 4 | 
 5 | 
 6 | 
 7 | class PAN_Slim(FPN_Slim):
 8 |     def __init__(self,
 9 |                  in_channels,
10 |                  out_channels,
11 |                  num_outs,
12 |                  start_level=0,
13 |                  end_level=-1,
14 |                  conv_cfg=None,
15 |                  norm_cfg=None,
16 |                  activation=None):
17 |         super(PAN_Slim,
18 |               self).__init__(in_channels, out_channels, num_outs, start_level,
19 |                              end_level, conv_cfg, norm_cfg, activation)
20 |         self.init_weights()
21 | 
22 |     def forward(self, inputs):
23 |         """Forward function."""
24 |         assert len(inputs) == len(self.in_channels)
25 | 
26 |         # build laterals
27 |         laterals = [
28 |             lateral_conv(inputs[i + self.start_level])
29 |             for i, lateral_conv in enumerate(self.lateral_convs)
30 |         ]
31 | 
32 |         # build top-down path
33 |         used_backbone_levels = len(laterals)
34 |         for i in range(used_backbone_levels - 1, 0, -1):
35 |             prev_shape = laterals[i - 1].shape[2:]
36 |             laterals[i - 1] += F.interpolate(
37 |                 laterals[i], size=prev_shape, mode='bilinear')
38 | 
39 |         # build outputs
40 |         # part 1: from original levels
41 |         inter_outs = [
42 |             laterals[i] for i in range(used_backbone_levels)
43 |         ]
44 | 
45 |         # part 2: add bottom-up path
46 |         for i in range(0, used_backbone_levels - 1):
47 |             prev_shape = inter_outs[i + 1].shape[2:]
48 |             inter_outs[i + 1] += F.interpolate(inter_outs[i], size=prev_shape, mode='bilinear')
49 | 
50 |         outs = []
51 |         outs.append(inter_outs[0])
52 |         outs.extend([
53 |             inter_outs[i] for i in range(1, used_backbone_levels)
54 |         ])
55 |         return tuple(outs)
56 | 


--------------------------------------------------------------------------------
/quarkdet/trainer/dist_trainer.py:
--------------------------------------------------------------------------------
 1 | import torch.distributed as dist
 2 | from .trainer import Trainer
 3 | from ..util import DDP
 4 | import torch
 5 | 
 6 | 
 7 | def average_gradients(model):
 8 |     """ Gradient averaging. """
 9 |     size = float(dist.get_world_size())
10 |     for param in model.parameters():
11 |         if param.grad is not None:
12 |             dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
13 |             param.grad.data /= size
14 | 
15 | 
16 | 
17 | class DistTrainer(Trainer):
18 |     """
19 |     Distributed trainer for multi-gpu training. (not finish yet)
20 |     """
21 |     def run_step(self, model, batch, mode='train'):
22 |         output, loss, loss_stats = model.module.forward_train(batch)
23 |         loss = loss.mean()
24 |         loss.requires_grad_()
25 |         
26 |         #-----------------------------------------------------------------------
27 |         # # #santiago
28 |         # grad_params = torch.autograd.grad(loss, model.parameters(), create_graph=True,allow_unused=True)
29 |         # # torch.autograd.grad does not accumuate the gradients into the .grad attributes
30 |         # # It instead returns the gradients as Variable tuples.
31 | 
32 |         # # now compute the 2-norm of the grad_params
33 |         # grad_norm = 0
34 |         # for grad in grad_params:
35 |         #     grad_norm += (grad * grad).sum()
36 |         # grad_norm = grad_norm.sqrt()
37 |         # print("grad_norm:",grad_norm)
38 | 
39 |         # # take the gradients wrt grad_norm. backward() will accumulate
40 |         # # the gradients into the .grad attributes
41 |         # grad_norm.backward()
42 |         #-----------------------------------------------------------------------
43 |         
44 |         
45 |         if mode == 'train':
46 |             self.optimizer.zero_grad()
47 |             loss.backward()
48 |             average_gradients(model)
49 |             self.optimizer.step()
50 |         return output, loss, loss_stats
51 | 
52 |     def set_device(self, batch_per_gpu, rank, device):
53 |         """
54 |         Set model device for Distributed-Data-Parallel
55 |         :param batch_per_gpu: batch size of each gpu
56 |         :param rank: distributed training process rank
57 |         :param device: cuda
58 |         """
59 |         self.rank = rank
60 |         self.model = DDP(batch_per_gpu, module=self.model.cuda(), device_ids=[rank], output_device=rank)
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/quarkdet/evaluator/coco_detection.py:
--------------------------------------------------------------------------------
 1 | import pycocotools.coco as coco
 2 | from pycocotools.cocoeval import COCOeval
 3 | import json
 4 | import os
 5 | import copy
 6 | 
 7 | 
 8 | def xyxy2xywh(bbox):
 9 |     """
10 |     change bbox to coco format
11 |     :param bbox: [x1, y1, x2, y2]
12 |     :return: [x, y, w, h]
13 |     """
14 |     return [
15 |         bbox[0],
16 |         bbox[1],
17 |         bbox[2] - bbox[0],
18 |         bbox[3] - bbox[1],
19 |     ]
20 | 
21 | 
22 | class CocoDetectionEvaluator:
23 |     def __init__(self, dataset):
24 |         assert hasattr(dataset, 'coco_api')
25 |         self.coco_api = dataset.coco_api
26 |         self.cat_ids = dataset.cat_ids
27 |         self.metric_names = ['mAP', 'AP_50', 'AP_75', 'AP_small', 'AP_m', 'AP_l']
28 | 
29 |     def results2json(self, results):
30 |         """
31 |         results: {image_id: {label: [bboxes...] } }
32 |         :return coco json format: {image_id:
33 |                                    category_id:
34 |                                    bbox:
35 |                                    score: }
36 |         """
37 |         json_results = []
38 |         for image_id, dets in results.items():
39 |             for label, bboxes in dets.items():
40 |                 category_id = self.cat_ids[label]
41 |                 for bbox in bboxes:
42 |                     score = float(bbox[4])
43 |                     detection = dict(
44 |                         image_id=int(image_id),
45 |                         category_id=int(category_id),
46 |                         bbox=xyxy2xywh(bbox),
47 |                         score=score)
48 |                     json_results.append(detection)
49 |         return json_results
50 | 
51 |     def evaluate(self, results, save_dir, epoch, logger, rank=-1):
52 |         results_json = self.results2json(results)
53 |         json_path = os.path.join(save_dir, 'results{}.json'.format(rank))
54 |         json.dump(results_json, open(json_path, 'w'))
55 |         coco_dets = self.coco_api.loadRes(json_path)
56 |         coco_eval = COCOeval(copy.deepcopy(self.coco_api), copy.deepcopy(coco_dets), "bbox")
57 |         coco_eval.evaluate()
58 |         coco_eval.accumulate()
59 |         coco_eval.summarize()
60 |         aps = coco_eval.stats[:6]
61 |         eval_results = {}
62 |         for k, v in zip(self.metric_names, aps):
63 |             eval_results[k] = v
64 |             logger.scalar_summary('Val_coco_bbox/' + k, 'val', v, epoch)
65 |         return eval_results
66 | 


--------------------------------------------------------------------------------
/quarkdet/data/collate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | import re
 5 | from torch._six import container_abcs, string_classes, int_classes
 6 | 
 7 | 
 8 | np_str_obj_array_pattern = re.compile(r'[SaUO]')
 9 | 
10 | 
11 | default_collate_err_msg_format = (
12 |     "default_collate: batch must contain tensors, numpy arrays, numbers, "
13 |     "dicts or lists; found {}")
14 | 
15 | 
16 | def custom_collate_function(batch):
17 |     r"""Puts each data field into a tensor with outer dimension batch size"""
18 | 
19 |     elem = batch[0]
20 |     elem_type = type(elem)
21 |     if isinstance(elem, torch.Tensor):
22 |         out = None
23 | 
24 |         # if torch.utils.data.get_worker_info() is not None:
25 |         # #     # If we're in a background process, concatenate directly into a
26 |         # #     # shared memory tensor to avoid an extra copy
27 |         #     numel = sum([x.numel() for x in batch])
28 |         #     storage = elem.storage()._new_shared(numel)
29 |         #     out = elem.new(storage)
30 |         return torch.stack(batch, 0, out=out)
31 |     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
32 |             and elem_type.__name__ != 'string_':
33 |         elem = batch[0]
34 |         if elem_type.__name__ == 'ndarray':
35 |             # array of string classes and object
36 |             if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
37 |                 raise TypeError(default_collate_err_msg_format.format(elem.dtype))
38 | 
39 |             # return custom_collate_function([torch.as_tensor(b) for b in batch])
40 |             return batch
41 |         elif elem.shape == ():  # scalars
42 |             # return torch.as_tensor(batch)
43 |             return batch
44 |     elif isinstance(elem, float):
45 |         return torch.tensor(batch, dtype=torch.float64)
46 |     elif isinstance(elem, int_classes):
47 |         return torch.tensor(batch)
48 |     elif isinstance(elem, string_classes):
49 |         return batch
50 |     elif isinstance(elem, container_abcs.Mapping):
51 |         return {key: custom_collate_function([d[key] for d in batch]) for key in elem}
52 |     elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
53 |         return elem_type(*(custom_collate_function(samples) for samples in zip(*batch)))
54 |     elif isinstance(elem, container_abcs.Sequence):
55 |         transposed = zip(*batch)
56 |         return [custom_collate_function(samples) for samples in transposed]
57 | 
58 |     raise TypeError(default_collate_err_msg_format.format(elem_type))
59 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import json
 4 | import datetime
 5 | import argparse
 6 | import sys  
 7 | sys.path.append("./") 
 8 | from quarkdet.util import mkdir, Logger, cfg, load_config
 9 | from quarkdet.trainer import build_trainer
10 | from quarkdet.data.collate import collate_function
11 | from quarkdet.data.dataset import build_dataset
12 | from quarkdet.model.detector import build_model
13 | from quarkdet.evaluator import build_evaluator
14 | 
15 | 
16 | def parse_args():
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument('config', help='model config file path')
19 |     parser.add_argument('--task', default='val', help='task to run, test or val')
20 |     parser.add_argument('--save_result', action='store_true', default=True, help='save val results to txt')
21 |     args = parser.parse_args()
22 |     return args
23 | 
24 | 
25 | def main(args):
26 |     load_config(cfg, args.config)
27 |     local_rank = -1
28 |     torch.backends.cudnn.enabled = True
29 |     torch.backends.cudnn.benchmark = True
30 |     cfg.defrost()
31 |     timestr = datetime.datetime.now().__format__('%Y%m%d%H%M%S')
32 |     cfg.save_dir = os.path.join(cfg.save_dir, timestr)
33 |     cfg.freeze()
34 |     mkdir(local_rank, cfg.save_dir)
35 |     logger = Logger(local_rank, cfg.save_dir)
36 | 
37 |     logger.log('Creating model...')
38 |     model = build_model(cfg.model)
39 | 
40 |     logger.log('Setting up data...')
41 |     val_dataset = build_dataset(cfg.data.val, args.task)
42 |     val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1,
43 |                                                  pin_memory=True, collate_fn=collate_function, drop_last=True)
44 |     trainer = build_trainer(local_rank, cfg, model, logger)
45 |     if 'load_model' in cfg.schedule:
46 |         trainer.load_model(cfg)
47 |     evaluator = build_evaluator(cfg, val_dataset)
48 |     logger.log('Starting testing...')
49 |     with torch.no_grad():
50 |         results, val_loss_dict,_ = trainer.run_epoch(0, val_dataloader, mode=args.task)
51 |     if args.task == 'test':
52 |         res_json = evaluator.results2json(results)
53 |         json_path = os.path.join(cfg.save_dir, 'results{}.json'.format(timestr))
54 |         json.dump(res_json, open(json_path, 'w'))
55 |     elif args.task == 'val':
56 |         eval_results = evaluator.evaluate(results, cfg.save_dir, 0, logger, rank=local_rank)
57 |         if args.save_result:
58 |             txt_path = os.path.join(cfg.save_dir, "eval_results{}.txt".format(timestr))
59 |             with open(txt_path, "a") as f:
60 |                 for k, v in eval_results.items():
61 |                     f.write("{}: {}\n".format(k, v))
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     args = parse_args()
66 |     main(args)
67 | 


--------------------------------------------------------------------------------
/quarkdet/util/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import torch
 4 | import numpy as np
 5 | from termcolor import colored
 6 | from .rank_filter import rank_filter
 7 | from .path import mkdir
 8 | 
 9 | 
10 | class Logger:
11 |     def __init__(self, local_rank, save_dir='./', use_tensorboard=True):
12 |         mkdir(local_rank, save_dir)
13 |         self.rank = local_rank
14 |         fmt = colored('[%(name)s]', 'magenta', attrs=['bold']) + colored('[%(asctime)s]', 'blue') + \
15 |               colored('%(levelname)s:', 'green') + colored('%(message)s', 'white')
16 |         logging.basicConfig(level=logging.INFO,
17 |                             filename=os.path.join(save_dir, 'logs.txt'),
18 |                             filemode='w')
19 |         self.log_dir = os.path.join(save_dir, 'logs')
20 |         console = logging.StreamHandler()
21 |         console.setLevel(logging.INFO)
22 |         formatter = logging.Formatter(fmt, datefmt="%m-%d %H:%M:%S")
23 |         console.setFormatter(formatter)
24 |         logging.getLogger().addHandler(console)
25 |         if use_tensorboard:
26 |             try:
27 |                 from torch.utils.tensorboard import SummaryWriter
28 |             except ImportError:
29 |                 raise ImportError(
30 |                     'Please run "pip install future tensorboard" to install '
31 |                     'the dependencies to use torch.utils.tensorboard '
32 |                     '(applicable to PyTorch 1.1 or higher)')
33 |             if self.rank < 1:
34 |                 logging.info('Using Tensorboard, logs will be saved in {}'.format(self.log_dir))
35 |                 self.writer = SummaryWriter(log_dir=self.log_dir)
36 | 
37 |     def log(self, string):
38 |         if self.rank < 1:
39 |             logging.info(string)
40 | 
41 |     def scalar_summary(self, tag, phase, value, step):
42 |         if self.rank < 1:
43 |             self.writer.add_scalars(tag, {phase: value}, step)
44 | 
45 | 
46 | class MovingAverage(object):
47 |     def __init__(self, val, window_size=50):
48 |         self.window_size = window_size
49 |         self.reset()
50 |         self.push(val)
51 | 
52 |     def reset(self):
53 |         self.queue = []
54 | 
55 |     def push(self, val):
56 |         self.queue.append(val)
57 |         if len(self.queue) > self.window_size:
58 |             self.queue.pop(0)
59 | 
60 |     def avg(self):
61 |         return np.mean(self.queue)
62 | 
63 | 
64 | class AverageMeter(object):
65 |     """Computes and stores the average and current value"""
66 | 
67 |     def __init__(self, val):
68 |         self.reset()
69 |         self.update(val)
70 | 
71 |     def reset(self):
72 |         self.val = 0
73 |         self.avg = 0
74 |         self.sum = 0
75 |         self.count = 0
76 | 
77 |     def update(self, val, n=1):
78 |         self.val = val
79 |         self.sum += val * n
80 |         self.count += n
81 |         if self.count > 0:
82 |             self.avg = self.sum / self.count
83 | 


--------------------------------------------------------------------------------
/quarkdet/model/neck/fpn_slim.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from ..module.conv import ConvModule
 5 | from ..module.init_weights import xavier_init
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | class FPN_Slim(nn.Module):
12 | 
13 |     def __init__(self,
14 |                  in_channels,
15 |                  out_channels,
16 |                  num_outs,
17 |                  start_level=0,
18 |                  end_level=-1,
19 |                  conv_cfg=None,
20 |                  norm_cfg=None,
21 |                  activation=None
22 |                  ):
23 |         super(FPN_Slim, self).__init__()
24 |         assert isinstance(in_channels, list)
25 |         self.in_channels = in_channels
26 |         self.out_channels = out_channels
27 |         self.num_ins = len(in_channels)
28 |         self.num_outs = num_outs
29 |         self.fp16_enabled = False
30 | 
31 |         if end_level == -1:
32 |             self.backbone_end_level = self.num_ins
33 |             assert num_outs >= self.num_ins - start_level
34 |         else:
35 |             # if end_level < inputs, no extra level is allowed
36 |             self.backbone_end_level = end_level
37 |             assert end_level <= len(in_channels)
38 |             assert num_outs == end_level - start_level
39 |         self.start_level = start_level
40 |         self.end_level = end_level
41 |         self.lateral_convs = nn.ModuleList()
42 | 
43 |         for i in range(self.start_level, self.backbone_end_level):
44 |             l_conv = ConvModule(
45 |                 in_channels[i],
46 |                 out_channels,
47 |                 1,
48 |                 conv_cfg=conv_cfg,
49 |                 norm_cfg=norm_cfg,
50 |                 activation=activation,
51 |                 inplace=False)
52 | 
53 |             self.lateral_convs.append(l_conv)
54 |         self.init_weights()
55 | 
56 |     # default init_weights for conv(msra) and norm in ConvModule
57 |     def init_weights(self):
58 |         for m in self.modules():
59 |             if isinstance(m, nn.Conv2d):
60 |                 xavier_init(m, distribution='uniform')
61 | 
62 |     def forward(self, inputs):
63 |         assert len(inputs) == len(self.in_channels)
64 | 
65 |         # build laterals
66 |         laterals = [
67 |             lateral_conv(inputs[i + self.start_level])
68 |             for i, lateral_conv in enumerate(self.lateral_convs)
69 |         ]
70 | 
71 |         # build top-down path
72 |         used_backbone_levels = len(laterals)
73 |         for i in range(used_backbone_levels - 1, 0, -1):
74 |             prev_shape = laterals[i - 1].shape[2:]
75 |             laterals[i - 1] += F.interpolate(
76 |                 laterals[i], size=prev_shape, mode='bilinear')
77 | 
78 |         # build outputs
79 |         outs = [
80 |             # self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
81 |             laterals[i] for i in range(used_backbone_levels)
82 |         ]
83 |         return tuple(outs)
84 | 
85 | 
86 | # if __name__ == '__main__':
87 | 


--------------------------------------------------------------------------------
/quarkdet/data/dataset/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | import torch
 3 | import numpy as np
 4 | from torch.utils.data import Dataset
 5 | from ..transform import Pipeline
 6 | 
 7 | 
 8 | class BaseDataset(Dataset, metaclass=ABCMeta):
 9 |     """
10 |     A dataset should have images, annotations and preprocessing pipelines
11 |     QuarkDet use [xmin, ymin, xmax, ymax] format for box and
12 |      [[x0,y0], [x1,y1] ... [xn,yn]] format for key points.
13 |     instance masks should decode into binary masks for each instance like
14 |     {
15 |         'bbox': [xmin,ymin,xmax,ymax],
16 |         'mask': mask
17 |      }
18 |     segmentation mask should decode into binary masks for each class.
19 | 
20 |     :param img_path: image data folder
21 |     :param ann_path: annotation file path or folder
22 |     :param use_instance_mask: load instance segmentation data
23 |     :param use_seg_mask: load semantic segmentation data
24 |     :param use_keypoint: load pose keypoint data
25 |     :param load_mosaic: using mosaic data augmentation from yolov4
26 |     :param mode: train or val or test
27 |     :param mosaic_image_size: image size Dynamic segmentation 例如图像大小640则会分成4个320×320的图像,以配置文件为准
28 |     """
29 |     def __init__(self,
30 |                  img_path,
31 |                  ann_path,
32 |                  input_size,
33 |                  pipeline,
34 |                  keep_ratio=True,
35 |                  use_instance_mask=False,
36 |                  use_seg_mask=False,
37 |                  use_keypoint=False,
38 |                  load_mosaic=False,
39 |                  mosaic_probability= 0.3,
40 |                  mosaic_area =9,
41 |                  mosaic_image_size=320, 
42 |                  mode='train'
43 |                  ):
44 |         self.img_path = img_path
45 |         self.ann_path = ann_path
46 |         self.input_size = input_size
47 |         self.pipeline = Pipeline(pipeline, keep_ratio)
48 |         self.keep_ratio = keep_ratio
49 |         self.use_instance_mask = use_instance_mask
50 |         self.use_seg_mask = use_seg_mask
51 |         self.use_keypoint = use_keypoint
52 |         self.load_mosaic = load_mosaic
53 |         self.mosaic_probability=mosaic_probability
54 |         self.mosaic_area=mosaic_area
55 |         self.mosaic_image_size=mosaic_image_size
56 |         self.mode = mode
57 | 
58 |         self.data_info = self.get_data_info(ann_path)
59 | 
60 |     def __len__(self):
61 |         return len(self.data_info)
62 | 
63 |     def __getitem__(self, idx):
64 |         if self.mode == 'val' or self.mode == 'test':
65 |             return self.get_val_data(idx)
66 |         else:
67 |             while True:
68 |                 data = self.get_train_data(idx)
69 |                 if data is None:
70 |                     idx = self.get_another_id()
71 |                     continue
72 |                 return data
73 | 
74 |     @abstractmethod
75 |     def get_data_info(self, ann_path):
76 |         pass
77 | 
78 |     @abstractmethod
79 |     def get_train_data(self, idx):
80 |         pass
81 | 
82 |     @abstractmethod
83 |     def get_val_data(self, idx):
84 |         pass
85 | 
86 |     def get_another_id(self):
87 |         return np.random.random_integers(0, len(self.data_info)-1)
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/sampler/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | 
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | class BaseSampler(metaclass=ABCMeta):
 9 | 
10 |     def __init__(self,
11 |                  num,
12 |                  pos_fraction,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         self.num = num
17 |         self.pos_fraction = pos_fraction
18 |         self.neg_pos_ub = neg_pos_ub
19 |         self.add_gt_as_proposals = add_gt_as_proposals
20 |         self.pos_sampler = self
21 |         self.neg_sampler = self
22 | 
23 |     @abstractmethod
24 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
29 |         pass
30 | 
31 |     def sample(self,
32 |                assign_result,
33 |                bboxes,
34 |                gt_bboxes,
35 |                gt_labels=None,
36 |                **kwargs):
37 |         """Sample positive and negative bboxes.
38 | 
39 |         This is a simple implementation of bbox sampling given candidates,
40 |         assigning results and ground truth bboxes.
41 | 
42 |         Args:
43 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
44 |             bboxes (Tensor): Boxes to be sampled from.
45 |             gt_bboxes (Tensor): Ground truth bboxes.
46 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
47 | 
48 |         Returns:
49 |             :obj:`SamplingResult`: Sampling result.
50 | 
51 |         """
52 |         if len(bboxes.shape) < 2:
53 |             bboxes = bboxes[None, :]
54 | 
55 |         bboxes = bboxes[:, :4]
56 | 
57 |         gt_flags = bboxes.new_zeros((bboxes.shape[0],), dtype=torch.uint8)
58 |         if self.add_gt_as_proposals and len(gt_bboxes) > 0:
59 |             if gt_labels is None:
60 |                 raise ValueError(
61 |                     'gt_labels must be given when add_gt_as_proposals is True')
62 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
63 |             assign_result.add_gt_(gt_labels)
64 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
65 |             gt_flags = torch.cat([gt_ones, gt_flags])
66 | 
67 |         num_expected_pos = int(self.num * self.pos_fraction)
68 |         pos_inds = self.pos_sampler._sample_pos(
69 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
70 |         # We found that sampled indices have duplicated items occasionally.
71 |         # (may be a bug of PyTorch)
72 |         pos_inds = pos_inds.unique()
73 |         num_sampled_pos = pos_inds.numel()
74 |         num_expected_neg = self.num - num_sampled_pos
75 |         if self.neg_pos_ub >= 0:
76 |             _pos = max(1, num_sampled_pos)
77 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
78 |             if num_expected_neg > neg_upper_bound:
79 |                 num_expected_neg = neg_upper_bound
80 |         neg_inds = self.neg_sampler._sample_neg(
81 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
82 |         neg_inds = neg_inds.unique()
83 | 
84 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
85 |                                          assign_result, gt_flags)
86 |         return sampling_result
87 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/sampler/sampling_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from quarkdet.util import util_mixins
 4 | 
 5 | 
 6 | class SamplingResult(util_mixins.NiceRepr):
 7 |     """
 8 |     Example:
 9 |         >>> # xdoctest: +IGNORE_WANT
10 |         >>> self = SamplingResult.random(rng=10)
11 |         >>> print('self = {}'.format(self))
12 |         self = <SamplingResult({
13 |             'neg_bboxes': torch.Size([12, 4]),
14 |             'neg_inds': tensor([ 0,  1,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12]),
15 |             'num_gts': 4,
16 |             'pos_assigned_gt_inds': tensor([], dtype=torch.int64),
17 |             'pos_bboxes': torch.Size([0, 4]),
18 |             'pos_inds': tensor([], dtype=torch.int64),
19 |             'pos_is_gt': tensor([], dtype=torch.uint8)
20 |         })>
21 |     """
22 | 
23 |     def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
24 |                  gt_flags):
25 |         self.pos_inds = pos_inds
26 |         self.neg_inds = neg_inds
27 |         self.pos_bboxes = bboxes[pos_inds]
28 |         self.neg_bboxes = bboxes[neg_inds]
29 |         self.pos_is_gt = gt_flags[pos_inds]
30 | 
31 |         self.num_gts = gt_bboxes.shape[0]
32 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
33 | 
34 |         if gt_bboxes.numel() == 0:
35 |             # hack for index error case
36 |             assert self.pos_assigned_gt_inds.numel() == 0
37 |             self.pos_gt_bboxes = torch.empty_like(gt_bboxes).view(-1, 4)
38 |         else:
39 |             if len(gt_bboxes.shape) < 2:
40 |                 gt_bboxes = gt_bboxes.view(-1, 4)
41 | 
42 |             self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
43 | 
44 |         if assign_result.labels is not None:
45 |             self.pos_gt_labels = assign_result.labels[pos_inds]
46 |         else:
47 |             self.pos_gt_labels = None
48 | 
49 |     @property
50 |     def bboxes(self):
51 |         return torch.cat([self.pos_bboxes, self.neg_bboxes])
52 | 
53 |     def to(self, device):
54 |         """
55 |         Change the device of the data inplace.
56 | 
57 |         Example:
58 |             >>> self = SamplingResult.random()
59 |             >>> print('self = {}'.format(self.to(None)))
60 |             >>> # xdoctest: +REQUIRES(--gpu)
61 |             >>> print('self = {}'.format(self.to(0)))
62 |         """
63 |         _dict = self.__dict__
64 |         for key, value in _dict.items():
65 |             if isinstance(value, torch.Tensor):
66 |                 _dict[key] = value.to(device)
67 |         return self
68 | 
69 |     def __nice__(self):
70 |         data = self.info.copy()
71 |         data['pos_bboxes'] = data.pop('pos_bboxes').shape
72 |         data['neg_bboxes'] = data.pop('neg_bboxes').shape
73 |         parts = ['\'{}\': {!r}'.format(k, v) for k, v in sorted(data.items())]
74 |         body = '    ' + ',\n    '.join(parts)
75 |         return '{\n' + body + '\n}'
76 | 
77 |     @property
78 |     def info(self):
79 |         """
80 |         Returns a dictionary of info about the object
81 |         """
82 |         return {
83 |             'pos_inds': self.pos_inds,
84 |             'neg_inds': self.neg_inds,
85 |             'pos_bboxes': self.pos_bboxes,
86 |             'neg_bboxes': self.neg_bboxes,
87 |             'pos_is_gt': self.pos_is_gt,
88 |             'num_gts': self.num_gts,
89 |             'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
90 |         }
91 | 


--------------------------------------------------------------------------------
/quarkdet/model/loss/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> import torch
68 |     >>> @weighted_loss
69 |     >>> def l1_loss(pred, target):
70 |     >>>     return (pred - target).abs()
71 | 
72 |     >>> pred = torch.Tensor([0, 2, 3])
73 |     >>> target = torch.Tensor([1, 1, 1])
74 |     >>> weight = torch.Tensor([1, 0, 1])
75 | 
76 |     >>> l1_loss(pred, target)
77 |     tensor(1.3333)
78 |     >>> l1_loss(pred, target, weight)
79 |     tensor(1.)
80 |     >>> l1_loss(pred, target, reduction='none')
81 |     tensor([1., 1., 2.])
82 |     >>> l1_loss(pred, target, weight, avg_factor=2)
83 |     tensor(1.5000)
84 |     """
85 | 
86 |     @functools.wraps(loss_func)
87 |     def wrapper(pred,
88 |                 target,
89 |                 weight=None,
90 |                 reduction='mean',
91 |                 avg_factor=None,
92 |                 **kwargs):
93 |         # get element-wise loss
94 |         loss = loss_func(pred, target, **kwargs)
95 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
96 |         return loss
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/config/shufflenetv2_0.5x.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/shufflenetv2_05x
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 |       name: ShuffleNetV2 
  8 |       out_stages: [2,3,4] 
  9 |       activation: LeakyReLU
 10 |       model_size: 0.5x
 11 |     neck:
 12 |       name: PAN
 13 |       in_channels: [48, 96, 192] 
 14 |       out_channels: 96
 15 |       start_level: 0
 16 |       num_outs: 3
 17 |     head:
 18 |       name: QuarkDetHead
 19 |       num_classes: 80 # 80
 20 |       input_channel: 96
 21 |       feat_channels: 96
 22 |       stacked_convs: 2
 23 |       share_cls_reg: True #True
 24 |       octave_base_scale: 5
 25 |       scales_per_octave: 1
 26 |       strides: [8, 16, 32]
 27 |       reg_max: 7 #16 #7
 28 |       norm_cfg:
 29 |         type: BN
 30 |       loss:
 31 |         loss_qfl:
 32 |           name: QualityFocalLoss
 33 |           use_sigmoid: False #True
 34 |           beta: 2.0
 35 |           loss_weight: 1.0
 36 |         loss_dfl:
 37 |           name: DistributionFocalLoss
 38 |           loss_weight: 0.25
 39 |         loss_bbox:
 40 |           name: GIoULoss
 41 |           loss_weight: 2.0
 42 | data:
 43 |   train:
 44 |     name: coco
 45 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 46 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 47 |     input_size: [320,320] #[w,h]
 48 |     keep_ratio: True
 49 |     pipeline:
 50 |       perspective: 0.0
 51 |       scale: [0.6, 1.4]
 52 |       stretch: [[1, 1], [1, 1]]
 53 |       rotation: 0
 54 |       shear: 0
 55 |       translate: 0
 56 |       flip: 0.5
 57 |       brightness: 0.2
 58 |       contrast: [0.8, 1.2]
 59 |       saturation: [0.8, 1.2]
 60 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 61 |   val:
 62 |     name: coco
 63 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 64 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 65 |     input_size: [320,320] #[w,h]
 66 |     keep_ratio: True
 67 |     pipeline:
 68 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 69 | device:
 70 |   gpu_ids: [0,1]
 71 |   workers_per_gpu:  8
 72 |   batchsize_per_gpu: 80 # santiago test
 73 | schedule:
 74 |   resume: False
 75 |   load_model: ./workspace/shufflenetv2_05x/model_last.pth
 76 | 
 77 |   optimizer:
 78 |     name: SGD
 79 |     lr: 0.14
 80 |     momentum: 0.9
 81 |     weight_decay: 0.0001
 82 |   warmup:
 83 |     name: linear
 84 |     steps: 300 
 85 |     ratio: 0.1
 86 |   total_epochs: 160 #70
 87 | 
 88 |   lr_schedule:
 89 |     name: ReduceLROnPlateau
 90 |     mode: min
 91 |     factor: 0.1
 92 |     patience: 3 #15
 93 |     verbose: True
 94 |     threshold: 0.00001
 95 |     threshold_mode: rel
 96 |     cooldown: 0
 97 |     min_lr: 0
 98 |     eps: 0.000000001 #1e-08  
 99 |   val_intervals: 5 #5
100 | evaluator:
101 |   name: CocoDetectionEvaluator
102 |   save_key: mAP
103 | 
104 | log:
105 |   interval: 10
106 | 
107 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
108 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
109 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
110 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
111 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
112 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
113 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
114 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
115 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
116 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
117 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
118 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
119 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
120 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/quarkdet/util/util_mixins.py:
--------------------------------------------------------------------------------
  1 | """This module defines the :class:`NiceRepr` mixin class, which defines a
  2 | ``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__``
  3 | method, which you must define. This means you only have to overload one
  4 | function instead of two.  Furthermore, if the object defines a ``__len__``
  5 | method, then the ``__nice__`` method defaults to something sensible, otherwise
  6 | it is treated as abstract and raises ``NotImplementedError``.
  7 | 
  8 | To use simply have your object inherit from :class:`NiceRepr`
  9 | (multi-inheritance should be ok).
 10 | 
 11 | This code was copied from the ubelt library: https://github.com/Erotemic/ubelt
 12 | 
 13 | Example:
 14 |     >>> # Objects that define __nice__ have a default __str__ and __repr__
 15 |     >>> class Student(NiceRepr):
 16 |     ...    def __init__(self, name):
 17 |     ...        self.name = name
 18 |     ...    def __nice__(self):
 19 |     ...        return self.name
 20 |     >>> s1 = Student('Alice')
 21 |     >>> s2 = Student('Bob')
 22 |     >>> print(f's1 = {s1}')
 23 |     >>> print(f's2 = {s2}')
 24 |     s1 = <Student(Alice)>
 25 |     s2 = <Student(Bob)>
 26 | 
 27 | Example:
 28 |     >>> # Objects that define __len__ have a default __nice__
 29 |     >>> class Group(NiceRepr):
 30 |     ...    def __init__(self, data):
 31 |     ...        self.data = data
 32 |     ...    def __len__(self):
 33 |     ...        return len(self.data)
 34 |     >>> g = Group([1, 2, 3])
 35 |     >>> print(f'g = {g}')
 36 |     g = <Group(3)>
 37 | """
 38 | import warnings
 39 | 
 40 | 
 41 | class NiceRepr(object):
 42 |     """Inherit from this class and define ``__nice__`` to "nicely" print your
 43 |     objects.
 44 | 
 45 |     Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function
 46 |     Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``.
 47 |     If the inheriting class has a ``__len__``, method then the default
 48 |     ``__nice__`` method will return its length.
 49 | 
 50 |     Example:
 51 |         >>> class Foo(NiceRepr):
 52 |         ...    def __nice__(self):
 53 |         ...        return 'info'
 54 |         >>> foo = Foo()
 55 |         >>> assert str(foo) == '<Foo(info)>'
 56 |         >>> assert repr(foo).startswith('<Foo(info) at ')
 57 | 
 58 |     Example:
 59 |         >>> class Bar(NiceRepr):
 60 |         ...    pass
 61 |         >>> bar = Bar()
 62 |         >>> import pytest
 63 |         >>> with pytest.warns(None) as record:
 64 |         >>>     assert 'object at' in str(bar)
 65 |         >>>     assert 'object at' in repr(bar)
 66 | 
 67 |     Example:
 68 |         >>> class Baz(NiceRepr):
 69 |         ...    def __len__(self):
 70 |         ...        return 5
 71 |         >>> baz = Baz()
 72 |         >>> assert str(baz) == '<Baz(5)>'
 73 |     """
 74 | 
 75 |     def __nice__(self):
 76 |         """str: a "nice" summary string describing this module"""
 77 |         if hasattr(self, '__len__'):
 78 |             # It is a common pattern for objects to use __len__ in __nice__
 79 |             # As a convenience we define a default __nice__ for these objects
 80 |             return str(len(self))
 81 |         else:
 82 |             # In all other cases force the subclass to overload __nice__
 83 |             raise NotImplementedError(
 84 |                 f'Define the __nice__ method for {self.__class__!r}')
 85 | 
 86 |     def __repr__(self):
 87 |         """str: the string of the module"""
 88 |         try:
 89 |             nice = self.__nice__()
 90 |             classname = self.__class__.__name__
 91 |             return f'<{classname}({nice}) at {hex(id(self))}>'
 92 |         except NotImplementedError as ex:
 93 |             warnings.warn(str(ex), category=RuntimeWarning)
 94 |             return object.__repr__(self)
 95 | 
 96 |     def __str__(self):
 97 |         """str: the string of the module"""
 98 |         try:
 99 |             classname = self.__class__.__name__
100 |             nice = self.__nice__()
101 |             return f'<{classname}({nice})>'
102 |         except NotImplementedError as ex:
103 |             warnings.warn(str(ex), category=RuntimeWarning)
104 |             return object.__repr__(self)
105 | 


--------------------------------------------------------------------------------
/quarkdet/model/neck/fpn.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from ..module.conv import ConvModule
  6 | from ..module.init_weights import xavier_init
  7 | 
  8 | 
  9 | class FPN(nn.Module):
 10 | 
 11 |     def __init__(self,
 12 |                  in_channels,
 13 |                  out_channels,
 14 |                  num_outs,
 15 |                  start_level=0,
 16 |                  end_level=-1,
 17 |                  conv_cfg=None,
 18 |                  norm_cfg=None,
 19 |                  activation=None
 20 |                  ):
 21 |         super(FPN, self).__init__()
 22 |         assert isinstance(in_channels, list)
 23 |         self.in_channels = in_channels
 24 |         self.out_channels = out_channels
 25 |         self.num_ins = len(in_channels)
 26 |         self.num_outs = num_outs
 27 |         self.fp16_enabled = False
 28 | 
 29 |         if end_level == -1:
 30 |             self.backbone_end_level = self.num_ins
 31 |             assert num_outs >= self.num_ins - start_level
 32 |         else:
 33 |             # if end_level < inputs, no extra level is allowed
 34 |             self.backbone_end_level = end_level
 35 |             assert end_level <= len(in_channels)
 36 |             assert num_outs == end_level - start_level
 37 |         self.start_level = start_level
 38 |         self.end_level = end_level
 39 |         self.lateral_convs = nn.ModuleList()
 40 |         self.fpn_convs = nn.ModuleList()
 41 | 
 42 |         # for i in range(self.start_level, self.backbone_end_level):
 43 |         #     l_conv = ConvModule(
 44 |         #         in_channels[i],
 45 |         #         out_channels,
 46 |         #         1,
 47 |         #         conv_cfg=conv_cfg,
 48 |         #         norm_cfg=norm_cfg,
 49 |         #         activation=activation,
 50 |         #         inplace=False)
 51 | 
 52 |         #     self.lateral_convs.append(l_conv)
 53 |         
 54 |         for i in range(self.start_level, self.backbone_end_level):
 55 |             l_conv = ConvModule(
 56 |                 in_channels[i],
 57 |                 out_channels,
 58 |                 1,
 59 |                 conv_cfg=conv_cfg,
 60 |                 norm_cfg=norm_cfg,
 61 |                 #act_cfg=act_cfg,
 62 |                 activation=activation,
 63 |                 inplace=False)
 64 |             fpn_conv = ConvModule(
 65 |                 out_channels,
 66 |                 out_channels,
 67 |                 3,
 68 |                 padding=1,
 69 |                 conv_cfg=conv_cfg,
 70 |                 norm_cfg=norm_cfg,
 71 |                 activation=activation,
 72 |                 inplace=False)
 73 | 
 74 |             self.lateral_convs.append(l_conv)
 75 |             self.fpn_convs.append(fpn_conv)    
 76 |         print("FPN:",self.lateral_convs)    
 77 |         self.init_weights()
 78 | 
 79 |     # default init_weights for conv(msra) and norm in ConvModule
 80 |     def init_weights(self):
 81 |         for m in self.modules():
 82 |             if isinstance(m, nn.Conv2d):
 83 |                 xavier_init(m, distribution='uniform')
 84 | 
 85 |     def forward(self, inputs):
 86 |         assert len(inputs) == len(self.in_channels)
 87 | 
 88 |         # build laterals
 89 |         laterals = [
 90 |             lateral_conv(inputs[i + self.start_level])
 91 |             for i, lateral_conv in enumerate(self.lateral_convs)
 92 |         ]
 93 | 
 94 |         # build top-down path
 95 |         used_backbone_levels = len(laterals)
 96 |         for i in range(used_backbone_levels - 1, 0, -1):
 97 |             prev_shape = laterals[i - 1].shape[2:]
 98 |             laterals[i - 1] += F.interpolate(
 99 |                 laterals[i], size=prev_shape, mode='bilinear',align_corners=True)
100 | 
101 |         # build outputs
102 |         # outs = [
103 |         #     # self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
104 |         #     laterals[i] for i in range(used_backbone_levels)
105 |         # ]
106 |         outs = [
107 |             self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
108 |         ]
109 |         #这里去除了fpn_convs stride=(2, 2)的两层，lateral_convs和fpn_convs都是三层
110 |         return tuple(outs)
111 | 
112 | 
113 | # if __name__ == '__main__':
114 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | from torchsummary import summary
  2 | import sys
  3 | sys.path.append("./")
  4 | from quarkdet.evaluator import build_evaluator
  5 | from quarkdet.model.detector import build_model
  6 | from quarkdet.data.dataset import build_dataset
  7 | from quarkdet.data.collate import custom_collate_function
  8 | from quarkdet.trainer import build_trainer
  9 | from quarkdet.util import mkdir, Logger, cfg, load_config
 10 | import os
 11 | import torch
 12 | import logging
 13 | import argparse
 14 | import numpy as np
 15 | import torch.distributed as dist
 16 | 
 17 | 
 18 | 
 19 | 
 20 | def parse_args():
 21 |     parser = argparse.ArgumentParser()
 22 |     parser.add_argument('config', help='train config file path')
 23 |     parser.add_argument('--local_rank', default=-1, type=int,
 24 |                         help='node rank for distributed training')
 25 |     parser.add_argument('--seed', type=int, default=None,
 26 |                         help='random seed')
 27 |     args = parser.parse_args()
 28 |     return args
 29 | 
 30 | 
 31 | def init_seeds(seed=0):
 32 |     """
 33 |     manually set a random seed for numpy, torch and cuda
 34 |     :param seed: random seed
 35 |     """
 36 |     torch.manual_seed(seed)
 37 |     np.random.seed(seed)
 38 |     torch.cuda.manual_seed(seed)
 39 |     torch.cuda.manual_seed_all(seed)
 40 |     if seed == 0:
 41 |         torch.backends.cudnn.deterministic = True
 42 |         torch.backends.cudnn.benchmark = False
 43 | 
 44 | def collate_fn_coco(batch):
 45 |     return tuple(zip(*batch)) 
 46 | def main(args):
 47 |     load_config(cfg, args.config)
 48 |     local_rank = int(args.local_rank)
 49 |     torch.backends.cudnn.enabled = True
 50 |     torch.backends.cudnn.benchmark = True
 51 |     mkdir(local_rank, cfg.save_dir)
 52 |     logger = Logger(local_rank, cfg.save_dir)
 53 |     if args.seed is not None:
 54 |         logger.log('Set random seed to {}'.format(args.seed))
 55 |         init_seeds(args.seed)
 56 | 
 57 |     logger.log('Creating model...')
 58 |     model = build_model(cfg.model)
 59 | 
 60 |     print("model:", model)
 61 | 
 62 |     # pre_dict = model.state_dict() #按键值对将模型参数加载到pre_dict
 63 |     # for k, v in pre_dict.items(): # 打印模型参数
 64 |     # for k, v in pre_dict.items(): #打印模型每层命名
 65 |     #     print ('%-50s%s' %(k,v.shape))
 66 | 
 67 |     #summary(model, (3, 320, 320))
 68 | 
 69 |     logger.log('Setting up data...')
 70 |     train_dataset = build_dataset(cfg.data.train, 'train')
 71 |     val_dataset = build_dataset(cfg.data.val, 'test')
 72 | 
 73 |     if len(cfg.device.gpu_ids) > 1:
 74 |         print('rank = ', local_rank)
 75 |         num_gpus = torch.cuda.device_count()
 76 |         torch.cuda.set_device(local_rank % num_gpus)
 77 |         dist.init_process_group(backend='nccl')
 78 |         train_sampler = torch.utils.data.distributed.DistributedSampler(
 79 |             train_dataset)
 80 |         train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu,
 81 |                                                        num_workers=cfg.device.workers_per_gpu, pin_memory=True,
 82 |                                                        collate_fn=custom_collate_function, sampler=train_sampler,
 83 |                                                        drop_last=True)
 84 |     else:
 85 | 
 86 |         train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu,
 87 |                                                        shuffle=True,collate_fn=custom_collate_function,
 88 | 
 89 |                                                        pin_memory=True, drop_last=True)
 90 | 
 91 | 
 92 |     val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1,
 93 |                                                  pin_memory=True, collate_fn=custom_collate_function, drop_last=True)
 94 | 
 95 |     trainer = build_trainer(local_rank, cfg, model, logger)
 96 | 
 97 |     if cfg.schedule.resume:
 98 |         trainer.resume(cfg)
 99 |         if 'load_model' in cfg.schedule:
100 |             trainer.load_model(cfg)
101 | 
102 |     evaluator = build_evaluator(cfg, val_dataset)
103 | 
104 |     logger.log('Starting training...')
105 |     trainer.run(train_dataloader, val_dataloader, evaluator)
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     args = parse_args()
110 |     main(args)
111 | 


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import time
  4 | import torch
  5 | import argparse
  6 | import sys  
  7 | sys.path.append("./")
  8 | from quarkdet.util import cfg, load_config, Logger
  9 | from quarkdet.model.detector import build_model
 10 | from quarkdet.util import load_model_weight
 11 | from quarkdet.data.transform import Pipeline
 12 |  
 13 | 
 14 | image_ext = ['.jpg', '.jpeg', '.webp', '.bmp', '.png']
 15 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
 16 | 
 17 | 
 18 | def parse_args():
 19 |     parser = argparse.ArgumentParser()
 20 |     parser.add_argument('demo', default='image', help='demo type, eg. image, video and webcam')
 21 |     parser.add_argument('--config', help='model config file path')
 22 |     parser.add_argument('--model', help='model file path')
 23 |     parser.add_argument('--path', default='./demo', help='path to images or video')
 24 |     parser.add_argument('--camid', type=int, default=0, help='webcam demo camera id')
 25 |     args = parser.parse_args()
 26 |     return args
 27 | 
 28 | 
 29 | class Predictor(object):
 30 |     def __init__(self, cfg, model_path, logger, device='cuda:0'):
 31 |         self.cfg = cfg
 32 |         self.device = device
 33 |         model = build_model(cfg.model)
 34 |         ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
 35 |         load_model_weight(model, ckpt, logger)
 36 |         self.model = model.to(device).eval()
 37 |         self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
 38 | 
 39 |     def inference(self, img):
 40 |         img_info = {}
 41 |         if isinstance(img, str):
 42 |             img_info['file_name'] = os.path.basename(img)
 43 |             img = cv2.imread(img)
 44 |         else:
 45 |             img_info['file_name'] = None
 46 | 
 47 |         height, width = img.shape[:2]
 48 |         img_info['height'] = height
 49 |         img_info['width'] = width
 50 |         meta = dict(img_info=img_info,
 51 |                     raw_img=img,
 52 |                     img=img)
 53 |         meta = self.pipeline(meta, self.cfg.data.val.input_size)
 54 |         meta['img'] = torch.from_numpy(meta['img'].transpose(2, 0, 1)).unsqueeze(0).to(self.device)
 55 |         with torch.no_grad():
 56 |             results = self.model.inference(meta)
 57 |         return meta, results
 58 | 
 59 |     def visualize(self, dets, meta, class_names, score_thres, wait=0):
 60 |         time1 = time.time()
 61 |         self.model.head.show_result(meta['raw_img'], dets, class_names, score_thres=score_thres, show=True)
 62 |         print('viz time: {:.3f}s'.format(time.time()-time1))
 63 | 
 64 | 
 65 | def get_image_list(path):
 66 |     image_names = []
 67 |     for maindir, subdir, file_name_list in os.walk(path):
 68 |         for filename in file_name_list:
 69 |             apath = os.path.join(maindir, filename)
 70 |             ext = os.path.splitext(apath)[1]
 71 |             if ext in image_ext:
 72 |                 image_names.append(apath)
 73 |     return image_names
 74 | 
 75 | 
 76 | def main():
 77 |     args = parse_args()
 78 |     torch.backends.cudnn.deterministic = True
 79 |     torch.backends.cudnn.benchmark = False
 80 | 
 81 |     load_config(cfg, args.config)
 82 |     logger = Logger(-1, use_tensorboard=False)
 83 |     predictor = Predictor(cfg, args.model, logger, device='cuda:0')
 84 |     logger.log('Press "Esc", "q" or "Q" to exit.')
 85 |     if args.demo == 'image':
 86 |         if os.path.isdir(args.path):
 87 |             files = get_image_list(args.path)
 88 |         else:
 89 |             files = [args.path]
 90 |         files.sort()
 91 |         for image_name in files:
 92 |             meta, res = predictor.inference(image_name)
 93 |             predictor.visualize(res, meta, cfg.class_names, 0.35)
 94 |             ch = cv2.waitKey(0)
 95 |             if ch == 27 or ch == ord('q') or ch == ord('Q'):
 96 |                 break
 97 |     elif args.demo == 'video' or args.demo == 'webcam':
 98 |         cap = cv2.VideoCapture(args.path if args.demo == 'video' else args.camid)
 99 |         while True:
100 |             ret_val, frame = cap.read()
101 |             meta, res = predictor.inference(frame)
102 |             predictor.visualize(res, meta, cfg.class_names, 0.35)
103 |             ch = cv2.waitKey(1)
104 |             if ch == 27 or ch == ord('q') or ch == ord('Q'):
105 |                 break
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/quarkdet/model/backbone/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from ..module.activation import act_layers
  8 | 
  9 | 
 10 | class ConvBNReLU(nn.Sequential):
 11 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, act='ReLU'):
 12 |         padding = (kernel_size - 1) // 2
 13 |         super(ConvBNReLU, self).__init__(
 14 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
 15 |             nn.BatchNorm2d(out_planes),
 16 |             act_layers(act)
 17 |         )
 18 | 
 19 | 
 20 | class InvertedResidual(nn.Module):
 21 |     def __init__(self, inp, oup, stride, expand_ratio, act='ReLU'):
 22 |         super(InvertedResidual, self).__init__()
 23 |         self.stride = stride
 24 |         assert stride in [1, 2]
 25 | 
 26 |         hidden_dim = int(round(inp * expand_ratio))
 27 |         self.use_res_connect = self.stride == 1 and inp == oup
 28 | 
 29 |         layers = []
 30 |         if expand_ratio != 1:
 31 |             # pw
 32 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, act=act))
 33 |         layers.extend([
 34 |             # dw
 35 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, act=act),
 36 |             # pw-linear
 37 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 38 |             nn.BatchNorm2d(oup),
 39 |         ])
 40 |         self.conv = nn.Sequential(*layers)
 41 | 
 42 |     def forward(self, x):
 43 |         if self.use_res_connect:
 44 |             return x + self.conv(x)
 45 |         else:
 46 |             return self.conv(x)
 47 | 
 48 | 
 49 | class MobileNetV2(nn.Module):
 50 |     def __init__(self, width_mult=1., out_stages=(1, 2, 4, 6), last_channel=1280, act='ReLU'):
 51 |         super(MobileNetV2, self).__init__()
 52 |         self.width_mult = width_mult
 53 |         self.out_stages = out_stages
 54 |         input_channel = 32
 55 |         self.last_channel = last_channel
 56 |         self.act = act
 57 |         self.interverted_residual_setting = [
 58 |             # t, c, n, s
 59 |             [1, 16, 1, 1],
 60 |             [6, 24, 2, 2],
 61 |             [6, 32, 3, 2],
 62 |             [6, 64, 4, 2],
 63 |             [6, 96, 3, 1],
 64 |             [6, 160, 3, 2],
 65 |             [6, 320, 1, 1],
 66 |         ]
 67 | 
 68 |         # building first layer
 69 |         self.input_channel = int(input_channel * width_mult)
 70 |         self.first_layer = ConvBNReLU(3, input_channel, stride=2, act=self.act)
 71 |         # building inverted residual blocks
 72 |         for i in range(7):
 73 |             name = 'stage{}'.format(i)
 74 |             setattr(self, name, self.build_mobilenet_stage(stage_num=i))
 75 | 
 76 |     def build_mobilenet_stage(self, stage_num):
 77 |         stage = []
 78 |         t, c, n, s = self.interverted_residual_setting[stage_num]
 79 |         output_channel = int(c * self.width_mult)
 80 |         for i in range(n):
 81 |             if i == 0:
 82 |                 stage.append(InvertedResidual(self.input_channel, output_channel, s, expand_ratio=t, act=self.act))
 83 |             else:
 84 |                 stage.append(InvertedResidual(self.input_channel, output_channel, 1, expand_ratio=t, act=self.act))
 85 |             self.input_channel = output_channel
 86 |         if stage_num == 6:
 87 |             last_layer = ConvBNReLU(self.input_channel, self.last_channel, kernel_size=1, act=self.act)
 88 |             stage.append(last_layer)
 89 |         stage = nn.Sequential(*stage)
 90 |         return stage
 91 | 
 92 |     def forward(self, x):
 93 |         x = self.first_layer(x)
 94 |         output = []
 95 |         for i in range(0, 7):
 96 |             stage = getattr(self, 'stage{}'.format(i))
 97 |             x = stage(x)
 98 |             if i in self.out_stages:
 99 |                 output.append(x)
100 | 
101 |         return tuple(output)
102 | 
103 |     def init_weights(self):
104 |         for m in self.modules():
105 |             if isinstance(m, nn.Conv2d):
106 |                 nn.init.normal_(m.weight, std=0.001)
107 |                 if m.bias is not None:
108 |                     m.bias.data.zero_()
109 |             elif isinstance(m, nn.BatchNorm2d):
110 |                 m.weight.data.fill_(1)
111 |                 m.bias.data.zero_()
112 | 
113 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class AnchorGenerator(object):
  5 |     """
  6 |     Examples:
  7 |         >>> self = AnchorGenerator(9, [1.], [1.])
  8 |         >>> all_anchors = self.grid_anchors((2, 2), device='cpu')
  9 |         >>> print(all_anchors)
 10 |         tensor([[ 0.,  0.,  8.,  8.],
 11 |                 [16.,  0., 24.,  8.],
 12 |                 [ 0., 16.,  8., 24.],
 13 |                 [16., 16., 24., 24.]])
 14 |     """
 15 | 
 16 |     def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
 17 |         self.base_size = base_size
 18 |         self.scales = torch.Tensor(scales)
 19 |         self.ratios = torch.Tensor(ratios)
 20 |         self.scale_major = scale_major
 21 |         self.ctr = ctr
 22 |         self.base_anchors = self.gen_base_anchors()
 23 | 
 24 |     @property
 25 |     def num_base_anchors(self):
 26 |         return self.base_anchors.size(0)
 27 | 
 28 |     def gen_base_anchors(self):
 29 |         w = self.base_size
 30 |         h = self.base_size
 31 |         if self.ctr is None:
 32 |             x_ctr = 0.5 * (w - 1)
 33 |             y_ctr = 0.5 * (h - 1)
 34 |         else:
 35 |             x_ctr, y_ctr = self.ctr
 36 | 
 37 |         h_ratios = torch.sqrt(self.ratios)
 38 |         w_ratios = 1 / h_ratios
 39 |         if self.scale_major:
 40 |             ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
 41 |             hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
 42 |         else:
 43 |             ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
 44 |             hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
 45 | 
 46 |         # yapf: disable
 47 |         base_anchors = torch.stack(
 48 |             [
 49 |                 x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
 50 |                 x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
 51 |             ],
 52 |             dim=-1).round()
 53 |         # yapf: enable
 54 |         #print("base_size:{%s}, scales:{%s}, ratios:{%s}"%(self.base_size, self.scales, self.ratios))
 55 |         #print("base_anchors",base_anchors)
 56 | 
 57 |         return base_anchors
 58 | 
 59 |     def _meshgrid(self, x, y, row_major=True):
 60 |         xx = x.repeat(len(y))
 61 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
 62 |         if row_major:
 63 |             return xx, yy
 64 |         else:
 65 |             return yy, xx
 66 | 
 67 |     def grid_anchors(self, featmap_size, stride=16, device='cuda'):
 68 |         base_anchors = self.base_anchors.to(device)
 69 |         # print("grid_anchors base_size:{%s}, scales:{%s}, ratios:{%s}"%(self.base_size, self.scales, self.ratios))
 70 |         # print("grid_anchors base_anchors",base_anchors)
 71 | 
 72 |         feat_h, feat_w = featmap_size
 73 |         #print(feat_h,feat_w)
 74 |         shift_x = torch.arange(0, feat_w, device=device) * stride
 75 |         shift_y = torch.arange(0, feat_h, device=device) * stride
 76 |         # print("shift_x:",shift_x)
 77 |         # print("shift_y:",shift_y)
 78 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
 79 |         shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
 80 |         shifts = shifts.type_as(base_anchors)
 81 |         # first feat_w elements correspond to the first row of shifts
 82 |         # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
 83 |         # shifted anchors (K, A, 4), reshape to (K*A, 4)
 84 | 
 85 |         all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
 86 |         all_anchors = all_anchors.view(-1, 4)
 87 |         # first A rows correspond to A anchors of (0, 0) in feature map,
 88 |         # then (0, 1), (0, 2), ...
 89 |         return all_anchors
 90 | 
 91 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
 92 |         feat_h, feat_w = featmap_size
 93 |         valid_h, valid_w = valid_size
 94 |         # print("valid_flags featmap_size:",featmap_size)
 95 |         # print("valid_flags valid_size:",valid_size)
 96 | 
 97 |         assert valid_h <= feat_h and valid_w <= feat_w
 98 |         valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
 99 |         valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
100 |         valid_x[:valid_w] = 1
101 |         valid_y[:valid_h] = 1
102 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
103 |         #print("valid_xx, valid_yy:",valid_xx, valid_yy)
104 |         valid = valid_xx & valid_yy
105 |         valid = valid[:, None].expand(valid.size(0),
106 |                                       self.num_base_anchors).contiguous().view(-1)
107 |         #print("valid_flags valid:",valid)                              
108 |         return valid
109 | 


--------------------------------------------------------------------------------
/quarkdet/model/neck/pan.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from ..module.conv import ConvModule
  4 | from .fpn import FPN
  5 | import numpy as np
  6 | 
  7 | 
  8 | class PAN(FPN):
  9 |     """Path Aggregation Network for Instance Segmentation.
 10 | 
 11 |     This is an implementation of the `PAN in Path Aggregation Network
 12 |     <https://arxiv.org/abs/1803.01534>`_.
 13 | 
 14 |     Args:
 15 |         in_channels (List[int]): Number of input channels per scale.
 16 |         out_channels (int): Number of output channels (used at each scale)
 17 |         num_outs (int): Number of output scales.
 18 |         start_level (int): Index of the start input backbone level used to
 19 |             build the feature pyramid. Default: 0.
 20 |         end_level (int): Index of the end input backbone level (exclusive) to
 21 |             build the feature pyramid. Default: -1, which means the last level.
 22 |         add_extra_convs (bool): Whether to add conv layers on top of the
 23 |             original feature maps. Default: False.
 24 |         extra_convs_on_inputs (bool): Whether to apply extra conv on
 25 |             the original feature from the backbone. Default: False.
 26 |         relu_before_extra_convs (bool): Whether to apply relu before the extra
 27 |             conv. Default: False.
 28 |         no_norm_on_lateral (bool): Whether to apply norm on lateral.
 29 |             Default: False.
 30 |         conv_cfg (dict): Config dict for convolution layer. Default: None.
 31 |         norm_cfg (dict): Config dict for normalization layer. Default: None.
 32 |         act_cfg (str): Config dict for activation layer in ConvModule.
 33 |             Default: None.
 34 |     """
 35 | 
 36 |     def __init__(self,
 37 |                  in_channels,
 38 |                  out_channels,
 39 |                  num_outs,
 40 |                  start_level=0,
 41 |                  end_level=-1,
 42 |                  conv_cfg=None,
 43 |                  norm_cfg=None,
 44 |                  activation=None):
 45 |         super(PAN,
 46 |               self).__init__(in_channels, out_channels, num_outs, start_level,
 47 |                              end_level, conv_cfg, norm_cfg, activation)
 48 |         #显示调用基类的__init__方法，Python不会自动执行这些初始化操作。     
 49 |         print("PAN:",in_channels, out_channels, num_outs, start_level,end_level, conv_cfg, norm_cfg, activation)
 50 |         self.init_weights()
 51 |         # add extra bottom up pathway
 52 |         self.downsample_convs = nn.ModuleList()
 53 |         self.pan_convs = nn.ModuleList()
 54 |         
 55 |         for i in range(self.start_level + 1, self.backbone_end_level):
 56 |             d_conv = ConvModule(
 57 |                 out_channels,
 58 |                 out_channels,
 59 |                 3,
 60 |                 stride=2,
 61 |                 padding=1,
 62 |                 conv_cfg=conv_cfg,
 63 |                 norm_cfg=norm_cfg,
 64 |                 activation=None,
 65 |                 inplace=False)
 66 |             pafpn_conv = ConvModule(
 67 |                 out_channels,
 68 |                 out_channels,
 69 |                 3,
 70 |                 padding=1,
 71 |                 conv_cfg=conv_cfg,
 72 |                 norm_cfg=norm_cfg,
 73 |                 activation=None,
 74 |                 inplace=False)
 75 |             self.downsample_convs.append(d_conv)
 76 |             self.pan_convs.append(pafpn_conv)
 77 | 
 78 |     def forward(self, inputs):
 79 |         """Forward function."""
 80 |         assert len(inputs) == len(self.in_channels)
 81 |         #print("PAN forward:",self.in_channels, self.out_channels, self.num_outs, self.start_level,self.end_level)
 82 | 
 83 | 
 84 |         # build laterals
 85 |         laterals = [
 86 |             lateral_conv(inputs[i + self.start_level])
 87 |             for i, lateral_conv in enumerate(self.lateral_convs)
 88 |         ]
 89 | 
 90 |         # build top-down path
 91 |         used_backbone_levels = len(laterals) 
 92 |         for i in range(used_backbone_levels - 1, 0, -1):   #i=[2,1]
 93 |             prev_shape = laterals[i - 1].shape[2:]
 94 |             laterals[i - 1] += F.interpolate(
 95 |                 laterals[i], size=prev_shape, mode='bilinear',align_corners=True)
 96 | 
 97 |         # part 1: from original levels
 98 | 
 99 |         inter_outs = [
100 |             self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
101 |         ]
102 | 
103 | 
104 |         # part 2: add bottom-up path
105 |         for i in range(0, used_backbone_levels - 1):
106 |             inter_outs[i + 1] += self.downsample_convs[i](inter_outs[i])
107 | 
108 |         outs = []
109 |         outs.append(inter_outs[0])
110 |         outs.extend([
111 |             self.pan_convs[i - 1](inter_outs[i])
112 |             for i in range(1, used_backbone_levels)
113 |         ])
114 | 
115 | 
116 |       
117 |      
118 |         return tuple(outs)
119 | 


--------------------------------------------------------------------------------
/quarkdet/util/data_parallel.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | from torch.nn.modules import Module
  4 | from torch.nn.parallel.scatter_gather import gather
  5 | from torch.nn.parallel.replicate import replicate
  6 | from torch.nn.parallel.parallel_apply import parallel_apply
  7 | 
  8 | from .scatter_gather import scatter_kwargs
  9 | 
 10 | class DataParallel(Module):
 11 |     r"""Implements data parallelism at the module level.
 12 | 
 13 |     This container parallelizes the application of the given module by
 14 |     splitting the input across the specified devices by chunking in the batch
 15 |     dimension. In the forward pass, the module is replicated on each device,
 16 |     and each replica handles a portion of the input. During the backwards
 17 |     pass, gradients from each replica are summed into the original module.
 18 | 
 19 |     The batch size should be larger than the number of GPUs used. It should
 20 |     also be an integer multiple of the number of GPUs so that each chunk is the
 21 |     same size (so that each GPU processes the same number of samples).
 22 | 
 23 |     See also: :ref:`cuda-nn-dataparallel-instead`
 24 | 
 25 |     Arbitrary positional and keyword inputs are allowed to be passed into
 26 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 27 |     specified (default 0). Primitive types will be broadcasted, but all
 28 |     other types will be a shallow copy and can be corrupted if written to in
 29 |     the model's forward pass.
 30 | 
 31 |     Args:
 32 |         module: module to be parallelized
 33 |         device_ids: CUDA devices (default: all devices)
 34 |         output_device: device location of output (default: device_ids[0])
 35 | 
 36 |     Example::
 37 | 
 38 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 39 |         >>> output = net(input_var)
 40 |     """
 41 | 
 42 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 43 |         super(DataParallel, self).__init__()
 44 | 
 45 |         if not torch.cuda.is_available():
 46 |             self.module = module
 47 |             self.device_ids = []
 48 |             return
 49 | 
 50 |         if device_ids is None:
 51 |             device_ids = list(range(torch.cuda.device_count()))
 52 |         if output_device is None:
 53 |             output_device = device_ids[0]
 54 |         self.dim = dim
 55 |         self.module = module
 56 |         self.device_ids = device_ids
 57 |         self.chunk_sizes = chunk_sizes
 58 |         self.output_device = output_device
 59 |         if len(self.device_ids) == 1:
 60 |             self.module.cuda(device_ids[0])
 61 | 
 62 |     def forward(self, *inputs, **kwargs):
 63 |         if not self.device_ids:
 64 |             return self.module(*inputs, **kwargs)
 65 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 66 |         if len(self.device_ids) == 1:
 67 |             return self.module(*inputs[0], **kwargs[0])
 68 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 69 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 70 |         return self.gather(outputs, self.output_device)
 71 | 
 72 |     def replicate(self, module, device_ids):
 73 |         return replicate(module, device_ids)
 74 | 
 75 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 76 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 77 | 
 78 |     def parallel_apply(self, replicas, inputs, kwargs):
 79 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 80 | 
 81 |     def gather(self, outputs, output_device):
 82 |         return gather(outputs, output_device, dim=self.dim)
 83 | 
 84 | 
 85 | # TODO: remove this
 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 87 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 88 | 
 89 |     This is the functional version of the DataParallel module.
 90 | 
 91 |     Args:
 92 |         module: the module to evaluate in parallel
 93 |         inputs: inputs to the module
 94 |         device_ids: GPU ids on which to replicate module
 95 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 96 |             (default: device_ids[0])
 97 |     Returns:
 98 |         a Variable containing the result of module(input) located on
 99 |         output_device
100 |     """
101 |     if not isinstance(inputs, tuple):
102 |         inputs = (inputs,)
103 | 
104 |     if device_ids is None:
105 |         device_ids = list(range(torch.cuda.device_count()))
106 | 
107 |     if output_device is None:
108 |         output_device = device_ids[0]
109 | 
110 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
111 |     if len(device_ids) == 1:
112 |         return module(*inputs[0], **module_kwargs[0])
113 |     used_device_ids = device_ids[:len(inputs)]
114 |     replicas = replicate(module, used_device_ids)
115 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
116 |     return gather(outputs, output_device, dim)
117 | 
118 | 


--------------------------------------------------------------------------------
/quarkdet/model/loss/varifocal_loss.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from .utils import weight_reduce_loss
  4 | 
  5 | 
  6 | def varifocal_loss(pred,
  7 |                    target,
  8 |                    weight=None,
  9 |                    alpha=0.75,
 10 |                    gamma=2.0,
 11 |                    iou_weighted=True,
 12 |                    reduction='mean',
 13 |                    avg_factor=None):
 14 |     """`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
 15 | 
 16 |     Args:
 17 |         pred (torch.Tensor): The prediction with shape (N, C), C is the
 18 |             number of classes
 19 |         target (torch.Tensor): The learning target of the iou-aware
 20 |             classification score with shape (N, C), C is the number of classes.
 21 |         weight (torch.Tensor, optional): The weight of loss for each
 22 |             prediction. Defaults to None.
 23 |         alpha (float, optional): A balance factor for the negative part of
 24 |             Varifocal Loss, which is different from the alpha of Focal Loss.
 25 |             Defaults to 0.75.
 26 |         gamma (float, optional): The gamma for calculating the modulating
 27 |             factor. Defaults to 2.0.
 28 |         iou_weighted (bool, optional): Whether to weight the loss of the
 29 |             positive example with the iou target. Defaults to True.
 30 |         reduction (str, optional): The method used to reduce the loss into
 31 |             a scalar. Defaults to 'mean'. Options are "none", "mean" and
 32 |             "sum".
 33 |         avg_factor (int, optional): Average factor that is used to average
 34 |             the loss. Defaults to None.
 35 |     """
 36 |     # pred and target should be of the same size
 37 |     assert pred.size() == target.size()
 38 |     pred_sigmoid = pred.sigmoid()
 39 |     target = target.type_as(pred)
 40 |     if iou_weighted:
 41 |         focal_weight = target * (target > 0.0).float() + \
 42 |             alpha * (pred_sigmoid - target).abs().pow(gamma) * \
 43 |             (target <= 0.0).float()
 44 |     else:
 45 |         focal_weight = (target > 0.0).float() + \
 46 |             alpha * (pred_sigmoid - target).abs().pow(gamma) * \
 47 |             (target <= 0.0).float()
 48 |     loss = F.binary_cross_entropy_with_logits(
 49 |         pred, target, reduction='none') * focal_weight
 50 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
 51 |     return loss
 52 | 
 53 | 
 54 | class VarifocalLoss(nn.Module):
 55 | 
 56 |     def __init__(self,
 57 |                  use_sigmoid=True,
 58 |                  alpha=0.75,
 59 |                  gamma=2.0,
 60 |                  iou_weighted=True,
 61 |                  reduction='mean',
 62 |                  loss_weight=1.0):
 63 |         """`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
 64 | 
 65 |         Args:
 66 |             use_sigmoid (bool, optional): Whether the prediction is
 67 |                 used for sigmoid or softmax. Defaults to True.
 68 |             alpha (float, optional): A balance factor for the negative part of
 69 |                 Varifocal Loss, which is different from the alpha of Focal
 70 |                 Loss. Defaults to 0.75.
 71 |             gamma (float, optional): The gamma for calculating the modulating
 72 |                 factor. Defaults to 2.0.
 73 |             iou_weighted (bool, optional): Whether to weight the loss of the
 74 |                 positive examples with the iou target. Defaults to True.
 75 |             reduction (str, optional): The method used to reduce the loss into
 76 |                 a scalar. Defaults to 'mean'. Options are "none", "mean" and
 77 |                 "sum".
 78 |             loss_weight (float, optional): Weight of loss. Defaults to 1.0.
 79 |         """
 80 |         super(VarifocalLoss, self).__init__()
 81 |         assert use_sigmoid is True, \
 82 |             'Only sigmoid varifocal loss supported now.'
 83 |         assert alpha >= 0.0
 84 |         self.use_sigmoid = use_sigmoid
 85 |         self.alpha = alpha
 86 |         self.gamma = gamma
 87 |         self.iou_weighted = iou_weighted
 88 |         self.reduction = reduction
 89 |         self.loss_weight = loss_weight
 90 | 
 91 |     def forward(self,
 92 |                 pred,
 93 |                 target,
 94 |                 weight=None,
 95 |                 avg_factor=None,
 96 |                 reduction_override=None):
 97 |         """Forward function.
 98 | 
 99 |         Args:
100 |             pred (torch.Tensor): The prediction.
101 |             target (torch.Tensor): The learning target of the prediction.
102 |             weight (torch.Tensor, optional): The weight of loss for each
103 |                 prediction. Defaults to None.
104 |             avg_factor (int, optional): Average factor that is used to average
105 |                 the loss. Defaults to None.
106 |             reduction_override (str, optional): The reduction method used to
107 |                 override the original reduction method of the loss.
108 |                 Options are "none", "mean" and "sum".
109 | 
110 |         Returns:
111 |             torch.Tensor: The calculated loss
112 |         """
113 |         assert reduction_override in (None, 'none', 'mean', 'sum')
114 |         reduction = (
115 |             reduction_override if reduction_override else self.reduction)
116 |         if self.use_sigmoid:
117 |             loss_cls = self.loss_weight * varifocal_loss(
118 |                 pred,
119 |                 target,
120 |                 weight,
121 |                 alpha=self.alpha,
122 |                 gamma=self.gamma,
123 |                 iou_weighted=self.iou_weighted,
124 |                 reduction=reduction,
125 |                 avg_factor=avg_factor)
126 |         else:
127 |             raise NotImplementedError
128 |         return loss_cls
129 | 


--------------------------------------------------------------------------------
/quarkdet/model/module/nms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision.ops import nms
  3 | 
  4 | 
  5 | def multiclass_nms(multi_bboxes,
  6 |                    multi_scores,
  7 |                    score_thr,
  8 |                    nms_cfg,
  9 |                    max_num=-1,
 10 |                    score_factors=None):
 11 |     """NMS for multi-class bboxes.
 12 | 
 13 |     Args:
 14 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
 15 |         multi_scores (Tensor): shape (n, #class), where the last column
 16 |             contains scores of the background class, but this will be ignored.
 17 |         score_thr (float): bbox threshold, bboxes with scores lower than it
 18 |             will not be considered.
 19 |         nms_thr (float): NMS IoU threshold
 20 |         max_num (int): if there are more than max_num bboxes after NMS,
 21 |             only top max_num will be kept.
 22 |         score_factors (Tensor): The factors multiplied to scores before
 23 |             applying NMS
 24 | 
 25 |     Returns:
 26 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels \
 27 |             are 0-based.
 28 |     """
 29 |     num_classes = multi_scores.size(1) - 1
 30 |     # exclude background category
 31 |     if multi_bboxes.shape[1] > 4:
 32 |         bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)
 33 |     else:
 34 |         bboxes = multi_bboxes[:, None].expand(
 35 |             multi_scores.size(0), num_classes, 4)
 36 |     scores = multi_scores[:, :-1]
 37 | 
 38 |     # filter out boxes with low scores
 39 |     valid_mask = scores > score_thr
 40 | 
 41 |     # We use masked_select for ONNX exporting purpose,
 42 |     # which is equivalent to bboxes = bboxes[valid_mask]
 43 |     # (TODO): as ONNX does not support repeat now,
 44 |     # we have to use this ugly code
 45 |     bboxes = torch.masked_select(
 46 |         bboxes,
 47 |         torch.stack((valid_mask, valid_mask, valid_mask, valid_mask),
 48 |                     -1)).view(-1, 4)
 49 |     if score_factors is not None:
 50 |         scores = scores * score_factors[:, None]
 51 |     scores = torch.masked_select(scores, valid_mask)
 52 |     labels = valid_mask.nonzero(as_tuple=False)[:, 1]
 53 | 
 54 |     if bboxes.numel() == 0:
 55 |         bboxes = multi_bboxes.new_zeros((0, 5))
 56 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
 57 | 
 58 |         if torch.onnx.is_in_onnx_export():
 59 |             raise RuntimeError('[ONNX Error] Can not record NMS '
 60 |                                'as it has not been executed this time')
 61 |         return bboxes, labels
 62 | 
 63 |     dets, keep = batched_nms(bboxes, scores, labels, nms_cfg)
 64 | 
 65 |     if max_num > 0:
 66 |         dets = dets[:max_num]
 67 |         keep = keep[:max_num]
 68 | 
 69 |     return dets, labels[keep]
 70 | 
 71 | 
 72 | def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
 73 |     """Performs non-maximum suppression in a batched fashion.
 74 |     Modified from https://github.com/pytorch/vision/blob
 75 |     /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
 76 |     In order to perform NMS independently per class, we add an offset to all
 77 |     the boxes. The offset is dependent only on the class idx, and is large
 78 |     enough so that boxes from different classes do not overlap.
 79 |     Arguments:
 80 |         boxes (torch.Tensor): boxes in shape (N, 4).
 81 |         scores (torch.Tensor): scores in shape (N, ).
 82 |         idxs (torch.Tensor): each index value correspond to a bbox cluster,
 83 |             and NMS will not be applied between elements of different idxs,
 84 |             shape (N, ).
 85 |         nms_cfg (dict): specify nms type and other parameters like iou_thr.
 86 |             Possible keys includes the following.
 87 |             - iou_thr (float): IoU threshold used for NMS.
 88 |             - split_thr (float): threshold number of boxes. In some cases the
 89 |                 number of boxes is large (e.g., 200k). To avoid OOM during
 90 |                 training, the users could set `split_thr` to a small value.
 91 |                 If the number of boxes is greater than the threshold, it will
 92 |                 perform NMS on each group of boxes separately and sequentially.
 93 |                 Defaults to 10000.
 94 |         class_agnostic (bool): if true, nms is class agnostic,
 95 |             i.e. IoU thresholding happens over all boxes,
 96 |             regardless of the predicted class.
 97 |     Returns:
 98 |         tuple: kept dets and indice.
 99 |     """
100 |     nms_cfg_ = nms_cfg.copy()
101 |     class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
102 |     if class_agnostic:
103 |         boxes_for_nms = boxes
104 |     else:
105 |         max_coordinate = boxes.max()
106 |         offsets = idxs.to(boxes) * (max_coordinate + 1)
107 |         boxes_for_nms = boxes + offsets[:, None]
108 | 
109 |     nms_type = nms_cfg_.pop('type', 'nms')
110 |     # nms_op = eval(nms_type)
111 | 
112 |     split_thr = nms_cfg_.pop('split_thr', 10000)
113 |     if len(boxes_for_nms) < split_thr:
114 |         # dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
115 |         keep = nms(boxes_for_nms, scores, **nms_cfg_)
116 |         boxes = boxes[keep]
117 |         # scores = dets[:, -1]
118 |         scores = scores[keep]
119 |     else:
120 |         total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
121 |         for id in torch.unique(idxs):
122 |             mask = (idxs == id).nonzero(as_tuple=False).view(-1)
123 |             # dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_)
124 |             keep = nms(boxes_for_nms[mask], scores[mask], **nms_cfg_)
125 |             total_mask[mask[keep]] = True
126 | 
127 |         keep = total_mask.nonzero(as_tuple=False).view(-1)
128 |         keep = keep[scores[keep].argsort(descending=True)]
129 |         boxes = boxes[keep]
130 |         scores = scores[keep]
131 | 
132 |     return torch.cat([boxes, scores[:, None]], -1), keep


--------------------------------------------------------------------------------
/config/mobilenetv3.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/mobilenet
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 |       name: MobileNetV3_Small 
  8 |       out_stages: [2,6] 
  9 |     neck:
 10 |       name: PAN
 11 |       in_channels: [24, 48, 576] 
 12 |       out_channels: 96
 13 |       start_level: 0
 14 |       num_outs: 3
 15 |     head:
 16 |       name: QuarkDetHead
 17 |       num_classes: 80 # 80
 18 |       input_channel: 96
 19 |       feat_channels: 96
 20 |       stacked_convs: 2
 21 |       share_cls_reg: True #True
 22 |       octave_base_scale: 5
 23 |       scales_per_octave: 1
 24 |       strides: [8, 16, 32]
 25 |       reg_max: 7 #16 #7
 26 |       norm_cfg:
 27 |         type: BN
 28 |       loss:
 29 |         loss_qfl:
 30 |           name: QualityFocalLoss
 31 |           use_sigmoid: False #True
 32 |           beta: 2.0
 33 |           loss_weight: 1.0
 34 |         loss_dfl:
 35 |           name: DistributionFocalLoss
 36 |           loss_weight: 0.25
 37 |         loss_bbox:
 38 |           name: GIoULoss
 39 |           loss_weight: 2.0
 40 | data:
 41 |   train:
 42 |     name: coco
 43 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 44 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 45 |     input_size: [320,320] #[w,h]
 46 |     keep_ratio: True
 47 |     pipeline:
 48 |       perspective: 0.0
 49 |       scale: [0.6, 1.4]
 50 |       stretch: [[1, 1], [1, 1]]
 51 |       rotation: 0
 52 |       shear: 0
 53 |       translate: 0
 54 |       flip: 0.5
 55 |       brightness: 0.2
 56 |       contrast: [0.8, 1.2]
 57 |       saturation: [0.8, 1.2]
 58 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 59 |   val:
 60 |     name: coco
 61 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 62 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 63 |     input_size: [320,320] #[w,h]
 64 |     keep_ratio: True
 65 |     pipeline:
 66 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 67 | device:
 68 |   gpu_ids: [1]
 69 |   workers_per_gpu:  8
 70 |   batchsize_per_gpu: 80 # 80 #40 #160 santiago test
 71 | schedule:
 72 |   resume: False
 73 |   load_model: ./workspace/mobilenet/model_last.pth
 74 | 
 75 |   optimizer:
 76 |     name: SGD
 77 |     lr: 0.14
 78 |     momentum: 0.9
 79 |     weight_decay: 0.0001
 80 |   warmup:
 81 |     name: linear
 82 |     steps: 300 #santiago test
 83 |     ratio: 0.1
 84 |   total_epochs: 160 #70
 85 |   lr_schedule:
 86 |     name: MultiStepLR
 87 |     milestones: [130,160,150,155]
 88 |     gamma: 0.1
 89 |   val_intervals: 5 #5
 90 | evaluator:
 91 |   name: CocoDetectionEvaluator
 92 |   save_key: mAP
 93 | 
 94 | log:
 95 |   interval: 10
 96 | 
 97 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 98 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
 99 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
100 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
101 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
102 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
103 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
104 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
105 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
106 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
107 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
108 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
109 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
110 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
111 | 
112 | 
113 | 
114 | #   {
115 | # 	'img': tensor([
116 | # 		[
117 | # 			[
118 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
119 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
120 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
121 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
122 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
123 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
124 | # 			],
125 | 
126 | # 			[
127 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
128 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
129 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
130 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
131 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
132 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
133 | # 			],
134 | 
135 | # 			[
136 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
137 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
138 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
139 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
140 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
141 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
142 | # 			]
143 | # 		]
144 | # 	]),
145 | # 	'img_info': {
146 | # 		'license': tensor([2]),
147 | # 		'file_name': ['000000007616.jpg'],
148 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
149 | # 		'height': tensor([375]),
150 | # 		'width': tensor([500]),
151 | # 		'date_captured': ['2013-11-16 19:22:23'],
152 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
153 | # 		'id': tensor([7616])
154 | # 	},
155 | # 	'gt_bboxes': [array([
156 | # 			[193.312, 153.37599, 216.5952, 175.8784],
157 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
158 | # 			[160.1216, 85.7984, 168.64641, 110.976],
159 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
160 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
161 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
162 | # 			[1.0816001, 231.6224, 320., 277.568],
163 | # 			[85.4656, 148.3776, 112.22401, 168.096],
164 | # 			[40.7232, 109.024, 44.607998, 121.5552]
165 | # 		],
166 | # 		dtype = float32)],
167 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
168 | # 	'warp_matrix': [array([
169 | # 		[0.64, 0., 0.],
170 | # 		[0., 0.64, 40.],
171 | # 		[0., 0., 1.]
172 | # 	])]
173 | # }
174 | 


--------------------------------------------------------------------------------
/config/nanodet.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/nanodet
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 |       name: ShuffleNetV2 
  8 |       out_stages: [2,3,4] 
  9 |       activation: LeakyReLU
 10 |       model_size: 1.0x
 11 |     neck:
 12 |       name: PAN_Slim
 13 |       in_channels: [116, 232, 464] 
 14 |       out_channels: 96
 15 |       start_level: 0
 16 |       num_outs: 3
 17 |     head:
 18 |       name: QuarkDetHead
 19 |       num_classes: 80 # 80
 20 |       input_channel: 96
 21 |       feat_channels: 96
 22 |       stacked_convs: 2
 23 |       share_cls_reg: True #True
 24 |       octave_base_scale: 5
 25 |       scales_per_octave: 1
 26 |       strides: [8, 16, 32]
 27 |       reg_max: 7 #16 #7
 28 |       norm_cfg:
 29 |         type: BN
 30 |       loss:
 31 |         loss_qfl:
 32 |           name: QualityFocalLoss
 33 |           use_sigmoid: False #True
 34 |           beta: 2.0
 35 |           loss_weight: 1.0
 36 |         loss_dfl:
 37 |           name: DistributionFocalLoss
 38 |           loss_weight: 0.25
 39 |         loss_bbox:
 40 |           name: GIoULoss
 41 |           loss_weight: 2.0
 42 | data:
 43 |   train:
 44 |     name: coco
 45 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 46 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 47 |     input_size: [320,320] #[w,h]
 48 |     keep_ratio: True
 49 |     pipeline:
 50 |       perspective: 0.0
 51 |       scale: [0.6, 1.4]
 52 |       stretch: [[1, 1], [1, 1]]
 53 |       rotation: 0
 54 |       shear: 0
 55 |       translate: 0
 56 |       flip: 0.5
 57 |       brightness: 0.2
 58 |       contrast: [0.8, 1.2]
 59 |       saturation: [0.8, 1.2]
 60 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 61 |   val:
 62 |     name: coco
 63 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 64 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 65 |     input_size: [320,320] #[w,h]
 66 |     keep_ratio: True
 67 |     pipeline:
 68 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 69 | device:
 70 |   gpu_ids: [0]
 71 |   workers_per_gpu:  8
 72 |   batchsize_per_gpu: 80 #
 73 | schedule:
 74 |   resume: False
 75 |   load_model: ./workspace/nanodet/model_last.pth
 76 | 
 77 |   optimizer:
 78 |     name: SGD
 79 |     lr: 0.14
 80 |     momentum: 0.9
 81 |     weight_decay: 0.0001
 82 |   warmup:
 83 |     name: linear
 84 |     steps: 3 
 85 |     ratio: 0.1
 86 |   total_epochs: 160 #70
 87 |   lr_schedule:
 88 |     name: MultiStepLR
 89 |     milestones: [40,130,160,150,155]
 90 |     gamma: 0.1
 91 |   val_intervals: 5 #5
 92 | evaluator:
 93 |   name: CocoDetectionEvaluator
 94 |   save_key: mAP
 95 | 
 96 | log:
 97 |   interval: 10
 98 | 
 99 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
100 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
101 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
102 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
103 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
104 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
105 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
106 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
107 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
108 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
109 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
110 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
111 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
112 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
113 | 
114 | 
115 | 
116 | #   {
117 | # 	'img': tensor([
118 | # 		[
119 | # 			[
120 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
121 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
122 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
123 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
124 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
125 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
126 | # 			],
127 | 
128 | # 			[
129 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
130 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
131 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
132 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
133 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
134 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
135 | # 			],
136 | 
137 | # 			[
138 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
139 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
140 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
141 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
142 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
143 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
144 | # 			]
145 | # 		]
146 | # 	]),
147 | # 	'img_info': {
148 | # 		'license': tensor([2]),
149 | # 		'file_name': ['000000007616.jpg'],
150 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
151 | # 		'height': tensor([375]),
152 | # 		'width': tensor([500]),
153 | # 		'date_captured': ['2013-11-16 19:22:23'],
154 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
155 | # 		'id': tensor([7616])
156 | # 	},
157 | # 	'gt_bboxes': [array([
158 | # 			[193.312, 153.37599, 216.5952, 175.8784],
159 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
160 | # 			[160.1216, 85.7984, 168.64641, 110.976],
161 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
162 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
163 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
164 | # 			[1.0816001, 231.6224, 320., 277.568],
165 | # 			[85.4656, 148.3776, 112.22401, 168.096],
166 | # 			[40.7232, 109.024, 44.607998, 121.5552]
167 | # 		],
168 | # 		dtype = float32)],
169 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
170 | # 	'warp_matrix': [array([
171 | # 		[0.64, 0., 0.],
172 | # 		[0., 0.64, 40.],
173 | # 		[0., 0., 1.]
174 | # 	])]
175 | # }
176 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/anchor/base_anchor_head.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from quarkdet.model.module.init_weights import normal_init
  5 | 
  6 | from .anchor_generator import AnchorGenerator
  7 | from .anchor_target import multi_apply
  8 | 
  9 | 
 10 | class AnchorHead(nn.Module):
 11 |     """Anchor-based head (RPN, RetinaNet, SSD, etc.).
 12 | 
 13 |     Args:
 14 |         num_classes (int): Number of categories including the background
 15 |             category.
 16 |         in_channels (int): Number of channels in the input feature map.
 17 |         feat_channels (int): Number of hidden channels. Used in child classes.
 18 |         anchor_scales (Iterable): Anchor scales.
 19 |         anchor_ratios (Iterable): Anchor aspect ratios.
 20 |         anchor_strides (Iterable): Anchor strides.
 21 |         anchor_base_sizes (Iterable): Anchor base sizes.
 22 |         target_means (Iterable): Mean values of regression targets.
 23 |         target_stds (Iterable): Std values of regression targets.
 24 |         loss_cls (dict): Config of classification loss.
 25 |         loss_bbox (dict): Config of localization loss.
 26 |     """  # noqa: W605
 27 | 
 28 |     def __init__(self,
 29 |                  num_classes,
 30 |                  loss,
 31 |                  use_sigmoid,
 32 |                  input_channel,
 33 |                  feat_channels=256,
 34 |                  anchor_scales=[8],
 35 |                  anchor_ratios=[1.0],
 36 |                  strides=[8, 16, 32],
 37 |                  anchor_base_sizes=None,
 38 |                  target_means=(.0, .0, .0, .0),
 39 |                  target_stds=(0.1, 0.1, 0.2, 0.2),
 40 |                  ):
 41 |         super(AnchorHead, self).__init__()
 42 |         self.in_channels = input_channel
 43 |         self.num_classes = num_classes
 44 |         self.loss_cfg = loss
 45 |         self.feat_channels = feat_channels
 46 |         self.anchor_scales = anchor_scales
 47 |         self.anchor_ratios = anchor_ratios
 48 |         self.anchor_strides = strides
 49 |         self.anchor_base_sizes = list(
 50 |             strides) if anchor_base_sizes is None else anchor_base_sizes
 51 |         self.target_means = target_means
 52 |         self.target_stds = target_stds
 53 | 
 54 |         self.use_sigmoid_cls =  use_sigmoid #loss.get('use_sigmoid', False) #use_sigmoid
 55 |         #self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
 56 |         # self.sampling = self.loss_cfg.loss_cls['name'] not in ['FocalLoss', 'GHMC']
 57 |         if self.use_sigmoid_cls:
 58 |             self.cls_out_channels = num_classes
 59 |         else:
 60 |             self.cls_out_channels = num_classes + 1
 61 |             
 62 |          
 63 |   
 64 |         print("self.cls_out_channels:",self.cls_out_channels)
 65 |         if self.cls_out_channels <= 0:
 66 |             raise ValueError('num_classes={} is too small'.format(num_classes))
 67 | 
 68 |         # self.loss_cls = build_loss(loss_cls)
 69 |         # self.loss_bbox = build_loss(loss_bbox)
 70 |         self.fp16_enabled = False
 71 | 
 72 |         self.anchor_generators = []
 73 |         for anchor_base in self.anchor_base_sizes:
 74 |             self.anchor_generators.append(
 75 |                 AnchorGenerator(anchor_base, anchor_scales, anchor_ratios))
 76 | 
 77 |         self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
 78 |         self._init_layers()
 79 | 
 80 |     def _init_layers(self):
 81 |         self.conv_cls = nn.Conv2d(self.in_channels,
 82 |                                   self.num_anchors * self.cls_out_channels, 1)
 83 |         self.conv_reg = nn.Conv2d(self.in_channels, self.num_anchors * 4, 1)
 84 | 
 85 |     def init_weights(self):
 86 |         normal_init(self.conv_cls, std=0.01)
 87 |         normal_init(self.conv_reg, std=0.01)
 88 | 
 89 |     def forward_single(self, x):
 90 |         cls_score = self.conv_cls(x)
 91 |         bbox_pred = self.conv_reg(x)
 92 |         return cls_score, bbox_pred
 93 | 
 94 |     def forward(self, feats):
 95 |         return multi_apply(self.forward_single, feats)
 96 | 
 97 |     def get_anchors(self, featmap_sizes, img_shapes, device='cuda'):  # checked!
 98 |         """Get anchors according to feature map sizes.
 99 | 
100 |         Args:
101 |             featmap_sizes (list[tuple]): Multi-level feature map sizes.
102 |             img_shapes (h,w): Image meta info.
103 |             device (torch.device | str): device for returned tensors
104 | 
105 |         Returns:
106 |             tuple: anchors of each image, valid flags of each image
107 |         """
108 |         num_imgs = len(img_shapes)
109 |         num_levels = len(featmap_sizes)
110 | 
111 |         # print("num_imgs",num_imgs)
112 |         # print("num_levels:",num_levels)
113 |         # print("featmap_sizes",featmap_sizes)
114 |         # print("img_shapes",img_shapes)
115 | 
116 |         # since feature map sizes of all images are the same, we only compute
117 |         # anchors for one time
118 |         multi_level_anchors = []
119 |         for i in range(num_levels):
120 |             anchors = self.anchor_generators[i].grid_anchors(
121 |                 featmap_sizes[i], self.anchor_strides[i], device=device)
122 |             #print(":featmap_sizes:",featmap_sizes[i],":anchor_strides:",self.anchor_strides[i])    
123 |             multi_level_anchors.append(anchors)
124 |         #print("multi_level_anchors:",multi_level_anchors)    
125 |         anchor_list = [multi_level_anchors for _ in range(num_imgs)]
126 |         #print("anchor_list:",anchor_list)
127 |         #print("for i in range(num_levels)")
128 | 
129 |         # for each image, we compute valid flags of multi level anchors
130 |         valid_flag_list = []
131 |         for img_id, img_shape in enumerate(img_shapes):
132 |             multi_level_flags = []
133 |             for i in range(num_levels):
134 |                 anchor_stride = self.anchor_strides[i]
135 |                 feat_h, feat_w = featmap_sizes[i]
136 |                 h, w = img_shape
137 |                 valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
138 |                 valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
139 |                 flags = self.anchor_generators[i].valid_flags(
140 |                     (feat_h, feat_w), (valid_feat_h, valid_feat_w),
141 |                     device=device)
142 |                 multi_level_flags.append(flags)
143 |             valid_flag_list.append(multi_level_flags)
144 | 
145 |         return anchor_list, valid_flag_list
146 | 


--------------------------------------------------------------------------------
/config/shufflenet.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/shufflenet
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 |       name: ShuffleNetV2 
  8 |       out_stages: [2,3,4] 
  9 |       activation: LeakyReLU
 10 |       model_size: 1.0x
 11 |     neck:
 12 |       name: PAN
 13 |       in_channels: [116, 232, 464] 
 14 |       out_channels: 96
 15 |       start_level: 0
 16 |       num_outs: 3
 17 |     head:
 18 |       name: QuarkDetHead
 19 |       num_classes: 80 # 80
 20 |       input_channel: 96
 21 |       feat_channels: 96
 22 |       stacked_convs: 2
 23 |       share_cls_reg: True #True
 24 |       octave_base_scale: 5
 25 |       scales_per_octave: 1
 26 |       strides: [8, 16, 32]
 27 |       reg_max: 7 #16 #7
 28 |       norm_cfg:
 29 |         type: BN
 30 |       loss:
 31 |         loss_qfl:
 32 |           name: QualityFocalLoss
 33 |           use_sigmoid: False #True
 34 |           beta: 2.0
 35 |           loss_weight: 1.0
 36 |         loss_dfl:
 37 |           name: DistributionFocalLoss
 38 |           loss_weight: 0.25
 39 |         loss_bbox:
 40 |           name: GIoULoss
 41 |           loss_weight: 2.0
 42 | data:
 43 |   train:
 44 |     name: coco
 45 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 46 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 47 |     input_size: [320,320] #[w,h]
 48 |     keep_ratio: True
 49 |     pipeline:
 50 |       perspective: 0.0
 51 |       scale: [0.6, 1.4]
 52 |       stretch: [[1, 1], [1, 1]]
 53 |       rotation: 0
 54 |       shear: 0
 55 |       translate: 0
 56 |       flip: 0.5
 57 |       brightness: 0.2
 58 |       contrast: [0.8, 1.2]
 59 |       saturation: [0.8, 1.2]
 60 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 61 |   val:
 62 |     name: coco
 63 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 64 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 65 |     input_size: [320,320] #[w,h]
 66 |     keep_ratio: True
 67 |     pipeline:
 68 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 69 | device:
 70 |   gpu_ids: [0,1]
 71 |   workers_per_gpu:  8
 72 |   batchsize_per_gpu: 80 # 80 #40 #160 santiago test
 73 | schedule:
 74 |   resume: True
 75 |   load_model: ./workspace/shufflenet/model_last.pth
 76 | 
 77 |   optimizer:
 78 |     name: SGD
 79 |     lr: 0.14
 80 |     momentum: 0.9
 81 |     weight_decay: 0.0001
 82 |   warmup:
 83 |     name: linear
 84 |     steps: 300 #santiago test
 85 |     ratio: 0.1
 86 |   total_epochs: 160 #70
 87 |   lr_schedule:
 88 |     name: MultiStepLR
 89 |     milestones: [130,160,150,155]
 90 |     gamma: 0.1
 91 |   val_intervals: 5 #5
 92 | evaluator:
 93 |   name: CocoDetectionEvaluator
 94 |   save_key: mAP
 95 | 
 96 | log:
 97 |   interval: 10
 98 | 
 99 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
100 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
101 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
102 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
103 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
104 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
105 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
106 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
107 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
108 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
109 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
110 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
111 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
112 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
113 | 
114 | 
115 | 
116 | #   {
117 | # 	'img': tensor([
118 | # 		[
119 | # 			[
120 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
121 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
122 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
123 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
124 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
125 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
126 | # 			],
127 | 
128 | # 			[
129 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
130 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
131 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
132 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
133 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
134 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
135 | # 			],
136 | 
137 | # 			[
138 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
139 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
140 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
141 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
142 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
143 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
144 | # 			]
145 | # 		]
146 | # 	]),
147 | # 	'img_info': {
148 | # 		'license': tensor([2]),
149 | # 		'file_name': ['000000007616.jpg'],
150 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
151 | # 		'height': tensor([375]),
152 | # 		'width': tensor([500]),
153 | # 		'date_captured': ['2013-11-16 19:22:23'],
154 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
155 | # 		'id': tensor([7616])
156 | # 	},
157 | # 	'gt_bboxes': [array([
158 | # 			[193.312, 153.37599, 216.5952, 175.8784],
159 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
160 | # 			[160.1216, 85.7984, 168.64641, 110.976],
161 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
162 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
163 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
164 | # 			[1.0816001, 231.6224, 320., 277.568],
165 | # 			[85.4656, 148.3776, 112.22401, 168.096],
166 | # 			[40.7232, 109.024, 44.607998, 121.5552]
167 | # 		],
168 | # 		dtype = float32)],
169 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
170 | # 	'warp_matrix': [array([
171 | # 		[0.64, 0., 0.],
172 | # 		[0., 0.64, 40.],
173 | # 		[0., 0., 1.]
174 | # 	])]
175 | # }
176 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/quarkdet_head.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from ..module.conv import ConvModule, DepthwiseConvModule
  5 | from ..module.init_weights import normal_init
  6 | #from .gfl_head import GFLHead
  7 | from .gfl_headv2 import GFLHeadV2
  8 | from .gfl_head import GFLHead
  9 | from .anchor.anchor_target import multi_apply
 10 | 
 11 | 
 12 | class QuarkDetHead(GFLHead): # 可以直接将GFLHead替换成 GFLHeadV2
 13 |     """
 14 |     Modified from GFL, use same loss functions but much lightweight convolution heads
 15 |     """
 16 | 
 17 |     def __init__(self,
 18 |                  num_classes,
 19 |                  loss,
 20 |                  input_channel,
 21 |                  stacked_convs=2,
 22 |                  octave_base_scale=5,
 23 |                  scales_per_octave=1,
 24 |                  conv_cfg=None,
 25 |                  norm_cfg=dict(type='BN'),
 26 |                  reg_max=16,
 27 |                  share_cls_reg=False,
 28 |                  activation='LeakyReLU',
 29 |                  **kwargs):
 30 |         self.share_cls_reg = share_cls_reg
 31 |         self.activation = activation
 32 |         super(QuarkDetHead, self).__init__(num_classes,
 33 |                                           loss,
 34 |                                           input_channel,
 35 |                                           stacked_convs,
 36 |                                           octave_base_scale,
 37 |                                           scales_per_octave,
 38 |                                           conv_cfg,
 39 |                                           norm_cfg,
 40 |                                           reg_max,
 41 |                                           **kwargs)
 42 | 
 43 |     def _init_layers(self):
 44 |         self.cls_convs = nn.ModuleList()
 45 |         self.reg_convs = nn.ModuleList()
 46 |         for _ in self.anchor_strides:
 47 |             cls_convs, reg_convs = self._buid_not_shared_head()
 48 |             self.cls_convs.append(cls_convs)
 49 |             self.reg_convs.append(reg_convs)
 50 | 
 51 |         self.gfl_cls = nn.ModuleList([nn.Conv2d(self.feat_channels,
 52 |                                                 self.cls_out_channels +
 53 |                                                 4 * (self.reg_max + 1) if self.share_cls_reg else self.cls_out_channels,
 54 |                                                 1,
 55 |                                                 padding=0) for _ in self.anchor_strides])
 56 |         # TODO: if
 57 |         self.gfl_reg = nn.ModuleList([nn.Conv2d(self.feat_channels,
 58 |                                                 4 * (self.reg_max + 1),
 59 |                                                 1,
 60 |                                                 padding=0) for _ in self.anchor_strides])
 61 | 
 62 |     def _buid_not_shared_head(self):
 63 |         cls_convs = nn.ModuleList()
 64 |         reg_convs = nn.ModuleList()
 65 |         # print("cls_convs before:",cls_convs)
 66 |         # print("reg_convs before:",reg_convs)
 67 |         # print("self.stacked_convs:",self.stacked_convs)
 68 |         for i in range(self.stacked_convs):
 69 |             chn = self.in_channels if i == 0 else self.feat_channels
 70 |             cls_convs.append(
 71 |                 DepthwiseConvModule(chn,
 72 |                                     self.feat_channels,
 73 |                                     3,
 74 |                                     stride=1,
 75 |                                     padding=1,
 76 |                                     norm_cfg=self.norm_cfg,
 77 |                                     bias=self.norm_cfg is None,
 78 |                                     activation=self.activation))
 79 |             if not self.share_cls_reg:
 80 |                 reg_convs.append(
 81 |                     DepthwiseConvModule(chn,
 82 |                                         self.feat_channels,
 83 |                                         3,
 84 |                                         stride=1,
 85 |                                         padding=1,
 86 |                                         norm_cfg=self.norm_cfg,
 87 |                                         bias=self.norm_cfg is None,
 88 |                                         activation=self.activation))
 89 |                 
 90 |         # print("cls_convs after:",cls_convs)
 91 |         # print("reg_convs after:",reg_convs)
 92 |         return cls_convs, reg_convs
 93 | 
 94 |     def init_weights(self):
 95 |         for seq in self.cls_convs:
 96 |             for m in seq:
 97 |                 normal_init(m.depthwise, std=0.01)
 98 |                 normal_init(m.pointwise, std=0.01)
 99 |         for seq in self.reg_convs:
100 |             for m in seq:
101 |                 normal_init(m.depthwise, std=0.01)
102 |                 normal_init(m.pointwise, std=0.01)
103 |         bias_cls = -4.595  # 用0.01的置信度初始化
104 |         for i in range(len(self.anchor_strides)):
105 |             normal_init(self.gfl_cls[i], std=0.01, bias=bias_cls)
106 |             normal_init(self.gfl_reg[i], std=0.01)
107 |         print('Finish initialize Lite quarkdet Head.')
108 | 
109 |     def forward(self, feats):
110 |         return multi_apply(self.forward_single,
111 |                            feats,
112 |                            self.cls_convs,
113 |                            self.reg_convs,
114 |                            self.gfl_cls,
115 |                            self.gfl_reg,
116 |                            )
117 | 
118 |     def forward_single(self, x, cls_convs, reg_convs, gfl_cls, gfl_reg):
119 |         cls_feat = x
120 |         reg_feat = x
121 |         for cls_conv in cls_convs:
122 |             cls_feat = cls_conv(cls_feat)
123 |         for reg_conv in reg_convs:
124 |             reg_feat = reg_conv(reg_feat)
125 |         if self.share_cls_reg:
126 |             feat = gfl_cls(cls_feat)
127 |             # print("feat:",feat.shape)
128 |             # print("cls_feat:",cls_feat.shape)
129 |             # print("self.cls_out_channels:",self.cls_out_channels)
130 |             cls_score, bbox_pred = torch.split(feat, [self.cls_out_channels, 4 * (self.reg_max + 1)], dim=1)
131 |             # print("cls_score:",cls_score.shape)
132 |             # print("bbox_pred:",bbox_pred.shape)
133 |         else:
134 |             cls_score = gfl_cls(cls_feat)
135 |             bbox_pred = gfl_reg(reg_feat)
136 | 
137 |         if torch.onnx.is_in_onnx_export():
138 |             cls_score = torch.sigmoid(cls_score).reshape(1, self.num_classes, -1).permute(0, 2, 1)
139 |             bbox_pred = bbox_pred.reshape(1, (self.reg_max+1)*4, -1).permute(0, 2, 1)
140 |         return cls_score, bbox_pred
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/config/test.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/test
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 |       name: ShuffleNetV2 
  8 |       out_stages: [2,3,4] 
  9 |       activation: LeakyReLU
 10 |       model_size: 1.0x
 11 | 
 12 |       # name: GhostNet 
 13 |       # width_mult: 1.0
 14 |       # out_stages: [4, 6, 9] 
 15 |       # act: ReLU6
 16 | 
 17 |     neck:
 18 |       name: PAN
 19 |       in_channels: [116, 232, 464] #[40, 112, 960]
 20 |       out_channels: 96
 21 |       start_level: 0
 22 |       num_outs: 3
 23 |     head:
 24 |       name: QuarkDetHead
 25 |       num_classes: 80 # 80
 26 |       input_channel: 96
 27 |       feat_channels: 96
 28 |       stacked_convs: 2
 29 |       share_cls_reg: True #True
 30 |       octave_base_scale: 5
 31 |       scales_per_octave: 1
 32 |       strides: [8, 16, 32]
 33 |       reg_max: 7 #16 #7
 34 |       norm_cfg:
 35 |         type: BN
 36 |       loss:
 37 |         loss_qfl:
 38 |           name: QualityFocalLoss
 39 |           use_sigmoid: False #True
 40 |           beta: 2.0
 41 |           loss_weight: 1.0
 42 |         loss_dfl:
 43 |           name: DistributionFocalLoss
 44 |           loss_weight: 0.25
 45 |         loss_bbox:
 46 |           name: GIoULoss
 47 |           loss_weight: 2.0
 48 | data:
 49 |   train:
 50 |     name: coco
 51 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 52 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 53 |     input_size: [320,320] #[w,h]
 54 |     keep_ratio: True
 55 |     pipeline:
 56 |       perspective: 0.0
 57 |       scale: [0.6, 1.4]
 58 |       stretch: [[1, 1], [1, 1]]
 59 |       rotation: 0
 60 |       shear: 0
 61 |       translate: 0
 62 |       flip: 0.5
 63 |       brightness: 0.2
 64 |       contrast: [0.8, 1.2]
 65 |       saturation: [0.8, 1.2]
 66 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 67 |   val:
 68 |     name: coco
 69 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 70 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 71 |     input_size: [320,320] #[w,h]
 72 |     keep_ratio: True
 73 |     pipeline:
 74 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 75 | device:
 76 |   gpu_ids: [1]
 77 |   workers_per_gpu:  1
 78 |   batchsize_per_gpu: 1 # 80 #40 #160 santiago test
 79 | schedule:
 80 |   resume: False
 81 |   load_model: ./workspace/test/model_last.pth
 82 | 
 83 |   optimizer:
 84 |     name: SGD
 85 |     lr: 0.14
 86 |     momentum: 0.9
 87 |     weight_decay: 0.0001
 88 |   warmup:
 89 |     name: linear
 90 |     steps: 1 #santiago test
 91 |     ratio: 0.1
 92 |   total_epochs: 200 #70
 93 |   lr_schedule:
 94 |     name: MultiStepLR
 95 |     milestones: [130,160,150,155]
 96 |     gamma: 0.1
 97 |   val_intervals: 5 #5
 98 | evaluator:
 99 |   name: CocoDetectionEvaluator
100 |   save_key: mAP
101 | 
102 | log:
103 |   interval: 1
104 | 
105 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
106 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
107 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
108 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
109 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
110 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
111 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
112 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
113 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
114 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
115 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
116 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
117 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
118 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
119 | 
120 | 
121 | 
122 | #   {
123 | # 	'img': tensor([
124 | # 		[
125 | # 			[
126 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
127 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
128 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
129 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
130 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
131 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
132 | # 			],
133 | 
134 | # 			[
135 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
136 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
137 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
138 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
139 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
140 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
141 | # 			],
142 | 
143 | # 			[
144 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
145 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
146 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
147 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
148 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
149 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
150 | # 			]
151 | # 		]
152 | # 	]),
153 | # 	'img_info': {
154 | # 		'license': tensor([2]),
155 | # 		'file_name': ['000000007616.jpg'],
156 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
157 | # 		'height': tensor([375]),
158 | # 		'width': tensor([500]),
159 | # 		'date_captured': ['2013-11-16 19:22:23'],
160 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
161 | # 		'id': tensor([7616])
162 | # 	},
163 | # 	'gt_bboxes': [array([
164 | # 			[193.312, 153.37599, 216.5952, 175.8784],
165 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
166 | # 			[160.1216, 85.7984, 168.64641, 110.976],
167 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
168 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
169 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
170 | # 			[1.0816001, 231.6224, 320., 277.568],
171 | # 			[85.4656, 148.3776, 112.22401, 168.096],
172 | # 			[40.7232, 109.024, 44.607998, 121.5552]
173 | # 		],
174 | # 		dtype = float32)],
175 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
176 | # 	'warp_matrix': [array([
177 | # 		[0.64, 0., 0.],
178 | # 		[0., 0.64, 40.],
179 | # 		[0., 0., 1.]
180 | # 	])]
181 | # }
182 | 


--------------------------------------------------------------------------------
/config/efficientdet.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/efficientdet
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 | 
  8 |       name: EfficientNet
  9 |       arch: efficientnet-b2
 10 |       out_levels: [3, 4, 5]
 11 |       norm_eval: False
 12 | 
 13 |     neck:
 14 |       name: BiFPN
 15 |       in_channels: [48, 120, 352] #如果是b3可以采用配置 [48, 136, 384],
 16 |       out_channels: 112
 17 |       num_outs: 3
 18 |       start_level: 0
 19 |       end_level: -1
 20 |       stack: 3
 21 |     head:
 22 |       name: QuarkDetHead
 23 |       num_classes: 80 # 80
 24 |       input_channel: 112
 25 |       feat_channels: 112
 26 |       stacked_convs: 2
 27 |       share_cls_reg: True #True
 28 |       octave_base_scale: 5
 29 |       scales_per_octave: 1
 30 |       strides: [8, 16, 32]
 31 |       reg_max: 7 #16 #7
 32 |       norm_cfg:
 33 |         type: BN
 34 |       loss:
 35 |         loss_qfl:
 36 |           name: QualityFocalLoss
 37 |           use_sigmoid: True
 38 |           beta: 2.0
 39 |           loss_weight: 1.0
 40 |         loss_dfl:
 41 |           name: DistributionFocalLoss
 42 |           loss_weight: 0.25
 43 |         loss_bbox:
 44 |           name: GIoULoss
 45 |           loss_weight: 2.0
 46 | data:
 47 |   train:
 48 |     name: coco
 49 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 50 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 51 |     input_size: [320,320] #[w,h]
 52 |     keep_ratio: True
 53 |     pipeline:
 54 |       perspective: 0.0
 55 |       scale: [0.6, 1.4]
 56 |       stretch: [[1, 1], [1, 1]]
 57 |       rotation: 0
 58 |       shear: 0
 59 |       translate: 0
 60 |       flip: 0.5
 61 |       brightness: 0.2
 62 |       contrast: [0.8, 1.2]
 63 |       saturation: [0.8, 1.2]
 64 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 65 |   val:
 66 |     name: coco
 67 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 68 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 69 |     input_size: [320,320] #[w,h]
 70 |     keep_ratio: True
 71 |     pipeline:
 72 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 73 | device:
 74 |   gpu_ids: [1]
 75 |   workers_per_gpu: 8
 76 |   batchsize_per_gpu: 20 # 80 #40 #160 santiago test
 77 | schedule:
 78 |   resume: False
 79 |   load_model: ./workspace/efficientdet/model_last.pth
 80 | 
 81 |   optimizer:
 82 |     name: SGD
 83 |     lr: 0.14
 84 |     momentum: 0.9
 85 |     weight_decay: 0.0001
 86 |   warmup:
 87 |     name: linear
 88 |     steps: 1  #santiago test
 89 |     ratio: 0.1
 90 |   total_epochs: 90 
 91 | 
 92 |   lr_schedule:
 93 |     name: ReduceLROnPlateau
 94 |     mode: min
 95 |     factor: 0.1
 96 |     patience: 2 #15
 97 |     verbose: True
 98 |     threshold: 0.00001
 99 |     threshold_mode: rel
100 |     cooldown: 0
101 |     min_lr: 0
102 |     eps: 0.000000001 #1e-08
103 | 
104 |   val_intervals: 10
105 | evaluator:
106 |   name: CocoDetectionEvaluator
107 |   save_key: mAP
108 | 
109 | log:
110 |   interval: 10 #10
111 | 
112 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
113 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
114 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
115 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
116 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
117 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
118 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
119 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
120 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
121 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
122 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
123 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
124 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
125 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
126 | 
127 | 
128 | 
129 | #   {
130 | # 	'img': tensor([
131 | # 		[
132 | # 			[
133 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
134 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
135 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
136 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
137 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
138 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
139 | # 			],
140 | 
141 | # 			[
142 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
143 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
144 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
145 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
146 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
147 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
148 | # 			],
149 | 
150 | # 			[
151 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
152 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
153 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
154 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
155 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
156 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
157 | # 			]
158 | # 		]
159 | # 	]),
160 | # 	'img_info': {
161 | # 		'license': tensor([2]),
162 | # 		'file_name': ['000000007616.jpg'],
163 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
164 | # 		'height': tensor([375]),
165 | # 		'width': tensor([500]),
166 | # 		'date_captured': ['2013-11-16 19:22:23'],
167 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
168 | # 		'id': tensor([7616])
169 | # 	},
170 | # 	'gt_bboxes': [array([
171 | # 			[193.312, 153.37599, 216.5952, 175.8784],
172 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
173 | # 			[160.1216, 85.7984, 168.64641, 110.976],
174 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
175 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
176 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
177 | # 			[1.0816001, 231.6224, 320., 277.568],
178 | # 			[85.4656, 148.3776, 112.22401, 168.096],
179 | # 			[40.7232, 109.024, 44.607998, 121.5552]
180 | # 		],
181 | # 		dtype = float32)],
182 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
183 | # 	'warp_matrix': [array([
184 | # 		[0.64, 0., 0.],
185 | # 		[0., 0.64, 40.],
186 | # 		[0., 0., 1.]
187 | # 	])]
188 | # }
189 | 


--------------------------------------------------------------------------------
/config/ghostnet_full.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/ghostnet_full
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 | 
  8 |       name: GhostNet_full 
  9 |       width_mult: 1.0
 10 |       out_stages: [4, 6, 9] 
 11 |       act: ReLU
 12 | 
 13 |     neck:
 14 |       name: PAN
 15 |       in_channels: [40, 112, 960]
 16 |       out_channels: 96
 17 |       start_level: 0
 18 |       num_outs: 3
 19 |     head:
 20 |       name: QuarkDetHead
 21 |       num_classes: 80 # 80
 22 |       input_channel: 96
 23 |       feat_channels: 96
 24 |       stacked_convs: 2
 25 |       share_cls_reg: True #True
 26 |       octave_base_scale: 5
 27 |       scales_per_octave: 1
 28 |       strides: [8, 16, 32]
 29 |       reg_max: 7 #16 #7
 30 |       norm_cfg:
 31 |         type: BN
 32 |       loss:
 33 |         loss_qfl:
 34 |           name: QualityFocalLoss
 35 |           use_sigmoid: True
 36 |           beta: 2.0
 37 |           loss_weight: 1.0
 38 |         loss_dfl:
 39 |           name: DistributionFocalLoss
 40 |           loss_weight: 0.25
 41 |         loss_bbox:
 42 |           name: GIoULoss
 43 |           loss_weight: 2.0
 44 | data:
 45 |   train:
 46 |     name: coco
 47 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 48 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 49 |     input_size: [320,320] #[w,h]
 50 |     keep_ratio: True
 51 |     pipeline:
 52 |       perspective: 0.0
 53 |       scale: [0.6, 1.4]
 54 |       stretch: [[1, 1], [1, 1]]
 55 |       rotation: 0
 56 |       shear: 0
 57 |       translate: 0
 58 |       flip: 0.5
 59 |       brightness: 0.2
 60 |       contrast: [0.8, 1.2]
 61 |       saturation: [0.8, 1.2]
 62 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 63 |   val:
 64 |     name: coco
 65 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 66 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 67 |     input_size: [320,320] #[w,h]
 68 |     keep_ratio: True
 69 |     pipeline:
 70 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 71 | device:
 72 |   gpu_ids: [0]
 73 |   workers_per_gpu: 8 
 74 |   batchsize_per_gpu:  80 #40 #160 santiago test
 75 | schedule:
 76 |   resume: True
 77 |   load_model: ./workspace/ghostnet_full/model_last.pth
 78 | 
 79 |   optimizer:
 80 |     name: SGD
 81 |     lr: 0.14
 82 |     momentum: 0.9
 83 |     weight_decay: 0.0001
 84 |   warmup:
 85 |     name: linear
 86 |     steps: 300 #300 #santiago test
 87 |     ratio: 0.1
 88 |   total_epochs: 90 
 89 | 
 90 |   lr_schedule:
 91 |     name: ReduceLROnPlateau
 92 |     mode: min
 93 |     factor: 0.1
 94 |     patience: 3 
 95 |     verbose: True
 96 |     threshold: 0.00001
 97 |     threshold_mode: rel
 98 |     cooldown: 0
 99 |     min_lr: 0
100 |     eps: 0.000000001 #1e-08
101 | 
102 |   val_intervals: 10
103 | evaluator:
104 |   name: CocoDetectionEvaluator
105 |   save_key: mAP
106 | 
107 | log:
108 |   interval: 10 #10
109 | 
110 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
111 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
112 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
113 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
114 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
115 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
116 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
117 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
118 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
119 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
120 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
121 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
122 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
123 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
124 | 
125 | 
126 | 
127 | #   {
128 | # 	'img': tensor([
129 | # 		[
130 | # 			[
131 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
132 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
133 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
134 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
135 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
136 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
137 | # 			],
138 | 
139 | # 			[
140 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
141 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
142 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
143 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
144 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
145 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
146 | # 			],
147 | 
148 | # 			[
149 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
150 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
151 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
152 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
153 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
154 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
155 | # 			]
156 | # 		]
157 | # 	]),
158 | # 	'img_info': {
159 | # 		'license': tensor([2]),
160 | # 		'file_name': ['000000007616.jpg'],
161 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
162 | # 		'height': tensor([375]),
163 | # 		'width': tensor([500]),
164 | # 		'date_captured': ['2013-11-16 19:22:23'],
165 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
166 | # 		'id': tensor([7616])
167 | # 	},
168 | # 	'gt_bboxes': [array([
169 | # 			[193.312, 153.37599, 216.5952, 175.8784],
170 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
171 | # 			[160.1216, 85.7984, 168.64641, 110.976],
172 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
173 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
174 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
175 | # 			[1.0816001, 231.6224, 320., 277.568],
176 | # 			[85.4656, 148.3776, 112.22401, 168.096],
177 | # 			[40.7232, 109.024, 44.607998, 121.5552]
178 | # 		],
179 | # 		dtype = float32)],
180 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
181 | # 	'warp_matrix': [array([
182 | # 		[0.64, 0., 0.],
183 | # 		[0., 0.64, 40.],
184 | # 		[0., 0., 1.]
185 | # 	])]
186 | # }
187 | 
188 | 
189 | 
190 | # ghostnet精简版本
191 | # 对GhostNet做了以下精简
192 | # 取出stage5中expansion size等于960的所有层，去除的层还包括
193 | # Conv2d 1×1 the number of output channels等于960和1280的层，平均池化层和最后的全连接层
194 | 


--------------------------------------------------------------------------------
/config/ghostnet_full_bifpn.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/ghostnet_full_bifpn
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 | 
  8 |       name: GhostNet_full 
  9 |       width_mult: 1.0
 10 |       out_stages: [4, 6, 9] 
 11 |       act: ReLU
 12 | 
 13 |     neck:
 14 |       name: BiFPN
 15 |       in_channels: [40, 112, 960]
 16 |       out_channels: 96
 17 |       num_outs: 3
 18 |       start_level: 0
 19 |       end_level: -1
 20 |       stack: 3
 21 |     head:
 22 |       name: QuarkDetHead
 23 |       num_classes: 80 # 80
 24 |       input_channel: 96
 25 |       feat_channels: 96
 26 |       stacked_convs: 2
 27 |       share_cls_reg: True #True
 28 |       octave_base_scale: 5
 29 |       scales_per_octave: 1
 30 |       strides: [8, 16, 32]
 31 |       reg_max: 7 #16 #7
 32 |       norm_cfg:
 33 |         type: BN
 34 |       loss:
 35 |         loss_qfl:
 36 |           name: QualityFocalLoss
 37 |           use_sigmoid: True
 38 |           beta: 2.0
 39 |           loss_weight: 1.0
 40 |         loss_dfl:
 41 |           name: DistributionFocalLoss
 42 |           loss_weight: 0.25
 43 |         loss_bbox:
 44 |           name: GIoULoss
 45 |           loss_weight: 2.0
 46 | data:
 47 |   train:
 48 |     name: coco
 49 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 50 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 51 |     input_size: [320,320] #[w,h]
 52 |     keep_ratio: True
 53 |     pipeline:
 54 |       perspective: 0.0
 55 |       scale: [0.6, 1.4]
 56 |       stretch: [[1, 1], [1, 1]]
 57 |       rotation: 0
 58 |       shear: 0
 59 |       translate: 0
 60 |       flip: 0.5
 61 |       brightness: 0.2
 62 |       contrast: [0.8, 1.2]
 63 |       saturation: [0.8, 1.2]
 64 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 65 |   val:
 66 |     name: coco
 67 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 68 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 69 |     input_size: [320,320] #[w,h]
 70 |     keep_ratio: True
 71 |     pipeline:
 72 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 73 | device:
 74 |   gpu_ids: [1]
 75 |   workers_per_gpu: 8
 76 |   batchsize_per_gpu: 80 # 80 #40 #160 santiago test
 77 | schedule:
 78 |   resume: False
 79 |   load_model: ./workspace/ghostnet_full_bifpn/model_last.pth
 80 | 
 81 |   optimizer:
 82 |     name: SGD
 83 |     lr: 0.14
 84 |     momentum: 0.9
 85 |     weight_decay: 0.0001
 86 |   warmup:
 87 |     name: linear
 88 |     steps: 300  #santiago test
 89 |     ratio: 0.1
 90 |   total_epochs: 90 
 91 | 
 92 |   lr_schedule:
 93 |     name: ReduceLROnPlateau
 94 |     mode: min
 95 |     factor: 0.1
 96 |     patience: 3 #15
 97 |     verbose: True
 98 |     threshold: 0.00001
 99 |     threshold_mode: rel
100 |     cooldown: 0
101 |     min_lr: 0
102 |     eps: 0.000000001
103 | 
104 |   val_intervals: 10
105 | evaluator:
106 |   name: CocoDetectionEvaluator
107 |   save_key: mAP
108 | 
109 | log:
110 |   interval: 10 #10
111 | 
112 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
113 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
114 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
115 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
116 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
117 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
118 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
119 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
120 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
121 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
122 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
123 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
124 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
125 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
126 | 
127 | 
128 | 
129 | #   {
130 | # 	'img': tensor([
131 | # 		[
132 | # 			[
133 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
134 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
135 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
136 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
137 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
138 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
139 | # 			],
140 | 
141 | # 			[
142 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
143 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
144 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
145 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
146 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
147 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
148 | # 			],
149 | 
150 | # 			[
151 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
152 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
153 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
154 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
155 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
156 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
157 | # 			]
158 | # 		]
159 | # 	]),
160 | # 	'img_info': {
161 | # 		'license': tensor([2]),
162 | # 		'file_name': ['000000007616.jpg'],
163 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
164 | # 		'height': tensor([375]),
165 | # 		'width': tensor([500]),
166 | # 		'date_captured': ['2013-11-16 19:22:23'],
167 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
168 | # 		'id': tensor([7616])
169 | # 	},
170 | # 	'gt_bboxes': [array([
171 | # 			[193.312, 153.37599, 216.5952, 175.8784],
172 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
173 | # 			[160.1216, 85.7984, 168.64641, 110.976],
174 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
175 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
176 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
177 | # 			[1.0816001, 231.6224, 320., 277.568],
178 | # 			[85.4656, 148.3776, 112.22401, 168.096],
179 | # 			[40.7232, 109.024, 44.607998, 121.5552]
180 | # 		],
181 | # 		dtype = float32)],
182 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
183 | # 	'warp_matrix': [array([
184 | # 		[0.64, 0., 0.],
185 | # 		[0., 0.64, 40.],
186 | # 		[0., 0., 1.]
187 | # 	])]
188 | # }
189 | 
190 | 
191 | 
192 | # ghostnet精简版本
193 | # 对GhostNet做了以下精简
194 | # 取出stage5中expansion size等于960的所有层，去除的层还包括
195 | # Conv2d 1×1 the number of output channels等于960和1280的层，平均池化层和最后的全连接层
196 | 


--------------------------------------------------------------------------------
/quarkdet/data/transform/warp.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | import cv2
  4 | import math
  5 | 
  6 | #PyTorch已经有现成的函数来替代
  7 | def get_flip_matrix(prob=0.5):
  8 |     F = np.eye(3)
  9 |     if random.random() < prob:
 10 |         F[0, 0] = -1
 11 |     return F
 12 | 
 13 | def get_perspective_matrix(perspective=0):
 14 |     """
 15 | 
 16 |     :param perspective:
 17 |     :return:
 18 |     """
 19 |     P = np.eye(3)
 20 |     P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
 21 |     P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
 22 |     return P
 23 | 
 24 | 
 25 | def get_rotation_matrix(degree=0):
 26 |     """
 27 | 
 28 |     :param degree:
 29 |     :return:
 30 |     """
 31 |     R = np.eye(3)
 32 |     a = random.uniform(-degree, degree)
 33 |     R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=1)
 34 |     return R
 35 | 
 36 | 
 37 | def get_scale_matrix(ratio=(1, 1)):
 38 |     """
 39 | 
 40 |     :param width_ratio:
 41 |     :param height_ratio:
 42 |     """
 43 |     Scl = np.eye(3)
 44 |     scale = random.uniform(*ratio)
 45 |     Scl[0, 0] *= scale
 46 |     Scl[1, 1] *= scale
 47 |     return Scl
 48 | 
 49 | 
 50 | def get_stretch_matrix(width_ratio=(1, 1), height_ratio=(1, 1)):
 51 |     """
 52 | 
 53 |     :param width_ratio:
 54 |     :param height_ratio:
 55 |     """
 56 |     Str = np.eye(3)
 57 |     Str[0, 0] *= random.uniform(*width_ratio)
 58 |     Str[1, 1] *= random.uniform(*height_ratio)
 59 |     return Str
 60 | 
 61 | 
 62 | def get_shear_matrix(degree):
 63 |     """
 64 | 
 65 |     :param degree:
 66 |     :return:
 67 |     """
 68 |     Sh = np.eye(3)
 69 |     Sh[0, 1] = math.tan(random.uniform(-degree, degree) * math.pi / 180)  # x shear (deg)
 70 |     Sh[1, 0] = math.tan(random.uniform(-degree, degree) * math.pi / 180)  # y shear (deg)
 71 |     return Sh
 72 | 
 73 | 
 74 | def get_translate_matrix(translate, width, height):
 75 |     """
 76 | 
 77 |     :param translate:
 78 |     :return:
 79 |     """
 80 |     T = np.eye(3)
 81 |     T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation
 82 |     T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation
 83 |     return T
 84 | 
 85 | 
 86 | def get_resize_matrix(raw_shape, dst_shape, keep_ratio):
 87 |     """
 88 |     Get resize matrix for resizing raw img to input size
 89 |     :param raw_shape: (width, height) of raw image
 90 |     :param dst_shape: (width, height) of input image
 91 |     :param keep_ratio: whether keep original ratio
 92 |     :return: 3x3 Matrix
 93 |     """
 94 |     r_w, r_h = raw_shape
 95 |     d_w, d_h = dst_shape
 96 |     Rs = np.eye(3)
 97 |     if keep_ratio:
 98 |         C = np.eye(3)
 99 |         C[0, 2] = - r_w / 2
100 |         C[1, 2] = - r_h / 2
101 | 
102 |         if r_w / r_h < d_w / d_h:
103 |             ratio = d_h / r_h
104 |         else:
105 |             ratio = d_w / r_w
106 |         Rs[0, 0] *= ratio
107 |         Rs[1, 1] *= ratio
108 | 
109 |         T = np.eye(3)
110 |         T[0, 2] = 0.5 * d_w
111 |         T[1, 2] = 0.5 * d_h
112 |         return T @ Rs @ C
113 |     else:
114 |         Rs[0, 0] *= d_w / r_w
115 |         Rs[1, 1] *= d_h / r_h
116 |         return Rs
117 | 
118 | def warp_and_resize(meta, warp_kwargs, dst_shape, keep_ratio=True):
119 |     # TODO: background, type
120 |     raw_img = meta['img']
121 |     height = raw_img.shape[0]  # shape(h,w,c)
122 |     width = raw_img.shape[1]
123 | 
124 |     # center
125 |     C = np.eye(3)
126 |     C[0, 2] = - width / 2
127 |     C[1, 2] = - height / 2
128 | 
129 |     # do not change the order of mat mul
130 |     if 'perspective' in warp_kwargs and random.randint(0, 1):
131 |         P = get_perspective_matrix(warp_kwargs['perspective'])
132 |         C = P @ C
133 |     if 'scale' in warp_kwargs and random.randint(0, 1):
134 |         Scl = get_scale_matrix(warp_kwargs['scale'])
135 |         C = Scl @ C
136 |     if 'stretch' in warp_kwargs and random.randint(0, 1):
137 |         Str = get_stretch_matrix(*warp_kwargs['stretch'])
138 |         C = Str @ C
139 |     if 'rotation' in warp_kwargs and random.randint(0, 1):
140 |         R = get_rotation_matrix(warp_kwargs['rotation'])
141 |         C = R @ C
142 |     if 'shear' in warp_kwargs and random.randint(0, 1):
143 |         Sh = get_shear_matrix(warp_kwargs['shear'])
144 |         C = Sh @ C
145 |     if 'flip' in warp_kwargs:
146 |         F = get_flip_matrix(warp_kwargs['flip'])
147 |         C = F @ C
148 |     if 'translate' in warp_kwargs and random.randint(0, 1):
149 |         T = get_translate_matrix(warp_kwargs['translate'], width, height)
150 |     else:
151 |         T = get_translate_matrix(0, width, height)
152 |     M = T @ C
153 |     # M = T @ Sh @ R @ Str @ P @ C
154 |     ResizeM = get_resize_matrix((width, height), dst_shape, keep_ratio)
155 |     M = ResizeM @ M
156 |     img = cv2.warpPerspective(raw_img, M, dsize=tuple(dst_shape))
157 |     meta['img'] = img
158 |     meta['warp_matrix'] = M
159 |     if 'gt_bboxes' in meta:
160 |         boxes = meta['gt_bboxes']
161 |         meta['gt_bboxes'] = warp_boxes(boxes, M, dst_shape[0], dst_shape[1])
162 |     if 'gt_masks' in meta:
163 |         for i, mask in enumerate(meta['gt_masks']):
164 |             meta['gt_masks'][i] = cv2.warpPerspective(mask, M, dsize=tuple(dst_shape))
165 | 
166 |     # TODO: keypoints
167 |     # if 'gt_keypoints' in meta:
168 | 
169 |     return meta
170 | 
171 | 
172 | def warp_boxes(boxes, M, width, height):
173 |     n = len(boxes)
174 |     if n:
175 |         # warp points
176 |         xy = np.ones((n * 4, 3))
177 |         xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
178 |         xy = xy @ M.T  # transform
179 |         xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
180 |         # create new boxes
181 |         x = xy[:, [0, 2, 4, 6]]
182 |         y = xy[:, [1, 3, 5, 7]]
183 |         xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
184 |         # clip boxes
185 |         xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
186 |         xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
187 |         return xy.astype(np.float32)
188 |     else:
189 |         return boxes
190 | 
191 | # def warp_keypoints(keypoints, M, width, height):
192 | #     n = len(keypoints)
193 | #     if n:
194 | #
195 | #         # warp points
196 | #         xy = np.ones((n * 4, 3))
197 | #         xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
198 | #         xy = xy @ M.T  # transform
199 | #         xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
200 | #         # create new boxes
201 | #         x = xy[:, [0, 2, 4, 6]]
202 | #         y = xy[:, [1, 3, 5, 7]]
203 | #         xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
204 | #         # clip boxes
205 | #         xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
206 | #         xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
207 | #         return xy
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 


--------------------------------------------------------------------------------
/config/ghostnet_slim640.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/GhostNet_slim640
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 | 
  8 |       name: GhostNet_slim 
  9 |       width_mult: 1.0
 10 |       #out_stages: [4, 6, 9] 
 11 |       out_stages: [4, 6, 7] 
 12 |       act: ReLU
 13 | 
 14 |     neck:
 15 |       name: PAN
 16 |       #in_channels: [40, 112, 960]
 17 |       in_channels: [40, 112, 160]
 18 |       out_channels: 96
 19 |       start_level: 0
 20 |       num_outs: 3
 21 |     head:
 22 |       name: QuarkDetHead
 23 |       num_classes: 80 # 80
 24 |       input_channel: 96
 25 |       feat_channels: 96
 26 |       stacked_convs: 2
 27 |       share_cls_reg: True #True
 28 |       octave_base_scale: 5
 29 |       scales_per_octave: 1
 30 |       strides: [8, 16, 32]
 31 |       reg_max: 7 #16 #7
 32 |       norm_cfg:
 33 |         type: BN
 34 |       loss:
 35 |         loss_qfl:
 36 |           name: QualityFocalLoss
 37 |           use_sigmoid: True
 38 |           beta: 2.0
 39 |           loss_weight: 1.0
 40 |         loss_dfl:
 41 |           name: DistributionFocalLoss
 42 |           loss_weight: 0.25
 43 |         loss_bbox:
 44 |           name: GIoULoss
 45 |           loss_weight: 2.0
 46 | 
 47 | 
 48 | data:
 49 |   train:
 50 |     name: coco
 51 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 52 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 53 |     input_size: [640,640] #[320,320] #[w,h]
 54 |     keep_ratio: True
 55 |     pipeline:
 56 |       perspective: 0.0
 57 |       scale: [0.6, 1.4]
 58 |       stretch: [[1, 1], [1, 1]]
 59 |       rotation: 0
 60 |       shear: 0
 61 |       translate: 0
 62 |       flip: 0.5
 63 |       brightness: 0.2
 64 |       contrast: [0.8, 1.2]
 65 |       saturation: [0.8, 1.2]
 66 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 67 |       
 68 |     load_mosaic: True
 69 |     mosaic_probability: 0.3
 70 |     mosaic_area: 9
 71 |     mosaic_image_size: 640   
 72 |   
 73 |   val:
 74 |     name: coco
 75 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 76 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 77 |     input_size: [640,640]  #[320,320] #[w,h]
 78 |     keep_ratio: True
 79 |     pipeline:
 80 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 81 | device:
 82 |   gpu_ids: [1]
 83 |   workers_per_gpu: 8 #8
 84 |   batchsize_per_gpu:  10 #160 santiago test
 85 | schedule:
 86 |   resume: False
 87 |   load_model: ./workspace/GhostNet_slim640/model_last.pth
 88 | 
 89 |   optimizer:
 90 |     name: SGD
 91 |     lr: 0.14
 92 |     momentum: 0.9
 93 |     weight_decay: 0.0001
 94 |   warmup:
 95 |     name: linear
 96 |     steps: 300 #300 #santiago test
 97 |     ratio: 0.1
 98 |   total_epochs: 300 #70
 99 |   # lr_schedule:
100 |   #   name: MultiStepLR
101 |   #   milestones: [30,80,130,150,155,160]
102 |   #   gamma: 0.1
103 | 
104 |   lr_schedule:
105 |     name: ReduceLROnPlateau
106 |     mode: 'min'
107 |     factor: 0.1
108 |     patience: 3 #15
109 |     verbose: True
110 |     threshold: 0.00001
111 |     threshold_mode: 'rel'
112 |     cooldown: 0
113 |     min_lr: 0
114 |     eps: 0.000000001 #1e-08 
115 | 
116 |   val_intervals: 5
117 | evaluator:
118 |   name: CocoDetectionEvaluator
119 |   save_key: mAP
120 | 
121 | log:
122 |   interval: 5 #10
123 | 
124 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
125 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
126 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
127 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
128 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
129 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
130 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
131 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
132 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
133 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
134 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
135 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
136 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
137 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
138 | 
139 | 
140 | 
141 | #   {
142 | # 	'img': tensor([
143 | # 		[
144 | # 			[
145 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
146 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
147 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
148 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
149 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
150 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
151 | # 			],
152 | 
153 | # 			[
154 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
155 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
156 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
157 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
158 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
159 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
160 | # 			],
161 | 
162 | # 			[
163 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
164 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
165 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
166 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
167 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
168 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
169 | # 			]
170 | # 		]
171 | # 	]),
172 | # 	'img_info': {
173 | # 		'license': tensor([2]),
174 | # 		'file_name': ['000000007616.jpg'],
175 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
176 | # 		'height': tensor([375]),
177 | # 		'width': tensor([500]),
178 | # 		'date_captured': ['2013-11-16 19:22:23'],
179 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
180 | # 		'id': tensor([7616])
181 | # 	},
182 | # 	'gt_bboxes': [array([
183 | # 			[193.312, 153.37599, 216.5952, 175.8784],
184 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
185 | # 			[160.1216, 85.7984, 168.64641, 110.976],
186 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
187 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
188 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
189 | # 			[1.0816001, 231.6224, 320., 277.568],
190 | # 			[85.4656, 148.3776, 112.22401, 168.096],
191 | # 			[40.7232, 109.024, 44.607998, 121.5552]
192 | # 		],
193 | # 		dtype = float32)],
194 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
195 | # 	'warp_matrix': [array([
196 | # 		[0.64, 0., 0.],
197 | # 		[0., 0.64, 40.],
198 | # 		[0., 0., 1.]
199 | # 	])]
200 | # }
201 | 
202 | 
203 | 
204 | 
205 | 


--------------------------------------------------------------------------------
/quarkdet/model/head/assigner/atss_assigner.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from ...loss.iou_loss import bbox_overlaps
  4 | from .base_assigner import BaseAssigner
  5 | from .assign_result import AssignResult
  6 | 
  7 | 
  8 | class ATSSAssigner(BaseAssigner):
  9 |     """Assign a corresponding gt bbox or background to each bbox.
 10 | 
 11 |     Each proposals will be assigned with `0` or a positive integer
 12 |     indicating the ground truth index.
 13 | 
 14 |     - 0: negative sample, no assigned gt
 15 |     - positive integer: positive sample, index (1-based) of assigned gt
 16 | 
 17 |     Args:
 18 |         topk (float): number of bbox selected in each level
 19 |     """
 20 | 
 21 |     def __init__(self, topk):
 22 |         self.topk = topk
 23 | 
 24 |     # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py
 25 | 
 26 |     def assign(self,
 27 |                bboxes,
 28 |                num_level_bboxes,
 29 |                gt_bboxes,
 30 |                gt_bboxes_ignore=None,
 31 |                gt_labels=None):
 32 |         """Assign gt to bboxes.
 33 | 
 34 |         The assignment is done in following steps
 35 | 
 36 |         1. compute iou between all bbox (bbox of all pyramid levels) and gt
 37 |         2. compute center distance between all bbox and gt
 38 |         3. on each pyramid level, for each gt, select k bbox whose center
 39 |            are closest to the gt center, so we total select k*l bbox as
 40 |            candidates for each gt
 41 |         4. get corresponding iou for the these candidates, and compute the
 42 |            mean and std, set mean + std as the iou threshold
 43 |         5. select these candidates whose iou are greater than or equal to
 44 |            the threshold as postive
 45 |         6. limit the positive sample's center in gt
 46 | 
 47 | 
 48 |         Args:
 49 |             bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
 50 |             num_level_bboxes (List): num of bboxes in each level
 51 |             gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
 52 |             gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
 53 |                 labelled as `ignored`, e.g., crowd boxes in COCO.
 54 |             gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
 55 | 
 56 |         Returns:
 57 |             :obj:`AssignResult`: The assign result.
 58 |         """
 59 |         INF = 100000000
 60 |         bboxes = bboxes[:, :4]
 61 |         num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
 62 | 
 63 |         # compute iou between all bbox and gt
 64 |         overlaps = bbox_overlaps(bboxes, gt_bboxes)
 65 | 
 66 |         # assign 0 by default
 67 |         assigned_gt_inds = overlaps.new_full((num_bboxes,),
 68 |                                              0,
 69 |                                              dtype=torch.long)
 70 | 
 71 |         if num_gt == 0 or num_bboxes == 0:
 72 |             # No ground truth or boxes, return empty assignment
 73 |             max_overlaps = overlaps.new_zeros((num_bboxes,))
 74 |             if num_gt == 0:
 75 |                 # No truth, assign everything to background
 76 |                 assigned_gt_inds[:] = 0
 77 |             if gt_labels is None:
 78 |                 assigned_labels = None
 79 |             else:
 80 |                 assigned_labels = overlaps.new_full((num_bboxes,),
 81 |                                                     -1,
 82 |                                                     dtype=torch.long)
 83 |             return AssignResult(
 84 |                 num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
 85 | 
 86 |         # compute center distance between all bbox and gt
 87 |         gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
 88 |         gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
 89 |         gt_points = torch.stack((gt_cx, gt_cy), dim=1)
 90 | 
 91 |         bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
 92 |         bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
 93 |         bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1)
 94 | 
 95 |         distances = (bboxes_points[:, None, :] -
 96 |                      gt_points[None, :, :]).pow(2).sum(-1).sqrt()
 97 | 
 98 |         # Selecting candidates based on the center distance
 99 |         candidate_idxs = []
100 |         start_idx = 0
101 |         for level, bboxes_per_level in enumerate(num_level_bboxes):
102 |             # on each pyramid level, for each gt,
103 |             # select k bbox whose center are closest to the gt center
104 |             end_idx = start_idx + bboxes_per_level
105 |             distances_per_level = distances[start_idx:end_idx, :]
106 |             selectable_k = min(self.topk, bboxes_per_level)
107 |             _, topk_idxs_per_level = distances_per_level.topk(
108 |                 selectable_k, dim=0, largest=False)
109 |             candidate_idxs.append(topk_idxs_per_level + start_idx)
110 |             start_idx = end_idx
111 |         candidate_idxs = torch.cat(candidate_idxs, dim=0)
112 | 
113 |         # get corresponding iou for the these candidates, and compute the
114 |         # mean and std, set mean + std as the iou threshold
115 |         candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)]
116 |         overlaps_mean_per_gt = candidate_overlaps.mean(0)
117 |         overlaps_std_per_gt = candidate_overlaps.std(0)
118 |         overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
119 | 
120 |         is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
121 | 
122 |         # limit the positive sample's center in gt
123 |         for gt_idx in range(num_gt):
124 |             candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
125 |         ep_bboxes_cx = bboxes_cx.view(1, -1).expand(
126 |             num_gt, num_bboxes).contiguous().view(-1)
127 |         ep_bboxes_cy = bboxes_cy.view(1, -1).expand(
128 |             num_gt, num_bboxes).contiguous().view(-1)
129 |         candidate_idxs = candidate_idxs.view(-1)
130 | 
131 |         # calculate the left, top, right, bottom distance between positive
132 |         # bbox center and gt side
133 |         l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]
134 |         t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]
135 |         r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt)
136 |         b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt)
137 |         is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01
138 |         is_pos = is_pos & is_in_gts
139 | 
140 |         # if an anchor box is assigned to multiple gts,
141 |         # the one with the highest IoU will be selected.
142 |         overlaps_inf = torch.full_like(overlaps,
143 |                                        -INF).t().contiguous().view(-1)
144 |         index = candidate_idxs.view(-1)[is_pos.view(-1)]
145 |         overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]
146 |         overlaps_inf = overlaps_inf.view(num_gt, -1).t()
147 | 
148 |         max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)
149 |         assigned_gt_inds[
150 |             max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1
151 | 
152 |         if gt_labels is not None:
153 |             assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1)
154 |             pos_inds = torch.nonzero(
155 |                 assigned_gt_inds > 0, as_tuple=False).squeeze()
156 |             if pos_inds.numel() > 0:
157 |                 assigned_labels[pos_inds] = gt_labels[
158 |                     assigned_gt_inds[pos_inds] - 1]
159 |         else:
160 |             assigned_labels = None
161 |         return AssignResult(
162 |             num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
163 | 


--------------------------------------------------------------------------------
/config/quarkdet.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/ghostnet
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 | 
  8 |       name: GhostNet 
  9 |       width_mult: 1.0
 10 |       #out_stages: [4, 6, 9] 
 11 |       out_stages: [4, 6, 7] 
 12 |       act: ReLU
 13 | 
 14 |     neck:
 15 |       name: PAN
 16 |       #in_channels: [40, 112, 960]
 17 |       in_channels: [40, 112, 160]
 18 |       out_channels: 96
 19 |       start_level: 0
 20 |       num_outs: 3
 21 |     head:
 22 |       name: QuarkDetHead
 23 |       num_classes: 80 # 80
 24 |       input_channel: 96
 25 |       feat_channels: 96
 26 |       stacked_convs: 2
 27 |       share_cls_reg: True #True
 28 |       octave_base_scale: 5
 29 |       scales_per_octave: 1
 30 |       strides: [8, 16, 32]
 31 |       reg_max: 7 #16 #7
 32 |       norm_cfg:
 33 |         type: BN
 34 |       loss:
 35 |         loss_qfl:
 36 |           name: QualityFocalLoss
 37 |           use_sigmoid: False #True
 38 |           beta: 2.0
 39 |           loss_weight: 1.0
 40 |         loss_dfl:
 41 |           name: DistributionFocalLoss
 42 |           loss_weight: 0.25
 43 |         loss_bbox:
 44 |           name: GIoULoss
 45 |           loss_weight: 2.0
 46 | data:
 47 |   train:
 48 |     name: coco
 49 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 50 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 51 |     input_size: [320,320] #[w,h]
 52 |     keep_ratio: True
 53 |     pipeline:
 54 |       perspective: 0.0
 55 |       scale: [0.6, 1.4]
 56 |       stretch: [[1, 1], [1, 1]]
 57 |       rotation: 0
 58 |       shear: 0
 59 |       translate: 0
 60 |       flip: 0.5
 61 |       brightness: 0.2
 62 |       contrast: [0.8, 1.2]
 63 |       saturation: [0.8, 1.2]
 64 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 65 |   val:
 66 |     name: coco
 67 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 68 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 69 |     input_size: [320,320] #[w,h]
 70 |     keep_ratio: True
 71 |     pipeline:
 72 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 73 | device:
 74 |   gpu_ids: [0,1]
 75 |   workers_per_gpu: 16 #8
 76 |   batchsize_per_gpu: 80 # 80 #40 #160 santiago test
 77 | schedule:
 78 |   resume: False
 79 |   load_model: ./workspace/ghostnet/model_last.pth
 80 | 
 81 |   optimizer:
 82 |     name: SGD
 83 |     lr: 0.14
 84 |     momentum: 0.9
 85 |     weight_decay: 0.0001
 86 |   warmup:
 87 |     name: linear
 88 |     steps: 100 #300 #santiago test
 89 |     ratio: 0.1
 90 |   total_epochs: 160 #70
 91 |   lr_schedule:
 92 |     name: MultiStepLR
 93 |     milestones: [30,80,130,150,155,160]
 94 |     gamma: 0.1
 95 |   val_intervals: 5
 96 | evaluator:
 97 |   name: CocoDetectionEvaluator
 98 |   save_key: mAP
 99 | 
100 | log:
101 |   interval: 10 #10
102 | 
103 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
104 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
105 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
106 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
107 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
108 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
109 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
110 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
111 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
112 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
113 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
114 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
115 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
116 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
117 | 
118 | 
119 | 
120 | #   {
121 | # 	'img': tensor([
122 | # 		[
123 | # 			[
124 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
125 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
126 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
127 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
128 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
129 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
130 | # 			],
131 | 
132 | # 			[
133 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
134 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
135 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
136 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
137 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
138 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
139 | # 			],
140 | 
141 | # 			[
142 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
143 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
144 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
145 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
146 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
147 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
148 | # 			]
149 | # 		]
150 | # 	]),
151 | # 	'img_info': {
152 | # 		'license': tensor([2]),
153 | # 		'file_name': ['000000007616.jpg'],
154 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
155 | # 		'height': tensor([375]),
156 | # 		'width': tensor([500]),
157 | # 		'date_captured': ['2013-11-16 19:22:23'],
158 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
159 | # 		'id': tensor([7616])
160 | # 	},
161 | # 	'gt_bboxes': [array([
162 | # 			[193.312, 153.37599, 216.5952, 175.8784],
163 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
164 | # 			[160.1216, 85.7984, 168.64641, 110.976],
165 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
166 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
167 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
168 | # 			[1.0816001, 231.6224, 320., 277.568],
169 | # 			[85.4656, 148.3776, 112.22401, 168.096],
170 | # 			[40.7232, 109.024, 44.607998, 121.5552]
171 | # 		],
172 | # 		dtype = float32)],
173 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
174 | # 	'warp_matrix': [array([
175 | # 		[0.64, 0., 0.],
176 | # 		[0., 0.64, 40.],
177 | # 		[0., 0., 1.]
178 | # 	])]
179 | # }
180 | 
181 | 
182 | 
183 | # ghostnet精简版本
184 | # 对GhostNet做了以下精简
185 | # 取出stage5中expansion size等于960的所有层，去除的层还包括
186 | # Conv2d 1×1 the number of output channels等于960和1280的层，平均池化层和最后的全连接层
187 | 
188 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.198
189 | # Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.339
190 | # Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.198
191 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059
192 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.197
193 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.323
194 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.211
195 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.340
196 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.362
197 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.105
198 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.410
199 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.583


--------------------------------------------------------------------------------
/config/ghostnet_slim.yml:
--------------------------------------------------------------------------------
  1 | #Config File example
  2 | save_dir: workspace/GhostNet_slim
  3 | model:
  4 |   detector:
  5 |     name: GFL
  6 |     backbone:
  7 | 
  8 |       name: GhostNet_slim 
  9 |       width_mult: 1.0
 10 |       #out_stages: [4, 6, 9] 
 11 |       out_stages: [4, 6, 7] 
 12 |       act: ReLU
 13 | 
 14 |     neck:
 15 |       name: PAN
 16 |       #in_channels: [40, 112, 960]
 17 |       in_channels: [40, 112, 160]
 18 |       out_channels: 96
 19 |       start_level: 0
 20 |       num_outs: 3
 21 |     head:
 22 |       name: QuarkDetHead
 23 |       num_classes: 80 # 80
 24 |       input_channel: 96
 25 |       feat_channels: 96
 26 |       stacked_convs: 2
 27 |       share_cls_reg: True #True
 28 |       octave_base_scale: 5
 29 |       scales_per_octave: 1
 30 |       strides: [8, 16, 32]
 31 |       reg_max: 7 #16 #7
 32 |       norm_cfg:
 33 |         type: BN
 34 |       loss:
 35 |         loss_qfl:
 36 |           name: QualityFocalLoss
 37 |           use_sigmoid: False #True
 38 |           beta: 2.0
 39 |           loss_weight: 1.0
 40 |         loss_dfl:
 41 |           name: DistributionFocalLoss
 42 |           loss_weight: 0.25
 43 |         loss_bbox:
 44 |           name: GIoULoss
 45 |           loss_weight: 2.0
 46 | data:
 47 |   train:
 48 |     name: coco
 49 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/train2017
 50 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_train2017.json
 51 |     input_size: [320,320] #[w,h]
 52 |     keep_ratio: True
 53 |     pipeline:
 54 |       perspective: 0.0
 55 |       scale: [0.6, 1.4]
 56 |       stretch: [[1, 1], [1, 1]]
 57 |       rotation: 0
 58 |       shear: 0
 59 |       translate: 0
 60 |       flip: 0.5
 61 |       brightness: 0.2
 62 |       contrast: [0.8, 1.2]
 63 |       saturation: [0.8, 1.2]
 64 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 65 |   val:
 66 |     name: coco
 67 |     img_path: /media/ubuntu/data/dataset/COCOv1/2017/val2017
 68 |     ann_path: /media/ubuntu/data/dataset/COCOv1/2017/annotations/instances_val2017.json
 69 |     input_size: [320,320] #[w,h]
 70 |     keep_ratio: True
 71 |     pipeline:
 72 |       normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
 73 | device:
 74 |   gpu_ids: [1]
 75 |   workers_per_gpu: 16 #8
 76 |   batchsize_per_gpu: 80 # 80 #40 #160 santiago test
 77 | schedule:
 78 |   resume: False
 79 |   load_model: ./workspace/GhostNet_slim/model_last.pth
 80 | 
 81 |   optimizer:
 82 |     name: SGD
 83 |     lr: 0.14
 84 |     momentum: 0.9
 85 |     weight_decay: 0.0001
 86 |   warmup:
 87 |     name: linear
 88 |     steps: 100 #300 #santiago test
 89 |     ratio: 0.1
 90 |   total_epochs: 160 #70
 91 |   lr_schedule:
 92 |     name: MultiStepLR
 93 |     milestones: [30,80,130,150,155,160]
 94 |     gamma: 0.1
 95 |   val_intervals: 5
 96 | evaluator:
 97 |   name: CocoDetectionEvaluator
 98 |   save_key: mAP
 99 | 
100 | log:
101 |   interval: 10 #10
102 | 
103 | class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
104 |               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
105 |               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
106 |               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
107 |               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
108 |               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
109 |               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
110 |               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
111 |               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
112 |               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
113 |               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
114 |               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
115 |               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
116 |               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
117 | 
118 | 
119 | 
120 | #   {
121 | # 	'img': tensor([
122 | # 		[
123 | # 			[
124 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
125 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
126 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
127 | # 				..., [-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
128 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667],
129 | # 				[-2.5667, -2.5667, -2.5667, ..., -2.5667, -2.5667, -2.5667]
130 | # 			],
131 | 
132 | # 			[
133 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
134 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
135 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
136 | # 				..., [-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
137 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013],
138 | # 				[-2.8013, -2.8013, -2.8013, ..., -2.8013, -2.8013, -2.8013]
139 | # 			],
140 | 
141 | # 			[
142 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
143 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
144 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
145 | # 				..., [-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
146 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668],
147 | # 				[-2.8668, -2.8668, -2.8668, ..., -2.8668, -2.8668, -2.8668]
148 | # 			]
149 | # 		]
150 | # 	]),
151 | # 	'img_info': {
152 | # 		'license': tensor([2]),
153 | # 		'file_name': ['000000007616.jpg'],
154 | # 		'coco_url': ['http://images.cocodataset.org/train2017/000000007616.jpg'],
155 | # 		'height': tensor([375]),
156 | # 		'width': tensor([500]),
157 | # 		'date_captured': ['2013-11-16 19:22:23'],
158 | # 		'flickr_url': ['http://farm1.staticflickr.com/3/6939216_ea2aca1399_z.jpg'],
159 | # 		'id': tensor([7616])
160 | # 	},
161 | # 	'gt_bboxes': [array([
162 | # 			[193.312, 153.37599, 216.5952, 175.8784],
163 | # 			[110.0224, 135.4624, 208.1792, 215.2832],
164 | # 			[160.1216, 85.7984, 168.64641, 110.976],
165 | # 			[204.7232, 93.6704, 212.2048, 108.3904],
166 | # 			[85.414406, 148.8192, 111.8976, 167.5584],
167 | # 			[236.0832, 155.96161, 267.5264, 166.3424],
168 | # 			[1.0816001, 231.6224, 320., 277.568],
169 | # 			[85.4656, 148.3776, 112.22401, 168.096],
170 | # 			[40.7232, 109.024, 44.607998, 121.5552]
171 | # 		],
172 | # 		dtype = float32)],
173 | # 	'gt_labels': [array([2, 7, 9, 9, 2, 2, 2, 7, 9])],
174 | # 	'warp_matrix': [array([
175 | # 		[0.64, 0., 0.],
176 | # 		[0., 0.64, 40.],
177 | # 		[0., 0., 1.]
178 | # 	])]
179 | # }
180 | 
181 | 
182 | 
183 | # ghostnet精简版本
184 | # 对GhostNet做了以下精简
185 | # 取出stage5中expansion size等于960的所有层，去除的层还包括
186 | # Conv2d 1×1 the number of output channels等于960和1280的层，平均池化层和最后的全连接层
187 | 
188 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.198
189 | # Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.339
190 | # Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.198
191 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059
192 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.197
193 | # Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.323
194 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.211
195 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.340
196 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.362
197 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.105
198 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.410
199 | # Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.583


--------------------------------------------------------------------------------
/quarkdet/model/backbone/shufflenetv2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | from ..module.activation import act_layers
  5 | 
  6 | model_urls = {
  7 |     'shufflenetv2_0.5x': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth',
  8 |     'shufflenetv2_1.0x': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth',
  9 |     'shufflenetv2_1.5x': None,
 10 |     'shufflenetv2_2.0x': None,
 11 | }
 12 | 
 13 | 
 14 | def channel_shuffle(x, groups):
 15 |     # type: (torch.Tensor, int) -> torch.Tensor
 16 |     batchsize, num_channels, height, width = x.data.size()
 17 |     channels_per_group = num_channels // groups
 18 | 
 19 |     # reshape
 20 |     x = x.view(batchsize, groups,
 21 |                channels_per_group, height, width)
 22 | 
 23 |     x = torch.transpose(x, 1, 2).contiguous()
 24 | 
 25 |     # flatten
 26 |     x = x.view(batchsize, -1, height, width)
 27 | 
 28 |     return x
 29 | 
 30 | 
 31 | class ShuffleV2Block(nn.Module):
 32 |     def __init__(self, inp, oup, stride, activation='ReLU'):
 33 |         super(ShuffleV2Block, self).__init__()
 34 | 
 35 |         if not (1 <= stride <= 3):
 36 |             raise ValueError('illegal stride value')
 37 |         self.stride = stride
 38 | 
 39 |         branch_features = oup // 2
 40 |         assert (self.stride != 1) or (inp == branch_features << 1)
 41 | 
 42 |         if self.stride > 1:
 43 |             self.branch1 = nn.Sequential(
 44 |                 self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
 45 |                 nn.BatchNorm2d(inp),
 46 |                 nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
 47 |                 nn.BatchNorm2d(branch_features),
 48 |                 act_layers(activation),
 49 |             )
 50 |         else:
 51 |             self.branch1 = nn.Sequential()
 52 | 
 53 |         self.branch2 = nn.Sequential(
 54 |             nn.Conv2d(inp if (self.stride > 1) else branch_features,
 55 |                       branch_features, kernel_size=1, stride=1, padding=0, bias=False),
 56 |             nn.BatchNorm2d(branch_features),
 57 |             act_layers(activation),
 58 |             self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
 59 |             nn.BatchNorm2d(branch_features),
 60 |             nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
 61 |             nn.BatchNorm2d(branch_features),
 62 |             act_layers(activation),
 63 |         )
 64 | 
 65 |     @staticmethod
 66 |     def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
 67 |         return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
 68 | 
 69 |     def forward(self, x):
 70 |         if self.stride == 1:
 71 |             x1, x2 = x.chunk(2, dim=1)
 72 |             out = torch.cat((x1, self.branch2(x2)), dim=1)
 73 |         else:
 74 |             out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
 75 | 
 76 |         out = channel_shuffle(out, 2)
 77 | 
 78 |         return out
 79 | 
 80 | 
 81 | class ShuffleNetV2(nn.Module):
 82 |     def __init__(self,
 83 |                  model_size='1.5x',
 84 |                  out_stages=(2, 3, 4),
 85 |                  with_last_conv=False,
 86 |                  kernal_size=3,
 87 |                  activation='ReLU'):
 88 |         super(ShuffleNetV2, self).__init__()
 89 |         print('model size is ', model_size)
 90 | 
 91 |         self.stage_repeats = [4, 8, 4]
 92 |         self.model_size = model_size
 93 |         self.out_stages = out_stages
 94 |         self.with_last_conv = with_last_conv
 95 |         self.kernal_size = kernal_size
 96 |         self.activation = activation
 97 |         if model_size == '0.5x':
 98 |             self._stage_out_channels = [24, 48, 96, 192, 1024]
 99 |         elif model_size == '1.0x':
100 |             self._stage_out_channels = [24, 116, 232, 464, 1024]
101 |         elif model_size == '1.5x':
102 |             self._stage_out_channels = [24, 176, 352, 704, 1024]
103 |         elif model_size == '2.0x':
104 |             self._stage_out_channels = [24, 244, 488, 976, 2048]
105 |         else:
106 |             raise NotImplementedError
107 | 
108 |         # building first layer
109 |         input_channels = 3
110 |         output_channels = self._stage_out_channels[0]
111 |         self.conv1 = nn.Sequential(
112 |             nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
113 |             nn.BatchNorm2d(output_channels),
114 |             act_layers(activation),
115 |         )
116 |         input_channels = output_channels
117 | 
118 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
119 | 
120 |         stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]
121 |         for name, repeats, output_channels in zip(
122 |                 stage_names, self.stage_repeats, self._stage_out_channels[1:]):
123 |             seq = [ShuffleV2Block(input_channels, output_channels, 2, activation=activation)]
124 |             for i in range(repeats - 1):
125 |                 seq.append(ShuffleV2Block(output_channels, output_channels, 1, activation=activation))
126 |             setattr(self, name, nn.Sequential(*seq))
127 |             input_channels = output_channels
128 |         output_channels = self._stage_out_channels[-1]
129 |         if self.with_last_conv:
130 |             self.conv5 = nn.Sequential(
131 |                 nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
132 |                 nn.BatchNorm2d(output_channels),
133 |                 act_layers(activation),
134 |             )
135 |             self.stage4.add_module('conv5', self.conv5)
136 |         self._initialize_weights()
137 | 
138 |     def forward(self, x):
139 |         x = self.conv1(x)
140 |         x = self.maxpool(x)
141 |         output = []
142 |         for i in range(2, 5):
143 |             stage = getattr(self, 'stage{}'.format(i))
144 |             x = stage(x)
145 |             if i in self.out_stages:
146 |                 output.append(x)
147 |         return tuple(output)
148 | 
149 |     def _initialize_weights(self, pretrain=True):
150 |         print('init weights...')
151 |         for name, m in self.named_modules():
152 |             if isinstance(m, nn.Conv2d):
153 |                 if 'first' in name:
154 |                     nn.init.normal_(m.weight, 0, 0.01)
155 |                 else:
156 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
157 |                 if m.bias is not None:
158 |                     nn.init.constant_(m.bias, 0)
159 |             elif isinstance(m, nn.BatchNorm2d):
160 |                 nn.init.constant_(m.weight, 1)
161 |                 if m.bias is not None:
162 |                     nn.init.constant_(m.bias, 0.0001)
163 |                 nn.init.constant_(m.running_mean, 0)
164 |             elif isinstance(m, nn.BatchNorm1d):
165 |                 nn.init.constant_(m.weight, 1)
166 |                 if m.bias is not None:
167 |                     nn.init.constant_(m.bias, 0.0001)
168 |                 nn.init.constant_(m.running_mean, 0)
169 |             elif isinstance(m, nn.Linear):
170 |                 nn.init.normal_(m.weight, 0, 0.01)
171 |                 if m.bias is not None:
172 |                     nn.init.constant_(m.bias, 0)
173 |         if pretrain:
174 |             url = model_urls['shufflenetv2_{}'.format(self.model_size)]
175 |             if url is not None:
176 |                 pretrained_state_dict = model_zoo.load_url(url)
177 |                 print('=> loading pretrained model {}'.format(url))
178 |                 self.load_state_dict(pretrained_state_dict, strict=False)
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     model = ShuffleNetV2(model_size='1.0x', )
183 |     print(model)
184 |     test_data = torch.rand(5, 3, 320, 320)
185 |     test_outputs = model(test_data)
186 |     for out in test_outputs:
187 |         print(out.size())
188 | 


--------------------------------------------------------------------------------
/quarkdet/model/backbone/vovnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from collections import OrderedDict
  5 | #https://github.com/youngwanLEE/vovnet-detectron2
  6 | #https://arxiv.org/pdf/1904.09730v1.pdf
  7 | #https://arxiv.org/pdf/1911.06667.pdf
  8 | __all__ = ['VoVNet', 'vovnet27_slim', 'vovnet39', 'vovnet57']
  9 | 
 10 | 
 11 | model_urls = {
 12 |     'vovnet39': 'https://dl.dropbox.com/s/1lnzsgnixd8gjra/vovnet39_torchvision.pth?dl=1',
 13 |     'vovnet57': 'https://dl.dropbox.com/s/6bfu9gstbwfw31m/vovnet57_torchvision.pth?dl=1'
 14 | }
 15 | 
 16 | 
 17 | def conv3x3(in_channels, out_channels, module_name, postfix,
 18 |             stride=1, groups=1, kernel_size=3, padding=1):
 19 |     """3x3 convolution with padding"""
 20 |     return [
 21 |         ('{}_{}/conv'.format(module_name, postfix),
 22 |             nn.Conv2d(in_channels, out_channels,
 23 |                       kernel_size=kernel_size,
 24 |                       stride=stride,
 25 |                       padding=padding,
 26 |                       groups=groups,
 27 |                       bias=False)),
 28 |         ('{}_{}/norm'.format(module_name, postfix),
 29 |             nn.BatchNorm2d(out_channels)),
 30 |         ('{}_{}/relu'.format(module_name, postfix),
 31 |             nn.ReLU(inplace=True)),
 32 |     ]
 33 | 
 34 | 
 35 | def conv1x1(in_channels, out_channels, module_name, postfix,
 36 |             stride=1, groups=1, kernel_size=1, padding=0):
 37 |     """1x1 convolution"""
 38 |     return [
 39 |         ('{}_{}/conv'.format(module_name, postfix),
 40 |             nn.Conv2d(in_channels, out_channels,
 41 |                       kernel_size=kernel_size,
 42 |                       stride=stride,
 43 |                       padding=padding,
 44 |                       groups=groups,
 45 |                       bias=False)),
 46 |         ('{}_{}/norm'.format(module_name, postfix),
 47 |             nn.BatchNorm2d(out_channels)),
 48 |         ('{}_{}/relu'.format(module_name, postfix),
 49 |             nn.ReLU(inplace=True)),
 50 |     ]
 51 | 
 52 | 
 53 | class _OSA_module(nn.Module):
 54 |     def __init__(self,
 55 |                  in_ch,
 56 |                  stage_ch,
 57 |                  concat_ch,
 58 |                  layer_per_block,
 59 |                  module_name,
 60 |                  identity=False):
 61 |         super(_OSA_module, self).__init__()
 62 | 
 63 |         self.identity = identity
 64 |         self.layers = nn.ModuleList()
 65 |         in_channel = in_ch
 66 |         for i in range(layer_per_block):
 67 |             self.layers.append(nn.Sequential(
 68 |                 OrderedDict(conv3x3(in_channel, stage_ch, module_name, i))))
 69 |             in_channel = stage_ch
 70 | 
 71 |         # feature aggregation         in_channel = in_ch + layer_per_block * stage_ch
 72 |         self.concat = nn.Sequential(
 73 |             OrderedDict(conv1x1(in_channel, concat_ch, module_name, 'concat')))
 74 | 
 75 |     def forward(self, x):
 76 |         identity_feat = x
 77 |         output = []
 78 |         output.append(x)
 79 |         for layer in self.layers:
 80 |             x = layer(x)
 81 |             output.append(x)
 82 | 
 83 |         x = torch.cat(output, dim=1)
 84 |         xt = self.concat(x)
 85 | 
 86 |         if self.identity:
 87 |             xt = xt + identity_feat
 88 | 
 89 |         return xt
 90 | 
 91 | 
 92 | class _OSA_stage(nn.Sequential):
 93 |     def __init__(self,
 94 |                  in_ch,
 95 |                  stage_ch,
 96 |                  concat_ch,
 97 |                  block_per_stage,
 98 |                  layer_per_block,
 99 |                  stage_num):
100 |         super(_OSA_stage, self).__init__()
101 | 
102 |         if not stage_num == 2:
103 |             self.add_module('Pooling',
104 |                 nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))
105 | 
106 |         module_name = f'OSA{stage_num}_1'
107 |         self.add_module(module_name,
108 |             _OSA_module(in_ch,
109 |                         stage_ch,
110 |                         concat_ch,
111 |                         layer_per_block,
112 |                         module_name))
113 |         for i in range(block_per_stage-1):
114 |             module_name = f'OSA{stage_num}_{i+2}'
115 |             self.add_module(module_name,
116 |                 _OSA_module(concat_ch,
117 |                             stage_ch,
118 |                             concat_ch,
119 |                             layer_per_block,
120 |                             module_name,
121 |                             identity=True))
122 | 
123 | 
124 | class VoVNet(nn.Module):
125 |     def __init__(self, 
126 |                  config_stage_ch,
127 |                  config_concat_ch,
128 |                  block_per_stage,
129 |                  layer_per_block,
130 |                  num_classes=2):
131 |         super(VoVNet, self).__init__()
132 | 
133 |         # Stem module         stem = conv3x3(3,   64, 'stem', '1', 2)
134 |         stem += conv3x3(64,  64, 'stem', '2', 1)
135 |         stem += conv3x3(64, 128, 'stem', '3', 2)
136 |         self.add_module('stem', nn.Sequential(OrderedDict(stem)))
137 | 
138 |         stem_out_ch = [128]
139 |         in_ch_list = stem_out_ch + config_concat_ch[:-1]
140 |         self.stage_names = []
141 |         for i in range(4): #num_stages             name = 'stage%d' % (i+2)
142 |             self.stage_names.append(name)
143 |             self.add_module(name,
144 |                             _OSA_stage(in_ch_list[i],
145 |                                        config_stage_ch[i],
146 |                                        config_concat_ch[i],
147 |                                        block_per_stage[i],
148 |                                        layer_per_block,
149 |                                        i+2))
150 | 
151 |         self.feature_layer = nn.Linear(config_concat_ch[-1], 128)
152 |         self.classifier = nn.Linear(128, num_classes)
153 | 
154 |         for m in self.modules():
155 |             if isinstance(m, nn.Conv2d):
156 |                 nn.init.kaiming_normal_(m.weight)
157 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
158 |                 nn.init.constant_(m.weight, 1)
159 |                 nn.init.constant_(m.bias, 0)
160 |             elif isinstance(m, nn.Linear):
161 |                 nn.init.constant_(m.bias, 0)
162 | 
163 |     def forward(self, x):
164 |         x = self.stem(x)
165 |         for name in self.stage_names:
166 |             x = getattr(self, name)(x)
167 |         x = F.adaptive_avg_pool2d(x, (1, 1)).view(x.size(0), -1)
168 |         features = self.feature_layer(x)
169 |         x = self.classifier(features)
170 |         return features,x
171 | 
172 | 
173 | def _vovnet(arch,
174 |             config_stage_ch,
175 |             config_concat_ch,
176 |             block_per_stage,
177 |             layer_per_block,
178 |             pretrained,
179 |             progress,
180 |             **kwargs):
181 |     model = VoVNet(config_stage_ch, config_concat_ch,
182 |                    block_per_stage, layer_per_block,
183 |                    **kwargs)
184 |     if pretrained:
185 |         state_dict = load_state_dict_from_url(model_urls[arch],
186 |                                               progress=progress)
187 |         model.load_state_dict(state_dict)
188 |     return model
189 | 
190 | 
191 | def vovnet57(pretrained=False, progress=True, **kwargs):
192 |     return _vovnet('vovnet57', [128, 160, 192, 224], [256, 512, 768, 1024],
193 |                     [1,1,4,3], 5, pretrained, progress, **kwargs)
194 | 
195 | 
196 | def vovnet39(pretrained=False, progress=True, **kwargs):
197 |     return _vovnet('vovnet39', [128, 160, 192, 224], [256, 512, 768, 1024],
198 |                     [1,1,2,2], 5, pretrained, progress, **kwargs)
199 | 
200 | 
201 | def vovnet27_slim(pretrained=False, progress=True, **kwargs):
202 |     return _vovnet('vovnet27_slim', [64, 80, 96, 112], [128, 256, 384, 512],
203 |                     [1,1,1,1], 5, pretrained, progress, **kwargs)


--------------------------------------------------------------------------------