├── easymd
    ├── utils
    │   └── __init__.py
    ├── analysis_tools
    │   ├── __init__.py
    │   ├── get_flops.py
    │   ├── anchor_analyze.py
    │   ├── browse_dataset.py
    │   ├── eval_metric.py
    │   ├── atss_anchor.py
    │   ├── benchmark.py
    │   ├── kmean.py
    │   ├── center_sample_demo.py
    │   ├── visualize_coco.py
    │   ├── visualize_panoptic.py
    │   ├── analyze_logs.py
    │   └── analyze_results.py
    ├── models
    │   ├── losses
    │   │   ├── __init__.py
    │   │   └── dice_loss.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── transform.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── panseg.py
    │   │   ├── detr_plus.py
    │   │   └── single_stage_panoptic_detector.py
    │   ├── __init__.py
    │   ├── backbones
    │   │   └── __init__.py
    │   └── panformer
    │   │   └── __init__.py
    ├── apis
    │   └── __init__.py
    ├── core
    │   ├── bbox
    │   │   ├── match_costs
    │   │   │   ├── __init__.py
    │   │   │   └── match_cost.py
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── evaluation
    │   │   ├── __init__.py
    │   │   └── eval_hooks.py
    ├── runner
    │   ├── __init__.py
    │   ├── hooks
    │   │   ├── __init__.py
    │   │   └── grad_check.py
    │   └── checkpoints.py
    ├── datasets
    │   ├── panopticapi
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── utils.py
    │   └── __init__.py
    └── __init__.py
├── figs
    └── arch.png
├── configs
    ├── panformer
    │   ├── panformer_r50_12e_coco_panoptic.py
    │   ├── panformer_r50_24e_coco_panoptic.py
    │   ├── panformer_pvtb5_24e_coco_panoptic.py
    │   ├── panformer_r101_24e_coco_panoptic.py
    │   └── panformer_swinl_24e_coco_panoptic.py
    ├── _base_
    │   ├── schedules
    │   │   ├── schedule_1x.py
    │   │   ├── schedule_20e.py
    │   │   └── schedule_2x.py
    │   ├── default_runtime.py
    │   ├── models
    │   │   ├── ssd300.py
    │   │   ├── rpn_r50_caffe_c4.py
    │   │   ├── retinanet_r50_fpn.py
    │   │   ├── rpn_r50_fpn.py
    │   │   ├── fast_rcnn_r50_fpn.py
    │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   ├── faster_rcnn_r50_fpn.py
    │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   ├── mask_rcnn_r50_fpn.py
    │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   └── cascade_mask_rcnn_r50_fpn.py
    │   └── datasets
    │   │   └── coco_panoptic_plus.py
    └── models
    │   ├── ssd300.py
    │   ├── rpn_r50_caffe_c4.py
    │   ├── retinanet_r50_fpn.py
    │   ├── rpn_r50_fpn.py
    │   ├── fast_rcnn_r50_fpn.py
    │   ├── faster_rcnn_r50_caffe_dc5.py
    │   ├── faster_rcnn_r50_fpn.py
    │   ├── faster_rcnn_r50_caffe_c4.py
    │   ├── mask_rcnn_r50_fpn.py
    │   ├── mask_rcnn_r50_caffe_c4.py
    │   ├── cascade_rcnn_r50_fpn.py
    │   └── cascade_mask_rcnn_r50_fpn.py
├── requirements.txt
├── tools
    ├── dist_train.sh
    ├── dist_test.sh
    ├── ana_tools
    │   ├── dataset_split.py
    │   ├── get_flops.py
    │   ├── ana_query.py
    │   └── benchmark.py
    ├── bricks
    │   └── infererce.py
    └── convert_panoptic_coco.sh
├── setup.py
├── .gitignore
├── converter
    └── panoptic_cityscapes_categories.json
└── README.md


/easymd/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/easymd/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .dice_loss import *


--------------------------------------------------------------------------------
/easymd/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .test import *
2 | from .train import *


--------------------------------------------------------------------------------
/easymd/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from .match_cost import *


--------------------------------------------------------------------------------
/easymd/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox import *
2 | from .evaluation import *


--------------------------------------------------------------------------------
/easymd/core/bbox/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .match_costs import *
3 | 
4 | 


--------------------------------------------------------------------------------
/easymd/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .hooks import  *
2 | from .checkpoints import *
3 | 


--------------------------------------------------------------------------------
/easymd/runner/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .grad_check import GradChecker, CacheCleaner


--------------------------------------------------------------------------------
/easymd/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import EvalHook_plus,DistEvalHook_plus


--------------------------------------------------------------------------------
/easymd/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .visual import *
3 | from .transform import *
4 | 


--------------------------------------------------------------------------------
/figs/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiqi-li/Panoptic-SegFormer/HEAD/figs/arch.png


--------------------------------------------------------------------------------
/easymd/datasets/panopticapi/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation import *
2 | from .converter_2cpng2pan import *


--------------------------------------------------------------------------------
/easymd/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .panopticapi import *
2 | from .coco_panoptic import CocoDataset_panoptic
3 | 


--------------------------------------------------------------------------------
/easymd/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .panseg import *
2 | from .single_stage_panoptic_detector import *
3 | from .detr_plus import DETR_plus


--------------------------------------------------------------------------------
/easymd/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbones import *
2 | from .utils import *
3 | from .detectors import *
4 | from .losses import *
5 | from .panformer import *


--------------------------------------------------------------------------------
/easymd/datasets/panopticapi/README.md:
--------------------------------------------------------------------------------
1 | We copy-paste [panopticapi](https://github.com/cocodataset/panopticapi) and modify it to make it compatible with our framework
2 | 


--------------------------------------------------------------------------------
/easymd/__init__.py:
--------------------------------------------------------------------------------
1 | from .analysis_tools import *
2 | 
3 | from .models import *
4 | from .core import *
5 | from .runner import *
6 | from .utils import *
7 | from .datasets import *


--------------------------------------------------------------------------------
/configs/panformer/panformer_r50_12e_coco_panoptic.py:
--------------------------------------------------------------------------------
1 | 
2 | _base_ = './base.py'
3 | lr_config = dict(policy='step', step=[8])
4 | runner = dict(type='EpochBasedRunner', max_epochs=12)
5 | 


--------------------------------------------------------------------------------
/configs/panformer/panformer_r50_24e_coco_panoptic.py:
--------------------------------------------------------------------------------
1 | 
2 | _base_ = './base.py'
3 | 
4 | lr_config = dict(policy='step', step=[18])
5 | runner = dict(type='EpochBasedRunner', max_epochs=24)
6 | 


--------------------------------------------------------------------------------
/easymd/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .pvt import pvt_small_impr1_peg
2 | from .pvt_v2_ap import pvt_v2_b2_ap,pvt_v2_b0_ap
3 | from .pvt_v2 import pvt_v2_b5,pvt_v2_b2, pvt_v2_b0,pvt_v2_b1
4 | from .swin import SwinTransformer
5 | 


--------------------------------------------------------------------------------
/easymd/models/panformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .detr_head import DETRHeadv2
2 | from .panformer_head import PanformerHead
3 | from .mask_head import MaskHead
4 | from .deformable_detr import  Deformable_Transformer
5 | from .tools import *


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | mmcv-full==1.3.4
 2 | mmdet==2.12.0
 3 | torchvision==0.8.2
 4 | timm==0.4.5
 5 | pycocotools
 6 | einops==0.3.0
 7 | Pillow==8.0.1
 8 | opencv-python==4.5.1.48
 9 | cityscapesscripts
10 | wandb
11 | seaborn
12 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | set -x
 4 | 
 5 | CONFIG=$1
 6 | GPUS=$2
 7 | PORT=${PORT:-29503}
 8 | 
 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
11 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch   ${@:3}   --deterministic
12 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29504}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval panoptic 
11 |     #bbox segm
12 | #


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/easymd/models/detectors/panseg.py:
--------------------------------------------------------------------------------
 1 | #from ..builder import DETECTORS
 2 | #from .detr import DETR
 3 | 
 4 | 
 5 | from mmdet.models.detectors.detr import DETR
 6 | from mmdet.models.builder import DETECTORS
 7 | from easymd.models.detectors.detr_plus import DETR_plus
 8 | @DETECTORS.register_module()
 9 | class PanSeg(DETR_plus):
10 | 
11 |     def __init__(self, *args, **kwargs):
12 |         super(DETR_plus, self).__init__(*args, **kwargs)
13 |         self.count=0
14 | 


--------------------------------------------------------------------------------
/tools/ana_tools/dataset_split.py:
--------------------------------------------------------------------------------
 1 | import json 
 2 | import random
 3 | 
 4 | file_path = './datasets/cityscapes/cityscapes_panoptic_train_detection_format.json'
 5 | 
 6 | with open(file_path,'r') as f:
 7 |     data = json.load(f)
 8 |     images = data['images']
 9 |     len_img = len(images)
10 |     print(len_img)
11 |     perm = [i for i in range(len_img)]
12 |     random.shuffle(images)
13 |     print(images[:len_img//10])
14 |     data['images'] =  data['images'][:len_img//10]
15 |     with open('partial_')


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         #dict(type='WandbLoggerHook',init_kwargs=dict(project="Panoptic-Segformer"))
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 
18 | 


--------------------------------------------------------------------------------
/tools/bricks/infererce.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import checkpoint
 2 | from mmdet.apis.inference import init_detector,LoadImage, inference_detector
 3 | import easymd
 4 | 
 5 | config = 'config.py'
 6 | #checkpoints = './checkpoints/pseg_r101_r50_latest.pth'
 7 | checkpoints = "path/to/pth"
 8 | img             = '000000322864.jpg'
 9 | results = {
10 |     'img': './datasets/coco/val2017/'+img
11 | }
12 | model = init_detector(config,checkpoint=checkpoints)
13 | 
14 | results = inference_detector(model,'./datasets/coco/val2017/'+img)
15 | 
16 | 


--------------------------------------------------------------------------------
/configs/panformer/panformer_pvtb5_24e_coco_panoptic.py:
--------------------------------------------------------------------------------
 1 | 
 2 | _base_ = './base.py'
 3 | 
 4 | model = dict(
 5 |     # get pvt_v2_b5_22k 
 6 |     # wget https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b5_22k.pth
 7 |     pretrained='./checkpoints/pvt_v2_b5_22k.pth',
 8 |     backbone=dict(
 9 |         type='pvt_v2_b5',
10 |         out_indices=(1, 2, 3),
11 |        ),
12 |     neck=dict(
13 |         type='ChannelMapper',
14 |         in_channels=[128, 320, 512], 
15 |      ),
16 |     bbox_head=dict(
17 |         quality_threshold_things=0.3,
18 |         quality_threshold_stuff=0.3,
19 |     )
20 | )
21 | 


--------------------------------------------------------------------------------
/easymd/runner/hooks/grad_check.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | import torch
 3 | 
 4 | @HOOKS.register_module()
 5 | class GradChecker(Hook):
 6 |     def __init__(self) -> None:
 7 |         super().__init__()
 8 |     def after_train_iter(self,runner):
 9 |         for key,val in runner.model.named_parameters():
10 |             if val.grad == None and  val.requires_grad:
11 |                 print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
12 | 
13 | 
14 | @HOOKS.register_module()
15 | class CacheCleaner(Hook):
16 |     def __init__(self) -> None:
17 |         super().__init__()
18 |     def after_train_epoch(self,runner):
19 |         torch.cuda.empty_cache()
20 | 


--------------------------------------------------------------------------------
/configs/panformer/panformer_r101_24e_coco_panoptic.py:
--------------------------------------------------------------------------------
 1 | 
 2 | _base_ = './base.py'
 3 | _dim_ = 256
 4 | _num_levels_=4
 5 | model = dict(
 6 |     type='PanSeg',
 7 |     pretrained='torchvision://resnet101',
 8 |     backbone=dict(
 9 |         type='ResNet',
10 |         depth=101,
11 |         num_stages=4,
12 |         out_indices=(1, 2, 3),
13 |         frozen_stages=1,
14 |         norm_cfg=dict(type='BN', requires_grad=False),
15 |         norm_eval=True,
16 |         style='pytorch'),
17 |     neck=dict(
18 |         type='ChannelMapper',
19 |         in_channels=[512, 1024, 2048],
20 |         kernel_size=1,
21 |         out_channels=_dim_,
22 |         act_cfg=None,
23 |         norm_cfg=dict(type='GN', num_groups=32),
24 |         num_outs=_num_levels_),
25 | )


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from setuptools import find_packages
 3 | 
 4 | import os
 5 | thelibFolder = os.path.dirname(os.path.realpath(__file__))
 6 | requirementPath = thelibFolder + '/requirements.txt'
 7 | install_requires = [] # Examples: ["gunicorn", "docutils>=0.3", "lxml==0.5a7"]
 8 | if os.path.isfile(requirementPath):
 9 |     with open(requirementPath) as f:
10 |         install_requires = f.read().splitlines()
11 | 
12 | setup(
13 |     name='easymd',
14 |     version='0.1',
15 |     packages=find_packages(),
16 |     url='',
17 |     license='Apache',
18 |     author='Li Zhiqi',
19 |     install_requires=install_requires,
20 |     author_email='lzq@smail.nju.edu.cn',
21 |     description='This package aims to enrich the ability of MMdetection'
22 | )
23 | 


--------------------------------------------------------------------------------
/easymd/models/utils/transform.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def mask2result(seg, labels, num_classes):
 5 |     """Convert detection results to a list of numpy arrays.
 6 | 
 7 |     Args:
 8 |         bboxes (torch.Tensor | np.ndarray): shape (n, 5)
 9 |         labels (torch.Tensor | np.ndarray): shape (n, )
10 |         num_classes (int): class number, including background class
11 | 
12 |     Returns:
13 |         list(ndarray): bbox results of each class
14 |     """
15 |     
16 |     if seg.shape[0] == 0:
17 |         _,h,w = seg.shape
18 |         return [np.zeros((0, h, w), dtype=np.float32) for i in range(num_classes)]
19 |     else:
20 |         if isinstance(seg, torch.Tensor):
21 |             seg = seg.detach().cpu().numpy()
22 |             labels = labels.detach().cpu().numpy()
23 |         return [seg[labels == i, :] for i in range(num_classes)]


--------------------------------------------------------------------------------
/configs/panformer/panformer_swinl_24e_coco_panoptic.py:
--------------------------------------------------------------------------------
 1 | _base_ = './base.py'
 2 | _dim_ = 256
 3 | _num_levels_=4
 4 | model = dict(
 5 |     type='PanSeg',
 6 |     # get swin-large
 7 |     #import os
 8 |     #import torch
 9 |     #os.system('wget -O checkpoints/swinl.pth https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth')
10 |     #model = torch.load('checkpoints/swinl.pth')
11 |     #torch.save(model['model'], 'checkpoints/swinl.pth')
12 |     #print('DONE, swin-large was saved as checkpoints/swinl.pth')
13 |     pretrained='./checkpoints/swinl.pth',
14 |     backbone=dict(
15 |         type='SwinTransformer',
16 |         embed_dim=192,
17 |         depths=[2, 2, 18, 2],
18 |         num_heads=[6, 12, 24, 48],
19 |         window_size=7,
20 |         mlp_ratio=4.,
21 |         qkv_bias=True,
22 |         qk_scale=None,
23 |         drop_rate=0.,
24 |         attn_drop_rate=0.,
25 |         drop_path_rate=0.3,
26 |         ape=False,
27 |         patch_norm=True,
28 |         out_indices=(1, 2, 3),
29 |         use_checkpoint=False),
30 |     neck=dict(
31 |         type='ChannelMapper',
32 |         in_channels=[384, 768, 1536],
33 |         kernel_size=1,
34 |         out_channels=_dim_,
35 |         act_cfg=None,
36 |         norm_cfg=dict(type='GN', num_groups=32),
37 |         num_outs=_num_levels_),
38 |     bbox_head=dict(
39 |         quality_threshold_things=0.3,
40 |         quality_threshold_stuff=0.3,
41 |     )
42 | )
43 | 


--------------------------------------------------------------------------------
/configs/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(
 7 |         type='SSDVGG',
 8 |         input_size=input_size,
 9 |         depth=16,
10 |         with_last_pool=False,
11 |         ceil_mode=True,
12 |         out_indices=(3, 4),
13 |         out_feature_indices=(22, 34),
14 |         l2_norm_scale=20),
15 |     neck=None,
16 |     bbox_head=dict(
17 |         type='SSDHead',
18 |         in_channels=(512, 1024, 512, 256, 256, 256),
19 |         num_classes=80,
20 |         anchor_generator=dict(
21 |             type='SSDAnchorGenerator',
22 |             scale_major=False,
23 |             input_size=input_size,
24 |             basesize_ratio_range=(0.15, 0.9),
25 |             strides=[8, 16, 32, 64, 100, 300],
26 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
31 |     # model training and testing settings
32 |     train_cfg=dict(
33 |         assigner=dict(
34 |             type='MaxIoUAssigner',
35 |             pos_iou_thr=0.5,
36 |             neg_iou_thr=0.5,
37 |             min_pos_iou=0.,
38 |             ignore_iof_thr=-1,
39 |             gt_max_assign_all=False),
40 |         smoothl1_beta=1.,
41 |         allowed_border=-1,
42 |         pos_weight=-1,
43 |         neg_pos_ratio=3,
44 |         debug=False),
45 |     test_cfg=dict(
46 |         nms_pre=1000,
47 |         nms=dict(type='nms', iou_threshold=0.45),
48 |         min_bbox_size=0,
49 |         score_thr=0.02,
50 |         max_per_img=200))
51 | cudnn_benchmark = True
52 | 


--------------------------------------------------------------------------------
/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(
 7 |         type='SSDVGG',
 8 |         input_size=input_size,
 9 |         depth=16,
10 |         with_last_pool=False,
11 |         ceil_mode=True,
12 |         out_indices=(3, 4),
13 |         out_feature_indices=(22, 34),
14 |         l2_norm_scale=20),
15 |     neck=None,
16 |     bbox_head=dict(
17 |         type='SSDHead',
18 |         in_channels=(512, 1024, 512, 256, 256, 256),
19 |         num_classes=80,
20 |         anchor_generator=dict(
21 |             type='SSDAnchorGenerator',
22 |             scale_major=False,
23 |             input_size=input_size,
24 |             basesize_ratio_range=(0.15, 0.9),
25 |             strides=[8, 16, 32, 64, 100, 300],
26 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
31 |     # model training and testing settings
32 |     train_cfg=dict(
33 |         assigner=dict(
34 |             type='MaxIoUAssigner',
35 |             pos_iou_thr=0.5,
36 |             neg_iou_thr=0.5,
37 |             min_pos_iou=0.,
38 |             ignore_iof_thr=-1,
39 |             gt_max_assign_all=False),
40 |         smoothl1_beta=1.,
41 |         allowed_border=-1,
42 |         pos_weight=-1,
43 |         neg_pos_ratio=3,
44 |         debug=False),
45 |     test_cfg=dict(
46 |         nms_pre=1000,
47 |         nms=dict(type='nms', iou_threshold=0.45),
48 |         min_bbox_size=0,
49 |         score_thr=0.02,
50 |         max_per_img=200))
51 | cudnn_benchmark = True
52 | 


--------------------------------------------------------------------------------
/configs/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=3,
 9 |         strides=(1, 2, 2),
10 |         dilations=(1, 1, 1),
11 |         out_indices=(2, ),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=False),
14 |         norm_eval=True,
15 |         style='caffe'),
16 |     neck=None,
17 |     rpn_head=dict(
18 |         type='RPNHead',
19 |         in_channels=1024,
20 |         feat_channels=1024,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             scales=[2, 4, 8, 16, 32],
24 |             ratios=[0.5, 1.0, 2.0],
25 |             strides=[16]),
26 |         bbox_coder=dict(
27 |             type='DeltaXYWHBBoxCoder',
28 |             target_means=[.0, .0, .0, .0],
29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
30 |         loss_cls=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(
35 |         rpn=dict(
36 |             assigner=dict(
37 |                 type='MaxIoUAssigner',
38 |                 pos_iou_thr=0.7,
39 |                 neg_iou_thr=0.3,
40 |                 min_pos_iou=0.3,
41 |                 ignore_iof_thr=-1),
42 |             sampler=dict(
43 |                 type='RandomSampler',
44 |                 num=256,
45 |                 pos_fraction=0.5,
46 |                 neg_pos_ub=-1,
47 |                 add_gt_as_proposals=False),
48 |             allowed_border=0,
49 |             pos_weight=-1,
50 |             debug=False)),
51 |     test_cfg=dict(
52 |         rpn=dict(
53 |             nms_pre=12000,
54 |             max_per_img=2000,
55 |             nms=dict(type='nms', iou_threshold=0.7),
56 |             min_bbox_size=0)))
57 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=3,
 9 |         strides=(1, 2, 2),
10 |         dilations=(1, 1, 1),
11 |         out_indices=(2, ),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=False),
14 |         norm_eval=True,
15 |         style='caffe'),
16 |     neck=None,
17 |     rpn_head=dict(
18 |         type='RPNHead',
19 |         in_channels=1024,
20 |         feat_channels=1024,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             scales=[2, 4, 8, 16, 32],
24 |             ratios=[0.5, 1.0, 2.0],
25 |             strides=[16]),
26 |         bbox_coder=dict(
27 |             type='DeltaXYWHBBoxCoder',
28 |             target_means=[.0, .0, .0, .0],
29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
30 |         loss_cls=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(
35 |         rpn=dict(
36 |             assigner=dict(
37 |                 type='MaxIoUAssigner',
38 |                 pos_iou_thr=0.7,
39 |                 neg_iou_thr=0.3,
40 |                 min_pos_iou=0.3,
41 |                 ignore_iof_thr=-1),
42 |             sampler=dict(
43 |                 type='RandomSampler',
44 |                 num=256,
45 |                 pos_fraction=0.5,
46 |                 neg_pos_ub=-1,
47 |                 add_gt_as_proposals=False),
48 |             allowed_border=0,
49 |             pos_weight=-1,
50 |             debug=False)),
51 |     test_cfg=dict(
52 |         rpn=dict(
53 |             nms_pre=12000,
54 |             max_per_img=2000,
55 |             nms=dict(type='nms', iou_threshold=0.7),
56 |             min_bbox_size=0)))
57 | 


--------------------------------------------------------------------------------
/configs/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/configs/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/configs/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.txt
  6 | !requirements.txt
  7 | !id.txt
  8 | # C extensions
  9 | *.zip
 10 | *.so
 11 | *.png
 12 | *.jpg
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | val_s/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | *.out
 33 | *.npy
 34 | *.json
 35 | !panoptic_coco_categories.json
 36 | !panoptic_cityscapes_categories.json
 37 | !panoptic_ade20k_categories.json
 38 | !panoptic_mapilarry_categories.json
 39 | /wandb
 40 | /datasets
 41 | /checkpoints
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | query/
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # celery beat schedule file
 92 | celerybeat-schedule
 93 | 
 94 | # SageMath parsed files
 95 | *.sage.py
 96 | 
 97 | # Environments
 98 | .env
 99 | .venv
100 | env/
101 | venv/
102 | ENV/
103 | env.bak/
104 | venv.bak/
105 | 
106 | # Spyder project settings
107 | .spyderproject
108 | .spyproject
109 | 
110 | # Rope project settings
111 | .ropeproject
112 | 
113 | # mkdocs documentation
114 | /site
115 | 
116 | # mypy
117 | .mypy_cache/
118 | 
119 | data/
120 | data
121 | .vscode
122 | .idea
123 | .DS_Store
124 | 
125 | # custom
126 | *.pdf
127 | *.pkl
128 | *.pkl.json
129 | *.log.json
130 | work_dirs/
131 | arun_log/
132 | # Pytorch
133 | 
134 | *.pth
135 | *.py~
136 | *.sh~
137 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_panoptic_plus.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset_panoptic'
 3 | data_root = 'datasets/coco/'
 4 | coco_root = 'datasets/'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True,with_seg=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks','gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(1333, 800),
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=2,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type=dataset_type,
37 |         ann_file= './datasets/annotations/panoptic_train2017_detection_format.json',
38 |         img_prefix=data_root + 'train2017/',
39 |         pipeline=train_pipeline),
40 |     val=dict( 
41 |       
42 |         segmentations_folder='./seg',
43 |         gt_json = './datasets/annotations/panoptic_val2017.json',
44 |         gt_folder = './datasets/annotations/panoptic_val2017',
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         segmentations_folder='./seg',
51 |         gt_json = './datasets/annotations/panoptic_val2017.json',
52 |         gt_folder = './datasets/annotations/panoptic_val2017',
53 |         type=dataset_type,
54 |         #ann_file= './datasets/coco/annotations/image_info_test-dev2017.json',
55 |         ann_file=data_root + 'annotations/instances_val2017.json',
56 |         #img_prefix=data_root + '/test2017/',
57 |         img_prefix=data_root + 'val2017/',
58 |         pipeline=test_pipeline)
59 |         )
60 | evaluation = dict(metric=['bbox', 'segm', 'panoptic'])
61 | #evaluation = dict(interval=1, metric='bbox')


--------------------------------------------------------------------------------
/easymd/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | import torch.distributed as dist
 4 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
 5 | from mmcv.runner import EvalHook as BaseEvalHook
 6 | from torch.nn.modules.batchnorm import _BatchNorm
 7 | 
 8 | 
 9 | class EvalHook_plus(BaseEvalHook):
10 | 
11 |     def _do_evaluate(self, runner):
12 |         """perform evaluation and save ckpt."""
13 |         if not self._should_evaluate(runner):
14 |             return
15 | 
16 |         from easymd.apis import single_gpu_test_plus
17 |         results = single_gpu_test_plus(runner.model, self.dataloader, show=False)
18 |         runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
19 |         key_score = self.evaluate(runner, results)
20 |         if self.save_best:
21 |             self._save_ckpt(runner, key_score)
22 | 
23 | class DistEvalHook_plus(BaseDistEvalHook):
24 |     def __init__(self,segmentations_folder=None,datasets = 'coco',**kwargs):
25 |         self.segmentations_folder = segmentations_folder
26 |         self.datasets = datasets
27 |         super(DistEvalHook_plus,self).__init__(**kwargs)
28 |     def _do_evaluate(self, runner):
29 |         """perform evaluation and save ckpt."""
30 |         # Synchronization of BatchNorm's buffer (running_mean
31 |         # and running_var) is not supported in the DDP of pytorch,
32 |         # which may cause the inconsistent performance of models in
33 |         # different ranks, so we broadcast BatchNorm's buffers
34 |         # of rank 0 to other ranks to avoid this.
35 |         if self.broadcast_bn_buffer:
36 |             model = runner.model
37 |             for name, module in model.named_modules():
38 |                 if isinstance(module,
39 |                               _BatchNorm) and module.track_running_stats:
40 |                     dist.broadcast(module.running_var, 0)
41 |                     dist.broadcast(module.running_mean, 0)
42 | 
43 |         if not self._should_evaluate(runner):
44 |             return
45 | 
46 |         tmpdir = self.tmpdir
47 |         if tmpdir is None:
48 |             tmpdir = osp.join(runner.work_dir, '.eval_hook')
49 | 
50 |         from easymd.apis import multi_gpu_test_plus
51 |         results = multi_gpu_test_plus(
52 |             runner.model,
53 |             self.dataloader,
54 |             datasets = self.datasets,
55 |             segmentations_folder=self.segmentations_folder,
56 |             tmpdir=tmpdir,
57 |             gpu_collect=self.gpu_collect)
58 |         if runner.rank == 0:
59 |             print('\n')
60 |             runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
61 |             key_score = self.evaluate(runner, results)
62 | 
63 |             if self.save_best:
64 |                 self._save_ckpt(runner, key_score)


--------------------------------------------------------------------------------
/easymd/runner/checkpoints.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import io
 3 | import os
 4 | import os.path as osp
 5 | import pkgutil
 6 | import re
 7 | import time
 8 | import warnings
 9 | from collections import OrderedDict
10 | from importlib import import_module
11 | from tempfile import TemporaryDirectory
12 | 
13 | import torch
14 | import torchvision
15 | from torch.optim import Optimizer
16 | 
17 | import mmcv
18 | 
19 | 
20 | from mmcv.runner.checkpoint import _load_checkpoint,load_state_dict
21 | 
22 | 
23 | def load_checkpoint(model,
24 |                     filename,
25 |                     map_location=None,
26 |                     strict=False,
27 |                     logger=None,
28 |                     revise_keys=[(r'^module\.', '')]):
29 |     """Load checkpoint from a file or URI.
30 | 
31 |     Args:
32 |         model (Module): Module to load checkpoint.
33 |         filename (str): Accept local filepath, URL, ``torchvision://xxx``,
34 |             ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
35 |             details.
36 |         map_location (str): Same as :func:`torch.load`.
37 |         strict (bool): Whether to allow different params for the model and
38 |             checkpoint.
39 |         logger (:mod:`logging.Logger` or None): The logger for error message.
40 |         revise_keys (list): A list of customized keywords to modify the
41 |             state_dict in checkpoint. Each item is a (pattern, replacement)
42 |             pair of the regular expression operations. Default: strip
43 |             the prefix 'module.' by [(r'^module\\.', '')].
44 | 
45 |     Returns:
46 |         dict or OrderedDict: The loaded checkpoint.
47 |     """
48 |     defautl_revise_keys = [
49 |         ('\\.mask_head\\.','.things_mask_head.'),
50 |         ('\\.mask_head2\\.','.stuff_mask_head.'),
51 |         ('\\.cls_branches2\\.', '.cls_thing_branches.'),
52 | 
53 |     ]
54 |     revise_keys.extend(defautl_revise_keys)
55 |     checkpoint = _load_checkpoint(filename, map_location, logger)
56 |     # OrderedDict is a subclass of dict
57 |     if not isinstance(checkpoint, dict):
58 |         raise RuntimeError(
59 |             f'No state_dict found in checkpoint file {filename}')
60 |     # get state_dict from checkpoint
61 |     if 'state_dict' in checkpoint:
62 |         state_dict = checkpoint['state_dict']
63 |     else:
64 |         state_dict = checkpoint
65 | 
66 |     # strip prefix of state_dict
67 |     metadata = getattr(state_dict, '_metadata', OrderedDict())
68 |     for p, r in revise_keys:
69 |         state_dict = OrderedDict(
70 |             {re.sub(p, r, k): v
71 |              for k, v in state_dict.items()})
72 |     # Keep metadata in state_dict
73 |     state_dict._metadata = metadata
74 | 
75 |     # load state_dict
76 |     load_state_dict(model, state_dict, strict, logger)
77 |     return checkpoint
78 | 
79 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet.models import build_detector
 7 | 
 8 | try:
 9 |     from mmcv.cnn import get_model_complexity_info
10 | except ImportError:
11 |     raise ImportError('Please upgrade mmcv to >0.6.2')
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Train a detector')
16 |     parser.add_argument('config', help='train config file path')
17 |     parser.add_argument(
18 |         '--shape',
19 |         type=int,
20 |         nargs='+',
21 |         default=[1280, 800],
22 |         help='input image size')
23 |     parser.add_argument(
24 |         '--cfg-options',
25 |         nargs='+',
26 |         action=DictAction,
27 |         help='override some settings in the used config, the key-value pair '
28 |         'in xxx=yyy format will be merged into config file. If the value to '
29 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
30 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
31 |         'Note that the quotation marks are necessary and that no white space '
32 |         'is allowed.')
33 |     args = parser.parse_args()
34 |     return args
35 | 
36 | 
37 | def main():
38 | 
39 |     args = parse_args()
40 | 
41 |     if len(args.shape) == 1:
42 |         input_shape = (3, args.shape[0], args.shape[0])
43 |     elif len(args.shape) == 2:
44 |         input_shape = (3, ) + tuple(args.shape)
45 |     else:
46 |         raise ValueError('invalid input shape')
47 | 
48 |     cfg = Config.fromfile(args.config)
49 |     if args.cfg_options is not None:
50 |         cfg.merge_from_dict(args.cfg_options)
51 |     # import modules from string list.
52 |     if cfg.get('custom_imports', None):
53 |         from mmcv.utils import import_modules_from_strings
54 |         import_modules_from_strings(**cfg['custom_imports'])
55 | 
56 |     model = build_detector(
57 |         cfg.model,
58 |         train_cfg=cfg.get('train_cfg'),
59 |         test_cfg=cfg.get('test_cfg'))
60 |     if torch.cuda.is_available():
61 |         model.cuda()
62 |     model.eval()
63 | 
64 |     if hasattr(model, 'forward_dummy'):
65 |         model.forward = model.forward_dummy
66 |     else:
67 |         raise NotImplementedError(
68 |             'FLOPs counter is currently not currently supported with {}'.
69 |             format(model.__class__.__name__))
70 | 
71 |     flops, params = get_model_complexity_info(model, input_shape)
72 |     split_line = '=' * 30
73 |     print(f'{split_line}\nInput shape: {input_shape}\n'
74 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
75 |     print('!!!Please be cautious if you use the results in papers. '
76 |           'You may need to check if all ops are supported and verify that the '
77 |           'flops computation is correct.')
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/tools/ana_tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet.models import build_detector
 7 | import easymd
 8 | try:
 9 |     from mmcv.cnn import get_model_complexity_info
10 | except ImportError:
11 |     raise ImportError('Please upgrade mmcv to >0.6.2')
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Train a detector')
16 |     parser.add_argument('config', help='train config file path')
17 |     parser.add_argument(
18 |         '--shape',
19 |         type=int,
20 |         nargs='+',
21 |         default=[1200, 800],
22 |         help='input image size')
23 |     parser.add_argument(
24 |         '--cfg-options',
25 |         nargs='+',
26 |         action=DictAction,
27 |         help='override some settings in the used config, the key-value pair '
28 |         'in xxx=yyy format will be merged into config file. If the value to '
29 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
30 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
31 |         'Note that the quotation marks are necessary and that no white space '
32 |         'is allowed.')
33 |     args = parser.parse_args()
34 |     return args
35 | 
36 | 
37 | def main():
38 | 
39 |     args = parse_args()
40 | 
41 |     if len(args.shape) == 1:
42 |         input_shape = (3, args.shape[0], args.shape[0])
43 |     elif len(args.shape) == 2:
44 |         input_shape = (3, ) + tuple(args.shape)
45 |     else:
46 |         raise ValueError('invalid input shape')
47 | 
48 |     cfg = Config.fromfile(args.config)
49 |     if args.cfg_options is not None:
50 |         cfg.merge_from_dict(args.cfg_options)
51 |     # import modules from string list.
52 |     if cfg.get('custom_imports', None):
53 |         from mmcv.utils import import_modules_from_strings
54 |         import_modules_from_strings(**cfg['custom_imports'])
55 | 
56 |     model = build_detector(
57 |         cfg.model,
58 |         train_cfg=cfg.get('train_cfg'),
59 |         test_cfg=cfg.get('test_cfg'))
60 |     if torch.cuda.is_available():
61 |         model.cuda()
62 |     model.eval()
63 | 
64 |     if hasattr(model, 'forward_dummy'):
65 |         model.forward = model.forward_dummy
66 |     else:
67 |         raise NotImplementedError(
68 |             'FLOPs counter is currently not currently supported with {}'.
69 |             format(model.__class__.__name__))
70 | 
71 |     flops, params = get_model_complexity_info(model, input_shape)
72 |     split_line = '=' * 30
73 |     print(f'{split_line}\nInput shape: {input_shape}\n'
74 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
75 |     print('!!!Please be cautious if you use the results in papers. '
76 |           'You may need to check if all ops are supported and verify that the '
77 |           'flops computation is correct.')
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/anchor_analyze.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core import build_anchor_generator
 2 | import mmdet
 3 | import mmcv
 4 | import numpy as np
 5 | import time
 6 | import cv2 as cv
 7 | 
 8 | def show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n):
 9 |     img = np.zeros(input_shape_hw, np.uint8)
10 |     feature_map = []
11 |     for s in stride:
12 |         feature_map.append([input_shape_hw[0] // s, input_shape_hw[1] // s])
13 |     anchor_generator = build_anchor_generator(anchor_generator_cfg)
14 |     anchors = anchor_generator.grid_anchors(feature_map)  # 输出原图尺度上anchor坐标 xyxy格式 左上角格式
15 |     base_anchors = anchor_generator.base_anchors
16 |     
17 |     for i,each in enumerate(base_anchors):
18 |         each[:,0:4:2] += input_shape_hw[0]//2
19 |         each[:,1:4:2] += input_shape_hw[1]//2
20 |     for _ in range(random_n):
21 |         disp_img = []
22 |         for i,anchor in enumerate(anchors):
23 |             img = np.zeros(input_shape_hw, np.uint8)
24 |             anchor = anchor.cpu().numpy()
25 |             print(anchor.shape)
26 |             index = (anchor[:, 0] > 0) & (anchor[:, 1] > 0) & (anchor[:, 2] < input_shape_hw[1]) & \
27 |                     (anchor[:, 3] < input_shape_hw[0])
28 |             anchor = anchor[index]
29 |             
30 |             anchor = np.random.permutation(anchor)
31 |             img_ = mmcv.imshow_bboxes(img, anchor[:select_n], thickness=1, show=False)
32 |             img_ = mmcv.imshow_bboxes(img_, base_anchors[i].cpu().numpy(), thickness=1, colors='red', show=False)
33 |             #disp_img.append(img_)
34 |             cv.imshow('img',img_)
35 |             if cv.waitKey(0) & 0xFF== ord('q'):
36 |                 exit(0)
37 |             #time.sleep(0.3)
38 | 
39 | def demo_retinanet(input_shape_hw):
40 |     stride = [8, 16, 32, 64, 128]
41 |     anchor_generator_cfg = dict(
42 |         type='AnchorGenerator',
43 |         octave_base_scale=4,  # 每层特征图的base anchor scale,如果变大，则整体anchor都会放大
44 |         scales_per_octave=3,  # 每层有3个尺度 2**0 2**(1/3) 2**(2/3)
45 |         ratios=[0.5, 1.0, 2.0],  # 每层的anchor有3种长宽比 故每一层每个位置有9个anchor
46 |         strides=stride)  # 每个特征图层输出stride,故anchor范围是4x8=32,4x128x2**(2/3)=812.7
47 |     random_n = 10
48 |     select_n = 100
49 |     show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n)
50 | 
51 | 
52 | def demo_yolov3(input_shape_hw):
53 |     stride = [32, 16, 8]
54 |     anchor_generator_cfg = dict(
55 |         type='YOLOAnchorGenerator',
56 |         base_sizes=[[(116, 90), (156, 198), (373, 326)],
57 |                     [(30, 61), (62, 45), (59, 119)],
58 |                     [(10, 13), (16, 30), (33, 23)]],
59 |         strides=stride)
60 | 
61 |     random_n = 10
62 |     select_n = 100
63 |     show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     input_shape_hw = (320, 320, 3)
68 |     demo_retinanet(input_shape_hw)
69 |     #demo_yolov3(input_shape_hw)


--------------------------------------------------------------------------------
/easymd/analysis_tools/browse_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | import mmdet
 6 | import mmcv
 7 | from mmcv import Config
 8 | from mmdet.datasets.builder import build_dataset
 9 | import random
10 | import cv2 as cv
11 | import numpy as np
12 | import easymd
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(description='Browse a dataset')
15 |     parser.add_argument('config', help='train config file path')
16 |     # 以下三个pipeline排除,方便可视化
17 |     parser.add_argument(
18 |         '--skip-type',
19 |         type=str,
20 |         nargs='+',
21 |         default=['DefaultFormatBundle', 'Normalize', 'Collect'],
22 |         help='skip some useless pipeline')
23 |     parser.add_argument(
24 |         '--output-dir',
25 |         default=None,
26 |         type=str,
27 |         help='If there is no display interface, you can save it')
28 |     parser.add_argument('--not-show', default=False, action='store_true')
29 |     parser.add_argument(
30 |         '--show-interval',
31 |         type=int,
32 |         default=0,
33 |         help='the interval of show (ms)')
34 |     args = parser.parse_args()
35 |     return args
36 | 
37 | 
38 | def retrieve_data_cfg(config_path, skip_type):
39 |     cfg = Config.fromfile(config_path)
40 |     train_data_cfg = cfg.data.train
41 |     if train_data_cfg.get('dataset', None) is not None:
42 |         # voc数据集
43 |         datasets = train_data_cfg['dataset']
44 |         datasets['pipeline'] = [
45 |             x for x in datasets.pipeline if x['type'] not in skip_type
46 |         ]
47 |     else:
48 |         train_data_cfg['pipeline'] = [
49 |             x for x in train_data_cfg.pipeline if x['type'] not in skip_type
50 |         ]
51 | 
52 |     return cfg
53 | 
54 | 
55 | def main():
56 |     args = parse_args()
57 |     cfg = retrieve_data_cfg(args.config, args.skip_type)
58 | 
59 |     dataset = build_dataset(cfg.data.train)
60 | 
61 |     progress_bar = mmcv.ProgressBar(len(dataset))
62 |     for item in dataset:
63 |         #print(item.keys())
64 |         filename = os.path.join(args.output_dir,
65 |                                 Path(item['filename']).name
66 |                                 ) if args.output_dir is not None else None
67 |         img = mmcv.imshow_det_bboxes(
68 |             item['img'],
69 |             item['gt_bboxes'],
70 |             item['gt_labels'],
71 |             class_names=dataset.CLASSES,
72 |             show=False,
73 |             out_file=filename,
74 |             wait_time=args.show_interval)
75 |         img = img.astype('uint8')
76 |         #print(dir(item['gt_masks'][0]))
77 |         for each in item['gt_masks']:
78 |             color = [random.randint(0,255),random.randint(0,255),random.randint(0,255)]
79 |             each_3 = each[...,None] *color
80 |             each_3 = each_3.astype('uint8')
81 |             img[each==1] = (img[each==1]*0.4 + each_3[each==1]*0.6).astype('uint8')
82 |             #np.clip(new_img_with_alpha,0,255)
83 |         cv.imshow('img',img)
84 |         if cv.waitKey(0) & 0xFF== ord('q'):
85 |             exit(0)
86 |         progress_bar.update()
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()


--------------------------------------------------------------------------------
/tools/ana_tools/ana_query.py:
--------------------------------------------------------------------------------
 1 | 
 2 | num_query = 400
 3 | num_class = 133
 4 | from mmdet.core import bbox
 5 | import numpy as np
 6 | import torch
 7 | import cv2 as cv
 8 | from torch.nn.functional import softmax
 9 | import torchvision
10 | 
11 | 
12 | import json 
13 | all_things = 0
14 | all_stuff = 0
15 | with open('./datasets/annotations/panoptic_val2017_detection_format.json','r') as f:
16 |     data = json.load(f)
17 |     print(len(data['annotations']))
18 |     for each in data['annotations']:
19 |         if each['category_id']<=80:
20 |             all_things+=1
21 |         else:
22 |             all_stuff+=1
23 | print(all_things,all_stuff,all_things/(all_things+all_stuff))
24 | map = np.zeros([400,133])
25 | things_stuff_list = []
26 | for i in range(num_query):
27 |     with open('./query/{i}.txt'.format(i=i)) as f:
28 |         img  = torch.ones([500,500,3]).numpy()*255
29 |         things = 0
30 |         stuff = 0
31 |         for line in f.readlines():
32 | 
33 |             data = line.strip().split(' ')
34 |             t= int (data[0])
35 |             if t<80:
36 |                 things+=1
37 |             else:
38 |                 stuff+=1
39 | 
40 |             cx, cy, w, h, bbox_area, mask_area = float(data[1]), float(data[2]), float(data[3]), float(data[4]), data[5],int(data[6])
41 |             bbox_area = float(bbox_area[7:-1])
42 |             cx, cy, w, h = int(500*cx), int(500*cy), int(500*w+0.5), int(500*h+0.5)
43 |             #cv.drawKeypoints()
44 |             '''
45 |             if w/h>1.5: # bbox_area<=322:
46 |                 cv.circle(img,  (cx,cy), 2, color=(255,0,0), thickness=1)
47 |             elif w/h<0.7: #322<bbox_area<962:
48 |                 cv.circle(img,  (cx,cy), 2, color=(0,255,0), thickness=1)
49 |             else:
50 |                 cv.circle(img,  (cx,cy), 2, color=(0,0,255), thickness=1)'''
51 |             if t<80:
52 |                 if mask_area<=322:
53 |                     cv.circle(img,  (cx,cy), 2, color=(255,0,0), thickness=1)
54 |                 elif 322<mask_area<962:
55 |                     cv.circle(img,  (cx,cy), 2, color=(0,255,0), thickness=1)
56 |                 else:
57 |                     cv.circle(img,  (cx,cy), 2, color=(0,0,255), thickness=1)
58 |             else:
59 |                 cv.circle(img,  (cx,cy), 2, color=(100,0,100), thickness=1)
60 |             
61 |             color = (w/h*50+100,0,0)
62 |             
63 |             map[i][t]+=1
64 |         things_stuff_list.append(things/(things+stuff))
65 |         print(i,things/(things+stuff),things+stuff)
66 |         torchvision.utils.save_image(torch.tensor(img).permute(2,0,1), '{i}.png'.format(i=i))
67 | map = torch.tensor(map)
68 | #map =map.permute(1,0)
69 | import matplotlib.pyplot as plt
70 | import matplotlib
71 | print('mean',np.mean(np.array(things_stuff_list)))
72 | plt.hist(np.array(things_stuff_list), bins=20)
73 | #print(map[0])
74 | plt.show()
75 | #print(map.sum(-1))
76 | #print(map.shape)
77 | #print(map[:,0])
78 | for i in range(133):
79 |     max = map[:,i].max()
80 |     min = map[:,i].min()
81 |     #print(max,min)
82 |     map[:, i] = (map[:,i] - min)/(max-min)
83 | 
84 | 
85 | 
86 | import mmcv
87 | mmcv.imshow(map.numpy())


--------------------------------------------------------------------------------
/tools/convert_panoptic_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ $# == 0 ]
 4 | then
 5 | 	echo "Need to at least one param to indicate which dataset to converte"
 6 | 	return
 7 | fi
 8 | 
 9 | case $1 in
10 | 
11 | coco)
12 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/annotations/panoptic_val2017.json \
13 |     --output_json_file ./datasets/annotations/panoptic_val2017_detection_format.json --categories_json_file \
14 |     ./converter/panoptic_coco_categories.json
15 | 
16 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/annotations/panoptic_train2017.json \
17 |     --output_json_file ./datasets/annotations/panoptic_train2017_detection_format.json --categories_json_file \
18 |     ./converter/panoptic_coco_categories.json
19 |     ;;
20 | cityscapes)
21 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/cityscapes/cityscapes_in_coco_format/cityscapes_panoptic_val.json \
22 |     --output_json_file ./datasets/cityscapes/cityscapes_in_coco_format/cityscapes_panoptic_val_detection_format.json --categories_json_file \
23 |     ./converter/panoptic_cityscapes_categories.json
24 | 
25 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/cityscapes/cityscapes_in_coco_format/cityscapes_panoptic_train.json \
26 |     --output_json_file ./datasets/cityscapes/cityscapes_in_coco_format/cityscapes_panoptic_train_detection_format.json --categories_json_file \
27 |     ./converter/panoptic_cityscapes_categories.json
28 |     ;;
29 | ade20k)
30 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/ADEChallengeData2016/ade20k_panoptic_val.json \
31 |     --output_json_file ./datasets/ADEChallengeData2016/ade20k_panoptic_val_detection_format.json --categories_json_file \
32 |     ./converter/panoptic_ade20k_categories.json
33 | 
34 |     python  $(dirname "$0")/converter.py  --input_json_file  ./datasets/ADEChallengeData2016/ade20k_panoptic_train.json  \
35 |     --output_json_file ./datasets/ADEChallengeData2016/ade20k_panoptic_train_detection_format.json  --categories_json_file \
36 |     ./converter/panoptic_ade20k_categories.json
37 |     ;;
38 | mapillary)
39 | 
40 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/mapillary-vistas/train/panoptic_2018.json \
41 |     --segmentations_folder ./datasets/mapillary-vistas/train/panoptic  \
42 |     --output_json_file ./datasets/mapillary-vistas/train/panoptic_train_coco_format.json --categories_json_file \
43 |     ./converter/panoptic_mapillary.json
44 | 
45 |     python  $(dirname "$0")/converter.py  --input_json_file ./datasets/mapillary-vistas/val/panoptic_2018.json \
46 |     --segmentations_folder ./datasets/mapillary-vistas/val/panoptic  \
47 |     --output_json_file ./datasets/mapillary-vistas/val/panoptic_val_coco_format.json --categories_json_file \
48 |     ./converter/panoptic_mapillary.json
49 | esac
50 |     
51 | #gpu1 "python  ./tools/converter.py  --input_json_file ./datasets/cityscapes/cityscapes_in_coco_format/cityscapes_panoptic_train.json --output_json_file ./datasets/cityscapes/cityscapes_in_coco_format/cityscapes_panoptic_train_detection_format.json --categories_json_file ./converter/panoptic_cityscapes_categories.json"


--------------------------------------------------------------------------------
/easymd/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import mmcv
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
11 |                                      'results saved in pkl format')
12 |     parser.add_argument('config', help='Config of the model')
13 |     parser.add_argument('pkl_results', help='Results in pickle format')
14 |     parser.add_argument(
15 |         '--format-only',
16 |         action='store_true',
17 |         help='Format the output results without perform evaluation. It is'
18 |         'useful when you want to format the result to a specific format and '
19 |         'submit it to the test server')
20 |     parser.add_argument(
21 |         '--eval',
22 |         type=str,
23 |         nargs='+',
24 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
25 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
26 |     parser.add_argument(
27 |         '--cfg-options',
28 |         nargs='+',
29 |         action=DictAction,
30 |         help='override some settings in the used config, the key-value pair '
31 |         'in xxx=yyy format will be merged into config file. If the value to '
32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
34 |         'Note that the quotation marks are necessary and that no white space '
35 |         'is allowed.')
36 |     parser.add_argument(
37 |         '--eval-options',
38 |         nargs='+',
39 |         action=DictAction,
40 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
41 |         'format will be kwargs for dataset.evaluate() function')
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | 
46 | def main():
47 |     args = parse_args()
48 | 
49 |     cfg = Config.fromfile(args.config)
50 |     assert args.eval or args.format_only, (
51 |         'Please specify at least one operation (eval/format the results) with '
52 |         'the argument "--eval", "--format-only"')
53 |     if args.eval and args.format_only:
54 |         raise ValueError('--eval and --format_only cannot be both specified')
55 | 
56 |     if args.cfg_options is not None:
57 |         cfg.merge_from_dict(args.cfg_options)
58 |     # import modules from string list.
59 |     if cfg.get('custom_imports', None):
60 |         from mmcv.utils import import_modules_from_strings
61 |         import_modules_from_strings(**cfg['custom_imports'])
62 |     cfg.data.test.test_mode = True
63 | 
64 |     dataset = build_dataset(cfg.data.test)
65 |     outputs = mmcv.load(args.pkl_results)
66 | 
67 |     kwargs = {} if args.eval_options is None else args.eval_options
68 |     if args.format_only:
69 |         dataset.format_results(outputs, **kwargs)
70 |     if args.eval:
71 |         eval_kwargs = cfg.get('evaluation', {}).copy()
72 |         # hard-code way to remove EvalHook args
73 |         for key in [
74 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
75 |                 'rule'
76 |         ]:
77 |             eval_kwargs.pop(key, None)
78 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
79 |         print(dataset.evaluate(outputs, **eval_kwargs))
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 


--------------------------------------------------------------------------------
/easymd/models/detectors/detr_plus.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core import bbox2result
 2 | #from ..builder import DETECTORS
 3 | from easymd.models.detectors.single_stage_panoptic_detector import SingleStagePanopticDetector
 4 | from mmdet.models.builder import DETECTORS, build_backbone, build_head, build_neck
 5 | import torch
 6 | import numpy as np
 7 | from easymd.models.utils.transform import mask2result
 8 | from easymd.models.utils.visual import save_tensor
 9 | from mmdet.core import bbox2result, bbox_mapping_back
10 | import mmcv
11 | from torchvision.transforms.transforms import ToTensor
12 | @DETECTORS.register_module()
13 | class DETR_plus(SingleStagePanopticDetector):
14 |     r"""Implementation of `DETR: End-to-End Object Detection with
15 |     Transformers <https://arxiv.org/pdf/2005.12872>`_"""
16 | 
17 |     def __init__(self,
18 |                  backbone,
19 |                  neck=None,
20 |                  bbox_head=None,
21 |                  train_cfg=None,
22 |                  test_cfg=None,
23 |                  pretrained=None,
24 |                  init_cfg=None):
25 |         
26 |         super(DETR_plus, self).__init__(backbone, neck, bbox_head, train_cfg,
27 |                                    test_cfg, pretrained, init_cfg)
28 |         self.count=0
29 |     def simple_test(self, img, img_metas=None, rescale=False):
30 |         """Test function without test time augmentation.
31 | 
32 |         Args:
33 |             imgs (list[torch.Tensor]): List of multiple images
34 |             img_metas (list[dict]): List of image information.
35 |             rescale (bool, optional): Whether to rescale the results.
36 |                 Defaults to False.
37 | 
38 |         Returns:
39 |             list[list[np.ndarray]]: BBox results of each image and classes.
40 |                 The outer list corresponds to each image. The inner list
41 |                 corresponds to each class.
42 |         """
43 |         
44 |         batch_size = len(img_metas)
45 |         assert batch_size == 1, 'Currently only batch_size 1 for inference ' \
46 |             f'mode is supported. Found batch_size {batch_size}.'
47 |         x = self.extract_feat(img)
48 |         outs = self.bbox_head(x, img_metas)
49 | 
50 |         results = self.bbox_head.get_bboxes(*outs, img_metas, rescale=rescale)
51 |         assert isinstance(results,dict), 'The return results should be a dict'
52 |         
53 |        
54 |         results_dict = {}
55 |         for return_type in results.keys():
56 |             if return_type == 'bbox':
57 |                 labels = results['labels']
58 |                 bbox_list = results['bbox']
59 |                 bbox_results = [
60 |                     bbox2result(det_bboxes, det_labels, self.bbox_head.num_things_classes)
61 |                     for det_bboxes, det_labels in zip(bbox_list,labels)
62 |                 ]
63 |                 results_dict['bbox'] = bbox_results
64 |             elif return_type == 'segm':
65 |                 seg_list = results['segm']
66 |                 labels = results['labels']
67 |                
68 |                 masks_results = [
69 |                     mask2result(det_segm,det_labels,self.bbox_head.num_things_classes)
70 |                         for det_segm, det_labels in zip(seg_list,labels)
71 |                 ]
72 |                 results_dict['segm'] = masks_results
73 |             elif return_type == 'panoptic':
74 |                 results_dict['panoptic'] = results['panoptic']
75 | 
76 |   
77 | 
78 |         
79 |         
80 |      
81 |         return results_dict
82 |   


--------------------------------------------------------------------------------
/easymd/analysis_tools/atss_anchor.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from mmdet.core import anchor, build_anchor_generator,build_assigner
 3 | import mmdet
 4 | import mmcv
 5 | import numpy as np
 6 | import time
 7 | import cv2 as cv
 8 | import torch
 9 | def show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n):
10 |     img = np.zeros(input_shape_hw, np.uint8)
11 |     feature_map = []
12 |     for s in stride:
13 |         feature_map.append([input_shape_hw[0] // s, input_shape_hw[1] // s])
14 |     anchor_generator = build_anchor_generator(anchor_generator_cfg)
15 |     anchors = anchor_generator.grid_anchors(feature_map)  # 输出原图尺度上anchor坐标 xyxy格式 左上角格式
16 |     base_anchors = anchor_generator.base_anchors
17 |     assigner=dict(type='ATSSAssigner', topk=9)
18 |     assigner = build_assigner(assigner)
19 |     
20 |     #print(anchors[0].shape,anchors[1].shape)
21 |     nums_per_level = [len(each) for each in anchors]
22 |     #for each in anchors:
23 |     #    nums_per_level.append(len(each))
24 |     anchors = torch.cat([each for each in anchors],dim=0)
25 |     gt_bboxes = torch.tensor([[100,100,300,300],[400,400,600,600]]).to(anchors.device)
26 |     gt_labels = torch.tensor([1,2]).to(anchors.device)
27 |     #print(anchors.device,gt_bboxes.device)
28 |     #print(nums_per_level)
29 |     assign_result = assigner.assign(anchors, nums_per_level, gt_bboxes, None, gt_labels)
30 |     print((assign_result.gt_inds!=0).nonzero().shape)
31 |     anchors = anchors[(assign_result.gt_inds!=0).nonzero().squeeze(1)]
32 |     print(anchors)
33 |     values,indices = anchors.min(-1)
34 |     anchors = anchors[(values>0).nonzero().squeeze(1)].cpu().numpy()
35 |     print(anchors)
36 |     img_ = mmcv.imshow_bboxes(img, anchors, thickness=1, show=False)
37 |     img_ = mmcv.imshow_bboxes(img_,gt_bboxes.cpu().numpy() , thickness=1, colors='red', show=False)
38 |     cv.imshow('img',img_)
39 |     if cv.waitKey(0) & 0xFF== ord('q'):
40 |         exit(0)
41 |     '''
42 |     for i,each in enumerate(base_anchors):
43 |         each[:,0:4:2] += input_shape_hw[0]//2
44 |         each[:,1:4:2] += input_shape_hw[1]//2
45 |     for _ in range(random_n):
46 |         disp_img = []
47 |         for i,anchor in enumerate(anchors):
48 |             img = np.zeros(input_shape_hw, np.uint8)
49 |             anchor = anchor.cpu().numpy()
50 |             print(anchor.shape)
51 |             index = (anchor[:, 0] > 0) & (anchor[:, 1] > 0) & (anchor[:, 2] < input_shape_hw[1]) & \
52 |                     (anchor[:, 3] < input_shape_hw[0])
53 |             anchor = anchor[index]
54 |             
55 |             anchor = np.random.permutation(anchor)
56 |             img_ = mmcv.imshow_bboxes(img, anchor[:select_n], thickness=1, show=False)
57 |             img_ = mmcv.imshow_bboxes(img_, base_anchors[i].cpu().numpy(), thickness=1, colors='red', show=False)
58 |             #disp_img.append(img_)
59 |             
60 |             #time.sleep(0.3)
61 |     '''
62 | def demo_atss(input_shape_hw):
63 |     stride = [8, 16, 32, 64, 128]
64 |     anchor_generator_cfg = dict(
65 |         type='AnchorGenerator',
66 |         octave_base_scale=8,  # 每层特征图的base anchor scale,如果变大，则整体anchor都会放大
67 |         scales_per_octave=1,  # 每层有3个尺度 2**0 2**(1/3) 2**(2/3)
68 |         ratios=[1.0],  # 每层的anchor有3种长宽比 故每一层每个位置有9个anchor
69 |         strides=stride)  # 每个特征图层输出stride,故anchor范围是4x8=32,4x128x2**(2/3)=812.7
70 |     random_n = 10
71 |     select_n = 100
72 |     show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n)
73 | 
74 | 
75 | 
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     input_shape_hw = (640, 640, 3)
80 |     demo_atss(input_shape_hw)
81 |     #demo_yolov3(input_shape_hw)


--------------------------------------------------------------------------------
/configs/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=4,
 10 |         strides=(1, 2, 2, 1),
 11 |         dilations=(1, 1, 1, 2),
 12 |         out_indices=(3, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=2048,
 20 |         feat_channels=2048,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=2048,
 39 |             featmap_strides=[16]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=2048,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=80,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 |     # model training and testing settings
 55 |     train_cfg=dict(
 56 |         rpn=dict(
 57 |             assigner=dict(
 58 |                 type='MaxIoUAssigner',
 59 |                 pos_iou_thr=0.7,
 60 |                 neg_iou_thr=0.3,
 61 |                 min_pos_iou=0.3,
 62 |                 match_low_quality=True,
 63 |                 ignore_iof_thr=-1),
 64 |             sampler=dict(
 65 |                 type='RandomSampler',
 66 |                 num=256,
 67 |                 pos_fraction=0.5,
 68 |                 neg_pos_ub=-1,
 69 |                 add_gt_as_proposals=False),
 70 |             allowed_border=0,
 71 |             pos_weight=-1,
 72 |             debug=False),
 73 |         rpn_proposal=dict(
 74 |             nms_pre=12000,
 75 |             max_per_img=2000,
 76 |             nms=dict(type='nms', iou_threshold=0.7),
 77 |             min_bbox_size=0),
 78 |         rcnn=dict(
 79 |             assigner=dict(
 80 |                 type='MaxIoUAssigner',
 81 |                 pos_iou_thr=0.5,
 82 |                 neg_iou_thr=0.5,
 83 |                 min_pos_iou=0.5,
 84 |                 match_low_quality=False,
 85 |                 ignore_iof_thr=-1),
 86 |             sampler=dict(
 87 |                 type='RandomSampler',
 88 |                 num=512,
 89 |                 pos_fraction=0.25,
 90 |                 neg_pos_ub=-1,
 91 |                 add_gt_as_proposals=True),
 92 |             pos_weight=-1,
 93 |             debug=False)),
 94 |     test_cfg=dict(
 95 |         rpn=dict(
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             nms_pre=6000,
 98 |             max_per_img=1000,
 99 |             min_bbox_size=0),
100 |         rcnn=dict(
101 |             score_thr=0.05,
102 |             nms=dict(type='nms', iou_threshold=0.5),
103 |             max_per_img=100)))
104 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=4,
 10 |         strides=(1, 2, 2, 1),
 11 |         dilations=(1, 1, 1, 2),
 12 |         out_indices=(3, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=2048,
 20 |         feat_channels=2048,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=2048,
 39 |             featmap_strides=[16]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=2048,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=80,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 |     # model training and testing settings
 55 |     train_cfg=dict(
 56 |         rpn=dict(
 57 |             assigner=dict(
 58 |                 type='MaxIoUAssigner',
 59 |                 pos_iou_thr=0.7,
 60 |                 neg_iou_thr=0.3,
 61 |                 min_pos_iou=0.3,
 62 |                 match_low_quality=True,
 63 |                 ignore_iof_thr=-1),
 64 |             sampler=dict(
 65 |                 type='RandomSampler',
 66 |                 num=256,
 67 |                 pos_fraction=0.5,
 68 |                 neg_pos_ub=-1,
 69 |                 add_gt_as_proposals=False),
 70 |             allowed_border=0,
 71 |             pos_weight=-1,
 72 |             debug=False),
 73 |         rpn_proposal=dict(
 74 |             nms_pre=12000,
 75 |             max_per_img=2000,
 76 |             nms=dict(type='nms', iou_threshold=0.7),
 77 |             min_bbox_size=0),
 78 |         rcnn=dict(
 79 |             assigner=dict(
 80 |                 type='MaxIoUAssigner',
 81 |                 pos_iou_thr=0.5,
 82 |                 neg_iou_thr=0.5,
 83 |                 min_pos_iou=0.5,
 84 |                 match_low_quality=False,
 85 |                 ignore_iof_thr=-1),
 86 |             sampler=dict(
 87 |                 type='RandomSampler',
 88 |                 num=512,
 89 |                 pos_fraction=0.25,
 90 |                 neg_pos_ub=-1,
 91 |                 add_gt_as_proposals=True),
 92 |             pos_weight=-1,
 93 |             debug=False)),
 94 |     test_cfg=dict(
 95 |         rpn=dict(
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             nms_pre=6000,
 98 |             max_per_img=1000,
 99 |             min_bbox_size=0),
100 |         rcnn=dict(
101 |             score_thr=0.05,
102 |             nms=dict(type='nms', iou_threshold=0.5),
103 |             max_per_img=100)))
104 | 


--------------------------------------------------------------------------------
/configs/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=-1,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=2000,
 77 |             max_per_img=1000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_pre=1000,
 99 |             max_per_img=1000,
100 |             nms=dict(type='nms', iou_threshold=0.7),
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)
106 |         # soft-nms is also supported for rcnn testing
107 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 |     ))
109 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=-1,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=2000,
 77 |             max_per_img=1000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_pre=1000,
 99 |             max_per_img=1000,
100 |             nms=dict(type='nms', iou_threshold=0.7),
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)
106 |         # soft-nms is also supported for rcnn testing
107 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 |     ))
109 | 


--------------------------------------------------------------------------------
/configs/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         rpn=dict(
 66 |             assigner=dict(
 67 |                 type='MaxIoUAssigner',
 68 |                 pos_iou_thr=0.7,
 69 |                 neg_iou_thr=0.3,
 70 |                 min_pos_iou=0.3,
 71 |                 match_low_quality=True,
 72 |                 ignore_iof_thr=-1),
 73 |             sampler=dict(
 74 |                 type='RandomSampler',
 75 |                 num=256,
 76 |                 pos_fraction=0.5,
 77 |                 neg_pos_ub=-1,
 78 |                 add_gt_as_proposals=False),
 79 |             allowed_border=0,
 80 |             pos_weight=-1,
 81 |             debug=False),
 82 |         rpn_proposal=dict(
 83 |             nms_pre=12000,
 84 |             max_per_img=2000,
 85 |             nms=dict(type='nms', iou_threshold=0.7),
 86 |             min_bbox_size=0),
 87 |         rcnn=dict(
 88 |             assigner=dict(
 89 |                 type='MaxIoUAssigner',
 90 |                 pos_iou_thr=0.5,
 91 |                 neg_iou_thr=0.5,
 92 |                 min_pos_iou=0.5,
 93 |                 match_low_quality=False,
 94 |                 ignore_iof_thr=-1),
 95 |             sampler=dict(
 96 |                 type='RandomSampler',
 97 |                 num=512,
 98 |                 pos_fraction=0.25,
 99 |                 neg_pos_ub=-1,
100 |                 add_gt_as_proposals=True),
101 |             pos_weight=-1,
102 |             debug=False)),
103 |     test_cfg=dict(
104 |         rpn=dict(
105 |             nms_pre=6000,
106 |             max_per_img=1000,
107 |             nms=dict(type='nms', iou_threshold=0.7),
108 |             min_bbox_size=0),
109 |         rcnn=dict(
110 |             score_thr=0.05,
111 |             nms=dict(type='nms', iou_threshold=0.5),
112 |             max_per_img=100)))
113 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         rpn=dict(
 66 |             assigner=dict(
 67 |                 type='MaxIoUAssigner',
 68 |                 pos_iou_thr=0.7,
 69 |                 neg_iou_thr=0.3,
 70 |                 min_pos_iou=0.3,
 71 |                 match_low_quality=True,
 72 |                 ignore_iof_thr=-1),
 73 |             sampler=dict(
 74 |                 type='RandomSampler',
 75 |                 num=256,
 76 |                 pos_fraction=0.5,
 77 |                 neg_pos_ub=-1,
 78 |                 add_gt_as_proposals=False),
 79 |             allowed_border=0,
 80 |             pos_weight=-1,
 81 |             debug=False),
 82 |         rpn_proposal=dict(
 83 |             nms_pre=12000,
 84 |             max_per_img=2000,
 85 |             nms=dict(type='nms', iou_threshold=0.7),
 86 |             min_bbox_size=0),
 87 |         rcnn=dict(
 88 |             assigner=dict(
 89 |                 type='MaxIoUAssigner',
 90 |                 pos_iou_thr=0.5,
 91 |                 neg_iou_thr=0.5,
 92 |                 min_pos_iou=0.5,
 93 |                 match_low_quality=False,
 94 |                 ignore_iof_thr=-1),
 95 |             sampler=dict(
 96 |                 type='RandomSampler',
 97 |                 num=512,
 98 |                 pos_fraction=0.25,
 99 |                 neg_pos_ub=-1,
100 |                 add_gt_as_proposals=True),
101 |             pos_weight=-1,
102 |             debug=False)),
103 |     test_cfg=dict(
104 |         rpn=dict(
105 |             nms_pre=6000,
106 |             max_per_img=1000,
107 |             nms=dict(type='nms', iou_threshold=0.7),
108 |             min_bbox_size=0),
109 |         rcnn=dict(
110 |             score_thr=0.05,
111 |             nms=dict(type='nms', iou_threshold=0.5),
112 |             max_per_img=100)))
113 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | import torch
  5 | from mmcv import Config, DictAction
  6 | from mmcv.cnn import fuse_conv_bn
  7 | from mmcv.parallel import MMDataParallel
  8 | from mmcv.runner import load_checkpoint, wrap_fp16_model
  9 | 
 10 | from mmdet.datasets import (build_dataloader, build_dataset,
 11 |                             replace_ImageToTensor)
 12 | from mmdet.models import build_detector
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
 17 |     parser.add_argument('config', help='test config file path')
 18 |     parser.add_argument('checkpoint', help='checkpoint file')
 19 |     parser.add_argument(
 20 |         '--log-interval', default=50, help='interval of logging')
 21 |     parser.add_argument(
 22 |         '--fuse-conv-bn',
 23 |         action='store_true',
 24 |         help='Whether to fuse conv and bn, this will slightly increase'
 25 |         'the inference speed')
 26 |     parser.add_argument(
 27 |         '--cfg-options',
 28 |         nargs='+',
 29 |         action=DictAction,
 30 |         help='override some settings in the used config, the key-value pair '
 31 |         'in xxx=yyy format will be merged into config file. If the value to '
 32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 34 |         'Note that the quotation marks are necessary and that no white space '
 35 |         'is allowed.')
 36 |     args = parser.parse_args()
 37 |     return args
 38 | 
 39 | 
 40 | def main():
 41 |     args = parse_args()
 42 | 
 43 |     cfg = Config.fromfile(args.config)
 44 |     if args.cfg_options is not None:
 45 |         cfg.merge_from_dict(args.cfg_options)
 46 |     # import modules from string list.
 47 |     if cfg.get('custom_imports', None):
 48 |         from mmcv.utils import import_modules_from_strings
 49 |         import_modules_from_strings(**cfg['custom_imports'])
 50 |     # set cudnn_benchmark
 51 |     if cfg.get('cudnn_benchmark', False):
 52 |         torch.backends.cudnn.benchmark = True
 53 |     cfg.model.pretrained = None
 54 |     cfg.data.test.test_mode = True
 55 | 
 56 |     # build the dataloader
 57 |     samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
 58 |     if samples_per_gpu > 1:
 59 |         # Replace 'ImageToTensor' to 'DefaultFormatBundle'
 60 |         cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
 61 |     dataset = build_dataset(cfg.data.test)
 62 |     data_loader = build_dataloader(
 63 |         dataset,
 64 |         samples_per_gpu=1,
 65 |         workers_per_gpu=cfg.data.workers_per_gpu,
 66 |         dist=False,
 67 |         shuffle=False)
 68 | 
 69 |     # build the model and load checkpoint
 70 |     cfg.model.train_cfg = None
 71 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
 72 |     fp16_cfg = cfg.get('fp16', None)
 73 |     if fp16_cfg is not None:
 74 |         wrap_fp16_model(model)
 75 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
 76 |     if args.fuse_conv_bn:
 77 |         model = fuse_conv_bn(model)
 78 | 
 79 |     model = MMDataParallel(model, device_ids=[0])
 80 | 
 81 |     model.eval()
 82 | 
 83 |     # the first several iterations may be very slow so skip them
 84 |     num_warmup = 5
 85 |     pure_inf_time = 0
 86 | 
 87 |     # benchmark with 2000 image and take the average
 88 |     for i, data in enumerate(data_loader):
 89 | 
 90 |         torch.cuda.synchronize()
 91 |         start_time = time.perf_counter()
 92 | 
 93 |         with torch.no_grad():
 94 |             model(return_loss=False, rescale=True, **data)
 95 | 
 96 |         torch.cuda.synchronize()
 97 |         elapsed = time.perf_counter() - start_time
 98 | 
 99 |         if i >= num_warmup:
100 |             pure_inf_time += elapsed
101 |             if (i + 1) % args.log_interval == 0:
102 |                 fps = (i + 1 - num_warmup) / pure_inf_time
103 |                 print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
104 | 
105 |         if (i + 1) == 2000:
106 |             pure_inf_time += elapsed
107 |             fps = (i + 1 - num_warmup) / pure_inf_time
108 |             print(f'Overall fps: {fps:.1f} img / s')
109 |             break
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/tools/ana_tools/benchmark.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import easymd  
  4 | import torch
  5 | from mmcv import Config, DictAction
  6 | from mmcv.cnn import fuse_conv_bn
  7 | from mmcv.parallel import MMDataParallel
  8 | from mmcv.runner import load_checkpoint, wrap_fp16_model
  9 | 
 10 | from mmdet.datasets import (build_dataloader, build_dataset,
 11 |                             replace_ImageToTensor)
 12 | from mmdet.models import build_detector
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
 17 |     parser.add_argument('config', help='test config file path')
 18 |     parser.add_argument('--checkpoint',default=None, help='checkpoint file')
 19 |     parser.add_argument(
 20 |         '--log-interval', default=50, help='interval of logging')
 21 |     parser.add_argument(
 22 |         '--fuse-conv-bn',
 23 |         action='store_true',
 24 |         help='Whether to fuse conv and bn, this will slightly increase'
 25 |         'the inference speed')
 26 |     parser.add_argument(
 27 |         '--cfg-options',
 28 |         nargs='+',
 29 |         action=DictAction,
 30 |         help='override some settings in the used config, the key-value pair '
 31 |         'in xxx=yyy format will be merged into config file. If the value to '
 32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 34 |         'Note that the quotation marks are necessary and that no white space '
 35 |         'is allowed.')
 36 |     args = parser.parse_args()
 37 |     return args
 38 | 
 39 | 
 40 | def main():
 41 |     args = parse_args()
 42 | 
 43 |     cfg = Config.fromfile(args.config)
 44 |     if args.cfg_options is not None:
 45 |         cfg.merge_from_dict(args.cfg_options)
 46 |     # import modules from string list.
 47 |     if cfg.get('custom_imports', None):
 48 |         from mmcv.utils import import_modules_from_strings
 49 |         import_modules_from_strings(**cfg['custom_imports'])
 50 |     # set cudnn_benchmark
 51 |     if cfg.get('cudnn_benchmark', False):
 52 |         torch.backends.cudnn.benchmark = True
 53 |     cfg.model.pretrained = None
 54 |     cfg.data.test.test_mode = True
 55 | 
 56 |     # build the dataloader
 57 |     samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
 58 |     if samples_per_gpu > 1:
 59 |         # Replace 'ImageToTensor' to 'DefaultFormatBundle'
 60 |         cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
 61 |     dataset = build_dataset(cfg.data.test)
 62 |     data_loader = build_dataloader(
 63 |         dataset,
 64 |         samples_per_gpu=1,
 65 |         workers_per_gpu=cfg.data.workers_per_gpu,
 66 |         dist=False,
 67 |         shuffle=False)
 68 | 
 69 |     # build the model and load checkpoint
 70 |     cfg.model.train_cfg = None
 71 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
 72 |     fp16_cfg = cfg.get('fp16', None)
 73 |     if fp16_cfg is not None:
 74 |         wrap_fp16_model(model)
 75 |     if  args.checkpoint is not None:
 76 |         load_checkpoint(model, args.checkpoint, map_location='cpu')
 77 |     if args.fuse_conv_bn:
 78 |         model = fuse_conv_bn(model)
 79 | 
 80 |     model = MMDataParallel(model, device_ids=[0])
 81 | 
 82 |     model.eval()
 83 | 
 84 |     # the first several iterations may be very slow so skip them
 85 |     num_warmup = 5
 86 |     pure_inf_time = 0
 87 | 
 88 |     # benchmark with 2000 image and take the average
 89 |     for i, data in enumerate(data_loader):
 90 | 
 91 |         torch.cuda.synchronize()
 92 |         start_time = time.perf_counter()
 93 | 
 94 |         with torch.no_grad():
 95 |             model(return_loss=False, rescale=True, **data)
 96 | 
 97 |         torch.cuda.synchronize()
 98 |         elapsed = time.perf_counter() - start_time
 99 | 
100 |         if i >= num_warmup:
101 |             pure_inf_time += elapsed
102 |             if (i + 1) % args.log_interval == 0:
103 |                 fps = (i + 1 - num_warmup) / pure_inf_time
104 |                 print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
105 | 
106 |         if (i + 1) == 2000:
107 |             pure_inf_time += elapsed
108 |             fps = (i + 1 - num_warmup) / pure_inf_time
109 |             print(f'Overall fps: {fps:.1f} img / s')
110 |             break
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main()
115 | 


--------------------------------------------------------------------------------
/configs/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         things_mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_pre=2000,
 90 |             max_per_img=1000,
 91 |             nms=dict(type='nms', iou_threshold=0.7),
 92 |             min_bbox_size=0),
 93 |         rcnn=dict(
 94 |             assigner=dict(
 95 |                 type='MaxIoUAssigner',
 96 |                 pos_iou_thr=0.5,
 97 |                 neg_iou_thr=0.5,
 98 |                 min_pos_iou=0.5,
 99 |                 match_low_quality=True,
100 |                 ignore_iof_thr=-1),
101 |             sampler=dict(
102 |                 type='RandomSampler',
103 |                 num=512,
104 |                 pos_fraction=0.25,
105 |                 neg_pos_ub=-1,
106 |                 add_gt_as_proposals=True),
107 |             mask_size=28,
108 |             pos_weight=-1,
109 |             debug=False)),
110 |     test_cfg=dict(
111 |         rpn=dict(
112 |             nms_pre=1000,
113 |             max_per_img=1000,
114 |             nms=dict(type='nms', iou_threshold=0.7),
115 |             min_bbox_size=0),
116 |         rcnn=dict(
117 |             score_thr=0.05,
118 |             nms=dict(type='nms', iou_threshold=0.5),
119 |             max_per_img=100,
120 |             mask_thr_binary=0.5)))
121 | 


--------------------------------------------------------------------------------
/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         things_mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_pre=2000,
 90 |             max_per_img=1000,
 91 |             nms=dict(type='nms', iou_threshold=0.7),
 92 |             min_bbox_size=0),
 93 |         rcnn=dict(
 94 |             assigner=dict(
 95 |                 type='MaxIoUAssigner',
 96 |                 pos_iou_thr=0.5,
 97 |                 neg_iou_thr=0.5,
 98 |                 min_pos_iou=0.5,
 99 |                 match_low_quality=True,
100 |                 ignore_iof_thr=-1),
101 |             sampler=dict(
102 |                 type='RandomSampler',
103 |                 num=512,
104 |                 pos_fraction=0.25,
105 |                 neg_pos_ub=-1,
106 |                 add_gt_as_proposals=True),
107 |             mask_size=28,
108 |             pos_weight=-1,
109 |             debug=False)),
110 |     test_cfg=dict(
111 |         rpn=dict(
112 |             nms_pre=1000,
113 |             max_per_img=1000,
114 |             nms=dict(type='nms', iou_threshold=0.7),
115 |             min_bbox_size=0),
116 |         rcnn=dict(
117 |             score_thr=0.05,
118 |             nms=dict(type='nms', iou_threshold=0.5),
119 |             max_per_img=100,
120 |             mask_thr_binary=0.5)))
121 | 


--------------------------------------------------------------------------------
/configs/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 63 |         mask_roi_extractor=None,
 64 |         things_mask_head=dict(
 65 |             type='FCNMaskHead',
 66 |             num_convs=0,
 67 |             in_channels=2048,
 68 |             conv_out_channels=256,
 69 |             num_classes=80,
 70 |             loss_mask=dict(
 71 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 72 |     # model training and testing settings
 73 |     train_cfg=dict(
 74 |         rpn=dict(
 75 |             assigner=dict(
 76 |                 type='MaxIoUAssigner',
 77 |                 pos_iou_thr=0.7,
 78 |                 neg_iou_thr=0.3,
 79 |                 min_pos_iou=0.3,
 80 |                 match_low_quality=True,
 81 |                 ignore_iof_thr=-1),
 82 |             sampler=dict(
 83 |                 type='RandomSampler',
 84 |                 num=256,
 85 |                 pos_fraction=0.5,
 86 |                 neg_pos_ub=-1,
 87 |                 add_gt_as_proposals=False),
 88 |             allowed_border=0,
 89 |             pos_weight=-1,
 90 |             debug=False),
 91 |         rpn_proposal=dict(
 92 |             nms_pre=12000,
 93 |             max_per_img=2000,
 94 |             nms=dict(type='nms', iou_threshold=0.7),
 95 |             min_bbox_size=0),
 96 |         rcnn=dict(
 97 |             assigner=dict(
 98 |                 type='MaxIoUAssigner',
 99 |                 pos_iou_thr=0.5,
100 |                 neg_iou_thr=0.5,
101 |                 min_pos_iou=0.5,
102 |                 match_low_quality=False,
103 |                 ignore_iof_thr=-1),
104 |             sampler=dict(
105 |                 type='RandomSampler',
106 |                 num=512,
107 |                 pos_fraction=0.25,
108 |                 neg_pos_ub=-1,
109 |                 add_gt_as_proposals=True),
110 |             mask_size=14,
111 |             pos_weight=-1,
112 |             debug=False)),
113 |     test_cfg=dict(
114 |         rpn=dict(
115 |             nms_pre=6000,
116 |             nms=dict(type='nms', iou_threshold=0.7),
117 |             max_per_img=1000,
118 |             min_bbox_size=0),
119 |         rcnn=dict(
120 |             score_thr=0.05,
121 |             nms=dict(type='nms', iou_threshold=0.5),
122 |             max_per_img=100,
123 |             mask_thr_binary=0.5)))
124 | 


--------------------------------------------------------------------------------
/configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 63 |         mask_roi_extractor=None,
 64 |         things_mask_head=dict(
 65 |             type='FCNMaskHead',
 66 |             num_convs=0,
 67 |             in_channels=2048,
 68 |             conv_out_channels=256,
 69 |             num_classes=80,
 70 |             loss_mask=dict(
 71 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 72 |     # model training and testing settings
 73 |     train_cfg=dict(
 74 |         rpn=dict(
 75 |             assigner=dict(
 76 |                 type='MaxIoUAssigner',
 77 |                 pos_iou_thr=0.7,
 78 |                 neg_iou_thr=0.3,
 79 |                 min_pos_iou=0.3,
 80 |                 match_low_quality=True,
 81 |                 ignore_iof_thr=-1),
 82 |             sampler=dict(
 83 |                 type='RandomSampler',
 84 |                 num=256,
 85 |                 pos_fraction=0.5,
 86 |                 neg_pos_ub=-1,
 87 |                 add_gt_as_proposals=False),
 88 |             allowed_border=0,
 89 |             pos_weight=-1,
 90 |             debug=False),
 91 |         rpn_proposal=dict(
 92 |             nms_pre=12000,
 93 |             max_per_img=2000,
 94 |             nms=dict(type='nms', iou_threshold=0.7),
 95 |             min_bbox_size=0),
 96 |         rcnn=dict(
 97 |             assigner=dict(
 98 |                 type='MaxIoUAssigner',
 99 |                 pos_iou_thr=0.5,
100 |                 neg_iou_thr=0.5,
101 |                 min_pos_iou=0.5,
102 |                 match_low_quality=False,
103 |                 ignore_iof_thr=-1),
104 |             sampler=dict(
105 |                 type='RandomSampler',
106 |                 num=512,
107 |                 pos_fraction=0.25,
108 |                 neg_pos_ub=-1,
109 |                 add_gt_as_proposals=True),
110 |             mask_size=14,
111 |             pos_weight=-1,
112 |             debug=False)),
113 |     test_cfg=dict(
114 |         rpn=dict(
115 |             nms_pre=6000,
116 |             nms=dict(type='nms', iou_threshold=0.7),
117 |             max_per_img=1000,
118 |             min_bbox_size=0),
119 |         rcnn=dict(
120 |             score_thr=0.05,
121 |             nms=dict(type='nms', iou_threshold=0.5),
122 |             max_per_img=100,
123 |             mask_thr_binary=0.5)))
124 | 


--------------------------------------------------------------------------------
/easymd/models/losses/dice_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | 
  4 | import mmcv
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from mmdet.core import bbox_overlaps
  9 | #from ..builder import LOSSES
 10 | #from .utils import weighted_loss
 11 | from mmdet.models.losses.utils import weighted_loss
 12 | from mmdet.models.builder import LOSSES
 13 | from easymd.models.utils.visual import save_tensor
 14 | 
 15 | 
 16 | 
 17 | def center_of_mass(bitmasks):
 18 |     n, h, w = bitmasks.size()
 19 | 
 20 |     ys = torch.linspace(0, 1, h, dtype=torch.float32, device=bitmasks.device)
 21 |     xs = torch.linspace(0, 1, w, dtype=torch.float32, device=bitmasks.device)
 22 | 
 23 |     m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
 24 |     m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
 25 |     m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
 26 |     center_x = m10 / m00
 27 |     center_y = m01 / m00
 28 |     return torch.stack([center_x, center_y],-1)
 29 |     #return center_x, center_y
 30 | 
 31 | 
 32 | 
 33 | #@mmcv.jit(derivate=True, coderize=True)
 34 | @weighted_loss
 35 | def dice_loss(input, target,mask=None,eps=0.001):
 36 |     N,H,W = input.shape
 37 |     
 38 |     input = input.contiguous().view(N, H*W)
 39 |     target = target.contiguous().view(N, H*W).float()
 40 |     if mask is not None:
 41 |       mask = mask.contiguous().view(N, H*W).float()
 42 |       input = input * mask
 43 |       target = target * mask
 44 |     a = torch.sum(input * target, 1)
 45 |     b = torch.sum(input * input, 1) + eps
 46 |     c = torch.sum(target * target, 1) + eps
 47 |     d = (2 * a) / (b + c)
 48 |     #print('1-d max',(1-d).max())
 49 |     return 1 - d
 50 | 
 51 | @weighted_loss
 52 | def l1_loss(pred, target):
 53 |     """Smooth L1 loss.
 54 | 
 55 |     Args:
 56 |         pred (torch.Tensor): The prediction.
 57 |         target (torch.Tensor): The learning target of the prediction.
 58 |         beta (float, optional): The threshold in the piecewise function.
 59 |             Defaults to 1.0.
 60 | 
 61 |     Returns:
 62 |         torch.Tensor: Calculated loss
 63 |     """
 64 |     beta=1.0
 65 |     assert beta > 0
 66 |     assert pred.size() == target.size() and target.numel() > 0
 67 |     loss = torch.abs(pred - target)
 68 |     return loss
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | @LOSSES.register_module()
 76 | class DiceLoss(nn.Module):
 77 | 
 78 |     def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):
 79 |         super(DiceLoss, self).__init__()
 80 |         self.eps = eps
 81 |         self.reduction = reduction
 82 |         self.loss_weight = loss_weight
 83 |         self.count = 0
 84 |     def forward(self,
 85 |                 pred,
 86 |                 target,
 87 |                 weight=None,
 88 |                 mask=None,
 89 |                 avg_factor=None,
 90 |                 reduction_override=None,
 91 |                 **kwargs):
 92 | 
 93 |         assert reduction_override in (None, 'none', 'mean', 'sum')
 94 |         reduction = (
 95 |             reduction_override if reduction_override else self.reduction)
 96 |         #if weight is not None and weight.dim() > 1:
 97 |             # TODO: remove this in the future
 98 |             # reduce the weight of shape (n,w,h) to (n,) to match the
 99 |             # giou_loss of shape (n,)
100 |             #assert weight.shape == pred.shape
101 |             #weight = weight.mean((-2,-1))
102 |         loss = self.loss_weight * dice_loss(
103 |             pred,
104 |             target,
105 |             weight,
106 |             mask=mask,
107 |             eps=self.eps,
108 |             reduction=reduction,
109 |             avg_factor=avg_factor,
110 |             **kwargs)
111 |         #print('DiceLoss',loss, avg_factor)
112 |         return loss
113 | 
114 | 
115 | 
116 | @LOSSES.register_module()
117 | class BCEFocalLoss(torch.nn.Module):
118 |   """
119 |   二分类的Focalloss alpha 固定
120 |   """
121 |   def __init__(self, gamma=2, alpha=0.25, reduction='sum',loss_weight=1.0):
122 |     super().__init__()
123 |     self.gamma = gamma
124 |     self.alpha = alpha
125 |     self.reduction = reduction
126 |     self.loss_weight = loss_weight
127 |   def forward(self, _input, target):
128 |     pt = torch.sigmoid(_input)
129 | 
130 |     #print(pt.shape, target.shape)
131 |     alpha = self.alpha
132 |     loss = - alpha * (1 - pt) ** self.gamma * target * torch.log(pt) - \
133 |         (1 - alpha) * pt ** self.gamma * (1 - target) * torch.log(1 - pt)
134 |     #print('loss_shape',loss.shape)
135 |     if self.reduction == 'elementwise_mean':
136 |       loss = torch.mean(loss)
137 |     elif self.reduction == 'sum':
138 |       loss = torch.sum(loss)
139 |     
140 |     return loss*self.loss_weight/54


--------------------------------------------------------------------------------
/converter/panoptic_cityscapes_categories.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "id": 7,
  4 |         "name": "road",
  5 |         "color": [
  6 |             128,
  7 |             64,
  8 |             128
  9 |         ],
 10 |         "supercategory": "flat",
 11 |         "isthing": 0
 12 |     },
 13 |     {
 14 |         "id": 8,
 15 |         "name": "sidewalk",
 16 |         "color": [
 17 |             244,
 18 |             35,
 19 |             232
 20 |         ],
 21 |         "supercategory": "flat",
 22 |         "isthing": 0
 23 |     },
 24 |     {
 25 |         "id": 11,
 26 |         "name": "building",
 27 |         "color": [
 28 |             70,
 29 |             70,
 30 |             70
 31 |         ],
 32 |         "supercategory": "construction",
 33 |         "isthing": 0
 34 |     },
 35 |     {
 36 |         "id": 12,
 37 |         "name": "wall",
 38 |         "color": [
 39 |             102,
 40 |             102,
 41 |             156
 42 |         ],
 43 |         "supercategory": "construction",
 44 |         "isthing": 0
 45 |     },
 46 |     {
 47 |         "id": 13,
 48 |         "name": "fence",
 49 |         "color": [
 50 |             190,
 51 |             153,
 52 |             153
 53 |         ],
 54 |         "supercategory": "construction",
 55 |         "isthing": 0
 56 |     },
 57 |     {
 58 |         "id": 17,
 59 |         "name": "pole",
 60 |         "color": [
 61 |             153,
 62 |             153,
 63 |             153
 64 |         ],
 65 |         "supercategory": "object",
 66 |         "isthing": 0
 67 |     },
 68 |     {
 69 |         "id": 19,
 70 |         "name": "traffic light",
 71 |         "color": [
 72 |             250,
 73 |             170,
 74 |             30
 75 |         ],
 76 |         "supercategory": "object",
 77 |         "isthing": 0
 78 |     },
 79 |     {
 80 |         "id": 20,
 81 |         "name": "traffic sign",
 82 |         "color": [
 83 |             220,
 84 |             220,
 85 |             0
 86 |         ],
 87 |         "supercategory": "object",
 88 |         "isthing": 0
 89 |     },
 90 |     {
 91 |         "id": 21,
 92 |         "name": "vegetation",
 93 |         "color": [
 94 |             107,
 95 |             142,
 96 |             35
 97 |         ],
 98 |         "supercategory": "nature",
 99 |         "isthing": 0
100 |     },
101 |     {
102 |         "id": 22,
103 |         "name": "terrain",
104 |         "color": [
105 |             152,
106 |             251,
107 |             152
108 |         ],
109 |         "supercategory": "nature",
110 |         "isthing": 0
111 |     },
112 |     {
113 |         "id": 23,
114 |         "name": "sky",
115 |         "color": [
116 |             70,
117 |             130,
118 |             180
119 |         ],
120 |         "supercategory": "sky",
121 |         "isthing": 0
122 |     },
123 |     {
124 |         "id": 24,
125 |         "name": "person",
126 |         "color": [
127 |             220,
128 |             20,
129 |             60
130 |         ],
131 |         "supercategory": "human",
132 |         "isthing": 1
133 |     },
134 |     {
135 |         "id": 25,
136 |         "name": "rider",
137 |         "color": [
138 |             255,
139 |             0,
140 |             0
141 |         ],
142 |         "supercategory": "human",
143 |         "isthing": 1
144 |     },
145 |     {
146 |         "id": 26,
147 |         "name": "car",
148 |         "color": [
149 |             0,
150 |             0,
151 |             142
152 |         ],
153 |         "supercategory": "vehicle",
154 |         "isthing": 1
155 |     },
156 |     {
157 |         "id": 27,
158 |         "name": "truck",
159 |         "color": [
160 |             0,
161 |             0,
162 |             70
163 |         ],
164 |         "supercategory": "vehicle",
165 |         "isthing": 1
166 |     },
167 |     {
168 |         "id": 28,
169 |         "name": "bus",
170 |         "color": [
171 |             0,
172 |             60,
173 |             100
174 |         ],
175 |         "supercategory": "vehicle",
176 |         "isthing": 1
177 |     },
178 |     {
179 |         "id": 31,
180 |         "name": "train",
181 |         "color": [
182 |             0,
183 |             80,
184 |             100
185 |         ],
186 |         "supercategory": "vehicle",
187 |         "isthing": 1
188 |     },
189 |     {
190 |         "id": 32,
191 |         "name": "motorcycle",
192 |         "color": [
193 |             0,
194 |             0,
195 |             230
196 |         ],
197 |         "supercategory": "vehicle",
198 |         "isthing": 1
199 |     },
200 |     {
201 |         "id": 33,
202 |         "name": "bicycle",
203 |         "color": [
204 |             119,
205 |             11,
206 |             32
207 |         ],
208 |         "supercategory": "vehicle",
209 |         "isthing": 1
210 |     }
211 | ]


--------------------------------------------------------------------------------
/easymd/datasets/panopticapi/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | import functools
  6 | import traceback
  7 | import json
  8 | import numpy as np
  9 | import cv2
 10 | 
 11 | # The decorator is used to prints an error trhown inside process
 12 | def get_traceback(f):
 13 |     @functools.wraps(f)
 14 |     def wrapper(*args, **kwargs):
 15 |         try:
 16 |             return f(*args, **kwargs)
 17 |         except Exception as e:
 18 |             print('Caught exception in worker thread:')
 19 |             traceback.print_exc()
 20 |             raise e
 21 | 
 22 |     return wrapper
 23 | 
 24 | 
 25 | class IdGenerator():
 26 |     '''
 27 |     The class is designed to generate unique IDs that have meaningful RGB encoding.
 28 |     Given semantic category unique ID will be generated and its RGB encoding will
 29 |     have color close to the predefined semantic category color.
 30 |     The RGB encoding used is ID = R * 256 * G + 256 * 256 + B.
 31 |     Class constructor takes dictionary {id: category_info}, where all semantic
 32 |     class ids are presented and category_info record is a dict with fields
 33 |     'isthing' and 'color'
 34 |     '''
 35 |     def __init__(self, categories):
 36 |         self.taken_colors = set([0, 0, 0])
 37 |         self.categories = categories
 38 |         for category in self.categories.values():
 39 |             if category['isthing'] == 0:
 40 |                 self.taken_colors.add(tuple(category['color']))
 41 | 
 42 |     def get_color(self, cat_id):
 43 |         def random_color(base, max_dist=30):
 44 |             new_color = base + np.random.randint(low=-max_dist,
 45 |                                                  high=max_dist+1,
 46 |                                                  size=3)
 47 |             return tuple(np.maximum(0, np.minimum(255, new_color)))
 48 | 
 49 |         category = self.categories[cat_id]
 50 |         if category['isthing'] == 0:
 51 |             return category['color']
 52 |         base_color_array = category['color']
 53 |         base_color = tuple(base_color_array)
 54 |         if base_color not in self.taken_colors:
 55 |             self.taken_colors.add(base_color)
 56 |             return base_color
 57 |         else:
 58 |             while True:
 59 |                 color = random_color(base_color_array)
 60 |                 if color not in self.taken_colors:
 61 |                     self.taken_colors.add(color)
 62 |                     return color
 63 | 
 64 |     def get_id(self, cat_id):
 65 |         color = self.get_color(cat_id)
 66 |         return rgb2id(color)
 67 | 
 68 |     def get_id_and_color(self, cat_id):
 69 |         color = self.get_color(cat_id)
 70 |         return rgb2id(color), color
 71 | 
 72 | 
 73 | def rgb2id(color):
 74 |     if isinstance(color, np.ndarray) and len(color.shape) == 3:
 75 |         if color.dtype == np.uint8:
 76 |             color = color.astype(np.int32)
 77 |         return color[:, :, 0] + 256 * color[:, :, 1] + 256 * 256 * color[:, :, 2]
 78 |     return int(color[0] + 256 * color[1] + 256 * 256 * color[2])
 79 | 
 80 | 
 81 | def id2rgb(id_map):
 82 |     if isinstance(id_map, np.ndarray):
 83 |         id_map_copy = id_map.copy()
 84 |         rgb_shape = tuple(list(id_map.shape) + [3])
 85 |         rgb_map = np.zeros(rgb_shape, dtype=np.uint8)
 86 |         for i in range(3):
 87 |             rgb_map[..., i] = id_map_copy % 256
 88 |             id_map_copy //= 256
 89 |         return rgb_map
 90 |     color = []
 91 |     for _ in range(3):
 92 |         color.append(id_map % 256)
 93 |         id_map //= 256
 94 |     return color
 95 | 
 96 | 
 97 | def save_json(d, file):
 98 |     with open(file, 'w') as f:
 99 |         json.dump(d, f)
100 | 
101 | 
102 | # General util function to get the boundary of a binary mask.
103 | def mask_to_boundary(mask, dilation_ratio=0.02):
104 |     """
105 |     Convert binary mask to boundary mask.
106 |     :param mask (numpy array, uint8): binary mask
107 |     :param dilation_ratio (float): ratio to calculate dilation = dilation_ratio * image_diagonal
108 |     :return: boundary mask (numpy array)
109 |     """
110 |     h, w = mask.shape
111 |     img_diag = np.sqrt(h ** 2 + w ** 2)
112 |     dilation = int(round(dilation_ratio * img_diag))
113 |     if dilation < 1:
114 |         dilation = 1
115 |     # Pad image so mask truncated by the image border is also considered as boundary.
116 |     newith_mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0)
117 |     kernel = np.ones((3, 3), dtype=np.uint8)
118 |     newith_mask_erode = cv2.erode(newith_mask, kernel, iterations=dilation)
119 |     mask_erode = newith_mask_erode[1 : h + 1, 1 : w + 1]
120 |     # G_d intersects G in the paper.
121 |     return mask - mask_erode


--------------------------------------------------------------------------------
/easymd/analysis_tools/kmean.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Kmean(object):
  5 |     def __init__(self, cluster_number, number_iter=1, name='iou'):
  6 |         self.cluster_number = cluster_number
  7 |         self.number_iter = number_iter
  8 |         self.name = name
  9 | 
 10 |     def _get_distance_measure(self, name='iou'):
 11 |         if name == 'iou':
 12 |             return self._calc_iou
 13 |         else:
 14 |             raise NotImplementedError('暂时没有实现')
 15 | 
 16 |     def _calc_iou(self, boxes_nx2, clusters_kx2):
 17 |         """
 18 |         calculate the iou between bboxes and clusters
 19 |         Args:
 20 |             boxes_nx2(np.ndarray): bboxes's width and height
 21 |             clusters_kx2(np.ndarray): clusters_kx2's width and height
 22 |         return:
 23 |             iou_nxk(np.ndarray): iou between bboxes and clusters
 24 |         """
 25 |         n = boxes_nx2.shape[0]
 26 |         k = self.cluster_number
 27 | 
 28 |         box_area = boxes_nx2[:, 0] * boxes_nx2[:, 1]  # 相当于左上角全部移动到0,0点，进行iou计算
 29 |         box_area = box_area.repeat(k)
 30 |         box_area = np.reshape(box_area, (n, k))
 31 | 
 32 |         cluster_area = clusters_kx2[:, 0] * clusters_kx2[:, 1]
 33 |         cluster_area = np.tile(cluster_area, [1, n])
 34 |         cluster_area = np.reshape(cluster_area, (n, k))
 35 | 
 36 |         box_w_matrix = np.reshape(boxes_nx2[:, 0].repeat(k), (n, k))
 37 |         cluster_w_matrix = np.reshape(np.tile(clusters_kx2[:, 0], (1, n)), (n, k))
 38 |         min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
 39 | 
 40 |         box_h_matrix = np.reshape(boxes_nx2[:, 1].repeat(k), (n, k))
 41 |         cluster_h_matrix = np.reshape(np.tile(clusters_kx2[:, 1], (1, n)), (n, k))
 42 |         min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
 43 |         inter_area = np.multiply(min_w_matrix, min_h_matrix)
 44 | 
 45 |         iou_nxk = inter_area / (box_area + cluster_area - inter_area)
 46 |         return iou_nxk
 47 | 
 48 |     def _calc_average_measure(self, boxes_nx2, clusters_kx2):
 49 |         """
 50 |         calculate the mean iou between bboxes and clusters
 51 |         Args:
 52 |             boxes_nx2(np.ndarray): bboxes's width and height
 53 |             clusters_kx2(np.ndarray): clusters_kx2's width and height
 54 |         return:
 55 |             mean_iou(np.ndarray): mean iou between boxes and their corresponding clusters
 56 |         """
 57 |         _distance_measure_fun = self._get_distance_measure(self.name)
 58 |         accuracy = np.mean([np.max(_distance_measure_fun(boxes_nx2, clusters_kx2), axis=1)])
 59 |         return accuracy
 60 | 
 61 |     def _kmeans(self, boxes_nx2):
 62 |         """
 63 |         cacluate the clusters by kmeans
 64 |         Args:
 65 |             boxes_nx2(np.ndarray): bboxes's width and height
 66 |         would use:
 67 |             cluster_number
 68 |         would call:
 69 |             _calc_iou()
 70 |         return:
 71 |             clusters(np.ndarray): the anchors for yolo
 72 |         """
 73 |         k = self.cluster_number
 74 |         box_number = boxes_nx2.shape[0]
 75 |         last_nearest = np.zeros((box_number,))
 76 |         clusters = boxes_nx2[np.random.choice(
 77 |             box_number, k, replace=False)]  # init k clusters
 78 |         _distance_measure_fun = self._get_distance_measure(self.name)
 79 |         while True:
 80 |             # 距离度量准则是1-iou，iou越大则越近
 81 |             distances = 1 - _distance_measure_fun(boxes_nx2, clusters)  # 输出维度 N,k
 82 | 
 83 |             current_nearest = np.argmin(distances, axis=1)  # 找出某个点离所有中心最近的索引
 84 |             if (last_nearest == current_nearest).all():  # 收敛
 85 |                 break  # clusters won't change
 86 |             for cluster in range(k):  # 更新聚类中心
 87 |                 if len(boxes_nx2[current_nearest == cluster]) == 0:
 88 |                     clusters[cluster] = boxes_nx2[np.random.choice(
 89 |                         box_number, 1, replace=False)]
 90 |                 else:
 91 |                     clusters[cluster] = np.median(  # update clusters
 92 |                         boxes_nx2[current_nearest == cluster], axis=0)
 93 | 
 94 |             last_nearest = current_nearest
 95 | 
 96 |         return clusters
 97 | 
 98 |     def clusters(self, wh_data_nx2):
 99 |         total_acc = -1
100 |         total_result = []
101 |         for _ in range(self.number_iter):
102 |             result = self._kmeans(wh_data_nx2)  # TODO ga+kmean
103 |             anchor_area = result[:, 0] * result[:, 1]
104 |             area_index = np.argsort(anchor_area)
105 |             result = result[area_index]
106 |             acc = self._calc_average_measure(wh_data_nx2, result) * 100
107 |             if acc > total_acc:
108 |                 total_acc = acc
109 |                 total_result = result
110 | 
111 |         # print("K anchors:\n {}".format(total_result.astype(np.int32)))
112 |         print("Accuracy: {:.2f}%".format(total_acc))
113 |         return total_result.astype(np.int32).tolist()


--------------------------------------------------------------------------------
/easymd/analysis_tools/center_sample_demo.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | import  mmcv
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | def get_target_mask(gt_bboxes, feature_shape, center_sample_radius, center_sampling):
  9 |     # 得到points
 10 |     xs = torch.arange(0, feature_shape[1])
 11 |     ys = torch.arange(0, feature_shape[0])
 12 |     y, x = torch.meshgrid(ys, xs)  # 注意，返回的一定是y在前
 13 |     y = y.flatten()  # hw
 14 |     x = x.flatten()  # hw
 15 |     # 还原到原图
 16 |     # points = torch.stack((x.reshape(-1) * stride, y.reshape(-1) * stride),
 17 |     #                      dim=-1) + stride // 2  # 整体偏移stride//2，对应中心点
 18 |     # 我们假设就是原图
 19 |     points = torch.stack((x.reshape(-1), y.reshape(-1)), dim=-1)
 20 | 
 21 |     num_points = points.size(0)  # 100x100,2
 22 |     num_gts = gt_bboxes.size(0)  # 1x4
 23 |     gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)  # 100x100,1,4
 24 |     xs, ys = points[:, 0], points[:, 1]
 25 |     xs = xs[:, None].expand(num_points, num_gts)  # 100x100,1
 26 |     ys = ys[:, None].expand(num_points, num_gts)
 27 | 
 28 |     if center_sampling:
 29 |         center_xs = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) / 2
 30 |         center_ys = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) / 2
 31 |         # center_gts里面存储的相当于是新的缩放后bbox坐标了
 32 |         center_gts = torch.zeros_like(gt_bboxes)
 33 |         stride = center_xs.new_ones(center_xs.shape) * center_sample_radius
 34 |         x_mins = center_xs - stride
 35 |         y_mins = center_ys - stride
 36 |         x_maxs = center_xs + stride
 37 |         y_maxs = center_ys + stride
 38 |         # 如果stride值比较小，x_mins还在bbox内部，则不做处理
 39 |         # 如果stride值比较大，x_mins已经出bbox界限了，则强制规定x_mins=gt_bboxes[..., 0]，相当于center_sampling无效
 40 |         center_gts[..., 0] = torch.where(x_mins > gt_bboxes[..., 0],
 41 |                                          x_mins, gt_bboxes[..., 0])
 42 |         center_gts[..., 1] = torch.where(y_mins > gt_bboxes[..., 1],
 43 |                                          y_mins, gt_bboxes[..., 1])
 44 |         center_gts[..., 2] = torch.where(x_maxs > gt_bboxes[..., 2],
 45 |                                          gt_bboxes[..., 2], x_maxs)
 46 |         center_gts[..., 3] = torch.where(y_maxs > gt_bboxes[..., 3],
 47 |                                          gt_bboxes[..., 3], y_maxs)
 48 |     else:
 49 |         center_gts = gt_bboxes
 50 | 
 51 |     # 计算原图上面任意一点距离bbox4条边的距离
 52 |     left = xs - center_gts[..., 0]  # 特征图上面点距离bbox左边界距离
 53 |     right = center_gts[..., 2] - xs  # 注意谁减谁
 54 |     top = ys - center_gts[..., 1]
 55 |     bottom = center_gts[..., 3] - ys
 56 |     bbox_targets = torch.stack((left, top, right, bottom), -1)  # 100x100,1,4
 57 |     # value, index = bbox_targets.min(-1)
 58 |     pos_mask = bbox_targets.min(-1)[0] > 0
 59 |     pos_mask = pos_mask.view(feature_shape[0], feature_shape[1], -1)
 60 |     return pos_mask, bbox_targets
 61 | 
 62 | 
 63 | def centerness_target(pos_mask, bbox_targets):
 64 |     """Compute centerness targets.
 65 |         Args:
 66 |             pos_bbox_targets (Tensor): BBox targets of positive bboxes in shape
 67 |                 (num_pos, 4)
 68 |         Returns:
 69 |             Tensor: Centerness target.
 70 |         """
 71 |     # only calculate pos centerness targets, otherwise there may be nan
 72 |     pos_mask = pos_mask.view(-1, 1)
 73 |     bbox_targets = bbox_targets[pos_mask]
 74 |     left_right = bbox_targets[:, [0, 2]]
 75 |     top_bottom = bbox_targets[:, [1, 3]]
 76 |     centerness_targets = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \
 77 |                          (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])
 78 |     targets = torch.sqrt(centerness_targets)
 79 |     # 还原成图返回
 80 |     img_disp_target = pos_mask.new_zeros(pos_mask.shape, dtype=torch.float32)
 81 |     img_disp_target[pos_mask] = targets
 82 |     return img_disp_target
 83 | 
 84 | 
 85 | if __name__ == '__main__':
 86 |     # 缺点： 中心采样策略无法反映hw变化，而且既然叫做半径，为啥mask区域不是圆形，而是正方形
 87 |     center_sampling = True  # 是否使用中心采样策略
 88 |     feature_shape = (100, 100, 3)
 89 |     strides = 4
 90 |     radius = 3.5  # 默认1.5
 91 |     center_sample_radius = radius * strides  # 扩展半径radius，值越大，扩展面积越大
 92 |     gt_boox = [20, 30, 80, 71]  # 特征图size xyxy
 93 | 
 94 |     gt_bbox = torch.as_tensor(gt_boox, dtype=torch.float32).view(-1, 4)
 95 |     pos_mask, bbox_targets = get_target_mask(gt_bbox, feature_shape, center_sample_radius, center_sampling)
 96 | 
 97 |     # 可视化
 98 |     pos_mask1 = pos_mask[..., 0].numpy()
 99 |     gray_img = np.where(pos_mask1 > 0, 255, 0).astype(np.uint8)
100 |     # 绘制原始bbox
101 |     img = mmcv.gray2bgr(gray_img)
102 |     cv2.rectangle(img, (gt_boox[0], gt_boox[1]), (gt_boox[2], gt_boox[3]), color=(255, 0, 0))
103 |     cv2.namedWindow('img', 0)
104 |     mmcv.imshow(img, 'img')
105 | 
106 |     # 显示centerness
107 |     centerness_targets = centerness_target(pos_mask, bbox_targets)
108 |     centerness_targets = centerness_targets.view(feature_shape[0], feature_shape[1])
109 |     plt.imshow(centerness_targets)
110 |     plt.show()


--------------------------------------------------------------------------------
/easymd/analysis_tools/visualize_coco.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from pycocotools.coco import COCO
  3 | import os
  4 | import cv2
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | from matplotlib.collections import PatchCollection
  8 | from matplotlib.patches import Polygon
  9 | 
 10 | 
 11 | def showBBox(coco, anns, label_box=True, is_filling=True):
 12 |     """
 13 |     show bounding box of annotations or predictions
 14 |     anns: loadAnns() annotations or predictions subject to coco results format
 15 |     label_box: show background of category labels or not
 16 |     """
 17 |     if len(anns) == 0:
 18 |         return 0
 19 |     ax = plt.gca()
 20 |     ax.set_autoscale_on(False)
 21 |     polygons = []
 22 |     color = []
 23 |     image2color = dict()
 24 |     for cat in coco.getCatIds():
 25 |         image2color[cat] = (np.random.random((1, 3)) * 0.7 + 0.3).tolist()[0]
 26 |     for ann in anns:
 27 |         c = image2color[ann['category_id']]
 28 |         [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
 29 |         poly = [[bbox_x, bbox_y], [bbox_x, bbox_y + bbox_h], [bbox_x + bbox_w, bbox_y + bbox_h],
 30 |                 [bbox_x + bbox_w, bbox_y]]
 31 |         np_poly = np.array(poly).reshape((4, 2))
 32 |         polygons.append(Polygon(np_poly))
 33 |         color.append(c)
 34 |         if label_box:
 35 |             label_bbox = dict(facecolor=c)
 36 |         else:
 37 |             label_bbox = None
 38 |         if 'score' in ann:
 39 |             ax.text(bbox_x, bbox_y, '%s: %.2f' % (coco.loadCats(ann['category_id'])[0]['name'], ann['score']),
 40 |                     color='white', bbox=label_bbox)
 41 |         else:
 42 |             ax.text(bbox_x, bbox_y, '%s' % (coco.loadCats(ann['category_id'])[0]['name']), color='white',
 43 |                     bbox=label_bbox)
 44 |     if is_filling:
 45 |         # option for filling bounding box
 46 |         p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
 47 |         ax.add_collection(p)
 48 |     p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
 49 |     ax.add_collection(p)
 50 | 
 51 | 
 52 | # only_bbox 为True表示仅仅可视化bbox，其余label不显示
 53 | # show_all 表示所有类别都显示，否则category_name来确定显示类别
 54 | def show_coco(data_root, ann_file, img_prefix, only_bbox=False, show_all=True, category_name='bicycle'):
 55 |     example_coco = COCO(ann_file)
 56 |     print('图片总数：{}'.format(len(example_coco.getImgIds())))
 57 |     categories = example_coco.loadCats(example_coco.getCatIds())
 58 |     category_names = [category['name'] for category in categories]
 59 |     print('Custom COCO categories: \n{}\n'.format(' '.join(category_names)))
 60 |     print(category_names)
 61 | 
 62 |     if show_all:
 63 |         category_ids = []
 64 |     else:
 65 |         category_ids = example_coco.getCatIds(category_name)
 66 |     image_ids = example_coco.getImgIds(catIds=category_ids)
 67 |     image_ids = sorted(image_ids)
 68 |     for i in range(len(image_ids)):
 69 |         id = image_ids[i]
 70 |         id =285
 71 |         #/home/lzq/workspace/easy-mmdet/datasets/coco/val2017/000000000285.jpg
 72 |         image_data = example_coco.loadImgs(id)[0]
 73 |         path = os.path.join(data_root, img_prefix, image_data['file_name'])
 74 |         print(path)
 75 |         image = cv2.imread(path)[:,:,::-1]
 76 |         
 77 |         
 78 |         annotation_ids = example_coco.getAnnIds(imgIds=image_data['id'], catIds=category_ids, iscrowd=None)
 79 |         #if len(annotation_ids)<2:
 80 |         #    continue
 81 |         plt.figure()
 82 |         plt.imshow(image)
 83 |         annotations = example_coco.loadAnns(annotation_ids)
 84 |         if only_bbox:
 85 |             showBBox(example_coco, annotations)
 86 |         else:
 87 |             example_coco.showAnns(annotations)
 88 |         plt.title(path)
 89 |         plt.show()
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |     # 和cfg里面设置一样 coco
 94 |     data_root = './datasets/coco/'
 95 |     ann_file = './datasets/annotations/panoptic_val2017_detection_format.json'
 96 |     #category_name=['banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged', 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged', 'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged']
 97 |     #category_name = 'tree-merged'
 98 |     #ann_file='/home/lzq/workspace/easy-mmdet/datasets/coco/annotations/instances_val2017.json'
 99 |     img_prefix = 'val2017/'
100 |     show_coco(data_root, ann_file, img_prefix,show_all=True,only_bbox=False)
101 | 
102 |     # voc转化为coco后显示
103 |     #data_root = '/home/pi/dataset/VOCdevkit/'
104 |     #ann_file = data_root + 'annotations/voc0712_trainval.json'
105 |     #img_prefix = data_root
106 |     #show_coco(data_root, ann_file, img_prefix)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Panoptic SegFormer: Delving Deeper into Panoptic Segmentation with Transformers
  2 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/panoptic-segformer/panoptic-segmentation-on-coco-minival)](https://paperswithcode.com/sota/panoptic-segmentation-on-coco-minival?p=panoptic-segformer)
  3 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/panoptic-segformer/panoptic-segmentation-on-coco-test-dev)](https://paperswithcode.com/sota/panoptic-segmentation-on-coco-test-dev?p=panoptic-segformer)
  4 | <div align="center">
  5 |   <img src="https://github.com/zhiqi-li/Panoptic-SegFormer/raw/master/figs/arch.png" width="100%" height="100%"/>
  6 | </div><br/>
  7 | 
  8 | Panoptic SegFormer is accepted by CVPR'22 and we update our latest paper on [arXiv](https://arxiv.org/abs/2109.03814)
  9 | 
 10 | 
 11 | ## Results
 12 | 
 13 | results on COCO val
 14 | 
 15 | | Backbone | Method | Lr Schd | PQ | Config | Download |
 16 | | :---: | :---: | :---: | :---: | :---: | :---: |
 17 | | R-50  | Panoptic-SegFormer | 1x| 48.0 |[config](configs/panformer/panformer_r50_12e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_r50_1x.pth) |
 18 | | R-50  | Panoptic-SegFormer | 2x| 49.6 |[config](configs/panformer/panformer_r50_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_r50_2x.pth) |
 19 | | R-101  | Panoptic-SegFormer | 2x| 50.6 |[config](configs/panformer/panformer_r101_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_r101_2x.pth)  |
 20 | | [PVTv2-B5](https://github.com/whai362/PVT) (**much lighter**)  | Panoptic-SegFormer | 2x| 55.6 |[config](configs/panformer/panformer_pvtb5_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_pvtv2b5_2x.pth) |
 21 | | Swin-L (window size 7)  | Panoptic-SegFormer | 2x| 55.8 |[config](configs/panformer/panformer_swinl_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_swinl_2x.pth) |
 22 | 
 23 | 
 24 | 
 25 | 
 26 | ## Install
 27 | 
 28 | ###  Prerequisites
 29 | 
 30 | - Linux
 31 | - Python 3.6+
 32 | - PyTorch 1.5+
 33 | - torchvision
 34 | - CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible)
 35 | - GCC 5+
 36 | - [mmcv-full==1.3.4](https://github.com/open-mmlab/mmcv/tree/v1.3.4)
 37 | - [mmdet==2.12.0](https://github.com/open-mmlab/mmdetection/tree/v2.12.0) # higher version may not work
 38 | - timm==0.4.5
 39 | - einops==0.3.0
 40 | - Pillow==8.0.1
 41 | - opencv-python==4.5.2
 42 | 
 43 | note: PyTorch1.8 has a bug in its [adamw.py](https://github.com/pytorch/pytorch/blob/v1.8.0/torch/optim/adamw.py) and it is solved in PyTorch1.9([see](https://github.com/pytorch/pytorch/blob/master/torch/optim/adamw.py)), you can easily solve it by comparing the difference.
 44 | 
 45 | 
 46 | ### install Panoptic SegFormer
 47 | 
 48 | ```
 49 | python setup.py install 
 50 | ```
 51 | 
 52 | 
 53 | ## Datasets 
 54 | 
 55 | When I began this project, mmdet dose not support panoptic segmentation officially. I convert the dataset from panoptic segmentation format to instance segmentation format for convenience.
 56 | 
 57 | ### 1. prepare data (COCO)
 58 | 
 59 | ```
 60 | cd Panoptic-SegFormer
 61 | mkdir datasets
 62 | cd datasets
 63 | ln -s path_to_coco coco
 64 | mkdir annotations/
 65 | cd annotations
 66 | wget http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip
 67 | unzip panoptic_annotations_trainval2017.zip
 68 | ```
 69 | 
 70 | Then the directory structure should be the following:
 71 | 
 72 | ```
 73 | Panoptic-SegFormer
 74 | ├── datasets
 75 | │   ├── annotations/
 76 | │   │   ├── panoptic_train2017/
 77 | │   │   ├── panoptic_train2017.json
 78 | │   │   ├── panoptic_val2017/
 79 | │   │   └── panoptic_val2017.json
 80 | │   └── coco/ 
 81 | │
 82 | ├── config
 83 | ├── checkpoints
 84 | ├── easymd
 85 | ...
 86 | ```
 87 | 
 88 | ### 2. convert panoptic format to detection format 
 89 | 
 90 | ```
 91 | cd Panoptic-SegFormer
 92 | ./tools/convert_panoptic_coco.sh coco
 93 | ```
 94 | 
 95 | Then the directory structure should be the following:
 96 | 
 97 | ```
 98 | Panoptic-SegFormer
 99 | ├── datasets
100 | │   ├── annotations/
101 | │   │   ├── panoptic_train2017/
102 | │   │   ├── panoptic_train2017_detection_format.json
103 | │   │   ├── panoptic_train2017.json
104 | │   │   ├── panoptic_val2017/
105 | │   │   ├── panoptic_val2017_detection_format.json
106 | │   │   └── panoptic_val2017.json
107 | │   └── coco/ 
108 | │
109 | ├── config
110 | ├── checkpoints
111 | ├── easymd
112 | ...
113 | ```
114 | 
115 | 
116 | ## Run (panoptic segmentation)
117 | 
118 | ### train 
119 | 
120 | single-machine with 8 gpus.
121 | 
122 | ```
123 | ./tools/dist_train.sh ./configs/panformer/panformer_r50_24e_coco_panoptic.py 8
124 | ```
125 | 
126 | 
127 | ### test
128 | 
129 | ```
130 | ./tools/dist_test.sh ./configs/panformer/panformer_r50_24e_coco_panoptic.py path/to/model.pth 8
131 | ```
132 | 
133 | ## <a name="Citing"></a>Citing
134 | 
135 | If you use Panoptic SegFormer in your research, please use the following BibTeX entry.
136 | 
137 | ```BibTeX
138 | @misc{li2021panoptic,
139 |       title={Panoptic SegFormer: Delving Deeper into Panoptic Segmentation with Transformers}, 
140 |       author={Zhiqi Li and Wenhai Wang and Enze Xie and Zhiding Yu and Anima Anandkumar and Jose M. Alvarez and Tong Lu and Ping Luo},
141 |       year={2021},
142 |       eprint={2109.03814},
143 |       archivePrefix={arXiv},
144 |       primaryClass={cs.CV}
145 | }
146 | ```
147 | 
148 | 
149 | ## Acknowledgement
150 | 
151 | Mainly based on [Defromable DETR](https://github.com/open-mmlab/mmdetection.git) from MMdet. 
152 | 
153 | Thanks very much for other open source works: [timm](https://github.com/rwightman/pytorch-image-models), [Panoptic FCN](https://github.com/dvlab-research/PanopticFCN), [MaskFomer](https://github.com/facebookresearch/MaskFormer), [QueryInst](https://github.com/hustvl/QueryInst)
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/visualize_panoptic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | '''
  3 | Visualization demo for panoptic COCO sample_data
  4 | 
  5 | The code shows an example of color generation for panoptic data (with
  6 | "generate_new_colors" set to True). For each segment distinct color is used in
  7 | a way that it close to the color of corresponding semantic class.
  8 | '''
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | from __future__ import unicode_literals
 13 | import os, sys
 14 | import numpy as np
 15 | import json
 16 | 
 17 | import PIL.Image as Image
 18 | import matplotlib.pyplot as plt
 19 | from skimage.segmentation import find_boundaries
 20 | import mmcv
 21 | from panopticapi.utils import IdGenerator, rgb2id
 22 | try:
 23 |     from detectron2.data import MetadataCatalog
 24 | except:
 25 |     print('no detecteon2')
 26 | #from detectron2.utils.visualizer import Visualizer
 27 | import torch
 28 | from easymd.models.utils.visual import Visualizer # we modified the Visualizer from detectron2
 29 | # whether from the PNG are used or new colors are generated
 30 | generate_new_colors = True
 31 | 
 32 | json_file = './datasets/annotations/panoptic_val2017.json'
 33 | segmentations_folder = './datasets/annotations/panoptic_val2017'
 34 | img_folder = './datasets/coco/val2017'
 35 | panoptic_coco_categories = './easymd/datasets/panoptic_coco_categories.json'
 36 | 
 37 | with open(json_file, 'r') as f:
 38 |     coco_d = json.load(f)
 39 | 
 40 | #ann = np.random.choice(coco_d['annotations'])
 41 | #id = 785 
 42 | #id = 79188
 43 | #id = 124975 #two horses
 44 | #id = 2592
 45 | #id = 26564
 46 | 
 47 | def f(id):
 48 |     find = False
 49 |     for each in coco_d['annotations']:
 50 |         if each['image_id'] == id:
 51 |             ann = each
 52 |             find=True
 53 |             break
 54 |     if not find:
 55 |         return
 56 | 
 57 |     with open(panoptic_coco_categories, 'r') as f:
 58 |         categories_list = json.load(f)
 59 |     categegories = {category['id']: category for category in categories_list}
 60 | 
 61 |     # find input img that correspond to the annotation
 62 |     img = None
 63 |     for image_info in coco_d['images']:
 64 |         if image_info['id'] == ann['image_id']:
 65 |             try:
 66 |                 img = np.array(
 67 |                     Image.open(os.path.join(img_folder, image_info['file_name']))
 68 |                 )
 69 |             except:
 70 |                 print("Undable to find correspoding input image.")
 71 |             break
 72 | 
 73 |     segmentation = np.array(
 74 |         Image.open(os.path.join(segmentations_folder, ann['file_name'])),
 75 |         dtype=np.uint8
 76 |     )
 77 |     segmentation_id = rgb2id(segmentation)
 78 |     # find segments boundaries
 79 | 
 80 | 
 81 |     if generate_new_colors:
 82 |         segmentation[:, :, :] = 0
 83 |         color_generator = IdGenerator(categegories)
 84 | 
 85 |         i =0
 86 | 
 87 |         for segment_info in ann['segments_info']:
 88 |             #print(segment_info)
 89 |             #if segment_info['id']!=  4475732:
 90 |             #    continue
 91 | 
 92 |             color = color_generator.get_color(segment_info['category_id'])
 93 |             mask = segmentation_id == segment_info['id']
 94 |             segmentation[mask] =color
 95 |             #print(dir(segment_info))
 96 |             
 97 |             segment_info.setdefault('isthing',True)
 98 |             #print(segment_info['category_id'] > 90,segment_info['category_id'])
 99 |             if segment_info['category_id'] > 90:
100 |                 segment_info['isthing'] =False
101 |             i+=1
102 |         if i<10:
103 |             return
104 | 
105 |     boundaries = find_boundaries(rgb2id(segmentation), mode='thick')
106 |     #segmentation[boundaries] = [0,255,0]
107 |     # depict boundaries
108 |     import cv2 as cv
109 | 
110 |     print(img.shape,segmentation.shape)
111 |     #res = cv.add(segmentation,img)
112 | 
113 |     #im = Image.open(data['img_metas'][0].data[0][0]['filename'])
114 |     meta = MetadataCatalog.get("coco_2017_val_panoptic_separated")
115 |     im = np.array(img)[:, :, ::-1]
116 |     v = Visualizer(im, meta, scale=1.0)
117 |     v._default_font_size = 10
118 |     v = v.draw_panoptic_seg_predictions(torch.from_numpy(segmentation_id), ann['segments_info'], area_threshold=0)
119 |     res = v.get_image()[:,:,::-1]
120 |     mmcv.imwrite(v.get_image(),'tmp.png')
121 | 
122 | 
123 | 
124 | 
125 |     if img is None:
126 |         plt.figure()
127 |         plt.imshow(segmentation)
128 |         plt.axis('off')
129 |     else:
130 |         plt.figure(figsize=(9, 5))
131 |         plt.subplot(231)
132 |         plt.imshow(img)
133 |         plt.axis('off')
134 |         plt.subplot(232)
135 |         plt.imshow(segmentation)
136 |         plt.axis('off')
137 |         plt.subplot(233)
138 |         plt.imshow(res)
139 |         plt.axis('off')
140 |         plt.subplot(234)
141 |         msg = np.array(
142 |             Image.open(os.path.join('/home/lzq/easy-mmdet/seg_pwm', ann['file_name'])),dtype=np.uint8
143 |     )
144 |         plt.imshow(msg)
145 |         plt.axis('off')
146 |         plt.subplot(235)
147 |         pwm = np.array(
148 |         Image.open(os.path.join('/home/lzq/easy-mmdet/seg_max', ann['file_name'])),dtype=np.uint8
149 |     )
150 |         plt.imshow(pwm)
151 |         plt.axis('off')
152 |         plt.subplot(236)
153 |         hp = np.array(
154 |         Image.open(os.path.join('/home/lzq/easy-mmdet/seg_hp', ann['file_name'])),
155 |         dtype=np.uint8
156 |     )
157 |         plt.imshow(hp)
158 |         plt.axis('off')
159 |         plt.tight_layout()
160 |     plt.show()
161 | #{"mode":"full","isActive":false}
162 | id=165681
163 | #f_id(id)
164 | f(id)
165 | #while True:
166 | #    f(id)
167 | #    id+=1
168 | 
169 |     #boundaries = find_boundaries(rgb2id(segmentation), mode='thick')
170 |     #mmcv.imwrite(segmentation[:,:,::-1],'gt/'+str(id)+'.png')
171 |     #segmentation[boundaries] = [0,255,0]
172 |     # depict boundaries
173 | 
174 | #for i in range(581781+1):
175 | #    f(i)
176 | '''
177 | import cv2 as cv
178 | 
179 | print(img.shape,segmentation.shape)
180 | res = cv.add(segmentation,img)
181 | 
182 | 
183 | 
184 | if img is None:
185 |     plt.figure()
186 |     plt.imshow(segmentation)
187 |     plt.axis('off')
188 | else:
189 |     plt.figure(figsize=(9, 5))
190 |     plt.subplot(131)
191 |     plt.imshow(img)
192 |     plt.axis('off')
193 |     plt.subplot(132)
194 |     plt.imshow(segmentation)
195 |     plt.axis('off')
196 |     plt.subplot(133)
197 |     plt.imshow(res)
198 |     plt.axis('off')
199 |     plt.tight_layout()
200 | plt.show()
201 | #{"mode":"full","isActive":false}
202 | '''
203 | 
204 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/analyze_logs.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | from collections import defaultdict
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import seaborn as sns
  8 | 
  9 | 
 10 | def cal_train_time(log_dicts, args):
 11 |     for i, log_dict in enumerate(log_dicts):
 12 |         print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
 13 |         all_times = []
 14 |         for epoch in log_dict.keys():
 15 |             if args.include_outliers:
 16 |                 all_times.append(log_dict[epoch]['time'])
 17 |             else:
 18 |                 all_times.append(log_dict[epoch]['time'][1:])
 19 |         all_times = np.array(all_times)
 20 |         epoch_ave_time = all_times.mean(-1)
 21 |         slowest_epoch = epoch_ave_time.argmax()
 22 |         fastest_epoch = epoch_ave_time.argmin()
 23 |         std_over_epoch = epoch_ave_time.std()
 24 |         print(f'slowest epoch {slowest_epoch + 1}, '
 25 |               f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
 26 |         print(f'fastest epoch {fastest_epoch + 1}, '
 27 |               f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
 28 |         print(f'time std over epochs is {std_over_epoch:.4f}')
 29 |         print(f'average iter time: {np.mean(all_times):.4f} s/iter')
 30 |         print()
 31 | 
 32 | 
 33 | def plot_curve(log_dicts, args):
 34 |     if args.backend is not None:
 35 |         plt.switch_backend(args.backend)
 36 |     sns.set_style(args.style)
 37 |     # if legend is None, use {filename}_{key} as legend
 38 |     legend = args.legend
 39 |     if legend is None:
 40 |         legend = []
 41 |         for json_log in args.json_logs:
 42 |             for metric in args.keys:
 43 |                 legend.append(f'{json_log}_{metric}')
 44 |     assert len(legend) == (len(args.json_logs) * len(args.keys))
 45 |     metrics = args.keys
 46 | 
 47 |     num_metrics = len(metrics)
 48 |     for i, log_dict in enumerate(log_dicts):
 49 |         epochs = list(log_dict.keys())
 50 |         for j, metric in enumerate(metrics):
 51 |             print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
 52 |             if metric not in log_dict[epochs[0]]:
 53 |                 raise KeyError(
 54 |                     f'{args.json_logs[i]} does not contain metric {metric}')
 55 | 
 56 |             if 'mAP' in metric:
 57 |                 xs = np.arange(1, max(epochs) + 1)
 58 |                 ys = []
 59 |                 for epoch in epochs:
 60 |                     ys += log_dict[epoch][metric]
 61 |                 ax = plt.gca()
 62 |                 ax.set_xticks(xs)
 63 |                 plt.xlabel('epoch')
 64 |                 plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
 65 |             else:
 66 |                 xs = []
 67 |                 ys = []
 68 |                 num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1]
 69 |                 for epoch in epochs:
 70 |                     iters = log_dict[epoch]['iter']
 71 |                     if log_dict[epoch]['mode'][-1] == 'val':
 72 |                         iters = iters[:-1]
 73 |                     xs.append(
 74 |                         np.array(iters) + (epoch - 1) * num_iters_per_epoch)
 75 |                     ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
 76 |                 xs = np.concatenate(xs)
 77 |                 ys = np.concatenate(ys)
 78 |                 plt.xlabel('iter')
 79 |                 plt.plot(
 80 |                     xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
 81 |             plt.legend()
 82 |         if args.title is not None:
 83 |             plt.title(args.title)
 84 |     if args.out is None:
 85 |         plt.show()
 86 |     else:
 87 |         print(f'save curve to: {args.out}')
 88 |         plt.savefig(args.out)
 89 |         plt.cla()
 90 | 
 91 | 
 92 | def add_plot_parser(subparsers):
 93 |     parser_plt = subparsers.add_parser(
 94 |         'plot_curve', help='parser for plotting curves')
 95 |     parser_plt.add_argument(
 96 |         'json_logs',
 97 |         type=str,
 98 |         nargs='+',
 99 |         help='path of train log in json format')
100 |     parser_plt.add_argument(
101 |         '--keys',
102 |         type=str,
103 |         nargs='+',
104 |         default=['bbox_mAP'],
105 |         help='the metric that you want to plot')
106 |     parser_plt.add_argument('--title', type=str, help='title of figure')
107 |     parser_plt.add_argument(
108 |         '--legend',
109 |         type=str,
110 |         nargs='+',
111 |         default=None,
112 |         help='legend of each plot')
113 |     parser_plt.add_argument(
114 |         '--backend', type=str, default=None, help='backend of plt')
115 |     parser_plt.add_argument(
116 |         '--style', type=str, default='dark', help='style of plt')
117 |     parser_plt.add_argument('--out', type=str, default=None)
118 | 
119 | 
120 | def add_time_parser(subparsers):
121 |     parser_time = subparsers.add_parser(
122 |         'cal_train_time',
123 |         help='parser for computing the average time per training iteration')
124 |     parser_time.add_argument(
125 |         'json_logs',
126 |         type=str,
127 |         nargs='+',
128 |         help='path of train log in json format')
129 |     parser_time.add_argument(
130 |         '--include-outliers',
131 |         action='store_true',
132 |         help='include the first value of every epoch when computing '
133 |         'the average time')
134 | 
135 | 
136 | def parse_args():
137 |     parser = argparse.ArgumentParser(description='Analyze Json Log')
138 |     # currently only support plot curve and calculate average train time
139 |     subparsers = parser.add_subparsers(dest='task', help='task parser')
140 |     add_plot_parser(subparsers)
141 |     add_time_parser(subparsers)
142 |     args = parser.parse_args()
143 |     return args
144 | 
145 | 
146 | def load_json_logs(json_logs):
147 |     # load and convert json_logs to log_dict, key is epoch, value is a sub dict
148 |     # keys of sub dict is different metrics, e.g. memory, bbox_mAP
149 |     # value of sub dict is a list of corresponding values of all iterations
150 |     log_dicts = [dict() for _ in json_logs]
151 |     for json_log, log_dict in zip(json_logs, log_dicts):
152 |         with open(json_log, 'r') as log_file:
153 |             for line in log_file:
154 |                 log = json.loads(line.strip())
155 |                 # skip lines without `epoch` field
156 |                 if 'epoch' not in log:
157 |                     continue
158 |                 epoch = log.pop('epoch')
159 |                 if epoch not in log_dict:
160 |                     log_dict[epoch] = defaultdict(list)
161 |                 for k, v in log.items():
162 |                     log_dict[epoch][k].append(v)
163 |     return log_dicts
164 | 
165 | 
166 | def main():
167 |     args = parse_args()
168 | 
169 |     json_logs = args.json_logs
170 |     for json_log in json_logs:
171 |         assert json_log.endswith('.json')
172 | 
173 |     log_dicts = load_json_logs(json_logs)
174 | 
175 |     eval(args.task)(log_dicts, args)
176 | 
177 | 
178 | if __name__ == '__main__':
179 |     main()
180 | 


--------------------------------------------------------------------------------
/configs/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ]),
 96 |     # model training and testing settings
 97 |     train_cfg=dict(
 98 |         rpn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.7,
102 |                 neg_iou_thr=0.3,
103 |                 min_pos_iou=0.3,
104 |                 match_low_quality=True,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=256,
109 |                 pos_fraction=0.5,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=False),
112 |             allowed_border=0,
113 |             pos_weight=-1,
114 |             debug=False),
115 |         rpn_proposal=dict(
116 |             nms_pre=2000,
117 |             max_per_img=2000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             min_bbox_size=0),
120 |         rcnn=[
121 |             dict(
122 |                 assigner=dict(
123 |                     type='MaxIoUAssigner',
124 |                     pos_iou_thr=0.5,
125 |                     neg_iou_thr=0.5,
126 |                     min_pos_iou=0.5,
127 |                     match_low_quality=False,
128 |                     ignore_iof_thr=-1),
129 |                 sampler=dict(
130 |                     type='RandomSampler',
131 |                     num=512,
132 |                     pos_fraction=0.25,
133 |                     neg_pos_ub=-1,
134 |                     add_gt_as_proposals=True),
135 |                 pos_weight=-1,
136 |                 debug=False),
137 |             dict(
138 |                 assigner=dict(
139 |                     type='MaxIoUAssigner',
140 |                     pos_iou_thr=0.6,
141 |                     neg_iou_thr=0.6,
142 |                     min_pos_iou=0.6,
143 |                     match_low_quality=False,
144 |                     ignore_iof_thr=-1),
145 |                 sampler=dict(
146 |                     type='RandomSampler',
147 |                     num=512,
148 |                     pos_fraction=0.25,
149 |                     neg_pos_ub=-1,
150 |                     add_gt_as_proposals=True),
151 |                 pos_weight=-1,
152 |                 debug=False),
153 |             dict(
154 |                 assigner=dict(
155 |                     type='MaxIoUAssigner',
156 |                     pos_iou_thr=0.7,
157 |                     neg_iou_thr=0.7,
158 |                     min_pos_iou=0.7,
159 |                     match_low_quality=False,
160 |                     ignore_iof_thr=-1),
161 |                 sampler=dict(
162 |                     type='RandomSampler',
163 |                     num=512,
164 |                     pos_fraction=0.25,
165 |                     neg_pos_ub=-1,
166 |                     add_gt_as_proposals=True),
167 |                 pos_weight=-1,
168 |                 debug=False)
169 |         ]),
170 |     test_cfg=dict(
171 |         rpn=dict(
172 |             nms_pre=1000,
173 |             max_per_img=1000,
174 |             nms=dict(type='nms', iou_threshold=0.7),
175 |             min_bbox_size=0),
176 |         rcnn=dict(
177 |             score_thr=0.05,
178 |             nms=dict(type='nms', iou_threshold=0.5),
179 |             max_per_img=100)))
180 | 


--------------------------------------------------------------------------------
/configs/_base_/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ]),
 96 |     # model training and testing settings
 97 |     train_cfg=dict(
 98 |         rpn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.7,
102 |                 neg_iou_thr=0.3,
103 |                 min_pos_iou=0.3,
104 |                 match_low_quality=True,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=256,
109 |                 pos_fraction=0.5,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=False),
112 |             allowed_border=0,
113 |             pos_weight=-1,
114 |             debug=False),
115 |         rpn_proposal=dict(
116 |             nms_pre=2000,
117 |             max_per_img=2000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             min_bbox_size=0),
120 |         rcnn=[
121 |             dict(
122 |                 assigner=dict(
123 |                     type='MaxIoUAssigner',
124 |                     pos_iou_thr=0.5,
125 |                     neg_iou_thr=0.5,
126 |                     min_pos_iou=0.5,
127 |                     match_low_quality=False,
128 |                     ignore_iof_thr=-1),
129 |                 sampler=dict(
130 |                     type='RandomSampler',
131 |                     num=512,
132 |                     pos_fraction=0.25,
133 |                     neg_pos_ub=-1,
134 |                     add_gt_as_proposals=True),
135 |                 pos_weight=-1,
136 |                 debug=False),
137 |             dict(
138 |                 assigner=dict(
139 |                     type='MaxIoUAssigner',
140 |                     pos_iou_thr=0.6,
141 |                     neg_iou_thr=0.6,
142 |                     min_pos_iou=0.6,
143 |                     match_low_quality=False,
144 |                     ignore_iof_thr=-1),
145 |                 sampler=dict(
146 |                     type='RandomSampler',
147 |                     num=512,
148 |                     pos_fraction=0.25,
149 |                     neg_pos_ub=-1,
150 |                     add_gt_as_proposals=True),
151 |                 pos_weight=-1,
152 |                 debug=False),
153 |             dict(
154 |                 assigner=dict(
155 |                     type='MaxIoUAssigner',
156 |                     pos_iou_thr=0.7,
157 |                     neg_iou_thr=0.7,
158 |                     min_pos_iou=0.7,
159 |                     match_low_quality=False,
160 |                     ignore_iof_thr=-1),
161 |                 sampler=dict(
162 |                     type='RandomSampler',
163 |                     num=512,
164 |                     pos_fraction=0.25,
165 |                     neg_pos_ub=-1,
166 |                     add_gt_as_proposals=True),
167 |                 pos_weight=-1,
168 |                 debug=False)
169 |         ]),
170 |     test_cfg=dict(
171 |         rpn=dict(
172 |             nms_pre=1000,
173 |             max_per_img=1000,
174 |             nms=dict(type='nms', iou_threshold=0.7),
175 |             min_bbox_size=0),
176 |         rcnn=dict(
177 |             score_thr=0.05,
178 |             nms=dict(type='nms', iou_threshold=0.5),
179 |             max_per_img=100)))
180 | 


--------------------------------------------------------------------------------
/easymd/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
  1 | from mmdet.models.losses.utils import weighted_loss
  2 | import torch
  3 | 
  4 | from mmdet.core.bbox.iou_calculators import bbox_overlaps
  5 | from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy, bbox_xyxy_to_cxcywh
  6 | #from .builder import MATCH_COST
  7 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
  8 | import torch.nn.functional as F
  9 | import mmcv
 10 | #from torchvision.utils import make_grid
 11 | from easymd.models.utils.visual import save_tensor
 12 | def center_of_mass(bitmasks):
 13 |     n, h, w = bitmasks.size()
 14 | 
 15 |     ys = torch.linspace(0, 1, h, dtype=torch.float32, device=bitmasks.device)
 16 |     xs = torch.linspace(0, 1, w, dtype=torch.float32, device=bitmasks.device)
 17 | 
 18 |     m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6)
 19 |     m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1)
 20 |     m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1)
 21 |     center_x = m10 / m00
 22 |     center_y = m01 / m00
 23 |     return torch.stack([center_x, center_y],-1)
 24 |     #return center_x, center_y
 25 | 
 26 | @weighted_loss
 27 | def l1_loss(pred, target):
 28 |     """Smooth L1 loss.
 29 | 
 30 |     Args:
 31 |         pred (torch.Tensor): The prediction.
 32 |         target (torch.Tensor): The learning target of the prediction.
 33 |         beta (float, optional): The threshold in the piecewise function.
 34 |             Defaults to 1.0.
 35 | 
 36 |     Returns:
 37 |         torch.Tensor: Calculated loss
 38 |     """
 39 | 
 40 |     #assert pred.size() == target.size() and target.numel() > 0
 41 |     loss = torch.abs(pred - target)
 42 | 
 43 |     return loss
 44 | 
 45 | 
 46 | 
 47 | @MATCH_COST.register_module()
 48 | class DiceCost(object):
 49 |     """IoUCost.
 50 | 
 51 |      Args:
 52 |          iou_mode (str, optional): iou mode such as 'iou' | 'giou'
 53 |          weight (int | float, optional): loss weight
 54 | 
 55 |      Examples:
 56 |          >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
 57 |          >>> import torch
 58 |          >>> self = IoUCost()
 59 |          >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
 60 |          >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
 61 |          >>> self(bboxes, gt_bboxes)
 62 |          tensor([[-0.1250,  0.1667],
 63 |                 [ 0.1667, -0.5000]])
 64 |     """
 65 | 
 66 |     def __init__(self,  weight=1.):
 67 |         self.weight = weight
 68 |         self.count =0 
 69 |     def __call__(self, input, target):
 70 |         """
 71 |         Args:
 72 |             bboxes (Tensor): Predicted boxes with unnormalized coordinates
 73 |                 (x1, y1, x2, y2). Shape [num_query, 4].
 74 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
 75 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
 76 | 
 77 |         Returns:
 78 |             torch.Tensor: iou_cost value with weight
 79 |         """
 80 |         # overlaps: [num_bboxes, num_gt]
 81 |         #print('INPUT', input.shape)
 82 |         #print('target',target.shape)
 83 |         
 84 |         N1,H1,W1 = input.shape
 85 |         N2,H2,W2 = target.shape
 86 |      
 87 |         if H1!=H2 or W1!=W2:
 88 |             target = F.interpolate(target.unsqueeze(0),size=(H1,W1),mode='bilinear').squeeze(0)
 89 | 
 90 |         input = input.contiguous().view(N1, -1)[:,None,:]
 91 |         target = target.contiguous().view(N2, -1)[None,:,:]
 92 | 
 93 |         a = torch.sum(input * target, -1)
 94 |         b = torch.sum(input * input, -1) + 0.001
 95 |         c = torch.sum(target * target, -1) + 0.001
 96 |         d = (2 * a) / (b + c)
 97 |         return (1-d)*self.weight
 98 | 
 99 | 
100 | @MATCH_COST.register_module()
101 | class CenterCost(object):
102 |     """IoUCost.
103 | 
104 |      Args:
105 |          iou_mode (str, optional): iou mode such as 'iou' | 'giou'
106 |          weight (int | float, optional): loss weight
107 | 
108 |      Examples:
109 |          >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
110 |          >>> import torch
111 |          >>> self = IoUCost()
112 |          >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
113 |          >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
114 |          >>> self(bboxes, gt_bboxes)
115 |          tensor([[-0.1250,  0.1667],
116 |                 [ 0.1667, -0.5000]])
117 |     """
118 | 
119 |     def __init__(self,  weight=1.):
120 |         self.weight = weight
121 |         self.count =0 
122 |     def __call__(self, input, target):
123 |         """
124 |         Args:
125 |             bboxes (Tensor): Predicted boxes with unnormalized coordinates
126 |                 (x1, y1, x2, y2). Shape [num_query, 4].
127 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
128 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
129 | 
130 |         Returns:
131 |             torch.Tensor: iou_cost value with weight
132 |         """
133 |         # overlaps: [num_bboxes, num_gt]
134 |         #print('INPUT', input.shape)
135 |         #print('target',target.shape)
136 |         
137 |         N1,H1,W1 = input.shape
138 |         N2,H2,W2 = target.shape
139 |         if H1!=H2 or W1!=W2:
140 |             target = F.interpolate(target.unsqueeze(0),size=(H1,W1),mode='bilinear').squeeze(0)
141 |         #save_tensor(input,'{i}.png'.format(i=self.count))
142 |         #self.count +=1
143 |         input = center_of_mass(input)
144 |         target = center_of_mass(target)
145 |         input = input.contiguous().view(N1, 2)[:,None,:]
146 |         target = target.contiguous().view(N2,2)[None,:,:]
147 |         cost = l1_loss(input,target)
148 |         
149 |         return cost*self.weight
150 | 
151 | 
152 | 
153 | @MATCH_COST.register_module()
154 | class BBoxL1Cost_center(object):
155 |     """BBoxL1Cost.
156 | 
157 |      Args:
158 |          weight (int | float, optional): loss_weight
159 |          box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN
160 | 
161 |      Examples:
162 |          >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost
163 |          >>> import torch
164 |          >>> self = BBoxL1Cost()
165 |          >>> bbox_pred = torch.rand(1, 4)
166 |          >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
167 |          >>> factor = torch.tensor([10, 8, 10, 8])
168 |          >>> self(bbox_pred, gt_bboxes, factor)
169 |          tensor([[1.6172, 1.6422]])
170 |     """
171 | 
172 |     def __init__(self, weight=1., box_format='xyxy'):
173 |         self.weight = weight
174 |         assert box_format in ['xyxy', 'xywh']
175 |         self.box_format = box_format
176 | 
177 |     def __call__(self, bbox_pred, gt_bboxes):
178 |         """
179 |         Args:
180 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
181 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
182 |                 [num_query, 4].
183 |             gt_bboxes (Tensor): Ground truth boxes with normalized
184 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
185 | 
186 |         Returns:
187 |             torch.Tensor: bbox_cost value with weight
188 |         """
189 |         if self.box_format == 'xywh':
190 |             gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)
191 |         elif self.box_format == 'xyxy':
192 |             bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)
193 |         bbox_cost = torch.cdist(bbox_pred[:,:2], gt_bboxes[:,:2], p=1)
194 |         return bbox_cost * self.weight


--------------------------------------------------------------------------------
/configs/models/cascade_mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ],
 96 |         mask_roi_extractor=dict(
 97 |             type='SingleRoIExtractor',
 98 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 99 |             out_channels=256,
100 |             featmap_strides=[4, 8, 16, 32]),
101 |         things_mask_head=dict(
102 |             type='FCNMaskHead',
103 |             num_convs=4,
104 |             in_channels=256,
105 |             conv_out_channels=256,
106 |             num_classes=80,
107 |             loss_mask=dict(
108 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 |     # model training and testing settings
110 |     train_cfg=dict(
111 |         rpn=dict(
112 |             assigner=dict(
113 |                 type='MaxIoUAssigner',
114 |                 pos_iou_thr=0.7,
115 |                 neg_iou_thr=0.3,
116 |                 min_pos_iou=0.3,
117 |                 match_low_quality=True,
118 |                 ignore_iof_thr=-1),
119 |             sampler=dict(
120 |                 type='RandomSampler',
121 |                 num=256,
122 |                 pos_fraction=0.5,
123 |                 neg_pos_ub=-1,
124 |                 add_gt_as_proposals=False),
125 |             allowed_border=0,
126 |             pos_weight=-1,
127 |             debug=False),
128 |         rpn_proposal=dict(
129 |             nms_pre=2000,
130 |             max_per_img=2000,
131 |             nms=dict(type='nms', iou_threshold=0.7),
132 |             min_bbox_size=0),
133 |         rcnn=[
134 |             dict(
135 |                 assigner=dict(
136 |                     type='MaxIoUAssigner',
137 |                     pos_iou_thr=0.5,
138 |                     neg_iou_thr=0.5,
139 |                     min_pos_iou=0.5,
140 |                     match_low_quality=False,
141 |                     ignore_iof_thr=-1),
142 |                 sampler=dict(
143 |                     type='RandomSampler',
144 |                     num=512,
145 |                     pos_fraction=0.25,
146 |                     neg_pos_ub=-1,
147 |                     add_gt_as_proposals=True),
148 |                 mask_size=28,
149 |                 pos_weight=-1,
150 |                 debug=False),
151 |             dict(
152 |                 assigner=dict(
153 |                     type='MaxIoUAssigner',
154 |                     pos_iou_thr=0.6,
155 |                     neg_iou_thr=0.6,
156 |                     min_pos_iou=0.6,
157 |                     match_low_quality=False,
158 |                     ignore_iof_thr=-1),
159 |                 sampler=dict(
160 |                     type='RandomSampler',
161 |                     num=512,
162 |                     pos_fraction=0.25,
163 |                     neg_pos_ub=-1,
164 |                     add_gt_as_proposals=True),
165 |                 mask_size=28,
166 |                 pos_weight=-1,
167 |                 debug=False),
168 |             dict(
169 |                 assigner=dict(
170 |                     type='MaxIoUAssigner',
171 |                     pos_iou_thr=0.7,
172 |                     neg_iou_thr=0.7,
173 |                     min_pos_iou=0.7,
174 |                     match_low_quality=False,
175 |                     ignore_iof_thr=-1),
176 |                 sampler=dict(
177 |                     type='RandomSampler',
178 |                     num=512,
179 |                     pos_fraction=0.25,
180 |                     neg_pos_ub=-1,
181 |                     add_gt_as_proposals=True),
182 |                 mask_size=28,
183 |                 pos_weight=-1,
184 |                 debug=False)
185 |         ]),
186 |     test_cfg=dict(
187 |         rpn=dict(
188 |             nms_pre=1000,
189 |             max_per_img=1000,
190 |             nms=dict(type='nms', iou_threshold=0.7),
191 |             min_bbox_size=0),
192 |         rcnn=dict(
193 |             score_thr=0.05,
194 |             nms=dict(type='nms', iou_threshold=0.5),
195 |             max_per_img=100,
196 |             mask_thr_binary=0.5)))
197 | 


--------------------------------------------------------------------------------
/easymd/models/detectors/single_stage_panoptic_detector.py:
--------------------------------------------------------------------------------
  1 | from typing import FrozenSet
  2 | from mmcv.runner.fp16_utils import auto_fp16
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from mmdet.core import bbox2result
  6 | from mmdet.models.detectors.base import BaseDetector
  7 | from mmdet.models.detectors.single_stage import SingleStageDetector
  8 | from mmdet.models.builder import DETECTORS, build_backbone, build_head, build_neck
  9 | #from .base import BaseDetector
 10 | import mmcv
 11 | from torch.utils.checkpoint import checkpoint
 12 | @DETECTORS.register_module()
 13 | class SingleStagePanopticDetector(BaseDetector):
 14 |     """Base class for single-stage detectors.
 15 | 
 16 |     Single-stage detectors directly and densely predict bounding boxes on the
 17 |     output features of the backbone+neck.
 18 |     """
 19 | 
 20 |     def __init__(self,
 21 | 
 22 |                  backbone,
 23 |                  neck=None,
 24 |                  bbox_head=None,
 25 |                  train_cfg=None,
 26 |                  test_cfg=None,
 27 |                  pretrained=None,
 28 |                  init_cfg=None,
 29 |                  with_checkpoint =False):
 30 |         
 31 |         super(SingleStagePanopticDetector, self).__init__(init_cfg)
 32 |         self.fp16_enabled = False
 33 |         backbone.pretrained = pretrained
 34 |         self.backbone = build_backbone(backbone)
 35 |         if neck is not None:
 36 |             self.neck = build_neck(neck)
 37 |         bbox_head.update(train_cfg=train_cfg)
 38 |         bbox_head.update(test_cfg=test_cfg)
 39 |         self.bbox_head = build_head(bbox_head)
 40 |         self.train_cfg = train_cfg
 41 |         self.test_cfg = test_cfg
 42 |         self.with_checkpoint = with_checkpoint
 43 |     def extract_feat(self, img):
 44 |         """Directly extract features from the backbone+neck."""
 45 |         x = self.backbone(img)
 46 |         if self.with_neck:
 47 |             x = self.neck(x)
 48 |         return x
 49 | 
 50 |     def forward_dummy(self, img):
 51 |         """Used for computing network flops.
 52 | 
 53 |         See `mmdetection/tools/analysis_tools/get_flops.py`
 54 |         """
 55 |         x = self.extract_feat(img)
 56 |         outs = self.bbox_head(x)
 57 |         return outs
 58 |     @auto_fp16(apply_to=('img',))
 59 |     def forward_train(self,
 60 |                       img,
 61 |                       img_metas,
 62 |                       gt_bboxes,
 63 |                       gt_labels,
 64 |                       gt_masks=None,
 65 |                       gt_bboxes_ignore=None,
 66 |                       gt_semantic_seg=None
 67 |                       ):
 68 |         """
 69 |         Args:
 70 |             img (Tensor): Input images of shape (N, C, H, W).
 71 |                 Typically these should be mean centered and std scaled.
 72 |             img_metas (list[dict]): A List of image info dict where each dict
 73 |                 has: 'img_shape', 'scale_factor', 'flip', and may also contain
 74 |                 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
 75 |                 For details on the values of these keys see
 76 |                 :class:`mmdet.datasets.pipelines.Collect`.
 77 |             gt_bboxes (list[Tensor]): Each item are the truth boxes for each
 78 |                 image in [tl_x, tl_y, br_x, br_y] format.
 79 |             gt_labels (list[Tensor]): Class indices corresponding to each box
 80 |             gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
 81 |                 boxes can be ignored when computing the loss.
 82 | 
 83 |         Returns:
 84 |             dict[str, Tensor]: A dictionary of loss components.
 85 |         """
 86 |       
 87 | 
 88 |         #mmcv.imshow(gt_semantic_seg.squeeze(0).squeeze(0).cpu().numpy())
 89 |         #mmcv.imshow(img.squeeze(0).permute(1,2,0).cpu().numpy())
 90 |         
 91 |         batch_input_shape = tuple(img[0].size()[-2:])
 92 |         for img_meta in img_metas:
 93 |             img_meta['batch_input_shape'] = batch_input_shape
 94 |         #img_metas[0]['img'] = img
 95 |         #super(SingleStagePanopticDetector, self).forward_train(img, img_metas)
 96 |         if self.with_checkpoint:
 97 |             img.requires_grad_(True)
 98 |             x = checkpoint(self.extract_feat,img)
 99 |         else:
100 |             x = self.extract_feat(img)
101 |         BS,C,H,W = img.shape
102 |         new_gt_masks = []
103 |         for each in gt_masks:
104 |             mask =torch.tensor(each.to_ndarray(),device=x[0].device)
105 |             _,h,w = mask.shape
106 |             padding = (
107 |                 0,W-w,
108 |                 0,H-h
109 |             )
110 |             mask = F.pad(mask,padding)
111 |             new_gt_masks.append(mask)
112 |         gt_masks = new_gt_masks
113 |         
114 |         losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
115 |                                               gt_labels, gt_masks,gt_bboxes_ignore,gt_semantic_seg=gt_semantic_seg)
116 |         return losses
117 | 
118 |     def simple_test(self, img, img_metas=None, rescale=False):
119 |         """Test function without test time augmentation.
120 | 
121 |         Args:
122 |             imgs (list[torch.Tensor]): List of multiple images
123 |             img_metas (list[dict]): List of image information.
124 |             rescale (bool, optional): Whether to rescale the results.
125 |                 Defaults to False.
126 | 
127 |         Returns:
128 |             list[list[np.ndarray]]: BBox results of each image and classes.
129 |                 The outer list corresponds to each image. The inner list
130 |                 corresponds to each class.
131 |         """
132 |      
133 |         x = self.extract_feat(img)
134 |         #print('checkpoint')
135 |         #
136 |         outs = self.bbox_head(x)
137 |         # get origin input shape to support onnx dynamic shape
138 |         if torch.onnx.is_in_onnx_export():
139 |             # get shape as tensor
140 |             img_shape = torch._shape_as_tensor(img)[2:]
141 |             img_metas[0]['img_shape_for_onnx'] = img_shape
142 |         bbox_list,seg_list = self.bbox_head.get_bboxes(
143 |             *outs, img_metas, rescale=rescale)
144 |         # skip post-processing when exporting to ONNX
145 |         if torch.onnx.is_in_onnx_export():
146 |             return bbox_list
147 | 
148 |         bbox_results = [
149 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
150 |             for det_bboxes, det_labels in bbox_list
151 |         ]
152 |         return list(zip(bbox_results, seg_list))
153 |         #return bbox_results
154 | 
155 |     def aug_test(self, imgs, img_metas, rescale=False):
156 |         """Test function with test time augmentation.
157 | 
158 |         Args:
159 |             imgs (list[Tensor]): the outer list indicates test-time
160 |                 augmentations and inner Tensor should have a shape NxCxHxW,
161 |                 which contains all images in the batch.
162 |             img_metas (list[list[dict]]): the outer list indicates test-time
163 |                 augs (multiscale, flip, etc.) and the inner list indicates
164 |                 images in a batch. each dict has image information.
165 |             rescale (bool, optional): Whether to rescale the results.
166 |                 Defaults to False.
167 | 
168 |         Returns:
169 |             list[list[np.ndarray]]: BBox results of each image and classes.
170 |                 The outer list corresponds to each image. The inner list
171 |                 corresponds to each class.
172 |         """
173 |         assert hasattr(self.bbox_head, 'aug_test'), \
174 |             f'{self.bbox_head.__class__.__name__}' \
175 |             ' does not support test-time augmentation'
176 | 
177 |         feats = self.extract_feats(imgs)
178 |         return [self.bbox_head.aug_test(feats, img_metas, rescale=rescale)]
179 | 


--------------------------------------------------------------------------------
/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ],
 96 |         mask_roi_extractor=dict(
 97 |             type='SingleRoIExtractor',
 98 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 99 |             out_channels=256,
100 |             featmap_strides=[4, 8, 16, 32]),
101 |         things_mask_head=dict(
102 |             type='FCNMaskHead',
103 |             num_convs=4,
104 |             in_channels=256,
105 |             conv_out_channels=256,
106 |             num_classes=80,
107 |             loss_mask=dict(
108 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 |     # model training and testing settings
110 |     train_cfg=dict(
111 |         rpn=dict(
112 |             assigner=dict(
113 |                 type='MaxIoUAssigner',
114 |                 pos_iou_thr=0.7,
115 |                 neg_iou_thr=0.3,
116 |                 min_pos_iou=0.3,
117 |                 match_low_quality=True,
118 |                 ignore_iof_thr=-1),
119 |             sampler=dict(
120 |                 type='RandomSampler',
121 |                 num=256,
122 |                 pos_fraction=0.5,
123 |                 neg_pos_ub=-1,
124 |                 add_gt_as_proposals=False),
125 |             allowed_border=0,
126 |             pos_weight=-1,
127 |             debug=False),
128 |         rpn_proposal=dict(
129 |             nms_pre=2000,
130 |             max_per_img=2000,
131 |             nms=dict(type='nms', iou_threshold=0.7),
132 |             min_bbox_size=0),
133 |         rcnn=[
134 |             dict(
135 |                 assigner=dict(
136 |                     type='MaxIoUAssigner',
137 |                     pos_iou_thr=0.5,
138 |                     neg_iou_thr=0.5,
139 |                     min_pos_iou=0.5,
140 |                     match_low_quality=False,
141 |                     ignore_iof_thr=-1),
142 |                 sampler=dict(
143 |                     type='RandomSampler',
144 |                     num=512,
145 |                     pos_fraction=0.25,
146 |                     neg_pos_ub=-1,
147 |                     add_gt_as_proposals=True),
148 |                 mask_size=28,
149 |                 pos_weight=-1,
150 |                 debug=False),
151 |             dict(
152 |                 assigner=dict(
153 |                     type='MaxIoUAssigner',
154 |                     pos_iou_thr=0.6,
155 |                     neg_iou_thr=0.6,
156 |                     min_pos_iou=0.6,
157 |                     match_low_quality=False,
158 |                     ignore_iof_thr=-1),
159 |                 sampler=dict(
160 |                     type='RandomSampler',
161 |                     num=512,
162 |                     pos_fraction=0.25,
163 |                     neg_pos_ub=-1,
164 |                     add_gt_as_proposals=True),
165 |                 mask_size=28,
166 |                 pos_weight=-1,
167 |                 debug=False),
168 |             dict(
169 |                 assigner=dict(
170 |                     type='MaxIoUAssigner',
171 |                     pos_iou_thr=0.7,
172 |                     neg_iou_thr=0.7,
173 |                     min_pos_iou=0.7,
174 |                     match_low_quality=False,
175 |                     ignore_iof_thr=-1),
176 |                 sampler=dict(
177 |                     type='RandomSampler',
178 |                     num=512,
179 |                     pos_fraction=0.25,
180 |                     neg_pos_ub=-1,
181 |                     add_gt_as_proposals=True),
182 |                 mask_size=28,
183 |                 pos_weight=-1,
184 |                 debug=False)
185 |         ]),
186 |     test_cfg=dict(
187 |         rpn=dict(
188 |             nms_pre=1000,
189 |             max_per_img=1000,
190 |             nms=dict(type='nms', iou_threshold=0.7),
191 |             min_bbox_size=0),
192 |         rcnn=dict(
193 |             score_thr=0.05,
194 |             nms=dict(type='nms', iou_threshold=0.5),
195 |             max_per_img=100,
196 |             mask_thr_binary=0.5)))
197 | 


--------------------------------------------------------------------------------
/easymd/analysis_tools/analyze_results.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os.path as osp
  3 | 
  4 | import mmcv
  5 | import numpy as np
  6 | from mmcv import Config, DictAction
  7 | 
  8 | from mmdet.core.evaluation import eval_map
  9 | from mmdet.core.visualization import imshow_gt_det_bboxes
 10 | from mmdet.datasets import build_dataset, get_loading_pipeline
 11 | 
 12 | 
 13 | def bbox_map_eval(det_result, annotation):
 14 |     """Evaluate mAP of single image det result.
 15 | 
 16 |     Args:
 17 |         det_result (list[list]): [[cls1_det, cls2_det, ...], ...].
 18 |             The outer list indicates images, and the inner list indicates
 19 |             per-class detected bboxes.
 20 |         annotation (dict): Ground truth annotations where keys of
 21 |              annotations are:
 22 | 
 23 |             - bboxes: numpy array of shape (n, 4)
 24 |             - labels: numpy array of shape (n, )
 25 |             - bboxes_ignore (optional): numpy array of shape (k, 4)
 26 |             - labels_ignore (optional): numpy array of shape (k, )
 27 | 
 28 |     Returns:
 29 |         float: mAP
 30 |     """
 31 | 
 32 |     # use only bbox det result
 33 |     if isinstance(det_result, tuple):
 34 |         bbox_det_result = [det_result[0]]
 35 |     else:
 36 |         bbox_det_result = [det_result]
 37 |     # mAP
 38 |     iou_thrs = np.linspace(
 39 |         .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
 40 |     mean_aps = []
 41 |     for thr in iou_thrs:
 42 |         mean_ap, _ = eval_map(
 43 |             bbox_det_result, [annotation], iou_thr=thr, logger='silent')
 44 |         mean_aps.append(mean_ap)
 45 |     return sum(mean_aps) / len(mean_aps)
 46 | 
 47 | 
 48 | class ResultVisualizer(object):
 49 |     """Display and save evaluation results.
 50 | 
 51 |     Args:
 52 |         show (bool): Whether to show the image. Default: True
 53 |         wait_time (float): Value of waitKey param. Default: 0.
 54 |         score_thr (float): Minimum score of bboxes to be shown.
 55 |            Default: 0
 56 |     """
 57 | 
 58 |     def __init__(self, show=False, wait_time=0, score_thr=0):
 59 |         self.show = show
 60 |         self.wait_time = wait_time
 61 |         self.score_thr = score_thr
 62 | 
 63 |     def _save_image_gts_results(self, dataset, results, mAPs, out_dir=None):
 64 |         mmcv.mkdir_or_exist(out_dir)
 65 | 
 66 |         for mAP_info in mAPs:
 67 |             index, mAP = mAP_info
 68 |             data_info = dataset.prepare_train_img(index)
 69 | 
 70 |             # calc save file path
 71 |             filename = data_info['filename']
 72 |             if data_info['img_prefix'] is not None:
 73 |                 filename = osp.join(data_info['img_prefix'], filename)
 74 |             else:
 75 |                 filename = data_info['filename']
 76 |             fname, name = osp.splitext(osp.basename(filename))
 77 |             save_filename = fname + '_' + str(round(mAP, 3)) + name
 78 |             out_file = osp.join(out_dir, save_filename)
 79 |             imshow_gt_det_bboxes(
 80 |                 data_info['img'],
 81 |                 data_info,
 82 |                 results[index],
 83 |                 dataset.CLASSES,
 84 |                 show=self.show,
 85 |                 score_thr=self.score_thr,
 86 |                 wait_time=self.wait_time,
 87 |                 out_file=out_file)
 88 | 
 89 |     def evaluate_and_show(self,
 90 |                           dataset,
 91 |                           results,
 92 |                           topk=20,
 93 |                           show_dir='work_dir',
 94 |                           eval_fn=None):
 95 |         """Evaluate and show results.
 96 | 
 97 |         Args:
 98 |             dataset (Dataset): A PyTorch dataset.
 99 |             results (list): Det results from test results pkl file
100 |             topk (int): Number of the highest topk and
101 |                 lowest topk after evaluation index sorting. Default: 20
102 |             show_dir (str, optional): The filename to write the image.
103 |                 Default: 'work_dir'
104 |             eval_fn (callable, optional): Eval function, Default: None
105 |         """
106 | 
107 |         assert topk > 0
108 |         if (topk * 2) > len(dataset):
109 |             topk = len(dataset) // 2
110 | 
111 |         if eval_fn is None:
112 |             eval_fn = bbox_map_eval
113 |         else:
114 |             assert callable(eval_fn)
115 | 
116 |         prog_bar = mmcv.ProgressBar(len(results))
117 |         _mAPs = {}
118 |         for i, (result, ) in enumerate(zip(results)):
119 |             # self.dataset[i] should not call directly
120 |             # because there is a risk of mismatch
121 |             data_info = dataset.prepare_train_img(i)
122 |             mAP = eval_fn(result, data_info['ann_info'])
123 |             _mAPs[i] = mAP
124 |             prog_bar.update()
125 | 
126 |         # descending select topk image
127 |         _mAPs = list(sorted(_mAPs.items(), key=lambda kv: kv[1]))
128 |         good_mAPs = _mAPs[-topk:]
129 |         bad_mAPs = _mAPs[:topk]
130 | 
131 |         good_dir = osp.abspath(osp.join(show_dir, 'good'))
132 |         bad_dir = osp.abspath(osp.join(show_dir, 'bad'))
133 |         self._save_image_gts_results(dataset, results, good_mAPs, good_dir)
134 |         self._save_image_gts_results(dataset, results, bad_mAPs, bad_dir)
135 | 
136 | 
137 | def parse_args():
138 |     parser = argparse.ArgumentParser(
139 |         description='MMDet eval image prediction result for each')
140 |     parser.add_argument('config', help='test config file path')
141 |     parser.add_argument(
142 |         'prediction_path', help='prediction path where test pkl result')
143 |     parser.add_argument(
144 |         'show_dir', help='directory where painted images will be saved')
145 |     parser.add_argument('--show', action='store_true', help='show results')
146 |     parser.add_argument(
147 |         '--wait-time',
148 |         type=float,
149 |         default=0,
150 |         help='the interval of show (s), 0 is block')
151 |     parser.add_argument(
152 |         '--topk',
153 |         default=20,
154 |         type=int,
155 |         help='saved Number of the highest topk '
156 |         'and lowest topk after index sorting')
157 |     parser.add_argument(
158 |         '--show-score-thr',
159 |         type=float,
160 |         default=0,
161 |         help='score threshold (default: 0.)')
162 |     parser.add_argument(
163 |         '--cfg-options',
164 |         nargs='+',
165 |         action=DictAction,
166 |         help='override some settings in the used config, the key-value pair '
167 |         'in xxx=yyy format will be merged into config file. If the value to '
168 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
169 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
170 |         'Note that the quotation marks are necessary and that no white space '
171 |         'is allowed.')
172 |     args = parser.parse_args()
173 |     return args
174 | 
175 | 
176 | def main():
177 |     args = parse_args()
178 | 
179 |     mmcv.check_file_exist(args.prediction_path)
180 | 
181 |     cfg = Config.fromfile(args.config)
182 |     if args.cfg_options is not None:
183 |         cfg.merge_from_dict(args.cfg_options)
184 |     cfg.data.test.test_mode = True
185 |     # import modules from string list.
186 |     if cfg.get('custom_imports', None):
187 |         from mmcv.utils import import_modules_from_strings
188 |         import_modules_from_strings(**cfg['custom_imports'])
189 | 
190 |     cfg.data.test.pop('samples_per_gpu', 0)
191 |     cfg.data.test.pipeline = get_loading_pipeline(cfg.data.train.pipeline)
192 |     dataset = build_dataset(cfg.data.test)
193 |     outputs = mmcv.load(args.prediction_path)
194 | 
195 |     result_visualizer = ResultVisualizer(args.show, args.wait_time,
196 |                                          args.show_score_thr)
197 |     result_visualizer.evaluate_and_show(
198 |         dataset, outputs, topk=args.topk, show_dir=args.show_dir)
199 | 
200 | 
201 | if __name__ == '__main__':
202 |     main()
203 | 


--------------------------------------------------------------------------------