├── easymd ├── utils │ └── __init__.py ├── analysis_tools │ ├── __init__.py │ ├── get_flops.py │ ├── anchor_analyze.py │ ├── browse_dataset.py │ ├── eval_metric.py │ ├── atss_anchor.py │ ├── benchmark.py │ ├── kmean.py │ ├── center_sample_demo.py │ ├── visualize_coco.py │ ├── visualize_panoptic.py │ ├── analyze_logs.py │ └── analyze_results.py ├── models │ ├── losses │ │ ├── __init__.py │ │ └── dice_loss.py │ ├── utils │ │ ├── __init__.py │ │ └── transform.py │ ├── detectors │ │ ├── __init__.py │ │ ├── panseg.py │ │ ├── detr_plus.py │ │ └── single_stage_panoptic_detector.py │ ├── __init__.py │ ├── backbones │ │ └── __init__.py │ └── panformer │ │ └── __init__.py ├── apis │ └── __init__.py ├── core │ ├── bbox │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── match_cost.py │ │ └── __init__.py │ ├── __init__.py │ └── evaluation │ │ ├── __init__.py │ │ └── eval_hooks.py ├── runner │ ├── __init__.py │ ├── hooks │ │ ├── __init__.py │ │ └── grad_check.py │ └── checkpoints.py ├── datasets │ ├── panopticapi │ │ ├── __init__.py │ │ ├── README.md │ │ └── utils.py │ └── __init__.py └── __init__.py ├── figs └── arch.png ├── configs ├── panformer │ ├── panformer_r50_12e_coco_panoptic.py │ ├── panformer_r50_24e_coco_panoptic.py │ ├── panformer_pvtb5_24e_coco_panoptic.py │ ├── panformer_r101_24e_coco_panoptic.py │ └── panformer_swinl_24e_coco_panoptic.py ├── _base_ │ ├── schedules │ │ ├── schedule_1x.py │ │ ├── schedule_20e.py │ │ └── schedule_2x.py │ ├── default_runtime.py │ ├── models │ │ ├── ssd300.py │ │ ├── rpn_r50_caffe_c4.py │ │ ├── retinanet_r50_fpn.py │ │ ├── rpn_r50_fpn.py │ │ ├── fast_rcnn_r50_fpn.py │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ ├── faster_rcnn_r50_fpn.py │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ ├── mask_rcnn_r50_fpn.py │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ ├── cascade_rcnn_r50_fpn.py │ │ └── cascade_mask_rcnn_r50_fpn.py │ └── datasets │ │ └── coco_panoptic_plus.py └── models │ ├── ssd300.py │ ├── rpn_r50_caffe_c4.py │ ├── retinanet_r50_fpn.py │ ├── rpn_r50_fpn.py │ ├── fast_rcnn_r50_fpn.py │ ├── faster_rcnn_r50_caffe_dc5.py │ ├── faster_rcnn_r50_fpn.py │ ├── faster_rcnn_r50_caffe_c4.py │ ├── mask_rcnn_r50_fpn.py │ ├── mask_rcnn_r50_caffe_c4.py │ ├── cascade_rcnn_r50_fpn.py │ └── cascade_mask_rcnn_r50_fpn.py ├── requirements.txt ├── tools ├── dist_train.sh ├── dist_test.sh ├── ana_tools │ ├── dataset_split.py │ ├── get_flops.py │ ├── ana_query.py │ └── benchmark.py ├── bricks │ └── infererce.py └── convert_panoptic_coco.sh ├── setup.py ├── .gitignore ├── converter └── panoptic_cityscapes_categories.json └── README.md /easymd/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /easymd/analysis_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /easymd/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .dice_loss import * -------------------------------------------------------------------------------- /easymd/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .test import * 2 | from .train import * -------------------------------------------------------------------------------- /easymd/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from .match_cost import * -------------------------------------------------------------------------------- /easymd/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox import * 2 | from .evaluation import * -------------------------------------------------------------------------------- /easymd/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .match_costs import * 3 | 4 | -------------------------------------------------------------------------------- /easymd/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import * 2 | from .checkpoints import * 3 | -------------------------------------------------------------------------------- /easymd/runner/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .grad_check import GradChecker, CacheCleaner -------------------------------------------------------------------------------- /easymd/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import EvalHook_plus,DistEvalHook_plus -------------------------------------------------------------------------------- /easymd/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .visual import * 3 | from .transform import * 4 | -------------------------------------------------------------------------------- /figs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiqi-li/Panoptic-SegFormer/HEAD/figs/arch.png -------------------------------------------------------------------------------- /easymd/datasets/panopticapi/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation import * 2 | from .converter_2cpng2pan import * -------------------------------------------------------------------------------- /easymd/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .panopticapi import * 2 | from .coco_panoptic import CocoDataset_panoptic 3 | -------------------------------------------------------------------------------- /easymd/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .panseg import * 2 | from .single_stage_panoptic_detector import * 3 | from .detr_plus import DETR_plus -------------------------------------------------------------------------------- /easymd/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .utils import * 3 | from .detectors import * 4 | from .losses import * 5 | from .panformer import * -------------------------------------------------------------------------------- /easymd/datasets/panopticapi/README.md: -------------------------------------------------------------------------------- 1 | We copy-paste [panopticapi](https://github.com/cocodataset/panopticapi) and modify it to make it compatible with our framework 2 | -------------------------------------------------------------------------------- /easymd/__init__.py: -------------------------------------------------------------------------------- 1 | from .analysis_tools import * 2 | 3 | from .models import * 4 | from .core import * 5 | from .runner import * 6 | from .utils import * 7 | from .datasets import * -------------------------------------------------------------------------------- /configs/panformer/panformer_r50_12e_coco_panoptic.py: -------------------------------------------------------------------------------- 1 | 2 | _base_ = './base.py' 3 | lr_config = dict(policy='step', step=[8]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=12) 5 | -------------------------------------------------------------------------------- /configs/panformer/panformer_r50_24e_coco_panoptic.py: -------------------------------------------------------------------------------- 1 | 2 | _base_ = './base.py' 3 | 4 | lr_config = dict(policy='step', step=[18]) 5 | runner = dict(type='EpochBasedRunner', max_epochs=24) 6 | -------------------------------------------------------------------------------- /easymd/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .pvt import pvt_small_impr1_peg 2 | from .pvt_v2_ap import pvt_v2_b2_ap,pvt_v2_b0_ap 3 | from .pvt_v2 import pvt_v2_b5,pvt_v2_b2, pvt_v2_b0,pvt_v2_b1 4 | from .swin import SwinTransformer 5 | -------------------------------------------------------------------------------- /easymd/models/panformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .detr_head import DETRHeadv2 2 | from .panformer_head import PanformerHead 3 | from .mask_head import MaskHead 4 | from .deformable_detr import Deformable_Transformer 5 | from .tools import * -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mmcv-full==1.3.4 2 | mmdet==2.12.0 3 | torchvision==0.8.2 4 | timm==0.4.5 5 | pycocotools 6 | einops==0.3.0 7 | Pillow==8.0.1 8 | opencv-python==4.5.1.48 9 | cityscapesscripts 10 | wandb 11 | seaborn 12 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | set -x 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | PORT=${PORT:-29503} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 11 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 12 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29504} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval panoptic 11 | #bbox segm 12 | # -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /easymd/models/detectors/panseg.py: -------------------------------------------------------------------------------- 1 | #from ..builder import DETECTORS 2 | #from .detr import DETR 3 | 4 | 5 | from mmdet.models.detectors.detr import DETR 6 | from mmdet.models.builder import DETECTORS 7 | from easymd.models.detectors.detr_plus import DETR_plus 8 | @DETECTORS.register_module() 9 | class PanSeg(DETR_plus): 10 | 11 | def __init__(self, *args, **kwargs): 12 | super(DETR_plus, self).__init__(*args, **kwargs) 13 | self.count=0 14 | -------------------------------------------------------------------------------- /tools/ana_tools/dataset_split.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | 4 | file_path = './datasets/cityscapes/cityscapes_panoptic_train_detection_format.json' 5 | 6 | with open(file_path,'r') as f: 7 | data = json.load(f) 8 | images = data['images'] 9 | len_img = len(images) 10 | print(len_img) 11 | perm = [i for i in range(len_img)] 12 | random.shuffle(images) 13 | print(images[:len_img//10]) 14 | data['images'] = data['images'][:len_img//10] 15 | with open('partial_') -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | #dict(type='WandbLoggerHook',init_kwargs=dict(project="Panoptic-Segformer")) 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | 18 | -------------------------------------------------------------------------------- /tools/bricks/infererce.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import checkpoint 2 | from mmdet.apis.inference import init_detector,LoadImage, inference_detector 3 | import easymd 4 | 5 | config = 'config.py' 6 | #checkpoints = './checkpoints/pseg_r101_r50_latest.pth' 7 | checkpoints = "path/to/pth" 8 | img = '000000322864.jpg' 9 | results = { 10 | 'img': './datasets/coco/val2017/'+img 11 | } 12 | model = init_detector(config,checkpoint=checkpoints) 13 | 14 | results = inference_detector(model,'./datasets/coco/val2017/'+img) 15 | 16 | -------------------------------------------------------------------------------- /configs/panformer/panformer_pvtb5_24e_coco_panoptic.py: -------------------------------------------------------------------------------- 1 | 2 | _base_ = './base.py' 3 | 4 | model = dict( 5 | # get pvt_v2_b5_22k 6 | # wget https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b5_22k.pth 7 | pretrained='./checkpoints/pvt_v2_b5_22k.pth', 8 | backbone=dict( 9 | type='pvt_v2_b5', 10 | out_indices=(1, 2, 3), 11 | ), 12 | neck=dict( 13 | type='ChannelMapper', 14 | in_channels=[128, 320, 512], 15 | ), 16 | bbox_head=dict( 17 | quality_threshold_things=0.3, 18 | quality_threshold_stuff=0.3, 19 | ) 20 | ) 21 | -------------------------------------------------------------------------------- /easymd/runner/hooks/grad_check.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | import torch 3 | 4 | @HOOKS.register_module() 5 | class GradChecker(Hook): 6 | def __init__(self) -> None: 7 | super().__init__() 8 | def after_train_iter(self,runner): 9 | for key,val in runner.model.named_parameters(): 10 | if val.grad == None and val.requires_grad: 11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 12 | 13 | 14 | @HOOKS.register_module() 15 | class CacheCleaner(Hook): 16 | def __init__(self) -> None: 17 | super().__init__() 18 | def after_train_epoch(self,runner): 19 | torch.cuda.empty_cache() 20 | -------------------------------------------------------------------------------- /configs/panformer/panformer_r101_24e_coco_panoptic.py: -------------------------------------------------------------------------------- 1 | 2 | _base_ = './base.py' 3 | _dim_ = 256 4 | _num_levels_=4 5 | model = dict( 6 | type='PanSeg', 7 | pretrained='torchvision://resnet101', 8 | backbone=dict( 9 | type='ResNet', 10 | depth=101, 11 | num_stages=4, 12 | out_indices=(1, 2, 3), 13 | frozen_stages=1, 14 | norm_cfg=dict(type='BN', requires_grad=False), 15 | norm_eval=True, 16 | style='pytorch'), 17 | neck=dict( 18 | type='ChannelMapper', 19 | in_channels=[512, 1024, 2048], 20 | kernel_size=1, 21 | out_channels=_dim_, 22 | act_cfg=None, 23 | norm_cfg=dict(type='GN', num_groups=32), 24 | num_outs=_num_levels_), 25 | ) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from setuptools import find_packages 3 | 4 | import os 5 | thelibFolder = os.path.dirname(os.path.realpath(__file__)) 6 | requirementPath = thelibFolder + '/requirements.txt' 7 | install_requires = [] # Examples: ["gunicorn", "docutils>=0.3", "lxml==0.5a7"] 8 | if os.path.isfile(requirementPath): 9 | with open(requirementPath) as f: 10 | install_requires = f.read().splitlines() 11 | 12 | setup( 13 | name='easymd', 14 | version='0.1', 15 | packages=find_packages(), 16 | url='', 17 | license='Apache', 18 | author='Li Zhiqi', 19 | install_requires=install_requires, 20 | author_email='lzq@smail.nju.edu.cn', 21 | description='This package aims to enrich the ability of MMdetection' 22 | ) 23 | -------------------------------------------------------------------------------- /easymd/models/utils/transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def mask2result(seg, labels, num_classes): 5 | """Convert detection results to a list of numpy arrays. 6 | 7 | Args: 8 | bboxes (torch.Tensor | np.ndarray): shape (n, 5) 9 | labels (torch.Tensor | np.ndarray): shape (n, ) 10 | num_classes (int): class number, including background class 11 | 12 | Returns: 13 | list(ndarray): bbox results of each class 14 | """ 15 | 16 | if seg.shape[0] == 0: 17 | _,h,w = seg.shape 18 | return [np.zeros((0, h, w), dtype=np.float32) for i in range(num_classes)] 19 | else: 20 | if isinstance(seg, torch.Tensor): 21 | seg = seg.detach().cpu().numpy() 22 | labels = labels.detach().cpu().numpy() 23 | return [seg[labels == i, :] for i in range(num_classes)] -------------------------------------------------------------------------------- /configs/panformer/panformer_swinl_24e_coco_panoptic.py: -------------------------------------------------------------------------------- 1 | _base_ = './base.py' 2 | _dim_ = 256 3 | _num_levels_=4 4 | model = dict( 5 | type='PanSeg', 6 | # get swin-large 7 | #import os 8 | #import torch 9 | #os.system('wget -O checkpoints/swinl.pth https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth') 10 | #model = torch.load('checkpoints/swinl.pth') 11 | #torch.save(model['model'], 'checkpoints/swinl.pth') 12 | #print('DONE, swin-large was saved as checkpoints/swinl.pth') 13 | pretrained='./checkpoints/swinl.pth', 14 | backbone=dict( 15 | type='SwinTransformer', 16 | embed_dim=192, 17 | depths=[2, 2, 18, 2], 18 | num_heads=[6, 12, 24, 48], 19 | window_size=7, 20 | mlp_ratio=4., 21 | qkv_bias=True, 22 | qk_scale=None, 23 | drop_rate=0., 24 | attn_drop_rate=0., 25 | drop_path_rate=0.3, 26 | ape=False, 27 | patch_norm=True, 28 | out_indices=(1, 2, 3), 29 | use_checkpoint=False), 30 | neck=dict( 31 | type='ChannelMapper', 32 | in_channels=[384, 768, 1536], 33 | kernel_size=1, 34 | out_channels=_dim_, 35 | act_cfg=None, 36 | norm_cfg=dict(type='GN', num_groups=32), 37 | num_outs=_num_levels_), 38 | bbox_head=dict( 39 | quality_threshold_things=0.3, 40 | quality_threshold_stuff=0.3, 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /configs/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2])), 31 | # model training and testing settings 32 | train_cfg=dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.5, 37 | min_pos_iou=0., 38 | ignore_iof_thr=-1, 39 | gt_max_assign_all=False), 40 | smoothl1_beta=1., 41 | allowed_border=-1, 42 | pos_weight=-1, 43 | neg_pos_ratio=3, 44 | debug=False), 45 | test_cfg=dict( 46 | nms_pre=1000, 47 | nms=dict(type='nms', iou_threshold=0.45), 48 | min_bbox_size=0, 49 | score_thr=0.02, 50 | max_per_img=200)) 51 | cudnn_benchmark = True 52 | -------------------------------------------------------------------------------- /configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2])), 31 | # model training and testing settings 32 | train_cfg=dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.5, 37 | min_pos_iou=0., 38 | ignore_iof_thr=-1, 39 | gt_max_assign_all=False), 40 | smoothl1_beta=1., 41 | allowed_border=-1, 42 | pos_weight=-1, 43 | neg_pos_ratio=3, 44 | debug=False), 45 | test_cfg=dict( 46 | nms_pre=1000, 47 | nms=dict(type='nms', iou_threshold=0.45), 48 | min_bbox_size=0, 49 | score_thr=0.02, 50 | max_per_img=200)) 51 | cudnn_benchmark = True 52 | -------------------------------------------------------------------------------- /configs/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)), 51 | test_cfg=dict( 52 | rpn=dict( 53 | nms_pre=12000, 54 | max_per_img=2000, 55 | nms=dict(type='nms', iou_threshold=0.7), 56 | min_bbox_size=0))) 57 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)), 51 | test_cfg=dict( 52 | rpn=dict( 53 | nms_pre=12000, 54 | max_per_img=2000, 55 | nms=dict(type='nms', iou_threshold=0.7), 56 | min_bbox_size=0))) 57 | -------------------------------------------------------------------------------- /configs/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # model training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # model training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /configs/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_pre=2000, 56 | max_per_img=1000, 57 | nms=dict(type='nms', iou_threshold=0.7), 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_pre=2000, 56 | max_per_img=1000, 57 | nms=dict(type='nms', iou_threshold=0.7), 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /configs/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 40 | # model training and testing settings 41 | train_cfg=dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)), 58 | test_cfg=dict( 59 | rcnn=dict( 60 | score_thr=0.05, 61 | nms=dict(type='nms', iou_threshold=0.5), 62 | max_per_img=100))) 63 | -------------------------------------------------------------------------------- /configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 40 | # model training and testing settings 41 | train_cfg=dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)), 58 | test_cfg=dict( 59 | rcnn=dict( 60 | score_thr=0.05, 61 | nms=dict(type='nms', iou_threshold=0.5), 62 | max_per_img=100))) 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.txt 6 | !requirements.txt 7 | !id.txt 8 | # C extensions 9 | *.zip 10 | *.so 11 | *.png 12 | *.jpg 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | val_s/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | *.out 33 | *.npy 34 | *.json 35 | !panoptic_coco_categories.json 36 | !panoptic_cityscapes_categories.json 37 | !panoptic_ade20k_categories.json 38 | !panoptic_mapilarry_categories.json 39 | /wandb 40 | /datasets 41 | /checkpoints 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | query/ 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # Environments 98 | .env 99 | .venv 100 | env/ 101 | venv/ 102 | ENV/ 103 | env.bak/ 104 | venv.bak/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | 119 | data/ 120 | data 121 | .vscode 122 | .idea 123 | .DS_Store 124 | 125 | # custom 126 | *.pdf 127 | *.pkl 128 | *.pkl.json 129 | *.log.json 130 | work_dirs/ 131 | arun_log/ 132 | # Pytorch 133 | 134 | *.pth 135 | *.py~ 136 | *.sh~ 137 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_panoptic_plus.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset_panoptic' 3 | data_root = 'datasets/coco/' 4 | coco_root = 'datasets/' 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True,with_seg=True), 10 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks','gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(1333, 800), 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | samples_per_gpu=2, 34 | workers_per_gpu=2, 35 | train=dict( 36 | type=dataset_type, 37 | ann_file= './datasets/annotations/panoptic_train2017_detection_format.json', 38 | img_prefix=data_root + 'train2017/', 39 | pipeline=train_pipeline), 40 | val=dict( 41 | 42 | segmentations_folder='./seg', 43 | gt_json = './datasets/annotations/panoptic_val2017.json', 44 | gt_folder = './datasets/annotations/panoptic_val2017', 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | segmentations_folder='./seg', 51 | gt_json = './datasets/annotations/panoptic_val2017.json', 52 | gt_folder = './datasets/annotations/panoptic_val2017', 53 | type=dataset_type, 54 | #ann_file= './datasets/coco/annotations/image_info_test-dev2017.json', 55 | ann_file=data_root + 'annotations/instances_val2017.json', 56 | #img_prefix=data_root + '/test2017/', 57 | img_prefix=data_root + 'val2017/', 58 | pipeline=test_pipeline) 59 | ) 60 | evaluation = dict(metric=['bbox', 'segm', 'panoptic']) 61 | #evaluation = dict(interval=1, metric='bbox') -------------------------------------------------------------------------------- /easymd/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 5 | from mmcv.runner import EvalHook as BaseEvalHook 6 | from torch.nn.modules.batchnorm import _BatchNorm 7 | 8 | 9 | class EvalHook_plus(BaseEvalHook): 10 | 11 | def _do_evaluate(self, runner): 12 | """perform evaluation and save ckpt.""" 13 | if not self._should_evaluate(runner): 14 | return 15 | 16 | from easymd.apis import single_gpu_test_plus 17 | results = single_gpu_test_plus(runner.model, self.dataloader, show=False) 18 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 19 | key_score = self.evaluate(runner, results) 20 | if self.save_best: 21 | self._save_ckpt(runner, key_score) 22 | 23 | class DistEvalHook_plus(BaseDistEvalHook): 24 | def __init__(self,segmentations_folder=None,datasets = 'coco',**kwargs): 25 | self.segmentations_folder = segmentations_folder 26 | self.datasets = datasets 27 | super(DistEvalHook_plus,self).__init__(**kwargs) 28 | def _do_evaluate(self, runner): 29 | """perform evaluation and save ckpt.""" 30 | # Synchronization of BatchNorm's buffer (running_mean 31 | # and running_var) is not supported in the DDP of pytorch, 32 | # which may cause the inconsistent performance of models in 33 | # different ranks, so we broadcast BatchNorm's buffers 34 | # of rank 0 to other ranks to avoid this. 35 | if self.broadcast_bn_buffer: 36 | model = runner.model 37 | for name, module in model.named_modules(): 38 | if isinstance(module, 39 | _BatchNorm) and module.track_running_stats: 40 | dist.broadcast(module.running_var, 0) 41 | dist.broadcast(module.running_mean, 0) 42 | 43 | if not self._should_evaluate(runner): 44 | return 45 | 46 | tmpdir = self.tmpdir 47 | if tmpdir is None: 48 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 49 | 50 | from easymd.apis import multi_gpu_test_plus 51 | results = multi_gpu_test_plus( 52 | runner.model, 53 | self.dataloader, 54 | datasets = self.datasets, 55 | segmentations_folder=self.segmentations_folder, 56 | tmpdir=tmpdir, 57 | gpu_collect=self.gpu_collect) 58 | if runner.rank == 0: 59 | print('\n') 60 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 61 | key_score = self.evaluate(runner, results) 62 | 63 | if self.save_best: 64 | self._save_ckpt(runner, key_score) -------------------------------------------------------------------------------- /easymd/runner/checkpoints.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import io 3 | import os 4 | import os.path as osp 5 | import pkgutil 6 | import re 7 | import time 8 | import warnings 9 | from collections import OrderedDict 10 | from importlib import import_module 11 | from tempfile import TemporaryDirectory 12 | 13 | import torch 14 | import torchvision 15 | from torch.optim import Optimizer 16 | 17 | import mmcv 18 | 19 | 20 | from mmcv.runner.checkpoint import _load_checkpoint,load_state_dict 21 | 22 | 23 | def load_checkpoint(model, 24 | filename, 25 | map_location=None, 26 | strict=False, 27 | logger=None, 28 | revise_keys=[(r'^module\.', '')]): 29 | """Load checkpoint from a file or URI. 30 | 31 | Args: 32 | model (Module): Module to load checkpoint. 33 | filename (str): Accept local filepath, URL, ``torchvision://xxx``, 34 | ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for 35 | details. 36 | map_location (str): Same as :func:`torch.load`. 37 | strict (bool): Whether to allow different params for the model and 38 | checkpoint. 39 | logger (:mod:`logging.Logger` or None): The logger for error message. 40 | revise_keys (list): A list of customized keywords to modify the 41 | state_dict in checkpoint. Each item is a (pattern, replacement) 42 | pair of the regular expression operations. Default: strip 43 | the prefix 'module.' by [(r'^module\\.', '')]. 44 | 45 | Returns: 46 | dict or OrderedDict: The loaded checkpoint. 47 | """ 48 | defautl_revise_keys = [ 49 | ('\\.mask_head\\.','.things_mask_head.'), 50 | ('\\.mask_head2\\.','.stuff_mask_head.'), 51 | ('\\.cls_branches2\\.', '.cls_thing_branches.'), 52 | 53 | ] 54 | revise_keys.extend(defautl_revise_keys) 55 | checkpoint = _load_checkpoint(filename, map_location, logger) 56 | # OrderedDict is a subclass of dict 57 | if not isinstance(checkpoint, dict): 58 | raise RuntimeError( 59 | f'No state_dict found in checkpoint file {filename}') 60 | # get state_dict from checkpoint 61 | if 'state_dict' in checkpoint: 62 | state_dict = checkpoint['state_dict'] 63 | else: 64 | state_dict = checkpoint 65 | 66 | # strip prefix of state_dict 67 | metadata = getattr(state_dict, '_metadata', OrderedDict()) 68 | for p, r in revise_keys: 69 | state_dict = OrderedDict( 70 | {re.sub(p, r, k): v 71 | for k, v in state_dict.items()}) 72 | # Keep metadata in state_dict 73 | state_dict._metadata = metadata 74 | 75 | # load state_dict 76 | load_state_dict(model, state_dict, strict, logger) 77 | return checkpoint 78 | 79 | -------------------------------------------------------------------------------- /easymd/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.models import build_detector 7 | 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--shape', 19 | type=int, 20 | nargs='+', 21 | default=[1280, 800], 22 | help='input image size') 23 | parser.add_argument( 24 | '--cfg-options', 25 | nargs='+', 26 | action=DictAction, 27 | help='override some settings in the used config, the key-value pair ' 28 | 'in xxx=yyy format will be merged into config file. If the value to ' 29 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 30 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 31 | 'Note that the quotation marks are necessary and that no white space ' 32 | 'is allowed.') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def main(): 38 | 39 | args = parse_args() 40 | 41 | if len(args.shape) == 1: 42 | input_shape = (3, args.shape[0], args.shape[0]) 43 | elif len(args.shape) == 2: 44 | input_shape = (3, ) + tuple(args.shape) 45 | else: 46 | raise ValueError('invalid input shape') 47 | 48 | cfg = Config.fromfile(args.config) 49 | if args.cfg_options is not None: 50 | cfg.merge_from_dict(args.cfg_options) 51 | # import modules from string list. 52 | if cfg.get('custom_imports', None): 53 | from mmcv.utils import import_modules_from_strings 54 | import_modules_from_strings(**cfg['custom_imports']) 55 | 56 | model = build_detector( 57 | cfg.model, 58 | train_cfg=cfg.get('train_cfg'), 59 | test_cfg=cfg.get('test_cfg')) 60 | if torch.cuda.is_available(): 61 | model.cuda() 62 | model.eval() 63 | 64 | if hasattr(model, 'forward_dummy'): 65 | model.forward = model.forward_dummy 66 | else: 67 | raise NotImplementedError( 68 | 'FLOPs counter is currently not currently supported with {}'. 69 | format(model.__class__.__name__)) 70 | 71 | flops, params = get_model_complexity_info(model, input_shape) 72 | split_line = '=' * 30 73 | print(f'{split_line}\nInput shape: {input_shape}\n' 74 | f'Flops: {flops}\nParams: {params}\n{split_line}') 75 | print('!!!Please be cautious if you use the results in papers. ' 76 | 'You may need to check if all ops are supported and verify that the ' 77 | 'flops computation is correct.') 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /tools/ana_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.models import build_detector 7 | import easymd 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--shape', 19 | type=int, 20 | nargs='+', 21 | default=[1200, 800], 22 | help='input image size') 23 | parser.add_argument( 24 | '--cfg-options', 25 | nargs='+', 26 | action=DictAction, 27 | help='override some settings in the used config, the key-value pair ' 28 | 'in xxx=yyy format will be merged into config file. If the value to ' 29 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 30 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 31 | 'Note that the quotation marks are necessary and that no white space ' 32 | 'is allowed.') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def main(): 38 | 39 | args = parse_args() 40 | 41 | if len(args.shape) == 1: 42 | input_shape = (3, args.shape[0], args.shape[0]) 43 | elif len(args.shape) == 2: 44 | input_shape = (3, ) + tuple(args.shape) 45 | else: 46 | raise ValueError('invalid input shape') 47 | 48 | cfg = Config.fromfile(args.config) 49 | if args.cfg_options is not None: 50 | cfg.merge_from_dict(args.cfg_options) 51 | # import modules from string list. 52 | if cfg.get('custom_imports', None): 53 | from mmcv.utils import import_modules_from_strings 54 | import_modules_from_strings(**cfg['custom_imports']) 55 | 56 | model = build_detector( 57 | cfg.model, 58 | train_cfg=cfg.get('train_cfg'), 59 | test_cfg=cfg.get('test_cfg')) 60 | if torch.cuda.is_available(): 61 | model.cuda() 62 | model.eval() 63 | 64 | if hasattr(model, 'forward_dummy'): 65 | model.forward = model.forward_dummy 66 | else: 67 | raise NotImplementedError( 68 | 'FLOPs counter is currently not currently supported with {}'. 69 | format(model.__class__.__name__)) 70 | 71 | flops, params = get_model_complexity_info(model, input_shape) 72 | split_line = '=' * 30 73 | print(f'{split_line}\nInput shape: {input_shape}\n' 74 | f'Flops: {flops}\nParams: {params}\n{split_line}') 75 | print('!!!Please be cautious if you use the results in papers. ' 76 | 'You may need to check if all ops are supported and verify that the ' 77 | 'flops computation is correct.') 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /easymd/analysis_tools/anchor_analyze.py: -------------------------------------------------------------------------------- 1 | from mmdet.core import build_anchor_generator 2 | import mmdet 3 | import mmcv 4 | import numpy as np 5 | import time 6 | import cv2 as cv 7 | 8 | def show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n): 9 | img = np.zeros(input_shape_hw, np.uint8) 10 | feature_map = [] 11 | for s in stride: 12 | feature_map.append([input_shape_hw[0] // s, input_shape_hw[1] // s]) 13 | anchor_generator = build_anchor_generator(anchor_generator_cfg) 14 | anchors = anchor_generator.grid_anchors(feature_map) # 输出原图尺度上anchor坐标 xyxy格式 左上角格式 15 | base_anchors = anchor_generator.base_anchors 16 | 17 | for i,each in enumerate(base_anchors): 18 | each[:,0:4:2] += input_shape_hw[0]//2 19 | each[:,1:4:2] += input_shape_hw[1]//2 20 | for _ in range(random_n): 21 | disp_img = [] 22 | for i,anchor in enumerate(anchors): 23 | img = np.zeros(input_shape_hw, np.uint8) 24 | anchor = anchor.cpu().numpy() 25 | print(anchor.shape) 26 | index = (anchor[:, 0] > 0) & (anchor[:, 1] > 0) & (anchor[:, 2] < input_shape_hw[1]) & \ 27 | (anchor[:, 3] < input_shape_hw[0]) 28 | anchor = anchor[index] 29 | 30 | anchor = np.random.permutation(anchor) 31 | img_ = mmcv.imshow_bboxes(img, anchor[:select_n], thickness=1, show=False) 32 | img_ = mmcv.imshow_bboxes(img_, base_anchors[i].cpu().numpy(), thickness=1, colors='red', show=False) 33 | #disp_img.append(img_) 34 | cv.imshow('img',img_) 35 | if cv.waitKey(0) & 0xFF== ord('q'): 36 | exit(0) 37 | #time.sleep(0.3) 38 | 39 | def demo_retinanet(input_shape_hw): 40 | stride = [8, 16, 32, 64, 128] 41 | anchor_generator_cfg = dict( 42 | type='AnchorGenerator', 43 | octave_base_scale=4, # 每层特征图的base anchor scale,如果变大,则整体anchor都会放大 44 | scales_per_octave=3, # 每层有3个尺度 2**0 2**(1/3) 2**(2/3) 45 | ratios=[0.5, 1.0, 2.0], # 每层的anchor有3种长宽比 故每一层每个位置有9个anchor 46 | strides=stride) # 每个特征图层输出stride,故anchor范围是4x8=32,4x128x2**(2/3)=812.7 47 | random_n = 10 48 | select_n = 100 49 | show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n) 50 | 51 | 52 | def demo_yolov3(input_shape_hw): 53 | stride = [32, 16, 8] 54 | anchor_generator_cfg = dict( 55 | type='YOLOAnchorGenerator', 56 | base_sizes=[[(116, 90), (156, 198), (373, 326)], 57 | [(30, 61), (62, 45), (59, 119)], 58 | [(10, 13), (16, 30), (33, 23)]], 59 | strides=stride) 60 | 61 | random_n = 10 62 | select_n = 100 63 | show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n) 64 | 65 | 66 | if __name__ == '__main__': 67 | input_shape_hw = (320, 320, 3) 68 | demo_retinanet(input_shape_hw) 69 | #demo_yolov3(input_shape_hw) -------------------------------------------------------------------------------- /easymd/analysis_tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | 5 | import mmdet 6 | import mmcv 7 | from mmcv import Config 8 | from mmdet.datasets.builder import build_dataset 9 | import random 10 | import cv2 as cv 11 | import numpy as np 12 | import easymd 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Browse a dataset') 15 | parser.add_argument('config', help='train config file path') 16 | # 以下三个pipeline排除,方便可视化 17 | parser.add_argument( 18 | '--skip-type', 19 | type=str, 20 | nargs='+', 21 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 22 | help='skip some useless pipeline') 23 | parser.add_argument( 24 | '--output-dir', 25 | default=None, 26 | type=str, 27 | help='If there is no display interface, you can save it') 28 | parser.add_argument('--not-show', default=False, action='store_true') 29 | parser.add_argument( 30 | '--show-interval', 31 | type=int, 32 | default=0, 33 | help='the interval of show (ms)') 34 | args = parser.parse_args() 35 | return args 36 | 37 | 38 | def retrieve_data_cfg(config_path, skip_type): 39 | cfg = Config.fromfile(config_path) 40 | train_data_cfg = cfg.data.train 41 | if train_data_cfg.get('dataset', None) is not None: 42 | # voc数据集 43 | datasets = train_data_cfg['dataset'] 44 | datasets['pipeline'] = [ 45 | x for x in datasets.pipeline if x['type'] not in skip_type 46 | ] 47 | else: 48 | train_data_cfg['pipeline'] = [ 49 | x for x in train_data_cfg.pipeline if x['type'] not in skip_type 50 | ] 51 | 52 | return cfg 53 | 54 | 55 | def main(): 56 | args = parse_args() 57 | cfg = retrieve_data_cfg(args.config, args.skip_type) 58 | 59 | dataset = build_dataset(cfg.data.train) 60 | 61 | progress_bar = mmcv.ProgressBar(len(dataset)) 62 | for item in dataset: 63 | #print(item.keys()) 64 | filename = os.path.join(args.output_dir, 65 | Path(item['filename']).name 66 | ) if args.output_dir is not None else None 67 | img = mmcv.imshow_det_bboxes( 68 | item['img'], 69 | item['gt_bboxes'], 70 | item['gt_labels'], 71 | class_names=dataset.CLASSES, 72 | show=False, 73 | out_file=filename, 74 | wait_time=args.show_interval) 75 | img = img.astype('uint8') 76 | #print(dir(item['gt_masks'][0])) 77 | for each in item['gt_masks']: 78 | color = [random.randint(0,255),random.randint(0,255),random.randint(0,255)] 79 | each_3 = each[...,None] *color 80 | each_3 = each_3.astype('uint8') 81 | img[each==1] = (img[each==1]*0.4 + each_3[each==1]*0.6).astype('uint8') 82 | #np.clip(new_img_with_alpha,0,255) 83 | cv.imshow('img',img) 84 | if cv.waitKey(0) & 0xFF== ord('q'): 85 | exit(0) 86 | progress_bar.update() 87 | 88 | 89 | if __name__ == '__main__': 90 | main() -------------------------------------------------------------------------------- /tools/ana_tools/ana_query.py: -------------------------------------------------------------------------------- 1 | 2 | num_query = 400 3 | num_class = 133 4 | from mmdet.core import bbox 5 | import numpy as np 6 | import torch 7 | import cv2 as cv 8 | from torch.nn.functional import softmax 9 | import torchvision 10 | 11 | 12 | import json 13 | all_things = 0 14 | all_stuff = 0 15 | with open('./datasets/annotations/panoptic_val2017_detection_format.json','r') as f: 16 | data = json.load(f) 17 | print(len(data['annotations'])) 18 | for each in data['annotations']: 19 | if each['category_id']<=80: 20 | all_things+=1 21 | else: 22 | all_stuff+=1 23 | print(all_things,all_stuff,all_things/(all_things+all_stuff)) 24 | map = np.zeros([400,133]) 25 | things_stuff_list = [] 26 | for i in range(num_query): 27 | with open('./query/{i}.txt'.format(i=i)) as f: 28 | img = torch.ones([500,500,3]).numpy()*255 29 | things = 0 30 | stuff = 0 31 | for line in f.readlines(): 32 | 33 | data = line.strip().split(' ') 34 | t= int (data[0]) 35 | if t<80: 36 | things+=1 37 | else: 38 | stuff+=1 39 | 40 | cx, cy, w, h, bbox_area, mask_area = float(data[1]), float(data[2]), float(data[3]), float(data[4]), data[5],int(data[6]) 41 | bbox_area = float(bbox_area[7:-1]) 42 | cx, cy, w, h = int(500*cx), int(500*cy), int(500*w+0.5), int(500*h+0.5) 43 | #cv.drawKeypoints() 44 | ''' 45 | if w/h>1.5: # bbox_area<=322: 46 | cv.circle(img, (cx,cy), 2, color=(255,0,0), thickness=1) 47 | elif w/h<0.7: #322`_""" 16 | 17 | def __init__(self, 18 | backbone, 19 | neck=None, 20 | bbox_head=None, 21 | train_cfg=None, 22 | test_cfg=None, 23 | pretrained=None, 24 | init_cfg=None): 25 | 26 | super(DETR_plus, self).__init__(backbone, neck, bbox_head, train_cfg, 27 | test_cfg, pretrained, init_cfg) 28 | self.count=0 29 | def simple_test(self, img, img_metas=None, rescale=False): 30 | """Test function without test time augmentation. 31 | 32 | Args: 33 | imgs (list[torch.Tensor]): List of multiple images 34 | img_metas (list[dict]): List of image information. 35 | rescale (bool, optional): Whether to rescale the results. 36 | Defaults to False. 37 | 38 | Returns: 39 | list[list[np.ndarray]]: BBox results of each image and classes. 40 | The outer list corresponds to each image. The inner list 41 | corresponds to each class. 42 | """ 43 | 44 | batch_size = len(img_metas) 45 | assert batch_size == 1, 'Currently only batch_size 1 for inference ' \ 46 | f'mode is supported. Found batch_size {batch_size}.' 47 | x = self.extract_feat(img) 48 | outs = self.bbox_head(x, img_metas) 49 | 50 | results = self.bbox_head.get_bboxes(*outs, img_metas, rescale=rescale) 51 | assert isinstance(results,dict), 'The return results should be a dict' 52 | 53 | 54 | results_dict = {} 55 | for return_type in results.keys(): 56 | if return_type == 'bbox': 57 | labels = results['labels'] 58 | bbox_list = results['bbox'] 59 | bbox_results = [ 60 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_things_classes) 61 | for det_bboxes, det_labels in zip(bbox_list,labels) 62 | ] 63 | results_dict['bbox'] = bbox_results 64 | elif return_type == 'segm': 65 | seg_list = results['segm'] 66 | labels = results['labels'] 67 | 68 | masks_results = [ 69 | mask2result(det_segm,det_labels,self.bbox_head.num_things_classes) 70 | for det_segm, det_labels in zip(seg_list,labels) 71 | ] 72 | results_dict['segm'] = masks_results 73 | elif return_type == 'panoptic': 74 | results_dict['panoptic'] = results['panoptic'] 75 | 76 | 77 | 78 | 79 | 80 | 81 | return results_dict 82 | -------------------------------------------------------------------------------- /easymd/analysis_tools/atss_anchor.py: -------------------------------------------------------------------------------- 1 | 2 | from mmdet.core import anchor, build_anchor_generator,build_assigner 3 | import mmdet 4 | import mmcv 5 | import numpy as np 6 | import time 7 | import cv2 as cv 8 | import torch 9 | def show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n): 10 | img = np.zeros(input_shape_hw, np.uint8) 11 | feature_map = [] 12 | for s in stride: 13 | feature_map.append([input_shape_hw[0] // s, input_shape_hw[1] // s]) 14 | anchor_generator = build_anchor_generator(anchor_generator_cfg) 15 | anchors = anchor_generator.grid_anchors(feature_map) # 输出原图尺度上anchor坐标 xyxy格式 左上角格式 16 | base_anchors = anchor_generator.base_anchors 17 | assigner=dict(type='ATSSAssigner', topk=9) 18 | assigner = build_assigner(assigner) 19 | 20 | #print(anchors[0].shape,anchors[1].shape) 21 | nums_per_level = [len(each) for each in anchors] 22 | #for each in anchors: 23 | # nums_per_level.append(len(each)) 24 | anchors = torch.cat([each for each in anchors],dim=0) 25 | gt_bboxes = torch.tensor([[100,100,300,300],[400,400,600,600]]).to(anchors.device) 26 | gt_labels = torch.tensor([1,2]).to(anchors.device) 27 | #print(anchors.device,gt_bboxes.device) 28 | #print(nums_per_level) 29 | assign_result = assigner.assign(anchors, nums_per_level, gt_bboxes, None, gt_labels) 30 | print((assign_result.gt_inds!=0).nonzero().shape) 31 | anchors = anchors[(assign_result.gt_inds!=0).nonzero().squeeze(1)] 32 | print(anchors) 33 | values,indices = anchors.min(-1) 34 | anchors = anchors[(values>0).nonzero().squeeze(1)].cpu().numpy() 35 | print(anchors) 36 | img_ = mmcv.imshow_bboxes(img, anchors, thickness=1, show=False) 37 | img_ = mmcv.imshow_bboxes(img_,gt_bboxes.cpu().numpy() , thickness=1, colors='red', show=False) 38 | cv.imshow('img',img_) 39 | if cv.waitKey(0) & 0xFF== ord('q'): 40 | exit(0) 41 | ''' 42 | for i,each in enumerate(base_anchors): 43 | each[:,0:4:2] += input_shape_hw[0]//2 44 | each[:,1:4:2] += input_shape_hw[1]//2 45 | for _ in range(random_n): 46 | disp_img = [] 47 | for i,anchor in enumerate(anchors): 48 | img = np.zeros(input_shape_hw, np.uint8) 49 | anchor = anchor.cpu().numpy() 50 | print(anchor.shape) 51 | index = (anchor[:, 0] > 0) & (anchor[:, 1] > 0) & (anchor[:, 2] < input_shape_hw[1]) & \ 52 | (anchor[:, 3] < input_shape_hw[0]) 53 | anchor = anchor[index] 54 | 55 | anchor = np.random.permutation(anchor) 56 | img_ = mmcv.imshow_bboxes(img, anchor[:select_n], thickness=1, show=False) 57 | img_ = mmcv.imshow_bboxes(img_, base_anchors[i].cpu().numpy(), thickness=1, colors='red', show=False) 58 | #disp_img.append(img_) 59 | 60 | #time.sleep(0.3) 61 | ''' 62 | def demo_atss(input_shape_hw): 63 | stride = [8, 16, 32, 64, 128] 64 | anchor_generator_cfg = dict( 65 | type='AnchorGenerator', 66 | octave_base_scale=8, # 每层特征图的base anchor scale,如果变大,则整体anchor都会放大 67 | scales_per_octave=1, # 每层有3个尺度 2**0 2**(1/3) 2**(2/3) 68 | ratios=[1.0], # 每层的anchor有3种长宽比 故每一层每个位置有9个anchor 69 | strides=stride) # 每个特征图层输出stride,故anchor范围是4x8=32,4x128x2**(2/3)=812.7 70 | random_n = 10 71 | select_n = 100 72 | show_anchor(input_shape_hw, stride, anchor_generator_cfg, random_n, select_n) 73 | 74 | 75 | 76 | 77 | 78 | if __name__ == '__main__': 79 | input_shape_hw = (640, 640, 3) 80 | demo_atss(input_shape_hw) 81 | #demo_yolov3(input_shape_hw) -------------------------------------------------------------------------------- /configs/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | strides=(1, 2, 2, 1), 11 | dilations=(1, 1, 1, 2), 12 | out_indices=(3, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=2048, 20 | feat_channels=2048, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=2048, 39 | featmap_strides=[16]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=2048, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | rpn=dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.7, 60 | neg_iou_thr=0.3, 61 | min_pos_iou=0.3, 62 | match_low_quality=True, 63 | ignore_iof_thr=-1), 64 | sampler=dict( 65 | type='RandomSampler', 66 | num=256, 67 | pos_fraction=0.5, 68 | neg_pos_ub=-1, 69 | add_gt_as_proposals=False), 70 | allowed_border=0, 71 | pos_weight=-1, 72 | debug=False), 73 | rpn_proposal=dict( 74 | nms_pre=12000, 75 | max_per_img=2000, 76 | nms=dict(type='nms', iou_threshold=0.7), 77 | min_bbox_size=0), 78 | rcnn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.5, 82 | neg_iou_thr=0.5, 83 | min_pos_iou=0.5, 84 | match_low_quality=False, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=512, 89 | pos_fraction=0.25, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=True), 92 | pos_weight=-1, 93 | debug=False)), 94 | test_cfg=dict( 95 | rpn=dict( 96 | nms=dict(type='nms', iou_threshold=0.7), 97 | nms_pre=6000, 98 | max_per_img=1000, 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | score_thr=0.05, 102 | nms=dict(type='nms', iou_threshold=0.5), 103 | max_per_img=100))) 104 | -------------------------------------------------------------------------------- /configs/_base_/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | strides=(1, 2, 2, 1), 11 | dilations=(1, 1, 1, 2), 12 | out_indices=(3, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=2048, 20 | feat_channels=2048, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=2048, 39 | featmap_strides=[16]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=2048, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | rpn=dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.7, 60 | neg_iou_thr=0.3, 61 | min_pos_iou=0.3, 62 | match_low_quality=True, 63 | ignore_iof_thr=-1), 64 | sampler=dict( 65 | type='RandomSampler', 66 | num=256, 67 | pos_fraction=0.5, 68 | neg_pos_ub=-1, 69 | add_gt_as_proposals=False), 70 | allowed_border=0, 71 | pos_weight=-1, 72 | debug=False), 73 | rpn_proposal=dict( 74 | nms_pre=12000, 75 | max_per_img=2000, 76 | nms=dict(type='nms', iou_threshold=0.7), 77 | min_bbox_size=0), 78 | rcnn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.5, 82 | neg_iou_thr=0.5, 83 | min_pos_iou=0.5, 84 | match_low_quality=False, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=512, 89 | pos_fraction=0.25, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=True), 92 | pos_weight=-1, 93 | debug=False)), 94 | test_cfg=dict( 95 | rpn=dict( 96 | nms=dict(type='nms', iou_threshold=0.7), 97 | nms_pre=6000, 98 | max_per_img=1000, 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | score_thr=0.05, 102 | nms=dict(type='nms', iou_threshold=0.5), 103 | max_per_img=100))) 104 | -------------------------------------------------------------------------------- /configs/models/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_pre=2000, 77 | max_per_img=1000, 78 | nms=dict(type='nms', iou_threshold=0.7), 79 | min_bbox_size=0), 80 | rcnn=dict( 81 | assigner=dict( 82 | type='MaxIoUAssigner', 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.5, 85 | min_pos_iou=0.5, 86 | match_low_quality=False, 87 | ignore_iof_thr=-1), 88 | sampler=dict( 89 | type='RandomSampler', 90 | num=512, 91 | pos_fraction=0.25, 92 | neg_pos_ub=-1, 93 | add_gt_as_proposals=True), 94 | pos_weight=-1, 95 | debug=False)), 96 | test_cfg=dict( 97 | rpn=dict( 98 | nms_pre=1000, 99 | max_per_img=1000, 100 | nms=dict(type='nms', iou_threshold=0.7), 101 | min_bbox_size=0), 102 | rcnn=dict( 103 | score_thr=0.05, 104 | nms=dict(type='nms', iou_threshold=0.5), 105 | max_per_img=100) 106 | # soft-nms is also supported for rcnn testing 107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 108 | )) 109 | -------------------------------------------------------------------------------- /configs/_base_/models/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_pre=2000, 77 | max_per_img=1000, 78 | nms=dict(type='nms', iou_threshold=0.7), 79 | min_bbox_size=0), 80 | rcnn=dict( 81 | assigner=dict( 82 | type='MaxIoUAssigner', 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.5, 85 | min_pos_iou=0.5, 86 | match_low_quality=False, 87 | ignore_iof_thr=-1), 88 | sampler=dict( 89 | type='RandomSampler', 90 | num=512, 91 | pos_fraction=0.25, 92 | neg_pos_ub=-1, 93 | add_gt_as_proposals=True), 94 | pos_weight=-1, 95 | debug=False)), 96 | test_cfg=dict( 97 | rpn=dict( 98 | nms_pre=1000, 99 | max_per_img=1000, 100 | nms=dict(type='nms', iou_threshold=0.7), 101 | min_bbox_size=0), 102 | rcnn=dict( 103 | score_thr=0.05, 104 | nms=dict(type='nms', iou_threshold=0.5), 105 | max_per_img=100) 106 | # soft-nms is also supported for rcnn testing 107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 108 | )) 109 | -------------------------------------------------------------------------------- /configs/models/faster_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | rpn=dict( 66 | assigner=dict( 67 | type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict( 74 | type='RandomSampler', 75 | num=256, 76 | pos_fraction=0.5, 77 | neg_pos_ub=-1, 78 | add_gt_as_proposals=False), 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | rpn_proposal=dict( 83 | nms_pre=12000, 84 | max_per_img=2000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.5, 92 | min_pos_iou=0.5, 93 | match_low_quality=False, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=512, 98 | pos_fraction=0.25, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=True), 101 | pos_weight=-1, 102 | debug=False)), 103 | test_cfg=dict( 104 | rpn=dict( 105 | nms_pre=6000, 106 | max_per_img=1000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=dict( 110 | score_thr=0.05, 111 | nms=dict(type='nms', iou_threshold=0.5), 112 | max_per_img=100))) 113 | -------------------------------------------------------------------------------- /configs/_base_/models/faster_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | rpn=dict( 66 | assigner=dict( 67 | type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict( 74 | type='RandomSampler', 75 | num=256, 76 | pos_fraction=0.5, 77 | neg_pos_ub=-1, 78 | add_gt_as_proposals=False), 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | rpn_proposal=dict( 83 | nms_pre=12000, 84 | max_per_img=2000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.5, 92 | min_pos_iou=0.5, 93 | match_low_quality=False, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=512, 98 | pos_fraction=0.25, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=True), 101 | pos_weight=-1, 102 | debug=False)), 103 | test_cfg=dict( 104 | rpn=dict( 105 | nms_pre=6000, 106 | max_per_img=1000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=dict( 110 | score_thr=0.05, 111 | nms=dict(type='nms', iou_threshold=0.5), 112 | max_per_img=100))) 113 | -------------------------------------------------------------------------------- /easymd/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import torch 5 | from mmcv import Config, DictAction 6 | from mmcv.cnn import fuse_conv_bn 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmdet.datasets import (build_dataloader, build_dataset, 11 | replace_ImageToTensor) 12 | from mmdet.models import build_detector 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 17 | parser.add_argument('config', help='test config file path') 18 | parser.add_argument('checkpoint', help='checkpoint file') 19 | parser.add_argument( 20 | '--log-interval', default=50, help='interval of logging') 21 | parser.add_argument( 22 | '--fuse-conv-bn', 23 | action='store_true', 24 | help='Whether to fuse conv and bn, this will slightly increase' 25 | 'the inference speed') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | args = parser.parse_args() 37 | return args 38 | 39 | 40 | def main(): 41 | args = parse_args() 42 | 43 | cfg = Config.fromfile(args.config) 44 | if args.cfg_options is not None: 45 | cfg.merge_from_dict(args.cfg_options) 46 | # import modules from string list. 47 | if cfg.get('custom_imports', None): 48 | from mmcv.utils import import_modules_from_strings 49 | import_modules_from_strings(**cfg['custom_imports']) 50 | # set cudnn_benchmark 51 | if cfg.get('cudnn_benchmark', False): 52 | torch.backends.cudnn.benchmark = True 53 | cfg.model.pretrained = None 54 | cfg.data.test.test_mode = True 55 | 56 | # build the dataloader 57 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) 58 | if samples_per_gpu > 1: 59 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 60 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) 61 | dataset = build_dataset(cfg.data.test) 62 | data_loader = build_dataloader( 63 | dataset, 64 | samples_per_gpu=1, 65 | workers_per_gpu=cfg.data.workers_per_gpu, 66 | dist=False, 67 | shuffle=False) 68 | 69 | # build the model and load checkpoint 70 | cfg.model.train_cfg = None 71 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 72 | fp16_cfg = cfg.get('fp16', None) 73 | if fp16_cfg is not None: 74 | wrap_fp16_model(model) 75 | load_checkpoint(model, args.checkpoint, map_location='cpu') 76 | if args.fuse_conv_bn: 77 | model = fuse_conv_bn(model) 78 | 79 | model = MMDataParallel(model, device_ids=[0]) 80 | 81 | model.eval() 82 | 83 | # the first several iterations may be very slow so skip them 84 | num_warmup = 5 85 | pure_inf_time = 0 86 | 87 | # benchmark with 2000 image and take the average 88 | for i, data in enumerate(data_loader): 89 | 90 | torch.cuda.synchronize() 91 | start_time = time.perf_counter() 92 | 93 | with torch.no_grad(): 94 | model(return_loss=False, rescale=True, **data) 95 | 96 | torch.cuda.synchronize() 97 | elapsed = time.perf_counter() - start_time 98 | 99 | if i >= num_warmup: 100 | pure_inf_time += elapsed 101 | if (i + 1) % args.log_interval == 0: 102 | fps = (i + 1 - num_warmup) / pure_inf_time 103 | print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') 104 | 105 | if (i + 1) == 2000: 106 | pure_inf_time += elapsed 107 | fps = (i + 1 - num_warmup) / pure_inf_time 108 | print(f'Overall fps: {fps:.1f} img / s') 109 | break 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /tools/ana_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import easymd 4 | import torch 5 | from mmcv import Config, DictAction 6 | from mmcv.cnn import fuse_conv_bn 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmdet.datasets import (build_dataloader, build_dataset, 11 | replace_ImageToTensor) 12 | from mmdet.models import build_detector 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 17 | parser.add_argument('config', help='test config file path') 18 | parser.add_argument('--checkpoint',default=None, help='checkpoint file') 19 | parser.add_argument( 20 | '--log-interval', default=50, help='interval of logging') 21 | parser.add_argument( 22 | '--fuse-conv-bn', 23 | action='store_true', 24 | help='Whether to fuse conv and bn, this will slightly increase' 25 | 'the inference speed') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | args = parser.parse_args() 37 | return args 38 | 39 | 40 | def main(): 41 | args = parse_args() 42 | 43 | cfg = Config.fromfile(args.config) 44 | if args.cfg_options is not None: 45 | cfg.merge_from_dict(args.cfg_options) 46 | # import modules from string list. 47 | if cfg.get('custom_imports', None): 48 | from mmcv.utils import import_modules_from_strings 49 | import_modules_from_strings(**cfg['custom_imports']) 50 | # set cudnn_benchmark 51 | if cfg.get('cudnn_benchmark', False): 52 | torch.backends.cudnn.benchmark = True 53 | cfg.model.pretrained = None 54 | cfg.data.test.test_mode = True 55 | 56 | # build the dataloader 57 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) 58 | if samples_per_gpu > 1: 59 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 60 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) 61 | dataset = build_dataset(cfg.data.test) 62 | data_loader = build_dataloader( 63 | dataset, 64 | samples_per_gpu=1, 65 | workers_per_gpu=cfg.data.workers_per_gpu, 66 | dist=False, 67 | shuffle=False) 68 | 69 | # build the model and load checkpoint 70 | cfg.model.train_cfg = None 71 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 72 | fp16_cfg = cfg.get('fp16', None) 73 | if fp16_cfg is not None: 74 | wrap_fp16_model(model) 75 | if args.checkpoint is not None: 76 | load_checkpoint(model, args.checkpoint, map_location='cpu') 77 | if args.fuse_conv_bn: 78 | model = fuse_conv_bn(model) 79 | 80 | model = MMDataParallel(model, device_ids=[0]) 81 | 82 | model.eval() 83 | 84 | # the first several iterations may be very slow so skip them 85 | num_warmup = 5 86 | pure_inf_time = 0 87 | 88 | # benchmark with 2000 image and take the average 89 | for i, data in enumerate(data_loader): 90 | 91 | torch.cuda.synchronize() 92 | start_time = time.perf_counter() 93 | 94 | with torch.no_grad(): 95 | model(return_loss=False, rescale=True, **data) 96 | 97 | torch.cuda.synchronize() 98 | elapsed = time.perf_counter() - start_time 99 | 100 | if i >= num_warmup: 101 | pure_inf_time += elapsed 102 | if (i + 1) % args.log_interval == 0: 103 | fps = (i + 1 - num_warmup) / pure_inf_time 104 | print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') 105 | 106 | if (i + 1) == 2000: 107 | pure_inf_time += elapsed 108 | fps = (i + 1 - num_warmup) / pure_inf_time 109 | print(f'Overall fps: {fps:.1f} img / s') 110 | break 111 | 112 | 113 | if __name__ == '__main__': 114 | main() 115 | -------------------------------------------------------------------------------- /configs/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | things_mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_pre=2000, 90 | max_per_img=1000, 91 | nms=dict(type='nms', iou_threshold=0.7), 92 | min_bbox_size=0), 93 | rcnn=dict( 94 | assigner=dict( 95 | type='MaxIoUAssigner', 96 | pos_iou_thr=0.5, 97 | neg_iou_thr=0.5, 98 | min_pos_iou=0.5, 99 | match_low_quality=True, 100 | ignore_iof_thr=-1), 101 | sampler=dict( 102 | type='RandomSampler', 103 | num=512, 104 | pos_fraction=0.25, 105 | neg_pos_ub=-1, 106 | add_gt_as_proposals=True), 107 | mask_size=28, 108 | pos_weight=-1, 109 | debug=False)), 110 | test_cfg=dict( 111 | rpn=dict( 112 | nms_pre=1000, 113 | max_per_img=1000, 114 | nms=dict(type='nms', iou_threshold=0.7), 115 | min_bbox_size=0), 116 | rcnn=dict( 117 | score_thr=0.05, 118 | nms=dict(type='nms', iou_threshold=0.5), 119 | max_per_img=100, 120 | mask_thr_binary=0.5))) 121 | -------------------------------------------------------------------------------- /configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | things_mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_pre=2000, 90 | max_per_img=1000, 91 | nms=dict(type='nms', iou_threshold=0.7), 92 | min_bbox_size=0), 93 | rcnn=dict( 94 | assigner=dict( 95 | type='MaxIoUAssigner', 96 | pos_iou_thr=0.5, 97 | neg_iou_thr=0.5, 98 | min_pos_iou=0.5, 99 | match_low_quality=True, 100 | ignore_iof_thr=-1), 101 | sampler=dict( 102 | type='RandomSampler', 103 | num=512, 104 | pos_fraction=0.25, 105 | neg_pos_ub=-1, 106 | add_gt_as_proposals=True), 107 | mask_size=28, 108 | pos_weight=-1, 109 | debug=False)), 110 | test_cfg=dict( 111 | rpn=dict( 112 | nms_pre=1000, 113 | max_per_img=1000, 114 | nms=dict(type='nms', iou_threshold=0.7), 115 | min_bbox_size=0), 116 | rcnn=dict( 117 | score_thr=0.05, 118 | nms=dict(type='nms', iou_threshold=0.5), 119 | max_per_img=100, 120 | mask_thr_binary=0.5))) 121 | -------------------------------------------------------------------------------- /configs/models/mask_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=None, 64 | things_mask_head=dict( 65 | type='FCNMaskHead', 66 | num_convs=0, 67 | in_channels=2048, 68 | conv_out_channels=256, 69 | num_classes=80, 70 | loss_mask=dict( 71 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 72 | # model training and testing settings 73 | train_cfg=dict( 74 | rpn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.7, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | match_low_quality=True, 81 | ignore_iof_thr=-1), 82 | sampler=dict( 83 | type='RandomSampler', 84 | num=256, 85 | pos_fraction=0.5, 86 | neg_pos_ub=-1, 87 | add_gt_as_proposals=False), 88 | allowed_border=0, 89 | pos_weight=-1, 90 | debug=False), 91 | rpn_proposal=dict( 92 | nms_pre=12000, 93 | max_per_img=2000, 94 | nms=dict(type='nms', iou_threshold=0.7), 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=False, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=14, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_pre=6000, 116 | nms=dict(type='nms', iou_threshold=0.7), 117 | max_per_img=1000, 118 | min_bbox_size=0), 119 | rcnn=dict( 120 | score_thr=0.05, 121 | nms=dict(type='nms', iou_threshold=0.5), 122 | max_per_img=100, 123 | mask_thr_binary=0.5))) 124 | -------------------------------------------------------------------------------- /configs/_base_/models/mask_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=None, 64 | things_mask_head=dict( 65 | type='FCNMaskHead', 66 | num_convs=0, 67 | in_channels=2048, 68 | conv_out_channels=256, 69 | num_classes=80, 70 | loss_mask=dict( 71 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 72 | # model training and testing settings 73 | train_cfg=dict( 74 | rpn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.7, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | match_low_quality=True, 81 | ignore_iof_thr=-1), 82 | sampler=dict( 83 | type='RandomSampler', 84 | num=256, 85 | pos_fraction=0.5, 86 | neg_pos_ub=-1, 87 | add_gt_as_proposals=False), 88 | allowed_border=0, 89 | pos_weight=-1, 90 | debug=False), 91 | rpn_proposal=dict( 92 | nms_pre=12000, 93 | max_per_img=2000, 94 | nms=dict(type='nms', iou_threshold=0.7), 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=False, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=14, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_pre=6000, 116 | nms=dict(type='nms', iou_threshold=0.7), 117 | max_per_img=1000, 118 | min_bbox_size=0), 119 | rcnn=dict( 120 | score_thr=0.05, 121 | nms=dict(type='nms', iou_threshold=0.5), 122 | max_per_img=100, 123 | mask_thr_binary=0.5))) 124 | -------------------------------------------------------------------------------- /easymd/models/losses/dice_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | 4 | import mmcv 5 | import torch 6 | import torch.nn as nn 7 | 8 | from mmdet.core import bbox_overlaps 9 | #from ..builder import LOSSES 10 | #from .utils import weighted_loss 11 | from mmdet.models.losses.utils import weighted_loss 12 | from mmdet.models.builder import LOSSES 13 | from easymd.models.utils.visual import save_tensor 14 | 15 | 16 | 17 | def center_of_mass(bitmasks): 18 | n, h, w = bitmasks.size() 19 | 20 | ys = torch.linspace(0, 1, h, dtype=torch.float32, device=bitmasks.device) 21 | xs = torch.linspace(0, 1, w, dtype=torch.float32, device=bitmasks.device) 22 | 23 | m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6) 24 | m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1) 25 | m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1) 26 | center_x = m10 / m00 27 | center_y = m01 / m00 28 | return torch.stack([center_x, center_y],-1) 29 | #return center_x, center_y 30 | 31 | 32 | 33 | #@mmcv.jit(derivate=True, coderize=True) 34 | @weighted_loss 35 | def dice_loss(input, target,mask=None,eps=0.001): 36 | N,H,W = input.shape 37 | 38 | input = input.contiguous().view(N, H*W) 39 | target = target.contiguous().view(N, H*W).float() 40 | if mask is not None: 41 | mask = mask.contiguous().view(N, H*W).float() 42 | input = input * mask 43 | target = target * mask 44 | a = torch.sum(input * target, 1) 45 | b = torch.sum(input * input, 1) + eps 46 | c = torch.sum(target * target, 1) + eps 47 | d = (2 * a) / (b + c) 48 | #print('1-d max',(1-d).max()) 49 | return 1 - d 50 | 51 | @weighted_loss 52 | def l1_loss(pred, target): 53 | """Smooth L1 loss. 54 | 55 | Args: 56 | pred (torch.Tensor): The prediction. 57 | target (torch.Tensor): The learning target of the prediction. 58 | beta (float, optional): The threshold in the piecewise function. 59 | Defaults to 1.0. 60 | 61 | Returns: 62 | torch.Tensor: Calculated loss 63 | """ 64 | beta=1.0 65 | assert beta > 0 66 | assert pred.size() == target.size() and target.numel() > 0 67 | loss = torch.abs(pred - target) 68 | return loss 69 | 70 | 71 | 72 | 73 | 74 | 75 | @LOSSES.register_module() 76 | class DiceLoss(nn.Module): 77 | 78 | def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0): 79 | super(DiceLoss, self).__init__() 80 | self.eps = eps 81 | self.reduction = reduction 82 | self.loss_weight = loss_weight 83 | self.count = 0 84 | def forward(self, 85 | pred, 86 | target, 87 | weight=None, 88 | mask=None, 89 | avg_factor=None, 90 | reduction_override=None, 91 | **kwargs): 92 | 93 | assert reduction_override in (None, 'none', 'mean', 'sum') 94 | reduction = ( 95 | reduction_override if reduction_override else self.reduction) 96 | #if weight is not None and weight.dim() > 1: 97 | # TODO: remove this in the future 98 | # reduce the weight of shape (n,w,h) to (n,) to match the 99 | # giou_loss of shape (n,) 100 | #assert weight.shape == pred.shape 101 | #weight = weight.mean((-2,-1)) 102 | loss = self.loss_weight * dice_loss( 103 | pred, 104 | target, 105 | weight, 106 | mask=mask, 107 | eps=self.eps, 108 | reduction=reduction, 109 | avg_factor=avg_factor, 110 | **kwargs) 111 | #print('DiceLoss',loss, avg_factor) 112 | return loss 113 | 114 | 115 | 116 | @LOSSES.register_module() 117 | class BCEFocalLoss(torch.nn.Module): 118 | """ 119 | 二分类的Focalloss alpha 固定 120 | """ 121 | def __init__(self, gamma=2, alpha=0.25, reduction='sum',loss_weight=1.0): 122 | super().__init__() 123 | self.gamma = gamma 124 | self.alpha = alpha 125 | self.reduction = reduction 126 | self.loss_weight = loss_weight 127 | def forward(self, _input, target): 128 | pt = torch.sigmoid(_input) 129 | 130 | #print(pt.shape, target.shape) 131 | alpha = self.alpha 132 | loss = - alpha * (1 - pt) ** self.gamma * target * torch.log(pt) - \ 133 | (1 - alpha) * pt ** self.gamma * (1 - target) * torch.log(1 - pt) 134 | #print('loss_shape',loss.shape) 135 | if self.reduction == 'elementwise_mean': 136 | loss = torch.mean(loss) 137 | elif self.reduction == 'sum': 138 | loss = torch.sum(loss) 139 | 140 | return loss*self.loss_weight/54 -------------------------------------------------------------------------------- /converter/panoptic_cityscapes_categories.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 7, 4 | "name": "road", 5 | "color": [ 6 | 128, 7 | 64, 8 | 128 9 | ], 10 | "supercategory": "flat", 11 | "isthing": 0 12 | }, 13 | { 14 | "id": 8, 15 | "name": "sidewalk", 16 | "color": [ 17 | 244, 18 | 35, 19 | 232 20 | ], 21 | "supercategory": "flat", 22 | "isthing": 0 23 | }, 24 | { 25 | "id": 11, 26 | "name": "building", 27 | "color": [ 28 | 70, 29 | 70, 30 | 70 31 | ], 32 | "supercategory": "construction", 33 | "isthing": 0 34 | }, 35 | { 36 | "id": 12, 37 | "name": "wall", 38 | "color": [ 39 | 102, 40 | 102, 41 | 156 42 | ], 43 | "supercategory": "construction", 44 | "isthing": 0 45 | }, 46 | { 47 | "id": 13, 48 | "name": "fence", 49 | "color": [ 50 | 190, 51 | 153, 52 | 153 53 | ], 54 | "supercategory": "construction", 55 | "isthing": 0 56 | }, 57 | { 58 | "id": 17, 59 | "name": "pole", 60 | "color": [ 61 | 153, 62 | 153, 63 | 153 64 | ], 65 | "supercategory": "object", 66 | "isthing": 0 67 | }, 68 | { 69 | "id": 19, 70 | "name": "traffic light", 71 | "color": [ 72 | 250, 73 | 170, 74 | 30 75 | ], 76 | "supercategory": "object", 77 | "isthing": 0 78 | }, 79 | { 80 | "id": 20, 81 | "name": "traffic sign", 82 | "color": [ 83 | 220, 84 | 220, 85 | 0 86 | ], 87 | "supercategory": "object", 88 | "isthing": 0 89 | }, 90 | { 91 | "id": 21, 92 | "name": "vegetation", 93 | "color": [ 94 | 107, 95 | 142, 96 | 35 97 | ], 98 | "supercategory": "nature", 99 | "isthing": 0 100 | }, 101 | { 102 | "id": 22, 103 | "name": "terrain", 104 | "color": [ 105 | 152, 106 | 251, 107 | 152 108 | ], 109 | "supercategory": "nature", 110 | "isthing": 0 111 | }, 112 | { 113 | "id": 23, 114 | "name": "sky", 115 | "color": [ 116 | 70, 117 | 130, 118 | 180 119 | ], 120 | "supercategory": "sky", 121 | "isthing": 0 122 | }, 123 | { 124 | "id": 24, 125 | "name": "person", 126 | "color": [ 127 | 220, 128 | 20, 129 | 60 130 | ], 131 | "supercategory": "human", 132 | "isthing": 1 133 | }, 134 | { 135 | "id": 25, 136 | "name": "rider", 137 | "color": [ 138 | 255, 139 | 0, 140 | 0 141 | ], 142 | "supercategory": "human", 143 | "isthing": 1 144 | }, 145 | { 146 | "id": 26, 147 | "name": "car", 148 | "color": [ 149 | 0, 150 | 0, 151 | 142 152 | ], 153 | "supercategory": "vehicle", 154 | "isthing": 1 155 | }, 156 | { 157 | "id": 27, 158 | "name": "truck", 159 | "color": [ 160 | 0, 161 | 0, 162 | 70 163 | ], 164 | "supercategory": "vehicle", 165 | "isthing": 1 166 | }, 167 | { 168 | "id": 28, 169 | "name": "bus", 170 | "color": [ 171 | 0, 172 | 60, 173 | 100 174 | ], 175 | "supercategory": "vehicle", 176 | "isthing": 1 177 | }, 178 | { 179 | "id": 31, 180 | "name": "train", 181 | "color": [ 182 | 0, 183 | 80, 184 | 100 185 | ], 186 | "supercategory": "vehicle", 187 | "isthing": 1 188 | }, 189 | { 190 | "id": 32, 191 | "name": "motorcycle", 192 | "color": [ 193 | 0, 194 | 0, 195 | 230 196 | ], 197 | "supercategory": "vehicle", 198 | "isthing": 1 199 | }, 200 | { 201 | "id": 33, 202 | "name": "bicycle", 203 | "color": [ 204 | 119, 205 | 11, 206 | 32 207 | ], 208 | "supercategory": "vehicle", 209 | "isthing": 1 210 | } 211 | ] -------------------------------------------------------------------------------- /easymd/datasets/panopticapi/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | import functools 6 | import traceback 7 | import json 8 | import numpy as np 9 | import cv2 10 | 11 | # The decorator is used to prints an error trhown inside process 12 | def get_traceback(f): 13 | @functools.wraps(f) 14 | def wrapper(*args, **kwargs): 15 | try: 16 | return f(*args, **kwargs) 17 | except Exception as e: 18 | print('Caught exception in worker thread:') 19 | traceback.print_exc() 20 | raise e 21 | 22 | return wrapper 23 | 24 | 25 | class IdGenerator(): 26 | ''' 27 | The class is designed to generate unique IDs that have meaningful RGB encoding. 28 | Given semantic category unique ID will be generated and its RGB encoding will 29 | have color close to the predefined semantic category color. 30 | The RGB encoding used is ID = R * 256 * G + 256 * 256 + B. 31 | Class constructor takes dictionary {id: category_info}, where all semantic 32 | class ids are presented and category_info record is a dict with fields 33 | 'isthing' and 'color' 34 | ''' 35 | def __init__(self, categories): 36 | self.taken_colors = set([0, 0, 0]) 37 | self.categories = categories 38 | for category in self.categories.values(): 39 | if category['isthing'] == 0: 40 | self.taken_colors.add(tuple(category['color'])) 41 | 42 | def get_color(self, cat_id): 43 | def random_color(base, max_dist=30): 44 | new_color = base + np.random.randint(low=-max_dist, 45 | high=max_dist+1, 46 | size=3) 47 | return tuple(np.maximum(0, np.minimum(255, new_color))) 48 | 49 | category = self.categories[cat_id] 50 | if category['isthing'] == 0: 51 | return category['color'] 52 | base_color_array = category['color'] 53 | base_color = tuple(base_color_array) 54 | if base_color not in self.taken_colors: 55 | self.taken_colors.add(base_color) 56 | return base_color 57 | else: 58 | while True: 59 | color = random_color(base_color_array) 60 | if color not in self.taken_colors: 61 | self.taken_colors.add(color) 62 | return color 63 | 64 | def get_id(self, cat_id): 65 | color = self.get_color(cat_id) 66 | return rgb2id(color) 67 | 68 | def get_id_and_color(self, cat_id): 69 | color = self.get_color(cat_id) 70 | return rgb2id(color), color 71 | 72 | 73 | def rgb2id(color): 74 | if isinstance(color, np.ndarray) and len(color.shape) == 3: 75 | if color.dtype == np.uint8: 76 | color = color.astype(np.int32) 77 | return color[:, :, 0] + 256 * color[:, :, 1] + 256 * 256 * color[:, :, 2] 78 | return int(color[0] + 256 * color[1] + 256 * 256 * color[2]) 79 | 80 | 81 | def id2rgb(id_map): 82 | if isinstance(id_map, np.ndarray): 83 | id_map_copy = id_map.copy() 84 | rgb_shape = tuple(list(id_map.shape) + [3]) 85 | rgb_map = np.zeros(rgb_shape, dtype=np.uint8) 86 | for i in range(3): 87 | rgb_map[..., i] = id_map_copy % 256 88 | id_map_copy //= 256 89 | return rgb_map 90 | color = [] 91 | for _ in range(3): 92 | color.append(id_map % 256) 93 | id_map //= 256 94 | return color 95 | 96 | 97 | def save_json(d, file): 98 | with open(file, 'w') as f: 99 | json.dump(d, f) 100 | 101 | 102 | # General util function to get the boundary of a binary mask. 103 | def mask_to_boundary(mask, dilation_ratio=0.02): 104 | """ 105 | Convert binary mask to boundary mask. 106 | :param mask (numpy array, uint8): binary mask 107 | :param dilation_ratio (float): ratio to calculate dilation = dilation_ratio * image_diagonal 108 | :return: boundary mask (numpy array) 109 | """ 110 | h, w = mask.shape 111 | img_diag = np.sqrt(h ** 2 + w ** 2) 112 | dilation = int(round(dilation_ratio * img_diag)) 113 | if dilation < 1: 114 | dilation = 1 115 | # Pad image so mask truncated by the image border is also considered as boundary. 116 | newith_mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0) 117 | kernel = np.ones((3, 3), dtype=np.uint8) 118 | newith_mask_erode = cv2.erode(newith_mask, kernel, iterations=dilation) 119 | mask_erode = newith_mask_erode[1 : h + 1, 1 : w + 1] 120 | # G_d intersects G in the paper. 121 | return mask - mask_erode -------------------------------------------------------------------------------- /easymd/analysis_tools/kmean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Kmean(object): 5 | def __init__(self, cluster_number, number_iter=1, name='iou'): 6 | self.cluster_number = cluster_number 7 | self.number_iter = number_iter 8 | self.name = name 9 | 10 | def _get_distance_measure(self, name='iou'): 11 | if name == 'iou': 12 | return self._calc_iou 13 | else: 14 | raise NotImplementedError('暂时没有实现') 15 | 16 | def _calc_iou(self, boxes_nx2, clusters_kx2): 17 | """ 18 | calculate the iou between bboxes and clusters 19 | Args: 20 | boxes_nx2(np.ndarray): bboxes's width and height 21 | clusters_kx2(np.ndarray): clusters_kx2's width and height 22 | return: 23 | iou_nxk(np.ndarray): iou between bboxes and clusters 24 | """ 25 | n = boxes_nx2.shape[0] 26 | k = self.cluster_number 27 | 28 | box_area = boxes_nx2[:, 0] * boxes_nx2[:, 1] # 相当于左上角全部移动到0,0点,进行iou计算 29 | box_area = box_area.repeat(k) 30 | box_area = np.reshape(box_area, (n, k)) 31 | 32 | cluster_area = clusters_kx2[:, 0] * clusters_kx2[:, 1] 33 | cluster_area = np.tile(cluster_area, [1, n]) 34 | cluster_area = np.reshape(cluster_area, (n, k)) 35 | 36 | box_w_matrix = np.reshape(boxes_nx2[:, 0].repeat(k), (n, k)) 37 | cluster_w_matrix = np.reshape(np.tile(clusters_kx2[:, 0], (1, n)), (n, k)) 38 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) 39 | 40 | box_h_matrix = np.reshape(boxes_nx2[:, 1].repeat(k), (n, k)) 41 | cluster_h_matrix = np.reshape(np.tile(clusters_kx2[:, 1], (1, n)), (n, k)) 42 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) 43 | inter_area = np.multiply(min_w_matrix, min_h_matrix) 44 | 45 | iou_nxk = inter_area / (box_area + cluster_area - inter_area) 46 | return iou_nxk 47 | 48 | def _calc_average_measure(self, boxes_nx2, clusters_kx2): 49 | """ 50 | calculate the mean iou between bboxes and clusters 51 | Args: 52 | boxes_nx2(np.ndarray): bboxes's width and height 53 | clusters_kx2(np.ndarray): clusters_kx2's width and height 54 | return: 55 | mean_iou(np.ndarray): mean iou between boxes and their corresponding clusters 56 | """ 57 | _distance_measure_fun = self._get_distance_measure(self.name) 58 | accuracy = np.mean([np.max(_distance_measure_fun(boxes_nx2, clusters_kx2), axis=1)]) 59 | return accuracy 60 | 61 | def _kmeans(self, boxes_nx2): 62 | """ 63 | cacluate the clusters by kmeans 64 | Args: 65 | boxes_nx2(np.ndarray): bboxes's width and height 66 | would use: 67 | cluster_number 68 | would call: 69 | _calc_iou() 70 | return: 71 | clusters(np.ndarray): the anchors for yolo 72 | """ 73 | k = self.cluster_number 74 | box_number = boxes_nx2.shape[0] 75 | last_nearest = np.zeros((box_number,)) 76 | clusters = boxes_nx2[np.random.choice( 77 | box_number, k, replace=False)] # init k clusters 78 | _distance_measure_fun = self._get_distance_measure(self.name) 79 | while True: 80 | # 距离度量准则是1-iou,iou越大则越近 81 | distances = 1 - _distance_measure_fun(boxes_nx2, clusters) # 输出维度 N,k 82 | 83 | current_nearest = np.argmin(distances, axis=1) # 找出某个点离所有中心最近的索引 84 | if (last_nearest == current_nearest).all(): # 收敛 85 | break # clusters won't change 86 | for cluster in range(k): # 更新聚类中心 87 | if len(boxes_nx2[current_nearest == cluster]) == 0: 88 | clusters[cluster] = boxes_nx2[np.random.choice( 89 | box_number, 1, replace=False)] 90 | else: 91 | clusters[cluster] = np.median( # update clusters 92 | boxes_nx2[current_nearest == cluster], axis=0) 93 | 94 | last_nearest = current_nearest 95 | 96 | return clusters 97 | 98 | def clusters(self, wh_data_nx2): 99 | total_acc = -1 100 | total_result = [] 101 | for _ in range(self.number_iter): 102 | result = self._kmeans(wh_data_nx2) # TODO ga+kmean 103 | anchor_area = result[:, 0] * result[:, 1] 104 | area_index = np.argsort(anchor_area) 105 | result = result[area_index] 106 | acc = self._calc_average_measure(wh_data_nx2, result) * 100 107 | if acc > total_acc: 108 | total_acc = acc 109 | total_result = result 110 | 111 | # print("K anchors:\n {}".format(total_result.astype(np.int32))) 112 | print("Accuracy: {:.2f}%".format(total_acc)) 113 | return total_result.astype(np.int32).tolist() -------------------------------------------------------------------------------- /easymd/analysis_tools/center_sample_demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | import mmcv 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def get_target_mask(gt_bboxes, feature_shape, center_sample_radius, center_sampling): 9 | # 得到points 10 | xs = torch.arange(0, feature_shape[1]) 11 | ys = torch.arange(0, feature_shape[0]) 12 | y, x = torch.meshgrid(ys, xs) # 注意,返回的一定是y在前 13 | y = y.flatten() # hw 14 | x = x.flatten() # hw 15 | # 还原到原图 16 | # points = torch.stack((x.reshape(-1) * stride, y.reshape(-1) * stride), 17 | # dim=-1) + stride // 2 # 整体偏移stride//2,对应中心点 18 | # 我们假设就是原图 19 | points = torch.stack((x.reshape(-1), y.reshape(-1)), dim=-1) 20 | 21 | num_points = points.size(0) # 100x100,2 22 | num_gts = gt_bboxes.size(0) # 1x4 23 | gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4) # 100x100,1,4 24 | xs, ys = points[:, 0], points[:, 1] 25 | xs = xs[:, None].expand(num_points, num_gts) # 100x100,1 26 | ys = ys[:, None].expand(num_points, num_gts) 27 | 28 | if center_sampling: 29 | center_xs = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) / 2 30 | center_ys = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) / 2 31 | # center_gts里面存储的相当于是新的缩放后bbox坐标了 32 | center_gts = torch.zeros_like(gt_bboxes) 33 | stride = center_xs.new_ones(center_xs.shape) * center_sample_radius 34 | x_mins = center_xs - stride 35 | y_mins = center_ys - stride 36 | x_maxs = center_xs + stride 37 | y_maxs = center_ys + stride 38 | # 如果stride值比较小,x_mins还在bbox内部,则不做处理 39 | # 如果stride值比较大,x_mins已经出bbox界限了,则强制规定x_mins=gt_bboxes[..., 0],相当于center_sampling无效 40 | center_gts[..., 0] = torch.where(x_mins > gt_bboxes[..., 0], 41 | x_mins, gt_bboxes[..., 0]) 42 | center_gts[..., 1] = torch.where(y_mins > gt_bboxes[..., 1], 43 | y_mins, gt_bboxes[..., 1]) 44 | center_gts[..., 2] = torch.where(x_maxs > gt_bboxes[..., 2], 45 | gt_bboxes[..., 2], x_maxs) 46 | center_gts[..., 3] = torch.where(y_maxs > gt_bboxes[..., 3], 47 | gt_bboxes[..., 3], y_maxs) 48 | else: 49 | center_gts = gt_bboxes 50 | 51 | # 计算原图上面任意一点距离bbox4条边的距离 52 | left = xs - center_gts[..., 0] # 特征图上面点距离bbox左边界距离 53 | right = center_gts[..., 2] - xs # 注意谁减谁 54 | top = ys - center_gts[..., 1] 55 | bottom = center_gts[..., 3] - ys 56 | bbox_targets = torch.stack((left, top, right, bottom), -1) # 100x100,1,4 57 | # value, index = bbox_targets.min(-1) 58 | pos_mask = bbox_targets.min(-1)[0] > 0 59 | pos_mask = pos_mask.view(feature_shape[0], feature_shape[1], -1) 60 | return pos_mask, bbox_targets 61 | 62 | 63 | def centerness_target(pos_mask, bbox_targets): 64 | """Compute centerness targets. 65 | Args: 66 | pos_bbox_targets (Tensor): BBox targets of positive bboxes in shape 67 | (num_pos, 4) 68 | Returns: 69 | Tensor: Centerness target. 70 | """ 71 | # only calculate pos centerness targets, otherwise there may be nan 72 | pos_mask = pos_mask.view(-1, 1) 73 | bbox_targets = bbox_targets[pos_mask] 74 | left_right = bbox_targets[:, [0, 2]] 75 | top_bottom = bbox_targets[:, [1, 3]] 76 | centerness_targets = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \ 77 | (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0]) 78 | targets = torch.sqrt(centerness_targets) 79 | # 还原成图返回 80 | img_disp_target = pos_mask.new_zeros(pos_mask.shape, dtype=torch.float32) 81 | img_disp_target[pos_mask] = targets 82 | return img_disp_target 83 | 84 | 85 | if __name__ == '__main__': 86 | # 缺点: 中心采样策略无法反映hw变化,而且既然叫做半径,为啥mask区域不是圆形,而是正方形 87 | center_sampling = True # 是否使用中心采样策略 88 | feature_shape = (100, 100, 3) 89 | strides = 4 90 | radius = 3.5 # 默认1.5 91 | center_sample_radius = radius * strides # 扩展半径radius,值越大,扩展面积越大 92 | gt_boox = [20, 30, 80, 71] # 特征图size xyxy 93 | 94 | gt_bbox = torch.as_tensor(gt_boox, dtype=torch.float32).view(-1, 4) 95 | pos_mask, bbox_targets = get_target_mask(gt_bbox, feature_shape, center_sample_radius, center_sampling) 96 | 97 | # 可视化 98 | pos_mask1 = pos_mask[..., 0].numpy() 99 | gray_img = np.where(pos_mask1 > 0, 255, 0).astype(np.uint8) 100 | # 绘制原始bbox 101 | img = mmcv.gray2bgr(gray_img) 102 | cv2.rectangle(img, (gt_boox[0], gt_boox[1]), (gt_boox[2], gt_boox[3]), color=(255, 0, 0)) 103 | cv2.namedWindow('img', 0) 104 | mmcv.imshow(img, 'img') 105 | 106 | # 显示centerness 107 | centerness_targets = centerness_target(pos_mask, bbox_targets) 108 | centerness_targets = centerness_targets.view(feature_shape[0], feature_shape[1]) 109 | plt.imshow(centerness_targets) 110 | plt.show() -------------------------------------------------------------------------------- /easymd/analysis_tools/visualize_coco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from pycocotools.coco import COCO 3 | import os 4 | import cv2 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | from matplotlib.collections import PatchCollection 8 | from matplotlib.patches import Polygon 9 | 10 | 11 | def showBBox(coco, anns, label_box=True, is_filling=True): 12 | """ 13 | show bounding box of annotations or predictions 14 | anns: loadAnns() annotations or predictions subject to coco results format 15 | label_box: show background of category labels or not 16 | """ 17 | if len(anns) == 0: 18 | return 0 19 | ax = plt.gca() 20 | ax.set_autoscale_on(False) 21 | polygons = [] 22 | color = [] 23 | image2color = dict() 24 | for cat in coco.getCatIds(): 25 | image2color[cat] = (np.random.random((1, 3)) * 0.7 + 0.3).tolist()[0] 26 | for ann in anns: 27 | c = image2color[ann['category_id']] 28 | [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox'] 29 | poly = [[bbox_x, bbox_y], [bbox_x, bbox_y + bbox_h], [bbox_x + bbox_w, bbox_y + bbox_h], 30 | [bbox_x + bbox_w, bbox_y]] 31 | np_poly = np.array(poly).reshape((4, 2)) 32 | polygons.append(Polygon(np_poly)) 33 | color.append(c) 34 | if label_box: 35 | label_bbox = dict(facecolor=c) 36 | else: 37 | label_bbox = None 38 | if 'score' in ann: 39 | ax.text(bbox_x, bbox_y, '%s: %.2f' % (coco.loadCats(ann['category_id'])[0]['name'], ann['score']), 40 | color='white', bbox=label_bbox) 41 | else: 42 | ax.text(bbox_x, bbox_y, '%s' % (coco.loadCats(ann['category_id'])[0]['name']), color='white', 43 | bbox=label_bbox) 44 | if is_filling: 45 | # option for filling bounding box 46 | p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) 47 | ax.add_collection(p) 48 | p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) 49 | ax.add_collection(p) 50 | 51 | 52 | # only_bbox 为True表示仅仅可视化bbox,其余label不显示 53 | # show_all 表示所有类别都显示,否则category_name来确定显示类别 54 | def show_coco(data_root, ann_file, img_prefix, only_bbox=False, show_all=True, category_name='bicycle'): 55 | example_coco = COCO(ann_file) 56 | print('图片总数:{}'.format(len(example_coco.getImgIds()))) 57 | categories = example_coco.loadCats(example_coco.getCatIds()) 58 | category_names = [category['name'] for category in categories] 59 | print('Custom COCO categories: \n{}\n'.format(' '.join(category_names))) 60 | print(category_names) 61 | 62 | if show_all: 63 | category_ids = [] 64 | else: 65 | category_ids = example_coco.getCatIds(category_name) 66 | image_ids = example_coco.getImgIds(catIds=category_ids) 67 | image_ids = sorted(image_ids) 68 | for i in range(len(image_ids)): 69 | id = image_ids[i] 70 | id =285 71 | #/home/lzq/workspace/easy-mmdet/datasets/coco/val2017/000000000285.jpg 72 | image_data = example_coco.loadImgs(id)[0] 73 | path = os.path.join(data_root, img_prefix, image_data['file_name']) 74 | print(path) 75 | image = cv2.imread(path)[:,:,::-1] 76 | 77 | 78 | annotation_ids = example_coco.getAnnIds(imgIds=image_data['id'], catIds=category_ids, iscrowd=None) 79 | #if len(annotation_ids)<2: 80 | # continue 81 | plt.figure() 82 | plt.imshow(image) 83 | annotations = example_coco.loadAnns(annotation_ids) 84 | if only_bbox: 85 | showBBox(example_coco, annotations) 86 | else: 87 | example_coco.showAnns(annotations) 88 | plt.title(path) 89 | plt.show() 90 | 91 | 92 | if __name__ == '__main__': 93 | # 和cfg里面设置一样 coco 94 | data_root = './datasets/coco/' 95 | ann_file = './datasets/annotations/panoptic_val2017_detection_format.json' 96 | #category_name=['banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged', 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged', 'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged'] 97 | #category_name = 'tree-merged' 98 | #ann_file='/home/lzq/workspace/easy-mmdet/datasets/coco/annotations/instances_val2017.json' 99 | img_prefix = 'val2017/' 100 | show_coco(data_root, ann_file, img_prefix,show_all=True,only_bbox=False) 101 | 102 | # voc转化为coco后显示 103 | #data_root = '/home/pi/dataset/VOCdevkit/' 104 | #ann_file = data_root + 'annotations/voc0712_trainval.json' 105 | #img_prefix = data_root 106 | #show_coco(data_root, ann_file, img_prefix) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Panoptic SegFormer: Delving Deeper into Panoptic Segmentation with Transformers 2 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/panoptic-segformer/panoptic-segmentation-on-coco-minival)](https://paperswithcode.com/sota/panoptic-segmentation-on-coco-minival?p=panoptic-segformer) 3 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/panoptic-segformer/panoptic-segmentation-on-coco-test-dev)](https://paperswithcode.com/sota/panoptic-segmentation-on-coco-test-dev?p=panoptic-segformer) 4 |
5 | 6 |

7 | 8 | Panoptic SegFormer is accepted by CVPR'22 and we update our latest paper on [arXiv](https://arxiv.org/abs/2109.03814) 9 | 10 | 11 | ## Results 12 | 13 | results on COCO val 14 | 15 | | Backbone | Method | Lr Schd | PQ | Config | Download | 16 | | :---: | :---: | :---: | :---: | :---: | :---: | 17 | | R-50 | Panoptic-SegFormer | 1x| 48.0 |[config](configs/panformer/panformer_r50_12e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_r50_1x.pth) | 18 | | R-50 | Panoptic-SegFormer | 2x| 49.6 |[config](configs/panformer/panformer_r50_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_r50_2x.pth) | 19 | | R-101 | Panoptic-SegFormer | 2x| 50.6 |[config](configs/panformer/panformer_r101_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_r101_2x.pth) | 20 | | [PVTv2-B5](https://github.com/whai362/PVT) (**much lighter**) | Panoptic-SegFormer | 2x| 55.6 |[config](configs/panformer/panformer_pvtb5_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_pvtv2b5_2x.pth) | 21 | | Swin-L (window size 7) | Panoptic-SegFormer | 2x| 55.8 |[config](configs/panformer/panformer_swinl_24e_coco_panoptic.py) | [model](https://github.com/zhiqi-li/Panoptic-SegFormer/releases/download/v1.0/panoptic_segformer_swinl_2x.pth) | 22 | 23 | 24 | 25 | 26 | ## Install 27 | 28 | ### Prerequisites 29 | 30 | - Linux 31 | - Python 3.6+ 32 | - PyTorch 1.5+ 33 | - torchvision 34 | - CUDA 9.2+ (If you build PyTorch from source, CUDA 9.0 is also compatible) 35 | - GCC 5+ 36 | - [mmcv-full==1.3.4](https://github.com/open-mmlab/mmcv/tree/v1.3.4) 37 | - [mmdet==2.12.0](https://github.com/open-mmlab/mmdetection/tree/v2.12.0) # higher version may not work 38 | - timm==0.4.5 39 | - einops==0.3.0 40 | - Pillow==8.0.1 41 | - opencv-python==4.5.2 42 | 43 | note: PyTorch1.8 has a bug in its [adamw.py](https://github.com/pytorch/pytorch/blob/v1.8.0/torch/optim/adamw.py) and it is solved in PyTorch1.9([see](https://github.com/pytorch/pytorch/blob/master/torch/optim/adamw.py)), you can easily solve it by comparing the difference. 44 | 45 | 46 | ### install Panoptic SegFormer 47 | 48 | ``` 49 | python setup.py install 50 | ``` 51 | 52 | 53 | ## Datasets 54 | 55 | When I began this project, mmdet dose not support panoptic segmentation officially. I convert the dataset from panoptic segmentation format to instance segmentation format for convenience. 56 | 57 | ### 1. prepare data (COCO) 58 | 59 | ``` 60 | cd Panoptic-SegFormer 61 | mkdir datasets 62 | cd datasets 63 | ln -s path_to_coco coco 64 | mkdir annotations/ 65 | cd annotations 66 | wget http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip 67 | unzip panoptic_annotations_trainval2017.zip 68 | ``` 69 | 70 | Then the directory structure should be the following: 71 | 72 | ``` 73 | Panoptic-SegFormer 74 | ├── datasets 75 | │  ├── annotations/ 76 | │  │   ├── panoptic_train2017/ 77 | │   │  ├── panoptic_train2017.json 78 | │  │   ├── panoptic_val2017/ 79 | │   │  └── panoptic_val2017.json 80 | │ └── coco/ 81 | │ 82 | ├── config 83 | ├── checkpoints 84 | ├── easymd 85 | ... 86 | ``` 87 | 88 | ### 2. convert panoptic format to detection format 89 | 90 | ``` 91 | cd Panoptic-SegFormer 92 | ./tools/convert_panoptic_coco.sh coco 93 | ``` 94 | 95 | Then the directory structure should be the following: 96 | 97 | ``` 98 | Panoptic-SegFormer 99 | ├── datasets 100 | │  ├── annotations/ 101 | │  │   ├── panoptic_train2017/ 102 | │  │   ├── panoptic_train2017_detection_format.json 103 | │   │  ├── panoptic_train2017.json 104 | │  │   ├── panoptic_val2017/ 105 | │  │   ├── panoptic_val2017_detection_format.json 106 | │   │  └── panoptic_val2017.json 107 | │ └── coco/ 108 | │ 109 | ├── config 110 | ├── checkpoints 111 | ├── easymd 112 | ... 113 | ``` 114 | 115 | 116 | ## Run (panoptic segmentation) 117 | 118 | ### train 119 | 120 | single-machine with 8 gpus. 121 | 122 | ``` 123 | ./tools/dist_train.sh ./configs/panformer/panformer_r50_24e_coco_panoptic.py 8 124 | ``` 125 | 126 | 127 | ### test 128 | 129 | ``` 130 | ./tools/dist_test.sh ./configs/panformer/panformer_r50_24e_coco_panoptic.py path/to/model.pth 8 131 | ``` 132 | 133 | ## Citing 134 | 135 | If you use Panoptic SegFormer in your research, please use the following BibTeX entry. 136 | 137 | ```BibTeX 138 | @misc{li2021panoptic, 139 | title={Panoptic SegFormer: Delving Deeper into Panoptic Segmentation with Transformers}, 140 | author={Zhiqi Li and Wenhai Wang and Enze Xie and Zhiding Yu and Anima Anandkumar and Jose M. Alvarez and Tong Lu and Ping Luo}, 141 | year={2021}, 142 | eprint={2109.03814}, 143 | archivePrefix={arXiv}, 144 | primaryClass={cs.CV} 145 | } 146 | ``` 147 | 148 | 149 | ## Acknowledgement 150 | 151 | Mainly based on [Defromable DETR](https://github.com/open-mmlab/mmdetection.git) from MMdet. 152 | 153 | Thanks very much for other open source works: [timm](https://github.com/rwightman/pytorch-image-models), [Panoptic FCN](https://github.com/dvlab-research/PanopticFCN), [MaskFomer](https://github.com/facebookresearch/MaskFormer), [QueryInst](https://github.com/hustvl/QueryInst) 154 | 155 | 156 | -------------------------------------------------------------------------------- /easymd/analysis_tools/visualize_panoptic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | ''' 3 | Visualization demo for panoptic COCO sample_data 4 | 5 | The code shows an example of color generation for panoptic data (with 6 | "generate_new_colors" set to True). For each segment distinct color is used in 7 | a way that it close to the color of corresponding semantic class. 8 | ''' 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | from __future__ import unicode_literals 13 | import os, sys 14 | import numpy as np 15 | import json 16 | 17 | import PIL.Image as Image 18 | import matplotlib.pyplot as plt 19 | from skimage.segmentation import find_boundaries 20 | import mmcv 21 | from panopticapi.utils import IdGenerator, rgb2id 22 | try: 23 | from detectron2.data import MetadataCatalog 24 | except: 25 | print('no detecteon2') 26 | #from detectron2.utils.visualizer import Visualizer 27 | import torch 28 | from easymd.models.utils.visual import Visualizer # we modified the Visualizer from detectron2 29 | # whether from the PNG are used or new colors are generated 30 | generate_new_colors = True 31 | 32 | json_file = './datasets/annotations/panoptic_val2017.json' 33 | segmentations_folder = './datasets/annotations/panoptic_val2017' 34 | img_folder = './datasets/coco/val2017' 35 | panoptic_coco_categories = './easymd/datasets/panoptic_coco_categories.json' 36 | 37 | with open(json_file, 'r') as f: 38 | coco_d = json.load(f) 39 | 40 | #ann = np.random.choice(coco_d['annotations']) 41 | #id = 785 42 | #id = 79188 43 | #id = 124975 #two horses 44 | #id = 2592 45 | #id = 26564 46 | 47 | def f(id): 48 | find = False 49 | for each in coco_d['annotations']: 50 | if each['image_id'] == id: 51 | ann = each 52 | find=True 53 | break 54 | if not find: 55 | return 56 | 57 | with open(panoptic_coco_categories, 'r') as f: 58 | categories_list = json.load(f) 59 | categegories = {category['id']: category for category in categories_list} 60 | 61 | # find input img that correspond to the annotation 62 | img = None 63 | for image_info in coco_d['images']: 64 | if image_info['id'] == ann['image_id']: 65 | try: 66 | img = np.array( 67 | Image.open(os.path.join(img_folder, image_info['file_name'])) 68 | ) 69 | except: 70 | print("Undable to find correspoding input image.") 71 | break 72 | 73 | segmentation = np.array( 74 | Image.open(os.path.join(segmentations_folder, ann['file_name'])), 75 | dtype=np.uint8 76 | ) 77 | segmentation_id = rgb2id(segmentation) 78 | # find segments boundaries 79 | 80 | 81 | if generate_new_colors: 82 | segmentation[:, :, :] = 0 83 | color_generator = IdGenerator(categegories) 84 | 85 | i =0 86 | 87 | for segment_info in ann['segments_info']: 88 | #print(segment_info) 89 | #if segment_info['id']!= 4475732: 90 | # continue 91 | 92 | color = color_generator.get_color(segment_info['category_id']) 93 | mask = segmentation_id == segment_info['id'] 94 | segmentation[mask] =color 95 | #print(dir(segment_info)) 96 | 97 | segment_info.setdefault('isthing',True) 98 | #print(segment_info['category_id'] > 90,segment_info['category_id']) 99 | if segment_info['category_id'] > 90: 100 | segment_info['isthing'] =False 101 | i+=1 102 | if i<10: 103 | return 104 | 105 | boundaries = find_boundaries(rgb2id(segmentation), mode='thick') 106 | #segmentation[boundaries] = [0,255,0] 107 | # depict boundaries 108 | import cv2 as cv 109 | 110 | print(img.shape,segmentation.shape) 111 | #res = cv.add(segmentation,img) 112 | 113 | #im = Image.open(data['img_metas'][0].data[0][0]['filename']) 114 | meta = MetadataCatalog.get("coco_2017_val_panoptic_separated") 115 | im = np.array(img)[:, :, ::-1] 116 | v = Visualizer(im, meta, scale=1.0) 117 | v._default_font_size = 10 118 | v = v.draw_panoptic_seg_predictions(torch.from_numpy(segmentation_id), ann['segments_info'], area_threshold=0) 119 | res = v.get_image()[:,:,::-1] 120 | mmcv.imwrite(v.get_image(),'tmp.png') 121 | 122 | 123 | 124 | 125 | if img is None: 126 | plt.figure() 127 | plt.imshow(segmentation) 128 | plt.axis('off') 129 | else: 130 | plt.figure(figsize=(9, 5)) 131 | plt.subplot(231) 132 | plt.imshow(img) 133 | plt.axis('off') 134 | plt.subplot(232) 135 | plt.imshow(segmentation) 136 | plt.axis('off') 137 | plt.subplot(233) 138 | plt.imshow(res) 139 | plt.axis('off') 140 | plt.subplot(234) 141 | msg = np.array( 142 | Image.open(os.path.join('/home/lzq/easy-mmdet/seg_pwm', ann['file_name'])),dtype=np.uint8 143 | ) 144 | plt.imshow(msg) 145 | plt.axis('off') 146 | plt.subplot(235) 147 | pwm = np.array( 148 | Image.open(os.path.join('/home/lzq/easy-mmdet/seg_max', ann['file_name'])),dtype=np.uint8 149 | ) 150 | plt.imshow(pwm) 151 | plt.axis('off') 152 | plt.subplot(236) 153 | hp = np.array( 154 | Image.open(os.path.join('/home/lzq/easy-mmdet/seg_hp', ann['file_name'])), 155 | dtype=np.uint8 156 | ) 157 | plt.imshow(hp) 158 | plt.axis('off') 159 | plt.tight_layout() 160 | plt.show() 161 | #{"mode":"full","isActive":false} 162 | id=165681 163 | #f_id(id) 164 | f(id) 165 | #while True: 166 | # f(id) 167 | # id+=1 168 | 169 | #boundaries = find_boundaries(rgb2id(segmentation), mode='thick') 170 | #mmcv.imwrite(segmentation[:,:,::-1],'gt/'+str(id)+'.png') 171 | #segmentation[boundaries] = [0,255,0] 172 | # depict boundaries 173 | 174 | #for i in range(581781+1): 175 | # f(i) 176 | ''' 177 | import cv2 as cv 178 | 179 | print(img.shape,segmentation.shape) 180 | res = cv.add(segmentation,img) 181 | 182 | 183 | 184 | if img is None: 185 | plt.figure() 186 | plt.imshow(segmentation) 187 | plt.axis('off') 188 | else: 189 | plt.figure(figsize=(9, 5)) 190 | plt.subplot(131) 191 | plt.imshow(img) 192 | plt.axis('off') 193 | plt.subplot(132) 194 | plt.imshow(segmentation) 195 | plt.axis('off') 196 | plt.subplot(133) 197 | plt.imshow(res) 198 | plt.axis('off') 199 | plt.tight_layout() 200 | plt.show() 201 | #{"mode":"full","isActive":false} 202 | ''' 203 | 204 | -------------------------------------------------------------------------------- /easymd/analysis_tools/analyze_logs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from collections import defaultdict 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn as sns 8 | 9 | 10 | def cal_train_time(log_dicts, args): 11 | for i, log_dict in enumerate(log_dicts): 12 | print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}') 13 | all_times = [] 14 | for epoch in log_dict.keys(): 15 | if args.include_outliers: 16 | all_times.append(log_dict[epoch]['time']) 17 | else: 18 | all_times.append(log_dict[epoch]['time'][1:]) 19 | all_times = np.array(all_times) 20 | epoch_ave_time = all_times.mean(-1) 21 | slowest_epoch = epoch_ave_time.argmax() 22 | fastest_epoch = epoch_ave_time.argmin() 23 | std_over_epoch = epoch_ave_time.std() 24 | print(f'slowest epoch {slowest_epoch + 1}, ' 25 | f'average time is {epoch_ave_time[slowest_epoch]:.4f}') 26 | print(f'fastest epoch {fastest_epoch + 1}, ' 27 | f'average time is {epoch_ave_time[fastest_epoch]:.4f}') 28 | print(f'time std over epochs is {std_over_epoch:.4f}') 29 | print(f'average iter time: {np.mean(all_times):.4f} s/iter') 30 | print() 31 | 32 | 33 | def plot_curve(log_dicts, args): 34 | if args.backend is not None: 35 | plt.switch_backend(args.backend) 36 | sns.set_style(args.style) 37 | # if legend is None, use {filename}_{key} as legend 38 | legend = args.legend 39 | if legend is None: 40 | legend = [] 41 | for json_log in args.json_logs: 42 | for metric in args.keys: 43 | legend.append(f'{json_log}_{metric}') 44 | assert len(legend) == (len(args.json_logs) * len(args.keys)) 45 | metrics = args.keys 46 | 47 | num_metrics = len(metrics) 48 | for i, log_dict in enumerate(log_dicts): 49 | epochs = list(log_dict.keys()) 50 | for j, metric in enumerate(metrics): 51 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}') 52 | if metric not in log_dict[epochs[0]]: 53 | raise KeyError( 54 | f'{args.json_logs[i]} does not contain metric {metric}') 55 | 56 | if 'mAP' in metric: 57 | xs = np.arange(1, max(epochs) + 1) 58 | ys = [] 59 | for epoch in epochs: 60 | ys += log_dict[epoch][metric] 61 | ax = plt.gca() 62 | ax.set_xticks(xs) 63 | plt.xlabel('epoch') 64 | plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o') 65 | else: 66 | xs = [] 67 | ys = [] 68 | num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1] 69 | for epoch in epochs: 70 | iters = log_dict[epoch]['iter'] 71 | if log_dict[epoch]['mode'][-1] == 'val': 72 | iters = iters[:-1] 73 | xs.append( 74 | np.array(iters) + (epoch - 1) * num_iters_per_epoch) 75 | ys.append(np.array(log_dict[epoch][metric][:len(iters)])) 76 | xs = np.concatenate(xs) 77 | ys = np.concatenate(ys) 78 | plt.xlabel('iter') 79 | plt.plot( 80 | xs, ys, label=legend[i * num_metrics + j], linewidth=0.5) 81 | plt.legend() 82 | if args.title is not None: 83 | plt.title(args.title) 84 | if args.out is None: 85 | plt.show() 86 | else: 87 | print(f'save curve to: {args.out}') 88 | plt.savefig(args.out) 89 | plt.cla() 90 | 91 | 92 | def add_plot_parser(subparsers): 93 | parser_plt = subparsers.add_parser( 94 | 'plot_curve', help='parser for plotting curves') 95 | parser_plt.add_argument( 96 | 'json_logs', 97 | type=str, 98 | nargs='+', 99 | help='path of train log in json format') 100 | parser_plt.add_argument( 101 | '--keys', 102 | type=str, 103 | nargs='+', 104 | default=['bbox_mAP'], 105 | help='the metric that you want to plot') 106 | parser_plt.add_argument('--title', type=str, help='title of figure') 107 | parser_plt.add_argument( 108 | '--legend', 109 | type=str, 110 | nargs='+', 111 | default=None, 112 | help='legend of each plot') 113 | parser_plt.add_argument( 114 | '--backend', type=str, default=None, help='backend of plt') 115 | parser_plt.add_argument( 116 | '--style', type=str, default='dark', help='style of plt') 117 | parser_plt.add_argument('--out', type=str, default=None) 118 | 119 | 120 | def add_time_parser(subparsers): 121 | parser_time = subparsers.add_parser( 122 | 'cal_train_time', 123 | help='parser for computing the average time per training iteration') 124 | parser_time.add_argument( 125 | 'json_logs', 126 | type=str, 127 | nargs='+', 128 | help='path of train log in json format') 129 | parser_time.add_argument( 130 | '--include-outliers', 131 | action='store_true', 132 | help='include the first value of every epoch when computing ' 133 | 'the average time') 134 | 135 | 136 | def parse_args(): 137 | parser = argparse.ArgumentParser(description='Analyze Json Log') 138 | # currently only support plot curve and calculate average train time 139 | subparsers = parser.add_subparsers(dest='task', help='task parser') 140 | add_plot_parser(subparsers) 141 | add_time_parser(subparsers) 142 | args = parser.parse_args() 143 | return args 144 | 145 | 146 | def load_json_logs(json_logs): 147 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict 148 | # keys of sub dict is different metrics, e.g. memory, bbox_mAP 149 | # value of sub dict is a list of corresponding values of all iterations 150 | log_dicts = [dict() for _ in json_logs] 151 | for json_log, log_dict in zip(json_logs, log_dicts): 152 | with open(json_log, 'r') as log_file: 153 | for line in log_file: 154 | log = json.loads(line.strip()) 155 | # skip lines without `epoch` field 156 | if 'epoch' not in log: 157 | continue 158 | epoch = log.pop('epoch') 159 | if epoch not in log_dict: 160 | log_dict[epoch] = defaultdict(list) 161 | for k, v in log.items(): 162 | log_dict[epoch][k].append(v) 163 | return log_dicts 164 | 165 | 166 | def main(): 167 | args = parse_args() 168 | 169 | json_logs = args.json_logs 170 | for json_log in json_logs: 171 | assert json_log.endswith('.json') 172 | 173 | log_dicts = load_json_logs(json_logs) 174 | 175 | eval(args.task)(log_dicts, args) 176 | 177 | 178 | if __name__ == '__main__': 179 | main() 180 | -------------------------------------------------------------------------------- /configs/models/cascade_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ]), 96 | # model training and testing settings 97 | train_cfg=dict( 98 | rpn=dict( 99 | assigner=dict( 100 | type='MaxIoUAssigner', 101 | pos_iou_thr=0.7, 102 | neg_iou_thr=0.3, 103 | min_pos_iou=0.3, 104 | match_low_quality=True, 105 | ignore_iof_thr=-1), 106 | sampler=dict( 107 | type='RandomSampler', 108 | num=256, 109 | pos_fraction=0.5, 110 | neg_pos_ub=-1, 111 | add_gt_as_proposals=False), 112 | allowed_border=0, 113 | pos_weight=-1, 114 | debug=False), 115 | rpn_proposal=dict( 116 | nms_pre=2000, 117 | max_per_img=2000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=[ 121 | dict( 122 | assigner=dict( 123 | type='MaxIoUAssigner', 124 | pos_iou_thr=0.5, 125 | neg_iou_thr=0.5, 126 | min_pos_iou=0.5, 127 | match_low_quality=False, 128 | ignore_iof_thr=-1), 129 | sampler=dict( 130 | type='RandomSampler', 131 | num=512, 132 | pos_fraction=0.25, 133 | neg_pos_ub=-1, 134 | add_gt_as_proposals=True), 135 | pos_weight=-1, 136 | debug=False), 137 | dict( 138 | assigner=dict( 139 | type='MaxIoUAssigner', 140 | pos_iou_thr=0.6, 141 | neg_iou_thr=0.6, 142 | min_pos_iou=0.6, 143 | match_low_quality=False, 144 | ignore_iof_thr=-1), 145 | sampler=dict( 146 | type='RandomSampler', 147 | num=512, 148 | pos_fraction=0.25, 149 | neg_pos_ub=-1, 150 | add_gt_as_proposals=True), 151 | pos_weight=-1, 152 | debug=False), 153 | dict( 154 | assigner=dict( 155 | type='MaxIoUAssigner', 156 | pos_iou_thr=0.7, 157 | neg_iou_thr=0.7, 158 | min_pos_iou=0.7, 159 | match_low_quality=False, 160 | ignore_iof_thr=-1), 161 | sampler=dict( 162 | type='RandomSampler', 163 | num=512, 164 | pos_fraction=0.25, 165 | neg_pos_ub=-1, 166 | add_gt_as_proposals=True), 167 | pos_weight=-1, 168 | debug=False) 169 | ]), 170 | test_cfg=dict( 171 | rpn=dict( 172 | nms_pre=1000, 173 | max_per_img=1000, 174 | nms=dict(type='nms', iou_threshold=0.7), 175 | min_bbox_size=0), 176 | rcnn=dict( 177 | score_thr=0.05, 178 | nms=dict(type='nms', iou_threshold=0.5), 179 | max_per_img=100))) 180 | -------------------------------------------------------------------------------- /configs/_base_/models/cascade_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ]), 96 | # model training and testing settings 97 | train_cfg=dict( 98 | rpn=dict( 99 | assigner=dict( 100 | type='MaxIoUAssigner', 101 | pos_iou_thr=0.7, 102 | neg_iou_thr=0.3, 103 | min_pos_iou=0.3, 104 | match_low_quality=True, 105 | ignore_iof_thr=-1), 106 | sampler=dict( 107 | type='RandomSampler', 108 | num=256, 109 | pos_fraction=0.5, 110 | neg_pos_ub=-1, 111 | add_gt_as_proposals=False), 112 | allowed_border=0, 113 | pos_weight=-1, 114 | debug=False), 115 | rpn_proposal=dict( 116 | nms_pre=2000, 117 | max_per_img=2000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=[ 121 | dict( 122 | assigner=dict( 123 | type='MaxIoUAssigner', 124 | pos_iou_thr=0.5, 125 | neg_iou_thr=0.5, 126 | min_pos_iou=0.5, 127 | match_low_quality=False, 128 | ignore_iof_thr=-1), 129 | sampler=dict( 130 | type='RandomSampler', 131 | num=512, 132 | pos_fraction=0.25, 133 | neg_pos_ub=-1, 134 | add_gt_as_proposals=True), 135 | pos_weight=-1, 136 | debug=False), 137 | dict( 138 | assigner=dict( 139 | type='MaxIoUAssigner', 140 | pos_iou_thr=0.6, 141 | neg_iou_thr=0.6, 142 | min_pos_iou=0.6, 143 | match_low_quality=False, 144 | ignore_iof_thr=-1), 145 | sampler=dict( 146 | type='RandomSampler', 147 | num=512, 148 | pos_fraction=0.25, 149 | neg_pos_ub=-1, 150 | add_gt_as_proposals=True), 151 | pos_weight=-1, 152 | debug=False), 153 | dict( 154 | assigner=dict( 155 | type='MaxIoUAssigner', 156 | pos_iou_thr=0.7, 157 | neg_iou_thr=0.7, 158 | min_pos_iou=0.7, 159 | match_low_quality=False, 160 | ignore_iof_thr=-1), 161 | sampler=dict( 162 | type='RandomSampler', 163 | num=512, 164 | pos_fraction=0.25, 165 | neg_pos_ub=-1, 166 | add_gt_as_proposals=True), 167 | pos_weight=-1, 168 | debug=False) 169 | ]), 170 | test_cfg=dict( 171 | rpn=dict( 172 | nms_pre=1000, 173 | max_per_img=1000, 174 | nms=dict(type='nms', iou_threshold=0.7), 175 | min_bbox_size=0), 176 | rcnn=dict( 177 | score_thr=0.05, 178 | nms=dict(type='nms', iou_threshold=0.5), 179 | max_per_img=100))) 180 | -------------------------------------------------------------------------------- /easymd/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.losses.utils import weighted_loss 2 | import torch 3 | 4 | from mmdet.core.bbox.iou_calculators import bbox_overlaps 5 | from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy, bbox_xyxy_to_cxcywh 6 | #from .builder import MATCH_COST 7 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 8 | import torch.nn.functional as F 9 | import mmcv 10 | #from torchvision.utils import make_grid 11 | from easymd.models.utils.visual import save_tensor 12 | def center_of_mass(bitmasks): 13 | n, h, w = bitmasks.size() 14 | 15 | ys = torch.linspace(0, 1, h, dtype=torch.float32, device=bitmasks.device) 16 | xs = torch.linspace(0, 1, w, dtype=torch.float32, device=bitmasks.device) 17 | 18 | m00 = bitmasks.sum(dim=-1).sum(dim=-1).clamp(min=1e-6) 19 | m10 = (bitmasks * xs).sum(dim=-1).sum(dim=-1) 20 | m01 = (bitmasks * ys[:, None]).sum(dim=-1).sum(dim=-1) 21 | center_x = m10 / m00 22 | center_y = m01 / m00 23 | return torch.stack([center_x, center_y],-1) 24 | #return center_x, center_y 25 | 26 | @weighted_loss 27 | def l1_loss(pred, target): 28 | """Smooth L1 loss. 29 | 30 | Args: 31 | pred (torch.Tensor): The prediction. 32 | target (torch.Tensor): The learning target of the prediction. 33 | beta (float, optional): The threshold in the piecewise function. 34 | Defaults to 1.0. 35 | 36 | Returns: 37 | torch.Tensor: Calculated loss 38 | """ 39 | 40 | #assert pred.size() == target.size() and target.numel() > 0 41 | loss = torch.abs(pred - target) 42 | 43 | return loss 44 | 45 | 46 | 47 | @MATCH_COST.register_module() 48 | class DiceCost(object): 49 | """IoUCost. 50 | 51 | Args: 52 | iou_mode (str, optional): iou mode such as 'iou' | 'giou' 53 | weight (int | float, optional): loss weight 54 | 55 | Examples: 56 | >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost 57 | >>> import torch 58 | >>> self = IoUCost() 59 | >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]]) 60 | >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]]) 61 | >>> self(bboxes, gt_bboxes) 62 | tensor([[-0.1250, 0.1667], 63 | [ 0.1667, -0.5000]]) 64 | """ 65 | 66 | def __init__(self, weight=1.): 67 | self.weight = weight 68 | self.count =0 69 | def __call__(self, input, target): 70 | """ 71 | Args: 72 | bboxes (Tensor): Predicted boxes with unnormalized coordinates 73 | (x1, y1, x2, y2). Shape [num_query, 4]. 74 | gt_bboxes (Tensor): Ground truth boxes with unnormalized 75 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 76 | 77 | Returns: 78 | torch.Tensor: iou_cost value with weight 79 | """ 80 | # overlaps: [num_bboxes, num_gt] 81 | #print('INPUT', input.shape) 82 | #print('target',target.shape) 83 | 84 | N1,H1,W1 = input.shape 85 | N2,H2,W2 = target.shape 86 | 87 | if H1!=H2 or W1!=W2: 88 | target = F.interpolate(target.unsqueeze(0),size=(H1,W1),mode='bilinear').squeeze(0) 89 | 90 | input = input.contiguous().view(N1, -1)[:,None,:] 91 | target = target.contiguous().view(N2, -1)[None,:,:] 92 | 93 | a = torch.sum(input * target, -1) 94 | b = torch.sum(input * input, -1) + 0.001 95 | c = torch.sum(target * target, -1) + 0.001 96 | d = (2 * a) / (b + c) 97 | return (1-d)*self.weight 98 | 99 | 100 | @MATCH_COST.register_module() 101 | class CenterCost(object): 102 | """IoUCost. 103 | 104 | Args: 105 | iou_mode (str, optional): iou mode such as 'iou' | 'giou' 106 | weight (int | float, optional): loss weight 107 | 108 | Examples: 109 | >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost 110 | >>> import torch 111 | >>> self = IoUCost() 112 | >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]]) 113 | >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]]) 114 | >>> self(bboxes, gt_bboxes) 115 | tensor([[-0.1250, 0.1667], 116 | [ 0.1667, -0.5000]]) 117 | """ 118 | 119 | def __init__(self, weight=1.): 120 | self.weight = weight 121 | self.count =0 122 | def __call__(self, input, target): 123 | """ 124 | Args: 125 | bboxes (Tensor): Predicted boxes with unnormalized coordinates 126 | (x1, y1, x2, y2). Shape [num_query, 4]. 127 | gt_bboxes (Tensor): Ground truth boxes with unnormalized 128 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 129 | 130 | Returns: 131 | torch.Tensor: iou_cost value with weight 132 | """ 133 | # overlaps: [num_bboxes, num_gt] 134 | #print('INPUT', input.shape) 135 | #print('target',target.shape) 136 | 137 | N1,H1,W1 = input.shape 138 | N2,H2,W2 = target.shape 139 | if H1!=H2 or W1!=W2: 140 | target = F.interpolate(target.unsqueeze(0),size=(H1,W1),mode='bilinear').squeeze(0) 141 | #save_tensor(input,'{i}.png'.format(i=self.count)) 142 | #self.count +=1 143 | input = center_of_mass(input) 144 | target = center_of_mass(target) 145 | input = input.contiguous().view(N1, 2)[:,None,:] 146 | target = target.contiguous().view(N2,2)[None,:,:] 147 | cost = l1_loss(input,target) 148 | 149 | return cost*self.weight 150 | 151 | 152 | 153 | @MATCH_COST.register_module() 154 | class BBoxL1Cost_center(object): 155 | """BBoxL1Cost. 156 | 157 | Args: 158 | weight (int | float, optional): loss_weight 159 | box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN 160 | 161 | Examples: 162 | >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost 163 | >>> import torch 164 | >>> self = BBoxL1Cost() 165 | >>> bbox_pred = torch.rand(1, 4) 166 | >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]]) 167 | >>> factor = torch.tensor([10, 8, 10, 8]) 168 | >>> self(bbox_pred, gt_bboxes, factor) 169 | tensor([[1.6172, 1.6422]]) 170 | """ 171 | 172 | def __init__(self, weight=1., box_format='xyxy'): 173 | self.weight = weight 174 | assert box_format in ['xyxy', 'xywh'] 175 | self.box_format = box_format 176 | 177 | def __call__(self, bbox_pred, gt_bboxes): 178 | """ 179 | Args: 180 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 181 | (cx, cy, w, h), which are all in range [0, 1]. Shape 182 | [num_query, 4]. 183 | gt_bboxes (Tensor): Ground truth boxes with normalized 184 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 185 | 186 | Returns: 187 | torch.Tensor: bbox_cost value with weight 188 | """ 189 | if self.box_format == 'xywh': 190 | gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes) 191 | elif self.box_format == 'xyxy': 192 | bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred) 193 | bbox_cost = torch.cdist(bbox_pred[:,:2], gt_bboxes[:,:2], p=1) 194 | return bbox_cost * self.weight -------------------------------------------------------------------------------- /configs/models/cascade_mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ], 96 | mask_roi_extractor=dict( 97 | type='SingleRoIExtractor', 98 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 99 | out_channels=256, 100 | featmap_strides=[4, 8, 16, 32]), 101 | things_mask_head=dict( 102 | type='FCNMaskHead', 103 | num_convs=4, 104 | in_channels=256, 105 | conv_out_channels=256, 106 | num_classes=80, 107 | loss_mask=dict( 108 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 109 | # model training and testing settings 110 | train_cfg=dict( 111 | rpn=dict( 112 | assigner=dict( 113 | type='MaxIoUAssigner', 114 | pos_iou_thr=0.7, 115 | neg_iou_thr=0.3, 116 | min_pos_iou=0.3, 117 | match_low_quality=True, 118 | ignore_iof_thr=-1), 119 | sampler=dict( 120 | type='RandomSampler', 121 | num=256, 122 | pos_fraction=0.5, 123 | neg_pos_ub=-1, 124 | add_gt_as_proposals=False), 125 | allowed_border=0, 126 | pos_weight=-1, 127 | debug=False), 128 | rpn_proposal=dict( 129 | nms_pre=2000, 130 | max_per_img=2000, 131 | nms=dict(type='nms', iou_threshold=0.7), 132 | min_bbox_size=0), 133 | rcnn=[ 134 | dict( 135 | assigner=dict( 136 | type='MaxIoUAssigner', 137 | pos_iou_thr=0.5, 138 | neg_iou_thr=0.5, 139 | min_pos_iou=0.5, 140 | match_low_quality=False, 141 | ignore_iof_thr=-1), 142 | sampler=dict( 143 | type='RandomSampler', 144 | num=512, 145 | pos_fraction=0.25, 146 | neg_pos_ub=-1, 147 | add_gt_as_proposals=True), 148 | mask_size=28, 149 | pos_weight=-1, 150 | debug=False), 151 | dict( 152 | assigner=dict( 153 | type='MaxIoUAssigner', 154 | pos_iou_thr=0.6, 155 | neg_iou_thr=0.6, 156 | min_pos_iou=0.6, 157 | match_low_quality=False, 158 | ignore_iof_thr=-1), 159 | sampler=dict( 160 | type='RandomSampler', 161 | num=512, 162 | pos_fraction=0.25, 163 | neg_pos_ub=-1, 164 | add_gt_as_proposals=True), 165 | mask_size=28, 166 | pos_weight=-1, 167 | debug=False), 168 | dict( 169 | assigner=dict( 170 | type='MaxIoUAssigner', 171 | pos_iou_thr=0.7, 172 | neg_iou_thr=0.7, 173 | min_pos_iou=0.7, 174 | match_low_quality=False, 175 | ignore_iof_thr=-1), 176 | sampler=dict( 177 | type='RandomSampler', 178 | num=512, 179 | pos_fraction=0.25, 180 | neg_pos_ub=-1, 181 | add_gt_as_proposals=True), 182 | mask_size=28, 183 | pos_weight=-1, 184 | debug=False) 185 | ]), 186 | test_cfg=dict( 187 | rpn=dict( 188 | nms_pre=1000, 189 | max_per_img=1000, 190 | nms=dict(type='nms', iou_threshold=0.7), 191 | min_bbox_size=0), 192 | rcnn=dict( 193 | score_thr=0.05, 194 | nms=dict(type='nms', iou_threshold=0.5), 195 | max_per_img=100, 196 | mask_thr_binary=0.5))) 197 | -------------------------------------------------------------------------------- /easymd/models/detectors/single_stage_panoptic_detector.py: -------------------------------------------------------------------------------- 1 | from typing import FrozenSet 2 | from mmcv.runner.fp16_utils import auto_fp16 3 | import torch 4 | import torch.nn.functional as F 5 | from mmdet.core import bbox2result 6 | from mmdet.models.detectors.base import BaseDetector 7 | from mmdet.models.detectors.single_stage import SingleStageDetector 8 | from mmdet.models.builder import DETECTORS, build_backbone, build_head, build_neck 9 | #from .base import BaseDetector 10 | import mmcv 11 | from torch.utils.checkpoint import checkpoint 12 | @DETECTORS.register_module() 13 | class SingleStagePanopticDetector(BaseDetector): 14 | """Base class for single-stage detectors. 15 | 16 | Single-stage detectors directly and densely predict bounding boxes on the 17 | output features of the backbone+neck. 18 | """ 19 | 20 | def __init__(self, 21 | 22 | backbone, 23 | neck=None, 24 | bbox_head=None, 25 | train_cfg=None, 26 | test_cfg=None, 27 | pretrained=None, 28 | init_cfg=None, 29 | with_checkpoint =False): 30 | 31 | super(SingleStagePanopticDetector, self).__init__(init_cfg) 32 | self.fp16_enabled = False 33 | backbone.pretrained = pretrained 34 | self.backbone = build_backbone(backbone) 35 | if neck is not None: 36 | self.neck = build_neck(neck) 37 | bbox_head.update(train_cfg=train_cfg) 38 | bbox_head.update(test_cfg=test_cfg) 39 | self.bbox_head = build_head(bbox_head) 40 | self.train_cfg = train_cfg 41 | self.test_cfg = test_cfg 42 | self.with_checkpoint = with_checkpoint 43 | def extract_feat(self, img): 44 | """Directly extract features from the backbone+neck.""" 45 | x = self.backbone(img) 46 | if self.with_neck: 47 | x = self.neck(x) 48 | return x 49 | 50 | def forward_dummy(self, img): 51 | """Used for computing network flops. 52 | 53 | See `mmdetection/tools/analysis_tools/get_flops.py` 54 | """ 55 | x = self.extract_feat(img) 56 | outs = self.bbox_head(x) 57 | return outs 58 | @auto_fp16(apply_to=('img',)) 59 | def forward_train(self, 60 | img, 61 | img_metas, 62 | gt_bboxes, 63 | gt_labels, 64 | gt_masks=None, 65 | gt_bboxes_ignore=None, 66 | gt_semantic_seg=None 67 | ): 68 | """ 69 | Args: 70 | img (Tensor): Input images of shape (N, C, H, W). 71 | Typically these should be mean centered and std scaled. 72 | img_metas (list[dict]): A List of image info dict where each dict 73 | has: 'img_shape', 'scale_factor', 'flip', and may also contain 74 | 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. 75 | For details on the values of these keys see 76 | :class:`mmdet.datasets.pipelines.Collect`. 77 | gt_bboxes (list[Tensor]): Each item are the truth boxes for each 78 | image in [tl_x, tl_y, br_x, br_y] format. 79 | gt_labels (list[Tensor]): Class indices corresponding to each box 80 | gt_bboxes_ignore (None | list[Tensor]): Specify which bounding 81 | boxes can be ignored when computing the loss. 82 | 83 | Returns: 84 | dict[str, Tensor]: A dictionary of loss components. 85 | """ 86 | 87 | 88 | #mmcv.imshow(gt_semantic_seg.squeeze(0).squeeze(0).cpu().numpy()) 89 | #mmcv.imshow(img.squeeze(0).permute(1,2,0).cpu().numpy()) 90 | 91 | batch_input_shape = tuple(img[0].size()[-2:]) 92 | for img_meta in img_metas: 93 | img_meta['batch_input_shape'] = batch_input_shape 94 | #img_metas[0]['img'] = img 95 | #super(SingleStagePanopticDetector, self).forward_train(img, img_metas) 96 | if self.with_checkpoint: 97 | img.requires_grad_(True) 98 | x = checkpoint(self.extract_feat,img) 99 | else: 100 | x = self.extract_feat(img) 101 | BS,C,H,W = img.shape 102 | new_gt_masks = [] 103 | for each in gt_masks: 104 | mask =torch.tensor(each.to_ndarray(),device=x[0].device) 105 | _,h,w = mask.shape 106 | padding = ( 107 | 0,W-w, 108 | 0,H-h 109 | ) 110 | mask = F.pad(mask,padding) 111 | new_gt_masks.append(mask) 112 | gt_masks = new_gt_masks 113 | 114 | losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes, 115 | gt_labels, gt_masks,gt_bboxes_ignore,gt_semantic_seg=gt_semantic_seg) 116 | return losses 117 | 118 | def simple_test(self, img, img_metas=None, rescale=False): 119 | """Test function without test time augmentation. 120 | 121 | Args: 122 | imgs (list[torch.Tensor]): List of multiple images 123 | img_metas (list[dict]): List of image information. 124 | rescale (bool, optional): Whether to rescale the results. 125 | Defaults to False. 126 | 127 | Returns: 128 | list[list[np.ndarray]]: BBox results of each image and classes. 129 | The outer list corresponds to each image. The inner list 130 | corresponds to each class. 131 | """ 132 | 133 | x = self.extract_feat(img) 134 | #print('checkpoint') 135 | # 136 | outs = self.bbox_head(x) 137 | # get origin input shape to support onnx dynamic shape 138 | if torch.onnx.is_in_onnx_export(): 139 | # get shape as tensor 140 | img_shape = torch._shape_as_tensor(img)[2:] 141 | img_metas[0]['img_shape_for_onnx'] = img_shape 142 | bbox_list,seg_list = self.bbox_head.get_bboxes( 143 | *outs, img_metas, rescale=rescale) 144 | # skip post-processing when exporting to ONNX 145 | if torch.onnx.is_in_onnx_export(): 146 | return bbox_list 147 | 148 | bbox_results = [ 149 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 150 | for det_bboxes, det_labels in bbox_list 151 | ] 152 | return list(zip(bbox_results, seg_list)) 153 | #return bbox_results 154 | 155 | def aug_test(self, imgs, img_metas, rescale=False): 156 | """Test function with test time augmentation. 157 | 158 | Args: 159 | imgs (list[Tensor]): the outer list indicates test-time 160 | augmentations and inner Tensor should have a shape NxCxHxW, 161 | which contains all images in the batch. 162 | img_metas (list[list[dict]]): the outer list indicates test-time 163 | augs (multiscale, flip, etc.) and the inner list indicates 164 | images in a batch. each dict has image information. 165 | rescale (bool, optional): Whether to rescale the results. 166 | Defaults to False. 167 | 168 | Returns: 169 | list[list[np.ndarray]]: BBox results of each image and classes. 170 | The outer list corresponds to each image. The inner list 171 | corresponds to each class. 172 | """ 173 | assert hasattr(self.bbox_head, 'aug_test'), \ 174 | f'{self.bbox_head.__class__.__name__}' \ 175 | ' does not support test-time augmentation' 176 | 177 | feats = self.extract_feats(imgs) 178 | return [self.bbox_head.aug_test(feats, img_metas, rescale=rescale)] 179 | -------------------------------------------------------------------------------- /configs/_base_/models/cascade_mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ], 96 | mask_roi_extractor=dict( 97 | type='SingleRoIExtractor', 98 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 99 | out_channels=256, 100 | featmap_strides=[4, 8, 16, 32]), 101 | things_mask_head=dict( 102 | type='FCNMaskHead', 103 | num_convs=4, 104 | in_channels=256, 105 | conv_out_channels=256, 106 | num_classes=80, 107 | loss_mask=dict( 108 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 109 | # model training and testing settings 110 | train_cfg=dict( 111 | rpn=dict( 112 | assigner=dict( 113 | type='MaxIoUAssigner', 114 | pos_iou_thr=0.7, 115 | neg_iou_thr=0.3, 116 | min_pos_iou=0.3, 117 | match_low_quality=True, 118 | ignore_iof_thr=-1), 119 | sampler=dict( 120 | type='RandomSampler', 121 | num=256, 122 | pos_fraction=0.5, 123 | neg_pos_ub=-1, 124 | add_gt_as_proposals=False), 125 | allowed_border=0, 126 | pos_weight=-1, 127 | debug=False), 128 | rpn_proposal=dict( 129 | nms_pre=2000, 130 | max_per_img=2000, 131 | nms=dict(type='nms', iou_threshold=0.7), 132 | min_bbox_size=0), 133 | rcnn=[ 134 | dict( 135 | assigner=dict( 136 | type='MaxIoUAssigner', 137 | pos_iou_thr=0.5, 138 | neg_iou_thr=0.5, 139 | min_pos_iou=0.5, 140 | match_low_quality=False, 141 | ignore_iof_thr=-1), 142 | sampler=dict( 143 | type='RandomSampler', 144 | num=512, 145 | pos_fraction=0.25, 146 | neg_pos_ub=-1, 147 | add_gt_as_proposals=True), 148 | mask_size=28, 149 | pos_weight=-1, 150 | debug=False), 151 | dict( 152 | assigner=dict( 153 | type='MaxIoUAssigner', 154 | pos_iou_thr=0.6, 155 | neg_iou_thr=0.6, 156 | min_pos_iou=0.6, 157 | match_low_quality=False, 158 | ignore_iof_thr=-1), 159 | sampler=dict( 160 | type='RandomSampler', 161 | num=512, 162 | pos_fraction=0.25, 163 | neg_pos_ub=-1, 164 | add_gt_as_proposals=True), 165 | mask_size=28, 166 | pos_weight=-1, 167 | debug=False), 168 | dict( 169 | assigner=dict( 170 | type='MaxIoUAssigner', 171 | pos_iou_thr=0.7, 172 | neg_iou_thr=0.7, 173 | min_pos_iou=0.7, 174 | match_low_quality=False, 175 | ignore_iof_thr=-1), 176 | sampler=dict( 177 | type='RandomSampler', 178 | num=512, 179 | pos_fraction=0.25, 180 | neg_pos_ub=-1, 181 | add_gt_as_proposals=True), 182 | mask_size=28, 183 | pos_weight=-1, 184 | debug=False) 185 | ]), 186 | test_cfg=dict( 187 | rpn=dict( 188 | nms_pre=1000, 189 | max_per_img=1000, 190 | nms=dict(type='nms', iou_threshold=0.7), 191 | min_bbox_size=0), 192 | rcnn=dict( 193 | score_thr=0.05, 194 | nms=dict(type='nms', iou_threshold=0.5), 195 | max_per_img=100, 196 | mask_thr_binary=0.5))) 197 | -------------------------------------------------------------------------------- /easymd/analysis_tools/analyze_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | 4 | import mmcv 5 | import numpy as np 6 | from mmcv import Config, DictAction 7 | 8 | from mmdet.core.evaluation import eval_map 9 | from mmdet.core.visualization import imshow_gt_det_bboxes 10 | from mmdet.datasets import build_dataset, get_loading_pipeline 11 | 12 | 13 | def bbox_map_eval(det_result, annotation): 14 | """Evaluate mAP of single image det result. 15 | 16 | Args: 17 | det_result (list[list]): [[cls1_det, cls2_det, ...], ...]. 18 | The outer list indicates images, and the inner list indicates 19 | per-class detected bboxes. 20 | annotation (dict): Ground truth annotations where keys of 21 | annotations are: 22 | 23 | - bboxes: numpy array of shape (n, 4) 24 | - labels: numpy array of shape (n, ) 25 | - bboxes_ignore (optional): numpy array of shape (k, 4) 26 | - labels_ignore (optional): numpy array of shape (k, ) 27 | 28 | Returns: 29 | float: mAP 30 | """ 31 | 32 | # use only bbox det result 33 | if isinstance(det_result, tuple): 34 | bbox_det_result = [det_result[0]] 35 | else: 36 | bbox_det_result = [det_result] 37 | # mAP 38 | iou_thrs = np.linspace( 39 | .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) 40 | mean_aps = [] 41 | for thr in iou_thrs: 42 | mean_ap, _ = eval_map( 43 | bbox_det_result, [annotation], iou_thr=thr, logger='silent') 44 | mean_aps.append(mean_ap) 45 | return sum(mean_aps) / len(mean_aps) 46 | 47 | 48 | class ResultVisualizer(object): 49 | """Display and save evaluation results. 50 | 51 | Args: 52 | show (bool): Whether to show the image. Default: True 53 | wait_time (float): Value of waitKey param. Default: 0. 54 | score_thr (float): Minimum score of bboxes to be shown. 55 | Default: 0 56 | """ 57 | 58 | def __init__(self, show=False, wait_time=0, score_thr=0): 59 | self.show = show 60 | self.wait_time = wait_time 61 | self.score_thr = score_thr 62 | 63 | def _save_image_gts_results(self, dataset, results, mAPs, out_dir=None): 64 | mmcv.mkdir_or_exist(out_dir) 65 | 66 | for mAP_info in mAPs: 67 | index, mAP = mAP_info 68 | data_info = dataset.prepare_train_img(index) 69 | 70 | # calc save file path 71 | filename = data_info['filename'] 72 | if data_info['img_prefix'] is not None: 73 | filename = osp.join(data_info['img_prefix'], filename) 74 | else: 75 | filename = data_info['filename'] 76 | fname, name = osp.splitext(osp.basename(filename)) 77 | save_filename = fname + '_' + str(round(mAP, 3)) + name 78 | out_file = osp.join(out_dir, save_filename) 79 | imshow_gt_det_bboxes( 80 | data_info['img'], 81 | data_info, 82 | results[index], 83 | dataset.CLASSES, 84 | show=self.show, 85 | score_thr=self.score_thr, 86 | wait_time=self.wait_time, 87 | out_file=out_file) 88 | 89 | def evaluate_and_show(self, 90 | dataset, 91 | results, 92 | topk=20, 93 | show_dir='work_dir', 94 | eval_fn=None): 95 | """Evaluate and show results. 96 | 97 | Args: 98 | dataset (Dataset): A PyTorch dataset. 99 | results (list): Det results from test results pkl file 100 | topk (int): Number of the highest topk and 101 | lowest topk after evaluation index sorting. Default: 20 102 | show_dir (str, optional): The filename to write the image. 103 | Default: 'work_dir' 104 | eval_fn (callable, optional): Eval function, Default: None 105 | """ 106 | 107 | assert topk > 0 108 | if (topk * 2) > len(dataset): 109 | topk = len(dataset) // 2 110 | 111 | if eval_fn is None: 112 | eval_fn = bbox_map_eval 113 | else: 114 | assert callable(eval_fn) 115 | 116 | prog_bar = mmcv.ProgressBar(len(results)) 117 | _mAPs = {} 118 | for i, (result, ) in enumerate(zip(results)): 119 | # self.dataset[i] should not call directly 120 | # because there is a risk of mismatch 121 | data_info = dataset.prepare_train_img(i) 122 | mAP = eval_fn(result, data_info['ann_info']) 123 | _mAPs[i] = mAP 124 | prog_bar.update() 125 | 126 | # descending select topk image 127 | _mAPs = list(sorted(_mAPs.items(), key=lambda kv: kv[1])) 128 | good_mAPs = _mAPs[-topk:] 129 | bad_mAPs = _mAPs[:topk] 130 | 131 | good_dir = osp.abspath(osp.join(show_dir, 'good')) 132 | bad_dir = osp.abspath(osp.join(show_dir, 'bad')) 133 | self._save_image_gts_results(dataset, results, good_mAPs, good_dir) 134 | self._save_image_gts_results(dataset, results, bad_mAPs, bad_dir) 135 | 136 | 137 | def parse_args(): 138 | parser = argparse.ArgumentParser( 139 | description='MMDet eval image prediction result for each') 140 | parser.add_argument('config', help='test config file path') 141 | parser.add_argument( 142 | 'prediction_path', help='prediction path where test pkl result') 143 | parser.add_argument( 144 | 'show_dir', help='directory where painted images will be saved') 145 | parser.add_argument('--show', action='store_true', help='show results') 146 | parser.add_argument( 147 | '--wait-time', 148 | type=float, 149 | default=0, 150 | help='the interval of show (s), 0 is block') 151 | parser.add_argument( 152 | '--topk', 153 | default=20, 154 | type=int, 155 | help='saved Number of the highest topk ' 156 | 'and lowest topk after index sorting') 157 | parser.add_argument( 158 | '--show-score-thr', 159 | type=float, 160 | default=0, 161 | help='score threshold (default: 0.)') 162 | parser.add_argument( 163 | '--cfg-options', 164 | nargs='+', 165 | action=DictAction, 166 | help='override some settings in the used config, the key-value pair ' 167 | 'in xxx=yyy format will be merged into config file. If the value to ' 168 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 169 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 170 | 'Note that the quotation marks are necessary and that no white space ' 171 | 'is allowed.') 172 | args = parser.parse_args() 173 | return args 174 | 175 | 176 | def main(): 177 | args = parse_args() 178 | 179 | mmcv.check_file_exist(args.prediction_path) 180 | 181 | cfg = Config.fromfile(args.config) 182 | if args.cfg_options is not None: 183 | cfg.merge_from_dict(args.cfg_options) 184 | cfg.data.test.test_mode = True 185 | # import modules from string list. 186 | if cfg.get('custom_imports', None): 187 | from mmcv.utils import import_modules_from_strings 188 | import_modules_from_strings(**cfg['custom_imports']) 189 | 190 | cfg.data.test.pop('samples_per_gpu', 0) 191 | cfg.data.test.pipeline = get_loading_pipeline(cfg.data.train.pipeline) 192 | dataset = build_dataset(cfg.data.test) 193 | outputs = mmcv.load(args.prediction_path) 194 | 195 | result_visualizer = ResultVisualizer(args.show, args.wait_time, 196 | args.show_score_thr) 197 | result_visualizer.evaluate_and_show( 198 | dataset, outputs, topk=args.topk, show_dir=args.show_dir) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() 203 | --------------------------------------------------------------------------------