├── README.assets └── image-20240301235034715.png ├── mmseg_custom ├── datasets │ ├── samplers │ │ ├── __init__.py │ │ └── distributed_sampler.py │ ├── pipelines │ │ ├── formating.py │ │ ├── add_noise.py │ │ ├── __init__.py │ │ └── compose.py │ ├── dark_zurich.py │ ├── night_driving.py │ ├── hrf.py │ ├── drive.py │ ├── isprs.py │ ├── stare.py │ ├── chase_db1.py │ ├── potsdam.py │ ├── voc.py │ ├── __init__.py │ ├── isaid.py │ └── loveda.py ├── configs │ ├── upernet │ │ ├── upernet_r101_512x512_80k_ade20k.py │ │ ├── upernet_r101_512x512_160k_ade20k.py │ │ ├── upernet_r101_512x512_20k_voc12aug.py │ │ ├── upernet_r101_512x512_40k_voc12aug.py │ │ ├── upernet_r101_512x1024_40k_cityscapes.py │ │ ├── upernet_r101_512x1024_80k_cityscapes.py │ │ ├── upernet_r101_769x769_40k_cityscapes.py │ │ ├── upernet_r101_769x769_80k_cityscapes.py │ │ ├── upernet_r50_512x1024_40k_cityscapes.py │ │ ├── upernet_r50_512x1024_80k_cityscapes.py │ │ ├── upernet_r50_512x512_160k_ade20k.py │ │ ├── upernet_r50_512x512_80k_ade20k.py │ │ ├── upernet_r50_512x512_20k_voc12aug.py │ │ ├── upernet_r50_512x512_40k_voc12aug.py │ │ ├── upernet_r50_769x769_40k_cityscapes.py │ │ ├── upernet_r50_769x769_80k_cityscapes.py │ │ ├── upernet_r101_512x512_40k_voc12aug_hard_pixel.py │ │ ├── upernet_r50_512x512_80k_ade20k_hard_pixel.py │ │ ├── upernet_r50_512x512_40k_voc12aug_hard_pixel.py │ │ └── upernet_r50_512x512_40k_voc12aug_FADC.py │ └── _base_ │ │ ├── datasets │ │ ├── pascal_voc12_aug.py │ │ ├── cityscapes_768x768.py │ │ ├── cityscapes_769x769.py │ │ ├── cityscapes_832x832.py │ │ ├── cityscapes_1024x1024.py │ │ ├── loveda.py │ │ ├── potsdam.py │ │ ├── vaihingen.py │ │ ├── coco-stuff164k.py │ │ ├── cityscapes.py │ │ ├── ade20k_640x640.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── stare.py │ │ ├── coco-stuff10k.py │ │ ├── chase_db1.py │ │ ├── pascal_voc12.py │ │ ├── isaid.py │ │ ├── ade20k.py │ │ ├── pascal_context.py │ │ └── pascal_context_59.py │ │ ├── default_runtime.py │ │ ├── schedules │ │ ├── schedule_320k.py │ │ ├── schedule_20k.py │ │ ├── schedule_40k.py │ │ ├── schedule_80k.py │ │ └── schedule_160k.py │ │ └── models │ │ ├── lraspp_m-v3-d8.py │ │ ├── dpt_vit-b16.py │ │ ├── erfnet_fcn.py │ │ ├── segformer_mit-b0.py │ │ ├── fpn_r50.py │ │ ├── cgnet.py │ │ ├── segmenter_vit-b16_mask.py │ │ ├── ccnet_r50-d8.py │ │ ├── danet_r50-d8.py │ │ ├── pspnet_r50-d8.py │ │ ├── deeplabv3_r50-d8.py │ │ ├── fcn_r50-d8.py │ │ ├── isanet_r50-d8.py │ │ ├── dmnet_r50-d8.py │ │ ├── upernet_r50.py │ │ ├── apcnet_r50-d8.py │ │ ├── dnl_r50-d8.py │ │ ├── nonlocal_r50-d8.py │ │ ├── gcnet_r50-d8.py │ │ ├── emanet_r50-d8.py │ │ ├── ann_r50-d8.py │ │ ├── deeplabv3plus_r50-d8.py │ │ ├── deeplabv3plus_r50-d8-AAFS.py │ │ ├── ocrnet_r50-d8.py │ │ ├── psanet_r50-d8.py │ │ ├── twins_pcpvt-s_fpn.py │ │ ├── encnet_r50-d8.py │ │ ├── upernet_mae.py │ │ ├── upernet_beit.py │ │ ├── upernet_convnext.py │ │ ├── pspnet_unet_s5-d16.py │ │ ├── deeplabv3_unet_s5-d16.py │ │ ├── fcn_unet_s5-d16.py │ │ ├── fastfcn_r50-d32_jpu_psp.py │ │ ├── fcn_hr18.py │ │ ├── upernet_swin.py │ │ ├── twins_pcpvt-s_upernet.py │ │ ├── pointrend_r50.py │ │ ├── upernet_vit-b16_ln_mln.py │ │ ├── fast_scnn.py │ │ ├── bisenetv1_r18-d32.py │ │ ├── icnet_r50-d8.py │ │ ├── ocrnet_hr18.py │ │ ├── setr_pup.py │ │ ├── setr_naive.py │ │ ├── upernet_swin_AS.py │ │ ├── bisenetv2.py │ │ ├── stdc.py │ │ └── setr_mla.py ├── models │ ├── backbones │ │ └── __init__.py │ ├── __init__.py │ └── utils │ │ ├── __init__.py │ │ ├── make_divisible.py │ │ ├── se_layer.py │ │ ├── res_layer.py │ │ ├── shape_convert.py │ │ └── up_conv_block.py ├── core │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── dist_util.py │ ├── seg │ │ ├── sampler │ │ │ ├── __init__.py │ │ │ ├── base_pixel_sampler.py │ │ │ └── ohem_pixel_sampler.py │ │ ├── __init__.py │ │ └── builder.py │ ├── optimizers │ │ └── __init__.py │ ├── __init__.py │ ├── evaluation │ │ ├── __init__.py │ │ └── eval_hooks.py │ └── builder.py ├── apis │ ├── __init__.py │ └── inference.py ├── version.py └── __init__.py ├── dist_train.sh ├── dist_test.sh ├── slurm_train.sh ├── slurm_test.sh └── get_flops.py /README.assets/image-20240301235034715.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Linwei-Chen/Seg-Aliasing/HEAD/README.assets/image-20240301235034715.png -------------------------------------------------------------------------------- /mmseg_custom/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .distributed_sampler import DistributedSampler 3 | 4 | __all__ = ['DistributedSampler'] 5 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x512_80k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_80k_ade20k.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_160k_ade20k.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x512_20k_voc12aug.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_20k_voc12aug.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x512_40k_voc12aug.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x1024_40k_cityscapes.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x1024_80k_cityscapes.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_769x769_40k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_769x769_40k_cityscapes.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_769x769_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_769x769_80k_cityscapes.py' 2 | model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /mmseg_custom/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .resnet import ResNet, ResNetV1c, ResNetV1d 3 | __all__ = [ 4 | 'ResNet', 'ResNetV1c', 'ResNetV1d', 5 | ] 6 | -------------------------------------------------------------------------------- /mmseg_custom/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .decode_heads import * # noqa: F401,F403 4 | from mmseg.models import * 5 | __all__ = [ 6 | ] 7 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' 4 | ] 5 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | -------------------------------------------------------------------------------- /mmseg_custom/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_util import check_dist_init, sync_random_seed 3 | from .misc import add_prefix 4 | 5 | __all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed'] 6 | -------------------------------------------------------------------------------- /mmseg_custom/core/seg/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_pixel_sampler import BasePixelSampler 3 | from .ohem_pixel_sampler import OHEMPixelSampler 4 | 5 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler'] 6 | -------------------------------------------------------------------------------- /mmseg_custom/core/seg/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_pixel_sampler 3 | from .sampler import BasePixelSampler, OHEMPixelSampler 4 | 5 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] 6 | -------------------------------------------------------------------------------- /mmseg_custom/core/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .layer_decay_optimizer_constructor import ( 3 | LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) 4 | 5 | __all__ = [ 6 | 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' 7 | ] 8 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' 4 | ] 5 | model = dict( 6 | decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) 7 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_80k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | model = dict( 6 | decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) 7 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_20k_voc12aug.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_20k.py' 5 | ] 6 | model = dict( 7 | decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) 8 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_40k_voc12aug.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | model = dict( 7 | decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) 8 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/pascal_voc12_aug.py: -------------------------------------------------------------------------------- 1 | _base_ = './pascal_voc12.py' 2 | # dataset settings 3 | data = dict( 4 | train=dict( 5 | ann_dir=['SegmentationClass', 'SegmentationClassAug'], 6 | split=[ 7 | 'ImageSets/Segmentation/train.txt', 8 | 'ImageSets/Segmentation/aug.txt' 9 | ])) 10 | -------------------------------------------------------------------------------- /mmseg_custom/core/seg/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | PIXEL_SAMPLERS = Registry('pixel sampler') 5 | 6 | 7 | def build_pixel_sampler(cfg, **default_args): 8 | """Build pixel sampler for segmentation map.""" 9 | return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) 10 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # flake8: noqa 3 | import warnings 4 | 5 | from .formatting import * 6 | 7 | warnings.warn('DeprecationWarning: mmseg.datasets.pipelines.formating will be ' 8 | 'deprecated in 2021, please replace it with ' 9 | 'mmseg.datasets.pipelines.formatting.') 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | load_from = None 12 | resume_from = None 13 | workflow = [('train', 1)] 14 | cudnn_benchmark = True 15 | -------------------------------------------------------------------------------- /mmseg_custom/core/seg/sampler/base_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BasePixelSampler(metaclass=ABCMeta): 6 | """Base class of pixel sampler.""" 7 | 8 | def __init__(self, **kwargs): 9 | pass 10 | 11 | @abstractmethod 12 | def sample(self, seg_logit, seg_label): 13 | """Placeholder for sample function.""" 14 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_769x769_40k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_40k.py' 5 | ] 6 | model = dict( 7 | decode_head=dict(align_corners=True), 8 | auxiliary_head=dict(align_corners=True), 9 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_769x769_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_r50.py', 3 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 4 | '../_base_/schedules/schedule_80k.py' 5 | ] 6 | model = dict( 7 | decode_head=dict(align_corners=True), 8 | auxiliary_head=dict(align_corners=True), 9 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/schedules/schedule_320k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=320000) 8 | checkpoint_config = dict(by_epoch=False, interval=32000) 9 | evaluation = dict(interval=32000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /mmseg_custom/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import (OPTIMIZER_BUILDERS, build_optimizer, 3 | build_optimizer_constructor) 4 | from .evaluation import * # noqa: F401, F403 5 | from .optimizers import * # noqa: F401, F403 6 | from .seg import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | 9 | __all__ = [ 10 | 'OPTIMIZER_BUILDERS', 'build_optimizer', 'build_optimizer_constructor' 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/dark_zurich.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .cityscapes import CityscapesDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class DarkZurichDataset(CityscapesDataset): 8 | """DarkZurichDataset dataset.""" 9 | 10 | def __init__(self, **kwargs): 11 | super().__init__( 12 | img_suffix='_rgb_anon.png', 13 | seg_map_suffix='_gt_labelTrainIds.png', 14 | **kwargs) 15 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/night_driving.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .cityscapes import CityscapesDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class NightDrivingDataset(CityscapesDataset): 8 | """NightDrivingDataset dataset.""" 9 | 10 | def __init__(self, **kwargs): 11 | super().__init__( 12 | img_suffix='_leftImg8bit.png', 13 | seg_map_suffix='_gtCoarse_labelTrainIds.png', 14 | **kwargs) 15 | -------------------------------------------------------------------------------- /mmseg_custom/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def add_prefix(inputs, prefix): 3 | """Add prefix for dict. 4 | 5 | Args: 6 | inputs (dict): The input dict with str keys. 7 | prefix (str): The prefix to add. 8 | 9 | Returns: 10 | 11 | dict: The dict with keys updated with ``prefix``. 12 | """ 13 | 14 | outputs = dict() 15 | for name, value in inputs.items(): 16 | outputs[f'{prefix}.{name}'] = value 17 | 18 | return outputs 19 | -------------------------------------------------------------------------------- /mmseg_custom/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .class_names import get_classes, get_palette 3 | from .eval_hooks import DistEvalHook, EvalHook 4 | from .metrics import (eval_metrics, intersect_and_union, mean_dice, 5 | mean_fscore, mean_iou, pre_eval_to_metrics) 6 | 7 | __all__ = [ 8 | 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', 9 | 'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics', 10 | 'intersect_and_union' 11 | ] 12 | -------------------------------------------------------------------------------- /dist_train.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | GPUS=$2 3 | NNODES=${NNODES:-1} 4 | NODE_RANK=${NODE_RANK:-0} 5 | # PORT=${PORT:-29500} 6 | PORT=${PORT:-$((1 + RANDOM % 10000))} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/train.py \ 17 | $CONFIG \ 18 | --launcher pytorch ${@:3} 19 | -------------------------------------------------------------------------------- /dist_test.sh: -------------------------------------------------------------------------------- 1 | CONFIG=$1 2 | CHECKPOINT=$2 3 | GPUS=$3 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch \ 11 | --nnodes=$NNODES \ 12 | --node_rank=$NODE_RANK \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | $(dirname "$0")/test.py \ 17 | $CONFIG \ 18 | $CHECKPOINT \ 19 | --launcher pytorch \ 20 | ${@:4} 21 | -------------------------------------------------------------------------------- /mmseg_custom/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import (get_root_logger, init_random_seed, set_random_seed, 5 | train_segmentor) 6 | 7 | __all__ = [ 8 | 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', 9 | 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', 10 | 'show_result_pyplot', 'init_random_seed' 11 | ] 12 | -------------------------------------------------------------------------------- /mmseg_custom/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '0.25.0' 4 | 5 | 6 | def parse_version_info(version_str): 7 | version_info = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | version_info.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | version_info.append(int(patch_version[0])) 14 | version_info.append(f'rc{patch_version[1]}') 15 | return tuple(version_info) 16 | 17 | 18 | version_info = parse_version_info(__version__) 19 | -------------------------------------------------------------------------------- /slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-4} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-4} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /mmseg_custom/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .embed import PatchEmbed 3 | from .inverted_residual import InvertedResidual, InvertedResidualV3 4 | from .make_divisible import make_divisible 5 | from .res_layer import ResLayer 6 | from .se_layer import SELayer 7 | from .self_attention_block import SelfAttentionBlock 8 | from .shape_convert import (nchw2nlc2nchw, nchw_to_nlc, nlc2nchw2nlc, 9 | nlc_to_nchw) 10 | from .up_conv_block import UpConvBlock 11 | 12 | __all__ = [ 13 | 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', 14 | 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', 15 | 'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc' 16 | ] 17 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r101_512x512_40k_voc12aug_hard_pixel.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnet101_v1c', 4 | backbone=dict( 5 | depth=101, 6 | # type='ResNetV1c', 7 | # type='ResNetV1cWithBlur', 8 | type='NyResNet', 9 | # blur_type='adafreq', 10 | # blur_type='blur', 11 | blur_type='flc', 12 | freq_thres=0.25 * 1.4, 13 | # blur_k=7, 14 | with_cp=True, 15 | # use_checkpoing=True, 16 | ), 17 | decode_head=dict( 18 | type='UPerHead', 19 | channels=128,) 20 | ) 21 | data = dict( 22 | samples_per_gpu=16, 23 | workers_per_gpu=16, 24 | ) 25 | checkpoint_config = dict(max_keep_ckpts=2) 26 | evaluation = dict(save_best='mIoU', pre_eval='True') -------------------------------------------------------------------------------- /mmseg_custom/datasets/pipelines/add_noise.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | 4 | import mmcv 5 | import numpy as np 6 | from mmcv.utils import deprecated_api_warning, is_tuple_of 7 | from numpy import random 8 | 9 | from ..builder import PIPELINES 10 | 11 | import numpy as np 12 | 13 | @PIPELINES.register_module() 14 | class AddNoisyImg(object): 15 | def __init__(self, sigma=10.): 16 | self.sigma = sigma 17 | 18 | def __call__(self, results): 19 | print(f'Add noise: sigma = {self.sigma}') 20 | noise = np.random.normal(scale=self.sigma, size=results['img'].shape) 21 | for key in results.get('img_fields', ['img']): 22 | results['img'] = np.clip(results['img'].astype(np.int32) + noise, 0, 255).astype(np.uint8) 23 | 24 | return results 25 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/lraspp_m-v3-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='MobileNetV3', 7 | arch='large', 8 | out_indices=(1, 3, 16), 9 | norm_cfg=norm_cfg), 10 | decode_head=dict( 11 | type='LRASPPHead', 12 | in_channels=(16, 24, 960), 13 | in_index=(0, 1, 2), 14 | channels=128, 15 | input_transform='multiple_select', 16 | dropout_ratio=0.1, 17 | num_classes=19, 18 | norm_cfg=norm_cfg, 19 | act_cfg=dict(type='ReLU'), 20 | align_corners=False, 21 | loss_decode=dict( 22 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 23 | # model training and testing settings 24 | train_cfg=dict(), 25 | test_cfg=dict(mode='whole')) 26 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class HRFDataset(CustomDataset): 9 | """HRF dataset. 10 | 11 | In segmentation map annotation for HRF, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(HRFDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert self.file_client.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/drive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class DRIVEDataset(CustomDataset): 9 | """DRIVE dataset. 10 | 11 | In segmentation map annotation for DRIVE, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '_manual1.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(DRIVEDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='_manual1.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert self.file_client.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/isprs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class ISPRSDataset(CustomDataset): 8 | """ISPRS dataset. 9 | 10 | In segmentation map annotation for LoveDA, 0 is the ignore index. 11 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 12 | ``seg_map_suffix`` are both fixed to '.png'. 13 | """ 14 | CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', 15 | 'car', 'clutter') 16 | 17 | PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], 18 | [255, 255, 0], [255, 0, 0]] 19 | 20 | def __init__(self, **kwargs): 21 | super(ISPRSDataset, self).__init__( 22 | img_suffix='.png', 23 | seg_map_suffix='.png', 24 | reduce_zero_label=True, 25 | **kwargs) 26 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/stare.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class STAREDataset(CustomDataset): 10 | """STARE dataset. 11 | 12 | In segmentation map annotation for STARE, 0 stands for background, which is 13 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 14 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 15 | '.ah.png'. 16 | """ 17 | 18 | CLASSES = ('background', 'vessel') 19 | 20 | PALETTE = [[120, 120, 120], [6, 230, 230]] 21 | 22 | def __init__(self, **kwargs): 23 | super(STAREDataset, self).__init__( 24 | img_suffix='.png', 25 | seg_map_suffix='.ah.png', 26 | reduce_zero_label=False, 27 | **kwargs) 28 | assert osp.exists(self.img_dir) 29 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class ChaseDB1Dataset(CustomDataset): 9 | """Chase_db1 dataset. 10 | 11 | In segmentation map annotation for Chase_db1, 0 stands for background, 12 | which is included in 2 categories. ``reduce_zero_label`` is fixed to False. 13 | The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '_1stHO.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(ChaseDB1Dataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='_1stHO.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert self.file_client.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/potsdam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class PotsdamDataset(CustomDataset): 8 | """ISPRS Potsdam dataset. 9 | 10 | In segmentation map annotation for Potsdam dataset, 0 is the ignore index. 11 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 12 | ``seg_map_suffix`` are both fixed to '.png'. 13 | """ 14 | CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', 15 | 'car', 'clutter') 16 | 17 | PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], 18 | [255, 255, 0], [255, 0, 0]] 19 | 20 | def __init__(self, **kwargs): 21 | super(PotsdamDataset, self).__init__( 22 | img_suffix='.png', 23 | seg_map_suffix='.png', 24 | reduce_zero_label=True, 25 | **kwargs) 26 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .compose import Compose 3 | from .formatting import (Collect, ImageToTensor, ToDataContainer, ToTensor, 4 | Transpose, to_tensor) 5 | from .loading import LoadAnnotations, LoadImageFromFile 6 | from .test_time_aug import MultiScaleFlipAug 7 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, 8 | PhotoMetricDistortion, RandomCrop, RandomCutOut, 9 | RandomFlip, RandomMosaic, RandomRotate, Rerange, 10 | Resize, RGB2Gray, SegRescale) 11 | from .add_noise import AddNoisyImg 12 | __all__ = [ 13 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 14 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 15 | 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 16 | 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', 17 | 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'RandomCutOut', 18 | 'RandomMosaic', 19 | 'AddNoisyImg' 20 | ] 21 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/dpt_vit-b16.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa 5 | backbone=dict( 6 | type='VisionTransformer', 7 | img_size=224, 8 | embed_dims=768, 9 | num_layers=12, 10 | num_heads=12, 11 | out_indices=(2, 5, 8, 11), 12 | final_norm=False, 13 | with_cls_token=True, 14 | output_cls_token=True), 15 | decode_head=dict( 16 | type='DPTHead', 17 | in_channels=(768, 768, 768, 768), 18 | channels=256, 19 | embed_dims=768, 20 | post_process_channels=[96, 192, 384, 768], 21 | num_classes=150, 22 | readout_type='project', 23 | input_transform='multiple_select', 24 | in_index=(0, 1, 2, 3), 25 | norm_cfg=norm_cfg, 26 | loss_decode=dict( 27 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 28 | auxiliary_head=None, 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | test_cfg=dict(mode='whole')) # yapf: disable 32 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/erfnet_fcn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='ERFNet', 8 | in_channels=3, 9 | enc_downsample_channels=(16, 64, 128), 10 | enc_stage_non_bottlenecks=(5, 8), 11 | enc_non_bottleneck_dilations=(2, 4, 8, 16), 12 | enc_non_bottleneck_channels=(64, 128), 13 | dec_upsample_channels=(64, 16), 14 | dec_stages_non_bottleneck=(2, 2), 15 | dec_non_bottleneck_channels=(64, 16), 16 | dropout_ratio=0.1, 17 | init_cfg=None), 18 | decode_head=dict( 19 | type='FCNHead', 20 | in_channels=16, 21 | channels=128, 22 | num_convs=1, 23 | concat_input=False, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | test_cfg=dict(mode='whole')) 33 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/segformer_mit-b0.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='MixVisionTransformer', 8 | in_channels=3, 9 | embed_dims=32, 10 | num_stages=4, 11 | num_layers=[2, 2, 2, 2], 12 | num_heads=[1, 2, 5, 8], 13 | patch_sizes=[7, 3, 3, 3], 14 | sr_ratios=[8, 4, 2, 1], 15 | out_indices=(0, 1, 2, 3), 16 | mlp_ratio=4, 17 | qkv_bias=True, 18 | drop_rate=0.0, 19 | attn_drop_rate=0.0, 20 | drop_path_rate=0.1), 21 | decode_head=dict( 22 | type='SegformerHead', 23 | in_channels=[32, 64, 160, 256], 24 | in_index=[0, 1, 2, 3], 25 | channels=256, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | # model training and testing settings 33 | train_cfg=dict(), 34 | test_cfg=dict(mode='whole')) 35 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=4), 22 | decode_head=dict( 23 | type='FPNHead', 24 | in_channels=[256, 256, 256, 256], 25 | in_index=[0, 1, 2, 3], 26 | feature_strides=[4, 8, 16, 32], 27 | channels=128, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | # model training and testing settings 35 | train_cfg=dict(), 36 | test_cfg=dict(mode='whole')) 37 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/cgnet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='CGNet', 7 | norm_cfg=norm_cfg, 8 | in_channels=3, 9 | num_channels=(32, 64, 128), 10 | num_blocks=(3, 21), 11 | dilations=(2, 4), 12 | reductions=(8, 16)), 13 | decode_head=dict( 14 | type='FCNHead', 15 | in_channels=256, 16 | in_index=2, 17 | channels=256, 18 | num_convs=0, 19 | concat_input=False, 20 | dropout_ratio=0, 21 | num_classes=19, 22 | norm_cfg=norm_cfg, 23 | loss_decode=dict( 24 | type='CrossEntropyLoss', 25 | use_sigmoid=False, 26 | loss_weight=1.0, 27 | class_weight=[ 28 | 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, 29 | 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, 30 | 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, 31 | 10.396974, 10.055647 32 | ])), 33 | # model training and testing settings 34 | train_cfg=dict(sampler=None), 35 | test_cfg=dict(mode='whole')) 36 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/segmenter_vit-b16_mask.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa 2 | # model settings 3 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=checkpoint, 7 | backbone=dict( 8 | type='VisionTransformer', 9 | img_size=(512, 512), 10 | patch_size=16, 11 | in_channels=3, 12 | embed_dims=768, 13 | num_layers=12, 14 | num_heads=12, 15 | drop_path_rate=0.1, 16 | attn_drop_rate=0.0, 17 | drop_rate=0.0, 18 | final_norm=True, 19 | norm_cfg=backbone_norm_cfg, 20 | with_cls_token=True, 21 | interpolate_mode='bicubic', 22 | ), 23 | decode_head=dict( 24 | type='SegmenterMaskTransformerHead', 25 | in_channels=768, 26 | channels=768, 27 | num_classes=150, 28 | num_layers=2, 29 | num_heads=12, 30 | embed_dims=768, 31 | dropout_ratio=0.0, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 34 | ), 35 | test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)), 36 | ) 37 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class PascalVOCDataset(CustomDataset): 10 | """Pascal VOC dataset. 11 | 12 | Args: 13 | split (str): Split txt file for Pascal VOC. 14 | """ 15 | 16 | CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 17 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 18 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 19 | 'train', 'tvmonitor') 20 | 21 | PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], 22 | [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], 23 | [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], 24 | [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], 25 | [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] 26 | 27 | def __init__(self, split, **kwargs): 28 | super(PascalVOCDataset, self).__init__( 29 | img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) 30 | assert osp.exists(self.img_dir) and self.split is not None 31 | -------------------------------------------------------------------------------- /mmseg_custom/core/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | 4 | from mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS 5 | from mmcv.utils import Registry, build_from_cfg 6 | 7 | OPTIMIZER_BUILDERS = Registry( 8 | 'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS) 9 | 10 | 11 | def build_optimizer_constructor(cfg): 12 | constructor_type = cfg.get('type') 13 | if constructor_type in OPTIMIZER_BUILDERS: 14 | return build_from_cfg(cfg, OPTIMIZER_BUILDERS) 15 | elif constructor_type in MMCV_OPTIMIZER_BUILDERS: 16 | return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS) 17 | else: 18 | raise KeyError(f'{constructor_type} is not registered ' 19 | 'in the optimizer builder registry.') 20 | 21 | 22 | def build_optimizer(model, cfg): 23 | optimizer_cfg = copy.deepcopy(cfg) 24 | constructor_type = optimizer_cfg.pop('constructor', 25 | 'DefaultOptimizerConstructor') 26 | paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) 27 | optim_constructor = build_optimizer_constructor( 28 | dict( 29 | type=constructor_type, 30 | optimizer_cfg=optimizer_cfg, 31 | paramwise_cfg=paramwise_cfg)) 32 | optimizer = optim_constructor(model) 33 | return optimizer 34 | -------------------------------------------------------------------------------- /mmseg_custom/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 3 | """Make divisible function. 4 | 5 | This function rounds the channel number to the nearest value that can be 6 | divisible by the divisor. It is taken from the original tf repo. It ensures 7 | that all layers have a channel number that is divisible by divisor. It can 8 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 9 | 10 | Args: 11 | value (int): The original channel number. 12 | divisor (int): The divisor to fully divide the channel number. 13 | min_value (int): The minimum value of the output channel. 14 | Default: None, means that the minimum value equal to the divisor. 15 | min_ratio (float): The minimum ratio of the rounded channel number to 16 | the original channel number. Default: 0.9. 17 | 18 | Returns: 19 | int: The modified output channel number. 20 | """ 21 | 22 | if min_value is None: 23 | min_value = divisor 24 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 25 | # Make sure that round down does not go down by more than (1-min_ratio). 26 | if new_value < min_ratio * value: 27 | new_value += divisor 28 | return new_value 29 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ade import ADE20KDataset 3 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 4 | from .chase_db1 import ChaseDB1Dataset 5 | from .cityscapes import CityscapesDataset 6 | from .coco_stuff import COCOStuffDataset 7 | from .custom import CustomDataset 8 | from .dark_zurich import DarkZurichDataset 9 | from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset, 10 | RepeatDataset) 11 | from .drive import DRIVEDataset 12 | from .hrf import HRFDataset 13 | from .isaid import iSAIDDataset 14 | from .isprs import ISPRSDataset 15 | from .loveda import LoveDADataset 16 | from .night_driving import NightDrivingDataset 17 | from .pascal_context import PascalContextDataset, PascalContextDataset59 18 | from .potsdam import PotsdamDataset 19 | from .stare import STAREDataset 20 | from .voc import PascalVOCDataset 21 | 22 | __all__ = [ 23 | 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 24 | 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', 25 | 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', 26 | 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 27 | 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', 28 | 'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset', 29 | 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset' 30 | ] 31 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/cityscapes_768x768.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (768, 768) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2049, 1025), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/cityscapes_769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (769, 769) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2049, 1025), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/cityscapes_832x832.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (832, 832) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/cityscapes_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (1024, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/ccnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='CCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | recurrence=2, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/danet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pam_channels=64, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/pspnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='PSPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pool_scales=(1, 2, 3, 6), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/deeplabv3_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ASPPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dilations=(1, 12, 24, 36), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/fcn_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='FCNHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | num_convs=2, 23 | concat_input=True, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | auxiliary_head=dict( 31 | type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=19, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/isanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ISAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | isa_channels=256, 23 | down_factor=(8, 8), 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | auxiliary_head=dict( 31 | type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=19, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/dmnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DMHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | filter_sizes=(1, 3, 5, 7), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=dict(type='SyncBN', requires_grad=True), 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='UPerHead', 19 | in_channels=[256, 512, 1024, 2048], 20 | in_index=[0, 1, 2, 3], 21 | pool_scales=(1, 2, 3, 6), 22 | channels=512, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/apcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='APCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pool_scales=(1, 2, 3, 6), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=dict(type='SyncBN', requires_grad=True), 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_80k_ade20k_hard_pixel.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './upernet_r50_512x512_80k_ade20k.py' 3 | ] 4 | model = dict( 5 | pretrained=None, 6 | # pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | depth=50, 9 | # type='ResNetV1c', 10 | # type='ResNetV1cWithBlur', 11 | # type='NyResNetFreezePretrain', 12 | # frozen_stages=4, 13 | type='NyResNet', 14 | # blur_type='adafreq', 15 | # blur_type='blur', 16 | blur_type='flc', 17 | freq_thres=0.25 * 1.4, 18 | # blur_k=7, 19 | with_cp=True, 20 | # use_checkpoing=True, 21 | init_cfg=dict( 22 | type='Pretrained', 23 | checkpoint='open-mmlab://resnet50_v1c', 24 | # prefix='backbone.' 25 | ) 26 | ), 27 | decode_head=dict( 28 | type='UPerHead', 29 | channels=128, 30 | ) 31 | ) 32 | data = dict( 33 | samples_per_gpu=16, 34 | workers_per_gpu=8, 35 | ) 36 | optimizer = dict( 37 | paramwise_cfg = dict( 38 | custom_keys={ 39 | # 'FPNDyHPAlign': dict(lr_mult=2.), 40 | # 'FPNFADyHPAlign': dict(lr_mult=2.), 41 | # 'FaPNDyHPAlign': dict(lr_mult=2.), 42 | 'head': dict(lr_mult=2.), 43 | 'att': dict(lr_mult=2.), 44 | # 'comp_conv': dict(lr_mult=2.), 45 | })) 46 | checkpoint_config = dict(max_keep_ckpts=2) 47 | evaluation = dict(save_best='mIoU', pre_eval='True') 48 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_40k_voc12aug_hard_pixel.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py' 2 | model = dict( 3 | pretrained=None, 4 | # pretrained='open-mmlab://resnet50_v1c', 5 | backbone=dict( 6 | depth=50, 7 | # type='ResNetV1c', 8 | # type='ResNetV1cWithBlur', 9 | # type='NyResNetFreezePretrain', 10 | # frozen_stages=4, 11 | type='NyResNet', 12 | # type='ResNetFreqMix', 13 | # blur_type='adafreq', 14 | # blur_type='blur', 15 | blur_type='flc', 16 | freq_thres=0.25 * 1.4, 17 | # blur_k=7, 18 | # with_cp=True, 19 | # use_checkpoing=True, 20 | init_cfg=dict( 21 | type='Pretrained', 22 | checkpoint='open-mmlab://resnet50_v1c', 23 | # prefix='backbone.' 24 | ) 25 | ), 26 | decode_head=dict( 27 | type='UPerHead', 28 | channels=128,) 29 | ) 30 | data = dict( 31 | samples_per_gpu=16, 32 | workers_per_gpu=16, 33 | ) 34 | checkpoint_config = dict(max_keep_ckpts=2) 35 | optimizer = dict( 36 | paramwise_cfg = dict( 37 | custom_keys={ 38 | # 'FPNDyHPAlign': dict(lr_mult=2.), 39 | # 'FPNFADyHPAlign': dict(lr_mult=2.), 40 | # 'FaPNDyHPAlign': dict(lr_mult=2.), 41 | 'head': dict(lr_mult=2.), 42 | 'att': dict(lr_mult=2.), 43 | # 'comp_conv': dict(lr_mult=2.), 44 | })) 45 | evaluation = dict(save_best='mIoU', pre_eval='True') -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/dnl_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DNLHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dropout_ratio=0.1, 23 | reduction=2, 24 | use_scale=True, 25 | mode='embedded_gaussian', 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/nonlocal_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='NLHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dropout_ratio=0.1, 23 | reduction=2, 24 | use_scale=True, 25 | mode='embedded_gaussian', 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/gcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='GCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | ratio=1 / 4., 23 | pooling_type='att', 24 | fusion_types=('channel_add', ), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/emanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='EMAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=256, 22 | ema_channels=512, 23 | num_bases=64, 24 | num_stages=3, 25 | momentum=0.1, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | auxiliary_head=dict( 33 | type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=19, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict( 44 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/ann_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ANNHead', 19 | in_channels=[1024, 2048], 20 | in_index=[2, 3], 21 | channels=512, 22 | project_channels=256, 23 | query_scales=(1, ), 24 | key_pool_scales=(1, 3, 6, 8), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/deeplabv3plus_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DepthwiseSeparableASPPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dilations=(1, 12, 24, 36), 23 | c1_in_channels=256, 24 | c1_channels=48, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/deeplabv3plus_r50-d8-AAFS.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | with_cp=True, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 2, 4), 13 | strides=(1, 2, 1, 1), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | decode_head=dict( 19 | type='AADepthwiseSeparableASPPHead', 20 | in_channels=2048, 21 | in_index=3, 22 | channels=512, 23 | dilations=(1, 12, 24, 36), 24 | c1_in_channels=256, 25 | c1_channels=48, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | auxiliary_head=dict( 33 | type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=19, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict( 44 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/ocrnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | type='ResNetV1c', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 2, 4), 13 | strides=(1, 2, 1, 1), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | decode_head=[ 19 | dict( 20 | type='FCNHead', 21 | in_channels=1024, 22 | in_index=2, 23 | channels=256, 24 | num_convs=1, 25 | concat_input=False, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 32 | dict( 33 | type='OCRHead', 34 | in_channels=2048, 35 | in_index=3, 36 | channels=512, 37 | ocr_channels=256, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 44 | ], 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/psanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='PSAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | mask_size=(97, 97), 23 | psa_type='bi-direction', 24 | compact=False, 25 | shrink_factor=2, 26 | normalization_factor=1.0, 27 | psa_softmax=True, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | auxiliary_head=dict( 35 | type='FCNHead', 36 | in_channels=1024, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/twins_pcpvt-s_fpn.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | model = dict( 7 | type='EncoderDecoder', 8 | backbone=dict( 9 | type='PCPVT', 10 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 11 | in_channels=3, 12 | embed_dims=[64, 128, 320, 512], 13 | num_heads=[1, 2, 5, 8], 14 | patch_sizes=[4, 2, 2, 2], 15 | strides=[4, 2, 2, 2], 16 | mlp_ratios=[8, 8, 4, 4], 17 | out_indices=(0, 1, 2, 3), 18 | qkv_bias=True, 19 | norm_cfg=backbone_norm_cfg, 20 | depths=[3, 4, 6, 3], 21 | sr_ratios=[8, 4, 2, 1], 22 | norm_after_stage=False, 23 | drop_rate=0.0, 24 | attn_drop_rate=0., 25 | drop_path_rate=0.2), 26 | neck=dict( 27 | type='FPN', 28 | in_channels=[64, 128, 320, 512], 29 | out_channels=256, 30 | num_outs=4), 31 | decode_head=dict( 32 | type='FPNHead', 33 | in_channels=[256, 256, 256, 256], 34 | in_index=[0, 1, 2, 3], 35 | feature_strides=[4, 8, 16, 32], 36 | channels=128, 37 | dropout_ratio=0.1, 38 | num_classes=150, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/encnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='EncHead', 19 | in_channels=[512, 1024, 2048], 20 | in_index=(1, 2, 3), 21 | channels=512, 22 | num_codes=32, 23 | use_se_loss=True, 24 | add_lateral=False, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 31 | loss_se_decode=dict( 32 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), 33 | auxiliary_head=dict( 34 | type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=19, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 46 | # model training and testing settings 47 | train_cfg=dict(), 48 | test_cfg=dict(mode='whole')) 49 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_mae.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained=None, 5 | backbone=dict( 6 | type='MAE', 7 | img_size=(640, 640), 8 | patch_size=16, 9 | in_channels=3, 10 | embed_dims=768, 11 | num_layers=12, 12 | num_heads=12, 13 | mlp_ratio=4, 14 | out_indices=(3, 5, 7, 11), 15 | attn_drop_rate=0.0, 16 | drop_path_rate=0.1, 17 | norm_cfg=dict(type='LN', eps=1e-6), 18 | act_cfg=dict(type='GELU'), 19 | norm_eval=False, 20 | init_values=0.1), 21 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 22 | decode_head=dict( 23 | type='UPerHead', 24 | in_channels=[384, 384, 384, 384], 25 | in_index=[0, 1, 2, 3], 26 | pool_scales=(1, 2, 3, 6), 27 | channels=512, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | auxiliary_head=dict( 35 | type='FCNHead', 36 | in_channels=384, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_beit.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained=None, 5 | backbone=dict( 6 | type='BEiT', 7 | img_size=(640, 640), 8 | patch_size=16, 9 | in_channels=3, 10 | embed_dims=768, 11 | num_layers=12, 12 | num_heads=12, 13 | mlp_ratio=4, 14 | out_indices=(3, 5, 7, 11), 15 | qv_bias=True, 16 | attn_drop_rate=0.0, 17 | drop_path_rate=0.1, 18 | norm_cfg=dict(type='LN', eps=1e-6), 19 | act_cfg=dict(type='GELU'), 20 | norm_eval=False, 21 | init_values=0.1), 22 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 23 | decode_head=dict( 24 | type='UPerHead', 25 | in_channels=[768, 768, 768, 768], 26 | in_index=[0, 1, 2, 3], 27 | pool_scales=(1, 2, 3, 6), 28 | channels=768, 29 | dropout_ratio=0.1, 30 | num_classes=150, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=768, 38 | in_index=2, 39 | channels=256, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=150, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='whole')) 51 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_convnext.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='SyncBN', requires_grad=True) 2 | custom_imports = dict(imports='mmcls.models', allow_failed_imports=False) 3 | checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=None, 7 | backbone=dict( 8 | type='mmcls.ConvNeXt', 9 | arch='base', 10 | out_indices=[0, 1, 2, 3], 11 | drop_path_rate=0.4, 12 | layer_scale_init_value=1.0, 13 | gap_before_final_norm=False, 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint=checkpoint_file, 16 | prefix='backbone.')), 17 | decode_head=dict( 18 | type='UPerHead', 19 | in_channels=[128, 256, 512, 1024], 20 | in_index=[0, 1, 2, 3], 21 | pool_scales=(1, 2, 3, 6), 22 | channels=512, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=384, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /mmseg_custom/core/utils/dist_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | import torch.distributed as dist 5 | from mmcv.runner import get_dist_info 6 | 7 | 8 | def check_dist_init(): 9 | return dist.is_available() and dist.is_initialized() 10 | 11 | 12 | def sync_random_seed(seed=None, device='cuda'): 13 | """Make sure different ranks share the same seed. All workers must call 14 | this function, otherwise it will deadlock. This method is generally used in 15 | `DistributedSampler`, because the seed should be identical across all 16 | processes in the distributed group. 17 | 18 | In distributed sampling, different ranks should sample non-overlapped 19 | data in the dataset. Therefore, this function is used to make sure that 20 | each rank shuffles the data indices in the same order based 21 | on the same seed. Then different ranks could use different indices 22 | to select non-overlapped data from the same data list. 23 | 24 | Args: 25 | seed (int, Optional): The seed. Default to None. 26 | device (str): The device where the seed will be put on. 27 | Default to 'cuda'. 28 | Returns: 29 | int: Seed to be used. 30 | """ 31 | 32 | if seed is None: 33 | seed = np.random.randint(2**31) 34 | assert isinstance(seed, int) 35 | 36 | rank, world_size = get_dist_info() 37 | 38 | if world_size == 1: 39 | return seed 40 | 41 | if rank == 0: 42 | random_num = torch.tensor(seed, dtype=torch.int32, device=device) 43 | else: 44 | random_num = torch.tensor(0, dtype=torch.int32, device=device) 45 | dist.broadcast(random_num, src=0) 46 | return random_num.item() 47 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/pspnet_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='PSPHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=16, 28 | pool_scales=(1, 2, 3, 6), 29 | dropout_ratio=0.1, 30 | num_classes=2, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | in_index=3, 39 | channels=64, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=2, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 51 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/deeplabv3_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='ASPPHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=16, 28 | dilations=(1, 12, 24, 36), 29 | dropout_ratio=0.1, 30 | num_classes=2, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | in_index=3, 39 | channels=64, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=2, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 51 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from ..builder import PIPELINES 7 | 8 | 9 | @PIPELINES.register_module() 10 | class Compose(object): 11 | """Compose multiple transforms sequentially. 12 | 13 | Args: 14 | transforms (Sequence[dict | callable]): Sequence of transform object or 15 | config dict to be composed. 16 | """ 17 | 18 | def __init__(self, transforms): 19 | assert isinstance(transforms, collections.abc.Sequence) 20 | self.transforms = [] 21 | for transform in transforms: 22 | if isinstance(transform, dict): 23 | transform = build_from_cfg(transform, PIPELINES) 24 | self.transforms.append(transform) 25 | elif callable(transform): 26 | self.transforms.append(transform) 27 | else: 28 | raise TypeError('transform must be callable or a dict') 29 | 30 | def __call__(self, data): 31 | """Call function to apply transforms sequentially. 32 | 33 | Args: 34 | data (dict): A result dict contains the data to transform. 35 | 36 | Returns: 37 | dict: Transformed data. 38 | """ 39 | 40 | for t in self.transforms: 41 | data = t(data) 42 | if data is None: 43 | return None 44 | return data 45 | 46 | def __repr__(self): 47 | format_string = self.__class__.__name__ + '(' 48 | for t in self.transforms: 49 | format_string += '\n' 50 | format_string += f' {t}' 51 | format_string += '\n)' 52 | return format_string 53 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/fcn_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='FCNHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=64, 28 | num_convs=1, 29 | concat_input=False, 30 | dropout_ratio=0.1, 31 | num_classes=2, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | auxiliary_head=dict( 37 | type='FCNHead', 38 | in_channels=128, 39 | in_index=3, 40 | channels=64, 41 | num_convs=1, 42 | concat_input=False, 43 | dropout_ratio=0.1, 44 | num_classes=2, 45 | norm_cfg=norm_cfg, 46 | align_corners=False, 47 | loss_decode=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 49 | # model training and testing settings 50 | train_cfg=dict(), 51 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 52 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | dilations=(1, 1, 2, 4), 11 | strides=(1, 2, 2, 2), 12 | out_indices=(1, 2, 3), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='JPU', 19 | in_channels=(512, 1024, 2048), 20 | mid_channels=512, 21 | start_level=0, 22 | end_level=-1, 23 | dilations=(1, 2, 4, 8), 24 | align_corners=False, 25 | norm_cfg=norm_cfg), 26 | decode_head=dict( 27 | type='PSPHead', 28 | in_channels=2048, 29 | in_index=2, 30 | channels=512, 31 | pool_scales=(1, 2, 3, 6), 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=1024, 41 | in_index=1, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=19, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/fcn_hr18.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://msra/hrnetv2_w18', 6 | backbone=dict( 7 | type='HRNet', 8 | norm_cfg=norm_cfg, 9 | norm_eval=False, 10 | extra=dict( 11 | stage1=dict( 12 | num_modules=1, 13 | num_branches=1, 14 | block='BOTTLENECK', 15 | num_blocks=(4, ), 16 | num_channels=(64, )), 17 | stage2=dict( 18 | num_modules=1, 19 | num_branches=2, 20 | block='BASIC', 21 | num_blocks=(4, 4), 22 | num_channels=(18, 36)), 23 | stage3=dict( 24 | num_modules=4, 25 | num_branches=3, 26 | block='BASIC', 27 | num_blocks=(4, 4, 4), 28 | num_channels=(18, 36, 72)), 29 | stage4=dict( 30 | num_modules=3, 31 | num_branches=4, 32 | block='BASIC', 33 | num_blocks=(4, 4, 4, 4), 34 | num_channels=(18, 36, 72, 144)))), 35 | decode_head=dict( 36 | type='FCNHead', 37 | in_channels=[18, 36, 72, 144], 38 | in_index=(0, 1, 2, 3), 39 | channels=sum([18, 36, 72, 144]), 40 | input_transform='resize_concat', 41 | kernel_size=1, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=-1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_swin.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=None, 7 | backbone=dict( 8 | type='SwinTransformer', 9 | pretrain_img_size=224, 10 | embed_dims=96, 11 | patch_size=4, 12 | window_size=7, 13 | mlp_ratio=4, 14 | depths=[2, 2, 6, 2], 15 | num_heads=[3, 6, 12, 24], 16 | strides=(4, 2, 2, 2), 17 | out_indices=(0, 1, 2, 3), 18 | qkv_bias=True, 19 | qk_scale=None, 20 | patch_norm=True, 21 | drop_rate=0., 22 | attn_drop_rate=0., 23 | drop_path_rate=0.3, 24 | use_abs_pos_embed=False, 25 | act_cfg=dict(type='GELU'), 26 | norm_cfg=backbone_norm_cfg), 27 | decode_head=dict( 28 | type='UPerHead', 29 | # type='UPerHeadASAlign', 30 | in_channels=[96, 192, 384, 768], 31 | in_index=[0, 1, 2, 3], 32 | pool_scales=(1, 2, 3, 6), 33 | # channels=512, 34 | channels=256, 35 | dropout_ratio=0.1, 36 | num_classes=19, 37 | norm_cfg=norm_cfg, 38 | align_corners=False, 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 41 | auxiliary_head=dict( 42 | type='FCNHead', 43 | in_channels=384, 44 | in_index=2, 45 | channels=256, 46 | num_convs=1, 47 | concat_input=False, 48 | dropout_ratio=0.1, 49 | num_classes=19, 50 | norm_cfg=norm_cfg, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 54 | # model training and testing settings 55 | train_cfg=dict(), 56 | test_cfg=dict(mode='whole')) 57 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/twins_pcpvt-s_upernet.py: -------------------------------------------------------------------------------- 1 | checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa 2 | 3 | # model settings 4 | backbone_norm_cfg = dict(type='LN') 5 | norm_cfg = dict(type='SyncBN', requires_grad=True) 6 | model = dict( 7 | type='EncoderDecoder', 8 | backbone=dict( 9 | type='PCPVT', 10 | init_cfg=dict(type='Pretrained', checkpoint=checkpoint), 11 | in_channels=3, 12 | embed_dims=[64, 128, 320, 512], 13 | num_heads=[1, 2, 5, 8], 14 | patch_sizes=[4, 2, 2, 2], 15 | strides=[4, 2, 2, 2], 16 | mlp_ratios=[8, 8, 4, 4], 17 | out_indices=(0, 1, 2, 3), 18 | qkv_bias=True, 19 | norm_cfg=backbone_norm_cfg, 20 | depths=[3, 4, 6, 3], 21 | sr_ratios=[8, 4, 2, 1], 22 | norm_after_stage=False, 23 | drop_rate=0.0, 24 | attn_drop_rate=0., 25 | drop_path_rate=0.2), 26 | decode_head=dict( 27 | type='UPerHead', 28 | in_channels=[64, 128, 320, 512], 29 | in_index=[0, 1, 2, 3], 30 | pool_scales=(1, 2, 3, 6), 31 | channels=512, 32 | dropout_ratio=0.1, 33 | num_classes=150, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | auxiliary_head=dict( 39 | type='FCNHead', 40 | in_channels=320, 41 | in_index=2, 42 | channels=256, 43 | num_convs=1, 44 | concat_input=False, 45 | dropout_ratio=0.1, 46 | num_classes=150, 47 | norm_cfg=norm_cfg, 48 | align_corners=False, 49 | loss_decode=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='whole')) 54 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/pointrend_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | type='ResNetV1c', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 1, 1), 13 | strides=(1, 2, 2, 2), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | num_outs=4), 23 | decode_head=[ 24 | dict( 25 | type='FPNHead', 26 | in_channels=[256, 256, 256, 256], 27 | in_index=[0, 1, 2, 3], 28 | feature_strides=[4, 8, 16, 32], 29 | channels=128, 30 | dropout_ratio=-1, 31 | num_classes=19, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | dict( 37 | type='PointHead', 38 | in_channels=[256], 39 | in_index=[0], 40 | channels=256, 41 | num_fcs=3, 42 | coarse_pred_each_layer=True, 43 | dropout_ratio=-1, 44 | num_classes=19, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 48 | ], 49 | # model training and testing settings 50 | train_cfg=dict( 51 | num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), 52 | test_cfg=dict( 53 | mode='whole', 54 | subdivision_steps=2, 55 | subdivision_num_points=8196, 56 | scale_factor=2)) 57 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_vit-b16_ln_mln.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', 6 | backbone=dict( 7 | type='VisionTransformer', 8 | img_size=(512, 512), 9 | patch_size=16, 10 | in_channels=3, 11 | embed_dims=768, 12 | num_layers=12, 13 | num_heads=12, 14 | mlp_ratio=4, 15 | out_indices=(2, 5, 8, 11), 16 | qkv_bias=True, 17 | drop_rate=0.0, 18 | attn_drop_rate=0.0, 19 | drop_path_rate=0.0, 20 | with_cls_token=True, 21 | norm_cfg=dict(type='LN', eps=1e-6), 22 | act_cfg=dict(type='GELU'), 23 | norm_eval=False, 24 | interpolate_mode='bicubic'), 25 | neck=dict( 26 | type='MultiLevelNeck', 27 | in_channels=[768, 768, 768, 768], 28 | out_channels=768, 29 | scales=[4, 2, 1, 0.5]), 30 | decode_head=dict( 31 | type='UPerHead', 32 | in_channels=[768, 768, 768, 768], 33 | in_index=[0, 1, 2, 3], 34 | pool_scales=(1, 2, 3, 6), 35 | channels=512, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 42 | auxiliary_head=dict( 43 | type='FCNHead', 44 | in_channels=768, 45 | in_index=3, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=19, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) # yapf: disable 58 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/fast_scnn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='FastSCNN', 7 | downsample_dw_channels=(32, 48), 8 | global_in_channels=64, 9 | global_block_channels=(64, 96, 128), 10 | global_block_strides=(2, 2, 1), 11 | global_out_channels=128, 12 | higher_in_channels=64, 13 | lower_in_channels=128, 14 | fusion_out_channels=128, 15 | out_indices=(0, 1, 2), 16 | norm_cfg=norm_cfg, 17 | align_corners=False), 18 | decode_head=dict( 19 | type='DepthwiseSeparableFCNHead', 20 | in_channels=128, 21 | channels=128, 22 | concat_input=False, 23 | num_classes=19, 24 | in_index=-1, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)), 29 | auxiliary_head=[ 30 | dict( 31 | type='FCNHead', 32 | in_channels=128, 33 | channels=32, 34 | num_convs=1, 35 | num_classes=19, 36 | in_index=-2, 37 | norm_cfg=norm_cfg, 38 | concat_input=False, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 42 | dict( 43 | type='FCNHead', 44 | in_channels=64, 45 | channels=32, 46 | num_convs=1, 47 | num_classes=19, 48 | in_index=-3, 49 | norm_cfg=norm_cfg, 50 | concat_input=False, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 54 | ], 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/loveda.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'LoveDADataset' 3 | data_root = 'data/loveDA' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1024, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='img_dir/train', 41 | ann_dir='ann_dir/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='img_dir/val', 47 | ann_dir='ann_dir/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/potsdam.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PotsdamDataset' 3 | data_root = 'data/potsdam' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(512, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='img_dir/train', 41 | ann_dir='ann_dir/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='img_dir/val', 47 | ann_dir='ann_dir/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/vaihingen.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ISPRSDataset' 3 | data_root = 'data/vaihingen' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(512, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='img_dir/train', 41 | ann_dir='ann_dir/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='img_dir/val', 47 | ann_dir='ann_dir/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/coco-stuff164k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuffDataset' 3 | data_root = 'data/coco_stuff164k' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/train2017', 41 | ann_dir='annotations/train2017', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/val2017', 47 | ann_dir='annotations/val2017', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/val2017', 53 | ann_dir='annotations/val2017', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = '/home/ubuntu/2TB/dataset/cityscapes' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_scale=(1536, 768), 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']), 33 | ]) 34 | ] 35 | data = dict( 36 | samples_per_gpu=2, 37 | workers_per_gpu=2, 38 | train=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | img_dir='leftImg8bit/train', 42 | ann_dir='gtFine/train', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | img_dir='leftImg8bit/val', 48 | ann_dir='gtFine/val', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | data_root=data_root, 53 | img_dir='leftImg8bit/val', 54 | ann_dir='gtFine/val', 55 | pipeline=test_pipeline)) 56 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/ade20k_640x640.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (640, 640) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2560, 640), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/training', 41 | ann_dir='annotations/training', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/validation', 47 | ann_dir='annotations/validation', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/drive.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DRIVEDataset' 3 | data_root = 'data/DRIVE' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (584, 565) 7 | crop_size = (64, 64) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'HRFDataset' 3 | data_root = 'data/HRF' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (2336, 3504) 7 | crop_size = (256, 256) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/stare.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'STAREDataset' 3 | data_root = 'data/STARE' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (605, 700) 7 | crop_size = (128, 128) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/coco-stuff10k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'COCOStuffDataset' 3 | data_root = 'data/coco_stuff10k' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | reduce_zero_label=True, 41 | img_dir='images/train2014', 42 | ann_dir='annotations/train2014', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | reduce_zero_label=True, 48 | img_dir='images/test2014', 49 | ann_dir='annotations/test2014', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | reduce_zero_label=True, 55 | img_dir='images/test2014', 56 | ann_dir='annotations/test2014', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /mmseg_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | import mmcv 5 | from packaging.version import parse 6 | 7 | from .version import __version__, version_info 8 | 9 | MMCV_MIN = '1.3.13' 10 | MMCV_MAX = '1.6.0' 11 | 12 | 13 | def digit_version(version_str: str, length: int = 4): 14 | """Convert a version string into a tuple of integers. 15 | 16 | This method is usually used for comparing two versions. For pre-release 17 | versions: alpha < beta < rc. 18 | 19 | Args: 20 | version_str (str): The version string. 21 | length (int): The maximum number of version levels. Default: 4. 22 | 23 | Returns: 24 | tuple[int]: The version info in digits (integers). 25 | """ 26 | version = parse(version_str) 27 | assert version.release, f'failed to parse version {version_str}' 28 | release = list(version.release) 29 | release = release[:length] 30 | if len(release) < length: 31 | release = release + [0] * (length - len(release)) 32 | if version.is_prerelease: 33 | mapping = {'a': -3, 'b': -2, 'rc': -1} 34 | val = -4 35 | # version.pre can be None 36 | if version.pre: 37 | if version.pre[0] not in mapping: 38 | warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 39 | 'version checking may go wrong') 40 | else: 41 | val = mapping[version.pre[0]] 42 | release.extend([val, version.pre[-1]]) 43 | else: 44 | release.extend([val, 0]) 45 | 46 | elif version.is_postrelease: 47 | release.extend([1, version.post]) 48 | else: 49 | release.extend([0, 0]) 50 | return tuple(release) 51 | 52 | 53 | mmcv_min_version = digit_version(MMCV_MIN) 54 | mmcv_max_version = digit_version(MMCV_MAX) 55 | mmcv_version = digit_version(mmcv.__version__) 56 | 57 | 58 | assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \ 59 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 60 | f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.' 61 | 62 | __all__ = ['__version__', 'version_info', 'digit_version'] 63 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ChaseDB1Dataset' 3 | data_root = 'data/CHASE_DB1' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (960, 999) 7 | crop_size = (128, 128) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/pascal_voc12.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalVOCDataset' 3 | data_root = '/home/ubuntu/dataset/VOCdevkit/VOC2012' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='JPEGImages', 41 | ann_dir='SegmentationClass', 42 | split='ImageSets/Segmentation/train.txt', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | img_dir='JPEGImages', 48 | ann_dir='SegmentationClass', 49 | split='ImageSets/Segmentation/val.txt', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | img_dir='JPEGImages', 55 | ann_dir='SegmentationClass', 56 | split='ImageSets/Segmentation/val.txt', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/isaid.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'iSAIDDataset' 3 | data_root = 'data/iSAID' 4 | 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | """ 8 | This crop_size setting is followed by the implementation of 9 | `PointFlow: Flowing Semantics Through Points for Aerial Image 10 | Segmentation `_. 11 | """ 12 | 13 | crop_size = (896, 896) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='LoadAnnotations'), 18 | dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)), 19 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 20 | dict(type='RandomFlip', prob=0.5), 21 | dict(type='PhotoMetricDistortion'), 22 | dict(type='Normalize', **img_norm_cfg), 23 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 24 | dict(type='DefaultFormatBundle'), 25 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 26 | ] 27 | test_pipeline = [ 28 | dict(type='LoadImageFromFile'), 29 | dict( 30 | type='MultiScaleFlipAug', 31 | img_scale=(896, 896), 32 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 33 | flip=False, 34 | transforms=[ 35 | dict(type='Resize', keep_ratio=True), 36 | dict(type='RandomFlip'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | samples_per_gpu=4, 44 | workers_per_gpu=4, 45 | train=dict( 46 | type=dataset_type, 47 | data_root=data_root, 48 | img_dir='img_dir/train', 49 | ann_dir='ann_dir/train', 50 | pipeline=train_pipeline), 51 | val=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | img_dir='img_dir/val', 55 | ann_dir='ann_dir/val', 56 | pipeline=test_pipeline), 57 | test=dict( 58 | type=dataset_type, 59 | data_root=data_root, 60 | img_dir='img_dir/val', 61 | ann_dir='ann_dir/val', 62 | pipeline=test_pipeline)) 63 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = '/home/ubuntu/2TB/dataset/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. 29 | dict(type='ResizeToMultiple', size_divisor=32), 30 | dict(type='RandomFlip'), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='ImageToTensor', keys=['img']), 33 | dict(type='Collect', keys=['img']), 34 | ]) 35 | ] 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type=dataset_type, 41 | data_root=data_root, 42 | img_dir='images/training', 43 | ann_dir='annotations/training', 44 | pipeline=train_pipeline), 45 | val=dict( 46 | type=dataset_type, 47 | data_root=data_root, 48 | img_dir='images/validation', 49 | ann_dir='annotations/validation', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | img_dir='images/validation', 55 | ann_dir='annotations/validation', 56 | pipeline=test_pipeline)) 57 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/pascal_context.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | img_scale = (520, 520) 8 | crop_size = (480, 480) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations'), 13 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=img_scale, 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='JPEGImages', 44 | ann_dir='SegmentationClassContext', 45 | split='ImageSets/SegmentationContext/train.txt', 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | img_dir='JPEGImages', 51 | ann_dir='SegmentationClassContext', 52 | split='ImageSets/SegmentationContext/val.txt', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='JPEGImages', 58 | ann_dir='SegmentationClassContext', 59 | split='ImageSets/SegmentationContext/val.txt', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/datasets/pascal_context_59.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset59' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | img_scale = (520, 520) 8 | crop_size = (480, 480) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations', reduce_zero_label=True), 13 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=img_scale, 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='JPEGImages', 44 | ann_dir='SegmentationClassContext', 45 | split='ImageSets/SegmentationContext/train.txt', 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | img_dir='JPEGImages', 51 | ann_dir='SegmentationClassContext', 52 | split='ImageSets/SegmentationContext/val.txt', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='JPEGImages', 58 | ann_dir='SegmentationClassContext', 59 | split='ImageSets/SegmentationContext/val.txt', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/bisenetv1_r18-d32.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='BiSeNetV1', 7 | in_channels=3, 8 | context_channels=(128, 256, 512), 9 | spatial_channels=(64, 64, 64, 128), 10 | out_indices=(0, 1, 2), 11 | out_channels=256, 12 | backbone_cfg=dict( 13 | type='ResNet', 14 | in_channels=3, 15 | depth=18, 16 | num_stages=4, 17 | out_indices=(0, 1, 2, 3), 18 | dilations=(1, 1, 1, 1), 19 | strides=(1, 2, 2, 2), 20 | norm_cfg=norm_cfg, 21 | norm_eval=False, 22 | style='pytorch', 23 | contract_dilation=True), 24 | norm_cfg=norm_cfg, 25 | align_corners=False, 26 | init_cfg=None), 27 | decode_head=dict( 28 | type='FCNHead', 29 | in_channels=256, 30 | in_index=0, 31 | channels=256, 32 | num_convs=1, 33 | concat_input=False, 34 | dropout_ratio=0.1, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | auxiliary_head=[ 41 | dict( 42 | type='FCNHead', 43 | in_channels=128, 44 | channels=64, 45 | num_convs=1, 46 | num_classes=19, 47 | in_index=1, 48 | norm_cfg=norm_cfg, 49 | concat_input=False, 50 | align_corners=False, 51 | loss_decode=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 53 | dict( 54 | type='FCNHead', 55 | in_channels=128, 56 | channels=64, 57 | num_convs=1, 58 | num_classes=19, 59 | in_index=2, 60 | norm_cfg=norm_cfg, 61 | concat_input=False, 62 | align_corners=False, 63 | loss_decode=dict( 64 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 65 | ], 66 | # model training and testing settings 67 | train_cfg=dict(), 68 | test_cfg=dict(mode='whole')) 69 | -------------------------------------------------------------------------------- /mmseg_custom/models/utils/se_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from .make_divisible import make_divisible 7 | 8 | 9 | class SELayer(nn.Module): 10 | """Squeeze-and-Excitation Module. 11 | 12 | Args: 13 | channels (int): The input (and output) channels of the SE layer. 14 | ratio (int): Squeeze ratio in SELayer, the intermediate channel will be 15 | ``int(channels/ratio)``. Default: 16. 16 | conv_cfg (None or dict): Config dict for convolution layer. 17 | Default: None, which means using conv2d. 18 | act_cfg (dict or Sequence[dict]): Config dict for activation layer. 19 | If act_cfg is a dict, two activation layers will be configured 20 | by this dict. If act_cfg is a sequence of dicts, the first 21 | activation layer will be configured by the first dict and the 22 | second activation layer will be configured by the second dict. 23 | Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, 24 | divisor=6.0)). 25 | """ 26 | 27 | def __init__(self, 28 | channels, 29 | ratio=16, 30 | conv_cfg=None, 31 | act_cfg=(dict(type='ReLU'), 32 | dict(type='HSigmoid', bias=3.0, divisor=6.0))): 33 | super(SELayer, self).__init__() 34 | if isinstance(act_cfg, dict): 35 | act_cfg = (act_cfg, act_cfg) 36 | assert len(act_cfg) == 2 37 | assert mmcv.is_tuple_of(act_cfg, dict) 38 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 39 | self.conv1 = ConvModule( 40 | in_channels=channels, 41 | out_channels=make_divisible(channels // ratio, 8), 42 | kernel_size=1, 43 | stride=1, 44 | conv_cfg=conv_cfg, 45 | act_cfg=act_cfg[0]) 46 | self.conv2 = ConvModule( 47 | in_channels=make_divisible(channels // ratio, 8), 48 | out_channels=channels, 49 | kernel_size=1, 50 | stride=1, 51 | conv_cfg=conv_cfg, 52 | act_cfg=act_cfg[1]) 53 | 54 | def forward(self, x): 55 | out = self.global_avgpool(x) 56 | out = self.conv1(out) 57 | out = self.conv2(out) 58 | return x * out 59 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/icnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='ICNet', 7 | backbone_cfg=dict( 8 | type='ResNetV1c', 9 | in_channels=3, 10 | depth=50, 11 | num_stages=4, 12 | out_indices=(0, 1, 2, 3), 13 | dilations=(1, 1, 2, 4), 14 | strides=(1, 2, 1, 1), 15 | norm_cfg=norm_cfg, 16 | norm_eval=False, 17 | style='pytorch', 18 | contract_dilation=True), 19 | in_channels=3, 20 | layer_channels=(512, 2048), 21 | light_branch_middle_channels=32, 22 | psp_out_channels=512, 23 | out_channels=(64, 256, 256), 24 | norm_cfg=norm_cfg, 25 | align_corners=False, 26 | ), 27 | neck=dict( 28 | type='ICNeck', 29 | in_channels=(64, 256, 256), 30 | out_channels=128, 31 | norm_cfg=norm_cfg, 32 | align_corners=False), 33 | decode_head=dict( 34 | type='FCNHead', 35 | in_channels=128, 36 | channels=128, 37 | num_convs=1, 38 | in_index=2, 39 | dropout_ratio=0, 40 | num_classes=19, 41 | norm_cfg=norm_cfg, 42 | concat_input=False, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 46 | auxiliary_head=[ 47 | dict( 48 | type='FCNHead', 49 | in_channels=128, 50 | channels=128, 51 | num_convs=1, 52 | num_classes=19, 53 | in_index=0, 54 | norm_cfg=norm_cfg, 55 | concat_input=False, 56 | align_corners=False, 57 | loss_decode=dict( 58 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 59 | dict( 60 | type='FCNHead', 61 | in_channels=128, 62 | channels=128, 63 | num_convs=1, 64 | num_classes=19, 65 | in_index=1, 66 | norm_cfg=norm_cfg, 67 | concat_input=False, 68 | align_corners=False, 69 | loss_decode=dict( 70 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 71 | ], 72 | # model training and testing settings 73 | train_cfg=dict(), 74 | test_cfg=dict(mode='whole')) 75 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/ocrnet_hr18.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://msra/hrnetv2_w18', 7 | backbone=dict( 8 | type='HRNet', 9 | norm_cfg=norm_cfg, 10 | norm_eval=False, 11 | extra=dict( 12 | stage1=dict( 13 | num_modules=1, 14 | num_branches=1, 15 | block='BOTTLENECK', 16 | num_blocks=(4, ), 17 | num_channels=(64, )), 18 | stage2=dict( 19 | num_modules=1, 20 | num_branches=2, 21 | block='BASIC', 22 | num_blocks=(4, 4), 23 | num_channels=(18, 36)), 24 | stage3=dict( 25 | num_modules=4, 26 | num_branches=3, 27 | block='BASIC', 28 | num_blocks=(4, 4, 4), 29 | num_channels=(18, 36, 72)), 30 | stage4=dict( 31 | num_modules=3, 32 | num_branches=4, 33 | block='BASIC', 34 | num_blocks=(4, 4, 4, 4), 35 | num_channels=(18, 36, 72, 144)))), 36 | decode_head=[ 37 | dict( 38 | type='FCNHead', 39 | in_channels=[18, 36, 72, 144], 40 | channels=sum([18, 36, 72, 144]), 41 | in_index=(0, 1, 2, 3), 42 | input_transform='resize_concat', 43 | kernel_size=1, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=-1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=False, 50 | loss_decode=dict( 51 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 52 | dict( 53 | type='OCRHead', 54 | in_channels=[18, 36, 72, 144], 55 | in_index=(0, 1, 2, 3), 56 | input_transform='resize_concat', 57 | channels=512, 58 | ocr_channels=256, 59 | dropout_ratio=-1, 60 | num_classes=19, 61 | norm_cfg=norm_cfg, 62 | align_corners=False, 63 | loss_decode=dict( 64 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 65 | ], 66 | # model training and testing settings 67 | train_cfg=dict(), 68 | test_cfg=dict(mode='whole')) 69 | -------------------------------------------------------------------------------- /get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config 5 | from mmcv.cnn import get_model_complexity_info 6 | 7 | from mmseg_custom.models import build_segmentor 8 | import time 9 | import torch 10 | from tqdm import tqdm 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser( 14 | description='Get the FLOPs of a segmentor') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--shape', 18 | type=int, 19 | nargs='+', 20 | # default=[2048, 1024], 21 | default=[64, 64], 22 | help='input image size') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def main(): 28 | 29 | args = parse_args() 30 | 31 | if len(args.shape) == 1: 32 | input_shape = (3, args.shape[0], args.shape[0]) 33 | elif len(args.shape) == 2: 34 | input_shape = (3, ) + tuple(args.shape) 35 | else: 36 | raise ValueError('invalid input shape') 37 | 38 | cfg = Config.fromfile(args.config) 39 | cfg.model.pretrained = None 40 | model = build_segmentor( 41 | cfg.model, 42 | train_cfg=cfg.get('train_cfg'), 43 | test_cfg=cfg.get('test_cfg')).cuda() 44 | model.eval() 45 | 46 | if hasattr(model, 'forward_dummy'): 47 | model.forward = model.forward_dummy 48 | else: 49 | raise NotImplementedError( 50 | 'FLOPs counter is currently not currently supported with {}'. 51 | format(model.__class__.__name__)) 52 | with torch.no_grad(): 53 | # flops, params = get_model_complexity_info(model, input_shape, as_strings=False,) 54 | flops, params = get_model_complexity_info(model, input_shape, as_strings=True,) 55 | split_line = '=' * 30 56 | print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( 57 | split_line, input_shape, flops, params)) 58 | print('!!!Please be cautious if you use the results in papers. ' 59 | 'You may need to check if all ops are supported and verify that the ' 60 | 'flops computation is correct.') 61 | # exit() 62 | fake_input = torch.rand(1, 3, args.shape[-2], args.shape[-1]).cuda() 63 | time_list = [] 64 | for _ in tqdm(range(1000)): 65 | t0 = time.perf_counter() 66 | _ = model(fake_input) 67 | used_time = time.perf_counter() - t0 68 | time_list.append(used_time) 69 | print(sum(time_list) / len(time_list)) 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/setr_pup.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 3 | norm_cfg = dict(type='SyncBN', requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', 7 | backbone=dict( 8 | type='VisionTransformer', 9 | img_size=(768, 768), 10 | patch_size=16, 11 | in_channels=3, 12 | embed_dims=1024, 13 | num_layers=24, 14 | num_heads=16, 15 | out_indices=(9, 14, 19, 23), 16 | drop_rate=0.1, 17 | norm_cfg=backbone_norm_cfg, 18 | with_cls_token=True, 19 | interpolate_mode='bilinear', 20 | ), 21 | decode_head=dict( 22 | type='SETRUPHead', 23 | in_channels=1024, 24 | channels=256, 25 | in_index=3, 26 | num_classes=19, 27 | dropout_ratio=0, 28 | norm_cfg=norm_cfg, 29 | num_convs=4, 30 | up_scale=2, 31 | kernel_size=3, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=[ 36 | dict( 37 | type='SETRUPHead', 38 | in_channels=1024, 39 | channels=256, 40 | in_index=0, 41 | num_classes=19, 42 | dropout_ratio=0, 43 | norm_cfg=norm_cfg, 44 | num_convs=1, 45 | up_scale=4, 46 | kernel_size=3, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | dict( 51 | type='SETRUPHead', 52 | in_channels=1024, 53 | channels=256, 54 | in_index=1, 55 | num_classes=19, 56 | dropout_ratio=0, 57 | norm_cfg=norm_cfg, 58 | num_convs=1, 59 | up_scale=4, 60 | kernel_size=3, 61 | align_corners=False, 62 | loss_decode=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 64 | dict( 65 | type='SETRUPHead', 66 | in_channels=1024, 67 | channels=256, 68 | in_index=2, 69 | num_classes=19, 70 | dropout_ratio=0, 71 | norm_cfg=norm_cfg, 72 | num_convs=1, 73 | up_scale=4, 74 | kernel_size=3, 75 | align_corners=False, 76 | loss_decode=dict( 77 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 78 | ], 79 | train_cfg=dict(), 80 | test_cfg=dict(mode='whole')) 81 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/setr_naive.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 3 | norm_cfg = dict(type='SyncBN', requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', 7 | backbone=dict( 8 | type='VisionTransformer', 9 | img_size=(768, 768), 10 | patch_size=16, 11 | in_channels=3, 12 | embed_dims=1024, 13 | num_layers=24, 14 | num_heads=16, 15 | out_indices=(9, 14, 19, 23), 16 | drop_rate=0.1, 17 | norm_cfg=backbone_norm_cfg, 18 | with_cls_token=True, 19 | interpolate_mode='bilinear', 20 | ), 21 | decode_head=dict( 22 | type='SETRUPHead', 23 | in_channels=1024, 24 | channels=256, 25 | in_index=3, 26 | num_classes=19, 27 | dropout_ratio=0, 28 | norm_cfg=norm_cfg, 29 | num_convs=1, 30 | up_scale=4, 31 | kernel_size=1, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=[ 36 | dict( 37 | type='SETRUPHead', 38 | in_channels=1024, 39 | channels=256, 40 | in_index=0, 41 | num_classes=19, 42 | dropout_ratio=0, 43 | norm_cfg=norm_cfg, 44 | num_convs=1, 45 | up_scale=4, 46 | kernel_size=1, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 50 | dict( 51 | type='SETRUPHead', 52 | in_channels=1024, 53 | channels=256, 54 | in_index=1, 55 | num_classes=19, 56 | dropout_ratio=0, 57 | norm_cfg=norm_cfg, 58 | num_convs=1, 59 | up_scale=4, 60 | kernel_size=1, 61 | align_corners=False, 62 | loss_decode=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 64 | dict( 65 | type='SETRUPHead', 66 | in_channels=1024, 67 | channels=256, 68 | in_index=2, 69 | num_classes=19, 70 | dropout_ratio=0, 71 | norm_cfg=norm_cfg, 72 | num_convs=1, 73 | up_scale=4, 74 | kernel_size=1, 75 | align_corners=False, 76 | loss_decode=dict( 77 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) 78 | ], 79 | train_cfg=dict(), 80 | test_cfg=dict(mode='whole')) 81 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/upernet_swin_AS.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True) 4 | LOG_DIR = None 5 | # LOG_DIR = '/home/ubuntu/code/ResolutionDet/mmseg_exp/log' 6 | ALIGN_CORNERS = False 7 | SAMPLER_NUM=3 8 | 9 | model = dict( 10 | # type='EncoderDecoder', 11 | type='EncoderDecoderWithSaliencySamplerPixelRelation', 12 | edge_loss_weight=100, 13 | pixel_relation=[], 14 | pretrained=None, 15 | backbone=dict( 16 | use_checkpoint=True, 17 | align_corners=ALIGN_CORNERS, 18 | log_dir=LOG_DIR, 19 | sampler_num =SAMPLER_NUM, 20 | sampler_strides=(1, 1, 1), 21 | sampler_paddings=(5, 5, 5), 22 | sampler_fwhm=(3, 3, 3), 23 | sampler_mode=('avgpsp_semantic_edge', 'avgpsp_semantic_edge', 'avgpsp_semantic_edge'), 24 | # sampler_mode=('lhpf', 'lhpf', 'lhpf'), 25 | # psp_ratio=16, 26 | type='SwinTransformerSaliencySampler', 27 | # type='SwinTransformer', 28 | 29 | pretrain_img_size=224, 30 | # embed_dims=192, 31 | embed_dims=96, 32 | patch_size=4, 33 | window_size=7, 34 | mlp_ratio=4, 35 | depths=[2, 2, 6, 2], 36 | num_heads=[3, 6, 12, 24], 37 | strides=(4, 2, 2, 2), 38 | out_indices=(0, 1, 2, 3), 39 | qkv_bias=True, 40 | qk_scale=None, 41 | patch_norm=True, 42 | drop_rate=0., 43 | attn_drop_rate=0., 44 | drop_path_rate=0.3, 45 | use_abs_pos_embed=False, 46 | act_cfg=dict(type='GELU'), 47 | norm_cfg=backbone_norm_cfg), 48 | decode_head=dict( 49 | # type='UPerHead', 50 | type='UPerHeadASAlign', 51 | in_channels=[96, 192, 384, 768], 52 | # in_channels=[192, 384, 768, 1536], 53 | in_index=[0, 1, 2, 3], 54 | pool_scales=(1, 2, 3, 6), 55 | # channels=512, 56 | channels=256, 57 | dropout_ratio=0.1, 58 | num_classes=19, 59 | norm_cfg=norm_cfg, 60 | align_corners=False, 61 | loss_decode=dict( 62 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 63 | auxiliary_head=dict( 64 | type='FCNHead', 65 | in_channels=384, 66 | in_index=2, 67 | channels=256, 68 | num_convs=1, 69 | concat_input=False, 70 | dropout_ratio=0.1, 71 | num_classes=19, 72 | norm_cfg=norm_cfg, 73 | align_corners=False, 74 | loss_decode=dict( 75 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 76 | # model training and testing settings 77 | train_cfg=dict(), 78 | test_cfg=dict(mode='whole')) 79 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/bisenetv2.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='BiSeNetV2', 8 | detail_channels=(64, 64, 128), 9 | semantic_channels=(16, 32, 64, 128), 10 | semantic_expansion_ratio=6, 11 | bga_channels=128, 12 | out_indices=(0, 1, 2, 3, 4), 13 | init_cfg=None, 14 | align_corners=False), 15 | decode_head=dict( 16 | type='FCNHead', 17 | in_channels=128, 18 | in_index=0, 19 | channels=1024, 20 | num_convs=1, 21 | concat_input=False, 22 | dropout_ratio=0.1, 23 | num_classes=19, 24 | norm_cfg=norm_cfg, 25 | align_corners=False, 26 | loss_decode=dict( 27 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 28 | auxiliary_head=[ 29 | dict( 30 | type='FCNHead', 31 | in_channels=16, 32 | channels=16, 33 | num_convs=2, 34 | num_classes=19, 35 | in_index=1, 36 | norm_cfg=norm_cfg, 37 | concat_input=False, 38 | align_corners=False, 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 41 | dict( 42 | type='FCNHead', 43 | in_channels=32, 44 | channels=64, 45 | num_convs=2, 46 | num_classes=19, 47 | in_index=2, 48 | norm_cfg=norm_cfg, 49 | concat_input=False, 50 | align_corners=False, 51 | loss_decode=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 53 | dict( 54 | type='FCNHead', 55 | in_channels=64, 56 | channels=256, 57 | num_convs=2, 58 | num_classes=19, 59 | in_index=3, 60 | norm_cfg=norm_cfg, 61 | concat_input=False, 62 | align_corners=False, 63 | loss_decode=dict( 64 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 65 | dict( 66 | type='FCNHead', 67 | in_channels=128, 68 | channels=1024, 69 | num_convs=2, 70 | num_classes=19, 71 | in_index=4, 72 | norm_cfg=norm_cfg, 73 | concat_input=False, 74 | align_corners=False, 75 | loss_decode=dict( 76 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 77 | ], 78 | # model training and testing settings 79 | train_cfg=dict(), 80 | test_cfg=dict(mode='whole')) 81 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/stdc.py: -------------------------------------------------------------------------------- 1 | norm_cfg = dict(type='BN', requires_grad=True) 2 | model = dict( 3 | type='EncoderDecoder', 4 | pretrained=None, 5 | backbone=dict( 6 | type='STDCContextPathNet', 7 | backbone_cfg=dict( 8 | type='STDCNet', 9 | stdc_type='STDCNet1', 10 | in_channels=3, 11 | channels=(32, 64, 256, 512, 1024), 12 | bottleneck_type='cat', 13 | num_convs=4, 14 | norm_cfg=norm_cfg, 15 | act_cfg=dict(type='ReLU'), 16 | with_final_conv=False), 17 | last_in_channels=(1024, 512), 18 | out_channels=128, 19 | ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)), 20 | decode_head=dict( 21 | type='FCNHead', 22 | in_channels=256, 23 | channels=256, 24 | num_convs=1, 25 | num_classes=19, 26 | in_index=3, 27 | concat_input=False, 28 | dropout_ratio=0.1, 29 | norm_cfg=norm_cfg, 30 | align_corners=True, 31 | sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | auxiliary_head=[ 35 | dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | channels=64, 39 | num_convs=1, 40 | num_classes=19, 41 | in_index=2, 42 | norm_cfg=norm_cfg, 43 | concat_input=False, 44 | align_corners=False, 45 | sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 48 | dict( 49 | type='FCNHead', 50 | in_channels=128, 51 | channels=64, 52 | num_convs=1, 53 | num_classes=19, 54 | in_index=1, 55 | norm_cfg=norm_cfg, 56 | concat_input=False, 57 | align_corners=False, 58 | sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), 59 | loss_decode=dict( 60 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 61 | dict( 62 | type='STDCHead', 63 | in_channels=256, 64 | channels=64, 65 | num_convs=1, 66 | num_classes=2, 67 | boundary_threshold=0.1, 68 | in_index=0, 69 | norm_cfg=norm_cfg, 70 | concat_input=False, 71 | align_corners=True, 72 | loss_decode=[ 73 | dict( 74 | type='CrossEntropyLoss', 75 | loss_name='loss_ce', 76 | use_sigmoid=True, 77 | loss_weight=1.0), 78 | dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0) 79 | ]), 80 | ], 81 | # model training and testing settings 82 | train_cfg=dict(), 83 | test_cfg=dict(mode='whole')) 84 | -------------------------------------------------------------------------------- /mmseg_custom/configs/_base_/models/setr_mla.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) 3 | norm_cfg = dict(type='SyncBN', requires_grad=True) 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', 7 | backbone=dict( 8 | type='VisionTransformer', 9 | img_size=(768, 768), 10 | patch_size=16, 11 | in_channels=3, 12 | embed_dims=1024, 13 | num_layers=24, 14 | num_heads=16, 15 | out_indices=(5, 11, 17, 23), 16 | drop_rate=0.1, 17 | norm_cfg=backbone_norm_cfg, 18 | with_cls_token=False, 19 | interpolate_mode='bilinear', 20 | ), 21 | neck=dict( 22 | type='MLANeck', 23 | in_channels=[1024, 1024, 1024, 1024], 24 | out_channels=256, 25 | norm_cfg=norm_cfg, 26 | act_cfg=dict(type='ReLU'), 27 | ), 28 | decode_head=dict( 29 | type='SETRMLAHead', 30 | in_channels=(256, 256, 256, 256), 31 | channels=512, 32 | in_index=(0, 1, 2, 3), 33 | dropout_ratio=0, 34 | mla_channels=128, 35 | num_classes=19, 36 | norm_cfg=norm_cfg, 37 | align_corners=False, 38 | loss_decode=dict( 39 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 40 | auxiliary_head=[ 41 | dict( 42 | type='FCNHead', 43 | in_channels=256, 44 | channels=256, 45 | in_index=0, 46 | dropout_ratio=0, 47 | num_convs=0, 48 | kernel_size=1, 49 | concat_input=False, 50 | num_classes=19, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 54 | dict( 55 | type='FCNHead', 56 | in_channels=256, 57 | channels=256, 58 | in_index=1, 59 | dropout_ratio=0, 60 | num_convs=0, 61 | kernel_size=1, 62 | concat_input=False, 63 | num_classes=19, 64 | align_corners=False, 65 | loss_decode=dict( 66 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 67 | dict( 68 | type='FCNHead', 69 | in_channels=256, 70 | channels=256, 71 | in_index=2, 72 | dropout_ratio=0, 73 | num_convs=0, 74 | kernel_size=1, 75 | concat_input=False, 76 | num_classes=19, 77 | align_corners=False, 78 | loss_decode=dict( 79 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 80 | dict( 81 | type='FCNHead', 82 | in_channels=256, 83 | channels=256, 84 | in_index=3, 85 | dropout_ratio=0, 86 | num_convs=0, 87 | kernel_size=1, 88 | concat_input=False, 89 | num_classes=19, 90 | align_corners=False, 91 | loss_decode=dict( 92 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 93 | ], 94 | train_cfg=dict(), 95 | test_cfg=dict(mode='whole')) 96 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from __future__ import division 3 | from typing import Iterator, Optional 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | from torch.utils.data import DistributedSampler as _DistributedSampler 8 | 9 | from mmseg.core.utils import sync_random_seed 10 | from mmseg.utils import get_device 11 | 12 | 13 | class DistributedSampler(_DistributedSampler): 14 | """DistributedSampler inheriting from 15 | `torch.utils.data.DistributedSampler`. 16 | 17 | Args: 18 | datasets (Dataset): the dataset will be loaded. 19 | num_replicas (int, optional): Number of processes participating in 20 | distributed training. By default, world_size is retrieved from the 21 | current distributed group. 22 | rank (int, optional): Rank of the current process within num_replicas. 23 | By default, rank is retrieved from the current distributed group. 24 | shuffle (bool): If True (default), sampler will shuffle the indices. 25 | seed (int): random seed used to shuffle the sampler if 26 | :attr:`shuffle=True`. This number should be identical across all 27 | processes in the distributed group. Default: ``0``. 28 | """ 29 | 30 | def __init__(self, 31 | dataset: Dataset, 32 | num_replicas: Optional[int] = None, 33 | rank: Optional[int] = None, 34 | shuffle: bool = True, 35 | seed=0) -> None: 36 | super().__init__( 37 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 38 | 39 | # In distributed sampling, different ranks should sample 40 | # non-overlapped data in the dataset. Therefore, this function 41 | # is used to make sure that each rank shuffles the data indices 42 | # in the same order based on the same seed. Then different ranks 43 | # could use different indices to select non-overlapped data from the 44 | # same data list. 45 | device = get_device() 46 | self.seed = sync_random_seed(seed, device) 47 | 48 | def __iter__(self) -> Iterator: 49 | """ 50 | Yields: 51 | Iterator: iterator of indices for rank. 52 | """ 53 | # deterministically shuffle based on epoch 54 | if self.shuffle: 55 | g = torch.Generator() 56 | # When :attr:`shuffle=True`, this ensures all replicas 57 | # use a different random ordering for each epoch. 58 | # Otherwise, the next iteration of this sampler will 59 | # yield the same ordering. 60 | g.manual_seed(self.epoch + self.seed) 61 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 62 | else: 63 | indices = torch.arange(len(self.dataset)).tolist() 64 | 65 | # add extra samples to make it evenly divisible 66 | indices += indices[:(self.total_size - len(indices))] 67 | assert len(indices) == self.total_size 68 | 69 | # subsample 70 | indices = indices[self.rank:self.total_size:self.num_replicas] 71 | assert len(indices) == self.num_samples 72 | 73 | return iter(indices) 74 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/isaid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | import mmcv 4 | from mmcv.utils import print_log 5 | 6 | from mmseg.utils import get_root_logger 7 | from .builder import DATASETS 8 | from .custom import CustomDataset 9 | 10 | 11 | @DATASETS.register_module() 12 | class iSAIDDataset(CustomDataset): 13 | """ iSAID: A Large-scale Dataset for Instance Segmentation in Aerial Images 14 | In segmentation map annotation for iSAID dataset, which is included 15 | in 16 categories. ``reduce_zero_label`` is fixed to False. The 16 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 17 | '_manual1.png'. 18 | """ 19 | 20 | CLASSES = ('background', 'ship', 'store_tank', 'baseball_diamond', 21 | 'tennis_court', 'basketball_court', 'Ground_Track_Field', 22 | 'Bridge', 'Large_Vehicle', 'Small_Vehicle', 'Helicopter', 23 | 'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane', 24 | 'Harbor') 25 | 26 | PALETTE = [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], 27 | [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127], 28 | [0, 0, 127], [0, 0, 191], [0, 0, 255], [0, 191, 127], 29 | [0, 127, 191], [0, 127, 255], [0, 100, 155]] 30 | 31 | def __init__(self, **kwargs): 32 | super(iSAIDDataset, self).__init__( 33 | img_suffix='.png', 34 | seg_map_suffix='.png', 35 | ignore_index=255, 36 | **kwargs) 37 | assert self.file_client.exists(self.img_dir) 38 | 39 | def load_annotations(self, 40 | img_dir, 41 | img_suffix, 42 | ann_dir, 43 | seg_map_suffix=None, 44 | split=None): 45 | """Load annotation from directory. 46 | 47 | Args: 48 | img_dir (str): Path to image directory 49 | img_suffix (str): Suffix of images. 50 | ann_dir (str|None): Path to annotation directory. 51 | seg_map_suffix (str|None): Suffix of segmentation maps. 52 | split (str|None): Split txt file. If split is specified, only file 53 | with suffix in the splits will be loaded. Otherwise, all images 54 | in img_dir/ann_dir will be loaded. Default: None 55 | 56 | Returns: 57 | list[dict]: All image info of dataset. 58 | """ 59 | 60 | img_infos = [] 61 | if split is not None: 62 | with open(split) as f: 63 | for line in f: 64 | name = line.strip() 65 | img_info = dict(filename=name + img_suffix) 66 | if ann_dir is not None: 67 | ann_name = name + '_instance_color_RGB' 68 | seg_map = ann_name + seg_map_suffix 69 | img_info['ann'] = dict(seg_map=seg_map) 70 | img_infos.append(img_info) 71 | else: 72 | for img in mmcv.scandir(img_dir, img_suffix, recursive=True): 73 | img_info = dict(filename=img) 74 | if ann_dir is not None: 75 | seg_img = img 76 | seg_map = seg_img.replace( 77 | img_suffix, '_instance_color_RGB' + seg_map_suffix) 78 | img_info['ann'] = dict(seg_map=seg_map) 79 | img_infos.append(img_info) 80 | 81 | print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) 82 | return img_infos 83 | -------------------------------------------------------------------------------- /mmseg_custom/datasets/loveda.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | import mmcv 5 | import numpy as np 6 | from PIL import Image 7 | 8 | from .builder import DATASETS 9 | from .custom import CustomDataset 10 | 11 | 12 | @DATASETS.register_module() 13 | class LoveDADataset(CustomDataset): 14 | """LoveDA dataset. 15 | 16 | In segmentation map annotation for LoveDA, 0 is the ignore index. 17 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 18 | ``seg_map_suffix`` are both fixed to '.png'. 19 | """ 20 | CLASSES = ('background', 'building', 'road', 'water', 'barren', 'forest', 21 | 'agricultural') 22 | 23 | PALETTE = [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255], 24 | [159, 129, 183], [0, 255, 0], [255, 195, 128]] 25 | 26 | def __init__(self, **kwargs): 27 | super(LoveDADataset, self).__init__( 28 | img_suffix='.png', 29 | seg_map_suffix='.png', 30 | reduce_zero_label=True, 31 | **kwargs) 32 | 33 | def results2img(self, results, imgfile_prefix, indices=None): 34 | """Write the segmentation results to images. 35 | 36 | Args: 37 | results (list[ndarray]): Testing results of the 38 | dataset. 39 | imgfile_prefix (str): The filename prefix of the png files. 40 | If the prefix is "somepath/xxx", 41 | the png files will be named "somepath/xxx.png". 42 | indices (list[int], optional): Indices of input results, if not 43 | set, all the indices of the dataset will be used. 44 | Default: None. 45 | 46 | Returns: 47 | list[str: str]: result txt files which contains corresponding 48 | semantic segmentation images. 49 | """ 50 | 51 | mmcv.mkdir_or_exist(imgfile_prefix) 52 | result_files = [] 53 | for result, idx in zip(results, indices): 54 | 55 | filename = self.img_infos[idx]['filename'] 56 | basename = osp.splitext(osp.basename(filename))[0] 57 | 58 | png_filename = osp.join(imgfile_prefix, f'{basename}.png') 59 | 60 | # The index range of official requirement is from 0 to 6. 61 | output = Image.fromarray(result.astype(np.uint8)) 62 | output.save(png_filename) 63 | result_files.append(png_filename) 64 | 65 | return result_files 66 | 67 | def format_results(self, results, imgfile_prefix, indices=None): 68 | """Format the results into dir (standard format for LoveDA evaluation). 69 | 70 | Args: 71 | results (list): Testing results of the dataset. 72 | imgfile_prefix (str): The prefix of images files. It 73 | includes the file path and the prefix of filename, e.g., 74 | "a/b/prefix". 75 | indices (list[int], optional): Indices of input results, 76 | if not set, all the indices of the dataset will be used. 77 | Default: None. 78 | 79 | Returns: 80 | tuple: (result_files, tmp_dir), result_files is a list containing 81 | the image paths, tmp_dir is the temporal directory created 82 | for saving json/png files when img_prefix is not specified. 83 | """ 84 | if indices is None: 85 | indices = list(range(len(self))) 86 | 87 | assert isinstance(results, list), 'results must be a list.' 88 | assert isinstance(indices, list), 'indices must be a list.' 89 | 90 | result_files = self.results2img(results, imgfile_prefix, indices) 91 | 92 | return result_files 93 | -------------------------------------------------------------------------------- /mmseg_custom/models/utils/res_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import build_conv_layer, build_norm_layer 3 | from mmcv.runner import Sequential 4 | from torch import nn as nn 5 | 6 | 7 | class ResLayer(Sequential): 8 | """ResLayer to build ResNet style backbone. 9 | 10 | Args: 11 | block (nn.Module): block used to build ResLayer. 12 | inplanes (int): inplanes of block. 13 | planes (int): planes of block. 14 | num_blocks (int): number of blocks. 15 | stride (int): stride of the first block. Default: 1 16 | avg_down (bool): Use AvgPool instead of stride conv when 17 | downsampling in the bottleneck. Default: False 18 | conv_cfg (dict): dictionary to construct and config conv layer. 19 | Default: None 20 | norm_cfg (dict): dictionary to construct and config norm layer. 21 | Default: dict(type='BN') 22 | multi_grid (int | None): Multi grid dilation rates of last 23 | stage. Default: None 24 | contract_dilation (bool): Whether contract first dilation of each layer 25 | Default: False 26 | """ 27 | 28 | def __init__(self, 29 | block, 30 | inplanes, 31 | planes, 32 | num_blocks, 33 | stride=1, 34 | dilation=1, 35 | avg_down=False, 36 | conv_cfg=None, 37 | norm_cfg=dict(type='BN'), 38 | multi_grid=None, 39 | contract_dilation=False, 40 | **kwargs): 41 | self.block = block 42 | 43 | downsample = None 44 | if stride != 1 or inplanes != planes * block.expansion: 45 | downsample = [] 46 | conv_stride = stride 47 | if avg_down: 48 | conv_stride = 1 49 | downsample.append( 50 | nn.AvgPool2d( 51 | kernel_size=stride, 52 | stride=stride, 53 | ceil_mode=True, 54 | count_include_pad=False)) 55 | downsample.extend([ 56 | build_conv_layer( 57 | conv_cfg, 58 | inplanes, 59 | planes * block.expansion, 60 | kernel_size=1, 61 | stride=conv_stride, 62 | bias=False), 63 | build_norm_layer(norm_cfg, planes * block.expansion)[1] 64 | ]) 65 | downsample = nn.Sequential(*downsample) 66 | 67 | layers = [] 68 | if multi_grid is None: 69 | if dilation > 1 and contract_dilation: 70 | first_dilation = dilation // 2 71 | else: 72 | first_dilation = dilation 73 | else: 74 | first_dilation = multi_grid[0] 75 | layers.append( 76 | block( 77 | inplanes=inplanes, 78 | planes=planes, 79 | stride=stride, 80 | dilation=first_dilation, 81 | downsample=downsample, 82 | conv_cfg=conv_cfg, 83 | norm_cfg=norm_cfg, 84 | **kwargs)) 85 | inplanes = planes * block.expansion 86 | for i in range(1, num_blocks): 87 | layers.append( 88 | block( 89 | inplanes=inplanes, 90 | planes=planes, 91 | stride=1, 92 | dilation=dilation if multi_grid is None else multi_grid[i], 93 | conv_cfg=conv_cfg, 94 | norm_cfg=norm_cfg, 95 | **kwargs)) 96 | super(ResLayer, self).__init__(*layers) 97 | -------------------------------------------------------------------------------- /mmseg_custom/configs/upernet/upernet_r50_512x512_40k_voc12aug_FADC.py: -------------------------------------------------------------------------------- 1 | _base_ = './upernet_r50_512x512_40k_voc12aug.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnet50_v1c', 4 | backbone=dict( 5 | depth=50, 6 | # type='ResNetV1c', 7 | dcn=dict( #在最后三个block加入可变形卷积 8 | # type='DCNv2', 9 | # type='FreqDecomp_DCNv2', 10 | # k_list=[8/1, 8/2, 8/3, 8/4, 8/5, 8/6, 8/7][::-1], 11 | # fs_feat='feat', 12 | # lp_type='freq', 13 | # # lp_type='freq_channel_att', 14 | # act='sigmoid', 15 | # channel_group=1, 16 | # channel_bn=False, 17 | # deformable_groups=1, 18 | type='AdaDilatedConv', 19 | offset_freq=None, 20 | # offset_freq='SLP_res', 21 | deformable_groups=1, 22 | padding_mode='zero', 23 | kernel_decompose='both', 24 | epsilon=1e-4, 25 | use_zero_dilation=False, 26 | # kernel_decompose=None, 27 | pre_fs=False, 28 | # pre_fs=True, 29 | # conv_type='multifreqband', 30 | conv_type='conv', 31 | # fs_cfg=None, 32 | fs_cfg={ 33 | # 'k_list':[3,5,7,9], 34 | 'k_list':[2,4,8], 35 | 'fs_feat':'feat', 36 | 'lowfreq_att':False, 37 | # 'lp_type':'freq_eca', 38 | # 'lp_type':'freq_channel_att', 39 | # 'lp_type':'freq', 40 | # 'lp_type':'avgpool', 41 | 'lp_type':'laplacian', 42 | 'act':'sigmoid', 43 | 'spatial':'conv', 44 | 'channel_res':True, 45 | 'spatial_group':8, 46 | }, 47 | sp_att=False, 48 | # type='AAConv', 49 | # compress_ratio=4, 50 | # lp_kernel=5, 51 | # pre_filter=False, 52 | # lp_bank=['FLC', 'PALP', 'SLP'], 53 | # lp_bank=['FS'], 54 | # use_BFM=False, 55 | # type='FLCConv', 56 | # freq_select_cfg=None, 57 | # res_path='high_extra_conv1x1', 58 | # anti_aliasing_path=False, 59 | # freq_select_cfg={ 60 | # # 'k_list':[8/1, 8/2, 8/3, 8/4, 8/5, 8/6, 8/7][::-1], 61 | # # 'k_list':[4/1, 4/2, 4/3][::-1], 62 | # 'lowfreq_att':False, 63 | # 'fs_feat':'feat', 64 | # # 'lp_type':'freq_eca', 65 | # # 'lp_type':'freq_channel_att', 66 | # 'lp_type':'freq', 67 | # 'act':'sigmoid', 68 | # 'spatial':'conv', 69 | # 'channel_res':True, 70 | # 'spatial_group':8, 71 | # 'global_selection':True, 72 | # 'init':'zero' 73 | # }, 74 | fallback_on_stride=False), 75 | # dcn=dict( #在最后三个block加入可变形卷积 76 | # # modulated=False, 77 | # # type='DCN', 78 | # deformable_groups=1, fallback_on_stride=False, only_on_stride_conv1=True), 79 | # stage_with_dcn=(False, True, True, True), 80 | stage_with_dcn=(False, True, True, True), 81 | ), 82 | decode_head=dict( 83 | type='UPerHead', 84 | channels=128,) 85 | ) 86 | data = dict( 87 | samples_per_gpu=16, 88 | workers_per_gpu=16, 89 | ) 90 | checkpoint_config = dict(max_keep_ckpts=2) 91 | evaluation = dict(save_best='mIoU', pre_eval='True') -------------------------------------------------------------------------------- /mmseg_custom/core/seg/sampler/ohem_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from ..builder import PIXEL_SAMPLERS 7 | from .base_pixel_sampler import BasePixelSampler 8 | 9 | 10 | @PIXEL_SAMPLERS.register_module() 11 | class OHEMPixelSampler(BasePixelSampler): 12 | """Online Hard Example Mining Sampler for segmentation. 13 | 14 | Args: 15 | context (nn.Module): The context of sampler, subclass of 16 | :obj:`BaseDecodeHead`. 17 | thresh (float, optional): The threshold for hard example selection. 18 | Below which, are prediction with low confidence. If not 19 | specified, the hard examples will be pixels of top ``min_kept`` 20 | loss. Default: None. 21 | min_kept (int, optional): The minimum number of predictions to keep. 22 | Default: 100000. 23 | """ 24 | 25 | def __init__(self, context, thresh=None, min_kept=100000): 26 | super(OHEMPixelSampler, self).__init__() 27 | self.context = context 28 | assert min_kept > 1 29 | self.thresh = thresh 30 | self.min_kept = min_kept 31 | 32 | def sample(self, seg_logit, seg_label): 33 | """Sample pixels that have high loss or with low prediction confidence. 34 | 35 | Args: 36 | seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) 37 | seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) 38 | 39 | Returns: 40 | torch.Tensor: segmentation weight, shape (N, H, W) 41 | """ 42 | with torch.no_grad(): 43 | assert seg_logit.shape[2:] == seg_label.shape[2:] 44 | assert seg_label.shape[1] == 1 45 | seg_label = seg_label.squeeze(1).long() 46 | batch_kept = self.min_kept * seg_label.size(0) 47 | valid_mask = seg_label != self.context.ignore_index 48 | seg_weight = seg_logit.new_zeros(size=seg_label.size()) 49 | valid_seg_weight = seg_weight[valid_mask] 50 | if self.thresh is not None: 51 | seg_prob = F.softmax(seg_logit, dim=1) 52 | 53 | tmp_seg_label = seg_label.clone().unsqueeze(1) 54 | tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 55 | seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) 56 | sort_prob, sort_indices = seg_prob[valid_mask].sort() 57 | 58 | if sort_prob.numel() > 0: 59 | min_threshold = sort_prob[min(batch_kept, 60 | sort_prob.numel() - 1)] 61 | else: 62 | min_threshold = 0.0 63 | threshold = max(min_threshold, self.thresh) 64 | valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. 65 | else: 66 | if not isinstance(self.context.loss_decode, nn.ModuleList): 67 | losses_decode = [self.context.loss_decode] 68 | else: 69 | losses_decode = self.context.loss_decode 70 | losses = 0.0 71 | for loss_module in losses_decode: 72 | losses += loss_module( 73 | seg_logit, 74 | seg_label, 75 | weight=None, 76 | ignore_index=self.context.ignore_index, 77 | reduction_override='none') 78 | 79 | # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa 80 | _, sort_indices = losses[valid_mask].sort(descending=True) 81 | valid_seg_weight[sort_indices[:batch_kept]] = 1. 82 | 83 | seg_weight[valid_mask] = valid_seg_weight 84 | 85 | return seg_weight 86 | -------------------------------------------------------------------------------- /mmseg_custom/models/utils/shape_convert.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def nlc_to_nchw(x, hw_shape): 3 | """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor. 4 | 5 | Args: 6 | x (Tensor): The input tensor of shape [N, L, C] before conversion. 7 | hw_shape (Sequence[int]): The height and width of output feature map. 8 | 9 | Returns: 10 | Tensor: The output tensor of shape [N, C, H, W] after conversion. 11 | """ 12 | H, W = hw_shape 13 | assert len(x.shape) == 3 14 | B, L, C = x.shape 15 | assert L == H * W, 'The seq_len doesn\'t match H, W' 16 | return x.transpose(1, 2).reshape(B, C, H, W) 17 | 18 | 19 | def nchw_to_nlc(x): 20 | """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor. 21 | 22 | Args: 23 | x (Tensor): The input tensor of shape [N, C, H, W] before conversion. 24 | 25 | Returns: 26 | Tensor: The output tensor of shape [N, L, C] after conversion. 27 | """ 28 | assert len(x.shape) == 4 29 | return x.flatten(2).transpose(1, 2).contiguous() 30 | 31 | 32 | def nchw2nlc2nchw(module, x, contiguous=False, **kwargs): 33 | """Flatten [N, C, H, W] shape tensor `x` to [N, L, C] shape tensor. Use the 34 | reshaped tensor as the input of `module`, and the convert the output of 35 | `module`, whose shape is. 36 | 37 | [N, L, C], to [N, C, H, W]. 38 | 39 | Args: 40 | module (Callable): A callable object the takes a tensor 41 | with shape [N, L, C] as input. 42 | x (Tensor): The input tensor of shape [N, C, H, W]. 43 | contiguous: 44 | contiguous (Bool): Whether to make the tensor contiguous 45 | after each shape transform. 46 | 47 | Returns: 48 | Tensor: The output tensor of shape [N, C, H, W]. 49 | 50 | Example: 51 | >>> import torch 52 | >>> import torch.nn as nn 53 | >>> norm = nn.LayerNorm(4) 54 | >>> feature_map = torch.rand(4, 4, 5, 5) 55 | >>> output = nchw2nlc2nchw(norm, feature_map) 56 | """ 57 | B, C, H, W = x.shape 58 | if not contiguous: 59 | x = x.flatten(2).transpose(1, 2) 60 | x = module(x, **kwargs) 61 | x = x.transpose(1, 2).reshape(B, C, H, W) 62 | else: 63 | x = x.flatten(2).transpose(1, 2).contiguous() 64 | x = module(x, **kwargs) 65 | x = x.transpose(1, 2).reshape(B, C, H, W).contiguous() 66 | return x 67 | 68 | 69 | def nlc2nchw2nlc(module, x, hw_shape, contiguous=False, **kwargs): 70 | """Convert [N, L, C] shape tensor `x` to [N, C, H, W] shape tensor. Use the 71 | reshaped tensor as the input of `module`, and convert the output of 72 | `module`, whose shape is. 73 | 74 | [N, C, H, W], to [N, L, C]. 75 | 76 | Args: 77 | module (Callable): A callable object the takes a tensor 78 | with shape [N, C, H, W] as input. 79 | x (Tensor): The input tensor of shape [N, L, C]. 80 | hw_shape: (Sequence[int]): The height and width of the 81 | feature map with shape [N, C, H, W]. 82 | contiguous (Bool): Whether to make the tensor contiguous 83 | after each shape transform. 84 | 85 | Returns: 86 | Tensor: The output tensor of shape [N, L, C]. 87 | 88 | Example: 89 | >>> import torch 90 | >>> import torch.nn as nn 91 | >>> conv = nn.Conv2d(16, 16, 3, 1, 1) 92 | >>> feature_map = torch.rand(4, 25, 16) 93 | >>> output = nlc2nchw2nlc(conv, feature_map, (5, 5)) 94 | """ 95 | H, W = hw_shape 96 | assert len(x.shape) == 3 97 | B, L, C = x.shape 98 | assert L == H * W, 'The seq_len doesn\'t match H, W' 99 | if not contiguous: 100 | x = x.transpose(1, 2).reshape(B, C, H, W) 101 | x = module(x, **kwargs) 102 | x = x.flatten(2).transpose(1, 2) 103 | else: 104 | x = x.transpose(1, 2).reshape(B, C, H, W).contiguous() 105 | x = module(x, **kwargs) 106 | x = x.flatten(2).transpose(1, 2).contiguous() 107 | return x 108 | -------------------------------------------------------------------------------- /mmseg_custom/models/utils/up_conv_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule, build_upsample_layer 5 | 6 | 7 | class UpConvBlock(nn.Module): 8 | """Upsample convolution block in decoder for UNet. 9 | 10 | This upsample convolution block consists of one upsample module 11 | followed by one convolution block. The upsample module expands the 12 | high-level low-resolution feature map and the convolution block fuses 13 | the upsampled high-level low-resolution feature map and the low-level 14 | high-resolution feature map from encoder. 15 | 16 | Args: 17 | conv_block (nn.Sequential): Sequential of convolutional layers. 18 | in_channels (int): Number of input channels of the high-level 19 | skip_channels (int): Number of input channels of the low-level 20 | high-resolution feature map from encoder. 21 | out_channels (int): Number of output channels. 22 | num_convs (int): Number of convolutional layers in the conv_block. 23 | Default: 2. 24 | stride (int): Stride of convolutional layer in conv_block. Default: 1. 25 | dilation (int): Dilation rate of convolutional layer in conv_block. 26 | Default: 1. 27 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 28 | memory while slowing down the training speed. Default: False. 29 | conv_cfg (dict | None): Config dict for convolution layer. 30 | Default: None. 31 | norm_cfg (dict | None): Config dict for normalization layer. 32 | Default: dict(type='BN'). 33 | act_cfg (dict | None): Config dict for activation layer in ConvModule. 34 | Default: dict(type='ReLU'). 35 | upsample_cfg (dict): The upsample config of the upsample module in 36 | decoder. Default: dict(type='InterpConv'). If the size of 37 | high-level feature map is the same as that of skip feature map 38 | (low-level feature map from encoder), it does not need upsample the 39 | high-level feature map and the upsample_cfg is None. 40 | dcn (bool): Use deformable convolution in convolutional layer or not. 41 | Default: None. 42 | plugins (dict): plugins for convolutional layers. Default: None. 43 | """ 44 | 45 | def __init__(self, 46 | conv_block, 47 | in_channels, 48 | skip_channels, 49 | out_channels, 50 | num_convs=2, 51 | stride=1, 52 | dilation=1, 53 | with_cp=False, 54 | conv_cfg=None, 55 | norm_cfg=dict(type='BN'), 56 | act_cfg=dict(type='ReLU'), 57 | upsample_cfg=dict(type='InterpConv'), 58 | dcn=None, 59 | plugins=None): 60 | super(UpConvBlock, self).__init__() 61 | assert dcn is None, 'Not implemented yet.' 62 | assert plugins is None, 'Not implemented yet.' 63 | 64 | self.conv_block = conv_block( 65 | in_channels=2 * skip_channels, 66 | out_channels=out_channels, 67 | num_convs=num_convs, 68 | stride=stride, 69 | dilation=dilation, 70 | with_cp=with_cp, 71 | conv_cfg=conv_cfg, 72 | norm_cfg=norm_cfg, 73 | act_cfg=act_cfg, 74 | dcn=None, 75 | plugins=None) 76 | if upsample_cfg is not None: 77 | self.upsample = build_upsample_layer( 78 | cfg=upsample_cfg, 79 | in_channels=in_channels, 80 | out_channels=skip_channels, 81 | with_cp=with_cp, 82 | norm_cfg=norm_cfg, 83 | act_cfg=act_cfg) 84 | else: 85 | self.upsample = ConvModule( 86 | in_channels, 87 | skip_channels, 88 | kernel_size=1, 89 | stride=1, 90 | padding=0, 91 | conv_cfg=conv_cfg, 92 | norm_cfg=norm_cfg, 93 | act_cfg=act_cfg) 94 | 95 | def forward(self, skip, x): 96 | """Forward function.""" 97 | 98 | x = self.upsample(x) 99 | out = torch.cat([skip, x], dim=1) 100 | out = self.conv_block(out) 101 | 102 | return out 103 | -------------------------------------------------------------------------------- /mmseg_custom/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | import warnings 4 | 5 | import torch.distributed as dist 6 | from mmcv.runner import DistEvalHook as _DistEvalHook 7 | from mmcv.runner import EvalHook as _EvalHook 8 | from torch.nn.modules.batchnorm import _BatchNorm 9 | 10 | 11 | class EvalHook(_EvalHook): 12 | """Single GPU EvalHook, with efficient test support. 13 | 14 | Args: 15 | by_epoch (bool): Determine perform evaluation by epoch or by iteration. 16 | If set to True, it will perform by epoch. Otherwise, by iteration. 17 | Default: False. 18 | efficient_test (bool): Whether save the results as local numpy files to 19 | save CPU memory during evaluation. Default: False. 20 | pre_eval (bool): Whether to use progressive mode to evaluate model. 21 | Default: False. 22 | Returns: 23 | list: The prediction results. 24 | """ 25 | 26 | greater_keys = ['mIoU', 'mAcc', 'aAcc'] 27 | 28 | def __init__(self, 29 | *args, 30 | by_epoch=False, 31 | efficient_test=False, 32 | pre_eval=False, 33 | **kwargs): 34 | super().__init__(*args, by_epoch=by_epoch, **kwargs) 35 | self.pre_eval = pre_eval 36 | if efficient_test: 37 | warnings.warn( 38 | 'DeprecationWarning: ``efficient_test`` for evaluation hook ' 39 | 'is deprecated, the evaluation hook is CPU memory friendly ' 40 | 'with ``pre_eval=True`` as argument for ``single_gpu_test()`` ' 41 | 'function') 42 | 43 | def _do_evaluate(self, runner): 44 | """perform evaluation and save ckpt.""" 45 | if not self._should_evaluate(runner): 46 | return 47 | 48 | from mmseg.apis import single_gpu_test 49 | results = single_gpu_test( 50 | runner.model, self.dataloader, show=False, pre_eval=self.pre_eval) 51 | runner.log_buffer.clear() 52 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 53 | key_score = self.evaluate(runner, results) 54 | if self.save_best: 55 | self._save_ckpt(runner, key_score) 56 | 57 | 58 | class DistEvalHook(_DistEvalHook): 59 | """Distributed EvalHook, with efficient test support. 60 | 61 | Args: 62 | by_epoch (bool): Determine perform evaluation by epoch or by iteration. 63 | If set to True, it will perform by epoch. Otherwise, by iteration. 64 | Default: False. 65 | efficient_test (bool): Whether save the results as local numpy files to 66 | save CPU memory during evaluation. Default: False. 67 | pre_eval (bool): Whether to use progressive mode to evaluate model. 68 | Default: False. 69 | Returns: 70 | list: The prediction results. 71 | """ 72 | 73 | greater_keys = ['mIoU', 'mAcc', 'aAcc'] 74 | 75 | def __init__(self, 76 | *args, 77 | by_epoch=False, 78 | efficient_test=False, 79 | pre_eval=False, 80 | **kwargs): 81 | super().__init__(*args, by_epoch=by_epoch, **kwargs) 82 | self.pre_eval = pre_eval 83 | if efficient_test: 84 | warnings.warn( 85 | 'DeprecationWarning: ``efficient_test`` for evaluation hook ' 86 | 'is deprecated, the evaluation hook is CPU memory friendly ' 87 | 'with ``pre_eval=True`` as argument for ``multi_gpu_test()`` ' 88 | 'function') 89 | 90 | def _do_evaluate(self, runner): 91 | """perform evaluation and save ckpt.""" 92 | # Synchronization of BatchNorm's buffer (running_mean 93 | # and running_var) is not supported in the DDP of pytorch, 94 | # which may cause the inconsistent performance of models in 95 | # different ranks, so we broadcast BatchNorm's buffers 96 | # of rank 0 to other ranks to avoid this. 97 | if self.broadcast_bn_buffer: 98 | model = runner.model 99 | for name, module in model.named_modules(): 100 | if isinstance(module, 101 | _BatchNorm) and module.track_running_stats: 102 | dist.broadcast(module.running_var, 0) 103 | dist.broadcast(module.running_mean, 0) 104 | 105 | if not self._should_evaluate(runner): 106 | return 107 | 108 | tmpdir = self.tmpdir 109 | if tmpdir is None: 110 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 111 | 112 | from mmseg.apis import multi_gpu_test 113 | results = multi_gpu_test( 114 | runner.model, 115 | self.dataloader, 116 | tmpdir=tmpdir, 117 | gpu_collect=self.gpu_collect, 118 | pre_eval=self.pre_eval) 119 | 120 | runner.log_buffer.clear() 121 | 122 | if runner.rank == 0: 123 | print('\n') 124 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 125 | key_score = self.evaluate(runner, results) 126 | 127 | if self.save_best: 128 | self._save_ckpt(runner, key_score) 129 | -------------------------------------------------------------------------------- /mmseg_custom/apis/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import matplotlib.pyplot as plt 3 | import mmcv 4 | import torch 5 | from mmcv.parallel import collate, scatter 6 | from mmcv.runner import load_checkpoint 7 | 8 | from mmseg.datasets.pipelines import Compose 9 | from mmseg_custom.models import build_segmentor 10 | 11 | 12 | def init_segmentor(config, checkpoint=None, device='cuda:0'): 13 | """Initialize a segmentor from config file. 14 | 15 | Args: 16 | config (str or :obj:`mmcv.Config`): Config file path or the config 17 | object. 18 | checkpoint (str, optional): Checkpoint path. If left as None, the model 19 | will not load any weights. 20 | device (str, optional) CPU/CUDA device option. Default 'cuda:0'. 21 | Use 'cpu' for loading model on CPU. 22 | Returns: 23 | nn.Module: The constructed segmentor. 24 | """ 25 | if isinstance(config, str): 26 | config = mmcv.Config.fromfile(config) 27 | elif not isinstance(config, mmcv.Config): 28 | raise TypeError('config must be a filename or Config object, ' 29 | 'but got {}'.format(type(config))) 30 | config.model.pretrained = None 31 | config.model.train_cfg = None 32 | model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) 33 | if checkpoint is not None: 34 | checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') 35 | model.CLASSES = checkpoint['meta']['CLASSES'] 36 | model.PALETTE = checkpoint['meta']['PALETTE'] 37 | model.cfg = config # save the config in the model for convenience 38 | model.to(device) 39 | model.eval() 40 | return model 41 | 42 | 43 | class LoadImage: 44 | """A simple pipeline to load image.""" 45 | 46 | def __call__(self, results): 47 | """Call function to load images into results. 48 | 49 | Args: 50 | results (dict): A result dict contains the file name 51 | of the image to be read. 52 | 53 | Returns: 54 | dict: ``results`` will be returned containing loaded image. 55 | """ 56 | 57 | if isinstance(results['img'], str): 58 | results['filename'] = results['img'] 59 | results['ori_filename'] = results['img'] 60 | else: 61 | results['filename'] = None 62 | results['ori_filename'] = None 63 | img = mmcv.imread(results['img']) 64 | results['img'] = img 65 | results['img_shape'] = img.shape 66 | results['ori_shape'] = img.shape 67 | return results 68 | 69 | 70 | def inference_segmentor(model, img): 71 | """Inference image(s) with the segmentor. 72 | 73 | Args: 74 | model (nn.Module): The loaded segmentor. 75 | imgs (str/ndarray or list[str/ndarray]): Either image files or loaded 76 | images. 77 | 78 | Returns: 79 | (list[Tensor]): The segmentation result. 80 | """ 81 | cfg = model.cfg 82 | device = next(model.parameters()).device # model device 83 | # build the data pipeline 84 | test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] 85 | test_pipeline = Compose(test_pipeline) 86 | # prepare data 87 | data = dict(img=img) 88 | data = test_pipeline(data) 89 | data = collate([data], samples_per_gpu=1) 90 | if next(model.parameters()).is_cuda: 91 | # scatter to specified GPU 92 | data = scatter(data, [device])[0] 93 | else: 94 | data['img_metas'] = [i.data[0] for i in data['img_metas']] 95 | 96 | # forward the model 97 | with torch.no_grad(): 98 | result = model(return_loss=False, rescale=True, **data) 99 | return result 100 | 101 | 102 | def show_result_pyplot(model, 103 | img, 104 | result, 105 | palette=None, 106 | fig_size=(15, 10), 107 | opacity=0.5, 108 | title='', 109 | block=True, 110 | out_file=None): 111 | """Visualize the segmentation results on the image. 112 | 113 | Args: 114 | model (nn.Module): The loaded segmentor. 115 | img (str or np.ndarray): Image filename or loaded image. 116 | result (list): The segmentation result. 117 | palette (list[list[int]]] | None): The palette of segmentation 118 | map. If None is given, random palette will be generated. 119 | Default: None 120 | fig_size (tuple): Figure size of the pyplot figure. 121 | opacity(float): Opacity of painted segmentation map. 122 | Default 0.5. 123 | Must be in (0, 1] range. 124 | title (str): The title of pyplot figure. 125 | Default is ''. 126 | block (bool): Whether to block the pyplot figure. 127 | Default is True. 128 | out_file (str or None): The path to write the image. 129 | Default: None. 130 | """ 131 | if hasattr(model, 'module'): 132 | model = model.module 133 | img = model.show_result( 134 | img, result, palette=palette, show=False, opacity=opacity) 135 | plt.figure(figsize=fig_size) 136 | plt.imshow(mmcv.bgr2rgb(img)) 137 | plt.title(title) 138 | plt.tight_layout() 139 | plt.show(block=block) 140 | if out_file is not None: 141 | mmcv.imwrite(img, out_file) 142 | --------------------------------------------------------------------------------